Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs.cpython-310.pyc +0 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs_binary.cpython-310.pyc +0 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/get_max_tokens.cpython-310.pyc +0 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/checkpoints/tokenizer.json +0 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/outputs/special_tokens_map.json +30 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/outputs/tokenizer.json +0 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/_virtualenv.py +130 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/appdirs.py +608 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/decorator.py +459 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/distutils-precedence.pth +1 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/ipykernel_launcher.py +18 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/isympy.py +342 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jsonpointer.py +348 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jupyter.py +7 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/nest_asyncio.py +219 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pandocfilters.py +304 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pip-22.0.2.virtualenv +0 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/protobuf-3.20.3-py3.10-nspkg.pth +1 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__config__.py +161 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__init__.py +141 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/_distributor_init.py +18 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/conftest.py +413 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/linalg.pxd +1 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize.pxd +1 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/README +76 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__init__.py +452 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_basinhopping.py +753 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_bracket.py +666 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_chandrupatla.py +549 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyla_py.py +316 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyqa_py.py +62 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_constraints.py +590 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dcsrch.py +728 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py +693 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentialevolution.py +1951 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiate.py +856 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct.cpython-310-x86_64-linux-gnu.so +0 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct_py.py +278 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dual_annealing.py +732 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_group_columns.cpython-310-x86_64-linux-gnu.so +0 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_hessian_update_strategy.py +475 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_isotonic.py +158 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lbfgsb_py.py +543 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linesearch.py +896 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog.py +716 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_doc.py +1434 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_highs.py +440 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_ip.py +1126 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_rs.py +572 -0
- emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_simplex.py +661 -0
emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs.cpython-310.pyc
ADDED
|
Binary file (9.62 kB). View file
|
|
|
emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs_binary.cpython-310.pyc
ADDED
|
Binary file (5.6 kB). View file
|
|
|
emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/get_max_tokens.cpython-310.pyc
ADDED
|
Binary file (2.48 kB). View file
|
|
|
emissary-ml/llm-scripts/fine-tuning/llama3/checkpoints/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
emissary-ml/llm-scripts/fine-tuning/llama3/outputs/special_tokens_map.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"unk_token": {
|
| 24 |
+
"content": "<unk>",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
}
|
| 30 |
+
}
|
emissary-ml/llm-scripts/fine-tuning/llama3/outputs/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/_virtualenv.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Patches that are applied at runtime to the virtual environment"""
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
VIRTUALENV_PATCH_FILE = os.path.join(__file__)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def patch_dist(dist):
|
| 11 |
+
"""
|
| 12 |
+
Distutils allows user to configure some arguments via a configuration file:
|
| 13 |
+
https://docs.python.org/3/install/index.html#distutils-configuration-files
|
| 14 |
+
|
| 15 |
+
Some of this arguments though don't make sense in context of the virtual environment files, let's fix them up.
|
| 16 |
+
"""
|
| 17 |
+
# we cannot allow some install config as that would get packages installed outside of the virtual environment
|
| 18 |
+
old_parse_config_files = dist.Distribution.parse_config_files
|
| 19 |
+
|
| 20 |
+
def parse_config_files(self, *args, **kwargs):
|
| 21 |
+
result = old_parse_config_files(self, *args, **kwargs)
|
| 22 |
+
install = self.get_option_dict("install")
|
| 23 |
+
|
| 24 |
+
if "prefix" in install: # the prefix governs where to install the libraries
|
| 25 |
+
install["prefix"] = VIRTUALENV_PATCH_FILE, os.path.abspath(sys.prefix)
|
| 26 |
+
for base in ("purelib", "platlib", "headers", "scripts", "data"):
|
| 27 |
+
key = "install_{}".format(base)
|
| 28 |
+
if key in install: # do not allow global configs to hijack venv paths
|
| 29 |
+
install.pop(key, None)
|
| 30 |
+
return result
|
| 31 |
+
|
| 32 |
+
dist.Distribution.parse_config_files = parse_config_files
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# Import hook that patches some modules to ignore configuration values that break package installation in case
|
| 36 |
+
# of virtual environments.
|
| 37 |
+
_DISTUTILS_PATCH = "distutils.dist", "setuptools.dist"
|
| 38 |
+
if sys.version_info > (3, 4):
|
| 39 |
+
# https://docs.python.org/3/library/importlib.html#setting-up-an-importer
|
| 40 |
+
from functools import partial
|
| 41 |
+
from importlib.abc import MetaPathFinder
|
| 42 |
+
from importlib.util import find_spec
|
| 43 |
+
|
| 44 |
+
class _Finder(MetaPathFinder):
|
| 45 |
+
"""A meta path finder that allows patching the imported distutils modules"""
|
| 46 |
+
|
| 47 |
+
fullname = None
|
| 48 |
+
|
| 49 |
+
# lock[0] is threading.Lock(), but initialized lazily to avoid importing threading very early at startup,
|
| 50 |
+
# because there are gevent-based applications that need to be first to import threading by themselves.
|
| 51 |
+
# See https://github.com/pypa/virtualenv/issues/1895 for details.
|
| 52 |
+
lock = []
|
| 53 |
+
|
| 54 |
+
def find_spec(self, fullname, path, target=None):
|
| 55 |
+
if fullname in _DISTUTILS_PATCH and self.fullname is None:
|
| 56 |
+
# initialize lock[0] lazily
|
| 57 |
+
if len(self.lock) == 0:
|
| 58 |
+
import threading
|
| 59 |
+
|
| 60 |
+
lock = threading.Lock()
|
| 61 |
+
# there is possibility that two threads T1 and T2 are simultaneously running into find_spec,
|
| 62 |
+
# observing .lock as empty, and further going into hereby initialization. However due to the GIL,
|
| 63 |
+
# list.append() operation is atomic and this way only one of the threads will "win" to put the lock
|
| 64 |
+
# - that every thread will use - into .lock[0].
|
| 65 |
+
# https://docs.python.org/3/faq/library.html#what-kinds-of-global-value-mutation-are-thread-safe
|
| 66 |
+
self.lock.append(lock)
|
| 67 |
+
|
| 68 |
+
with self.lock[0]:
|
| 69 |
+
self.fullname = fullname
|
| 70 |
+
try:
|
| 71 |
+
spec = find_spec(fullname, path)
|
| 72 |
+
if spec is not None:
|
| 73 |
+
# https://www.python.org/dev/peps/pep-0451/#how-loading-will-work
|
| 74 |
+
is_new_api = hasattr(spec.loader, "exec_module")
|
| 75 |
+
func_name = "exec_module" if is_new_api else "load_module"
|
| 76 |
+
old = getattr(spec.loader, func_name)
|
| 77 |
+
func = self.exec_module if is_new_api else self.load_module
|
| 78 |
+
if old is not func:
|
| 79 |
+
try:
|
| 80 |
+
setattr(spec.loader, func_name, partial(func, old))
|
| 81 |
+
except AttributeError:
|
| 82 |
+
pass # C-Extension loaders are r/o such as zipimporter with <python 3.7
|
| 83 |
+
return spec
|
| 84 |
+
finally:
|
| 85 |
+
self.fullname = None
|
| 86 |
+
|
| 87 |
+
@staticmethod
|
| 88 |
+
def exec_module(old, module):
|
| 89 |
+
old(module)
|
| 90 |
+
if module.__name__ in _DISTUTILS_PATCH:
|
| 91 |
+
patch_dist(module)
|
| 92 |
+
|
| 93 |
+
@staticmethod
|
| 94 |
+
def load_module(old, name):
|
| 95 |
+
module = old(name)
|
| 96 |
+
if module.__name__ in _DISTUTILS_PATCH:
|
| 97 |
+
patch_dist(module)
|
| 98 |
+
return module
|
| 99 |
+
|
| 100 |
+
sys.meta_path.insert(0, _Finder())
|
| 101 |
+
else:
|
| 102 |
+
# https://www.python.org/dev/peps/pep-0302/
|
| 103 |
+
from imp import find_module
|
| 104 |
+
from pkgutil import ImpImporter, ImpLoader
|
| 105 |
+
|
| 106 |
+
class _VirtualenvImporter(object, ImpImporter):
|
| 107 |
+
def __init__(self, path=None):
|
| 108 |
+
object.__init__(self)
|
| 109 |
+
ImpImporter.__init__(self, path)
|
| 110 |
+
|
| 111 |
+
def find_module(self, fullname, path=None):
|
| 112 |
+
if fullname in _DISTUTILS_PATCH:
|
| 113 |
+
try:
|
| 114 |
+
return _VirtualenvLoader(fullname, *find_module(fullname.split(".")[-1], path))
|
| 115 |
+
except ImportError:
|
| 116 |
+
pass
|
| 117 |
+
return None
|
| 118 |
+
|
| 119 |
+
class _VirtualenvLoader(object, ImpLoader):
|
| 120 |
+
def __init__(self, fullname, file, filename, etc):
|
| 121 |
+
object.__init__(self)
|
| 122 |
+
ImpLoader.__init__(self, fullname, file, filename, etc)
|
| 123 |
+
|
| 124 |
+
def load_module(self, fullname):
|
| 125 |
+
module = super(_VirtualenvLoader, self).load_module(fullname)
|
| 126 |
+
patch_dist(module)
|
| 127 |
+
module.__loader__ = None # distlib fallback
|
| 128 |
+
return module
|
| 129 |
+
|
| 130 |
+
sys.meta_path.append(_VirtualenvImporter())
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/appdirs.py
ADDED
|
@@ -0,0 +1,608 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
# Copyright (c) 2005-2010 ActiveState Software Inc.
|
| 4 |
+
# Copyright (c) 2013 Eddy Petrișor
|
| 5 |
+
|
| 6 |
+
"""Utilities for determining application-specific dirs.
|
| 7 |
+
|
| 8 |
+
See <http://github.com/ActiveState/appdirs> for details and usage.
|
| 9 |
+
"""
|
| 10 |
+
# Dev Notes:
|
| 11 |
+
# - MSDN on where to store app data files:
|
| 12 |
+
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
|
| 13 |
+
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
|
| 14 |
+
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
| 15 |
+
|
| 16 |
+
__version__ = "1.4.4"
|
| 17 |
+
__version_info__ = tuple(int(segment) for segment in __version__.split("."))
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
import sys
|
| 21 |
+
import os
|
| 22 |
+
|
| 23 |
+
PY3 = sys.version_info[0] == 3
|
| 24 |
+
|
| 25 |
+
if PY3:
|
| 26 |
+
unicode = str
|
| 27 |
+
|
| 28 |
+
if sys.platform.startswith('java'):
|
| 29 |
+
import platform
|
| 30 |
+
os_name = platform.java_ver()[3][0]
|
| 31 |
+
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
|
| 32 |
+
system = 'win32'
|
| 33 |
+
elif os_name.startswith('Mac'): # "Mac OS X", etc.
|
| 34 |
+
system = 'darwin'
|
| 35 |
+
else: # "Linux", "SunOS", "FreeBSD", etc.
|
| 36 |
+
# Setting this to "linux2" is not ideal, but only Windows or Mac
|
| 37 |
+
# are actually checked for and the rest of the module expects
|
| 38 |
+
# *sys.platform* style strings.
|
| 39 |
+
system = 'linux2'
|
| 40 |
+
else:
|
| 41 |
+
system = sys.platform
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
|
| 46 |
+
r"""Return full path to the user-specific data dir for this application.
|
| 47 |
+
|
| 48 |
+
"appname" is the name of application.
|
| 49 |
+
If None, just the system directory is returned.
|
| 50 |
+
"appauthor" (only used on Windows) is the name of the
|
| 51 |
+
appauthor or distributing body for this application. Typically
|
| 52 |
+
it is the owning company name. This falls back to appname. You may
|
| 53 |
+
pass False to disable it.
|
| 54 |
+
"version" is an optional version path element to append to the
|
| 55 |
+
path. You might want to use this if you want multiple versions
|
| 56 |
+
of your app to be able to run independently. If used, this
|
| 57 |
+
would typically be "<major>.<minor>".
|
| 58 |
+
Only applied when appname is present.
|
| 59 |
+
"roaming" (boolean, default False) can be set True to use the Windows
|
| 60 |
+
roaming appdata directory. That means that for users on a Windows
|
| 61 |
+
network setup for roaming profiles, this user data will be
|
| 62 |
+
sync'd on login. See
|
| 63 |
+
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
| 64 |
+
for a discussion of issues.
|
| 65 |
+
|
| 66 |
+
Typical user data directories are:
|
| 67 |
+
Mac OS X: ~/Library/Application Support/<AppName>
|
| 68 |
+
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
|
| 69 |
+
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
|
| 70 |
+
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
|
| 71 |
+
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
|
| 72 |
+
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
|
| 73 |
+
|
| 74 |
+
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
|
| 75 |
+
That means, by default "~/.local/share/<AppName>".
|
| 76 |
+
"""
|
| 77 |
+
if system == "win32":
|
| 78 |
+
if appauthor is None:
|
| 79 |
+
appauthor = appname
|
| 80 |
+
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
|
| 81 |
+
path = os.path.normpath(_get_win_folder(const))
|
| 82 |
+
if appname:
|
| 83 |
+
if appauthor is not False:
|
| 84 |
+
path = os.path.join(path, appauthor, appname)
|
| 85 |
+
else:
|
| 86 |
+
path = os.path.join(path, appname)
|
| 87 |
+
elif system == 'darwin':
|
| 88 |
+
path = os.path.expanduser('~/Library/Application Support/')
|
| 89 |
+
if appname:
|
| 90 |
+
path = os.path.join(path, appname)
|
| 91 |
+
else:
|
| 92 |
+
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
|
| 93 |
+
if appname:
|
| 94 |
+
path = os.path.join(path, appname)
|
| 95 |
+
if appname and version:
|
| 96 |
+
path = os.path.join(path, version)
|
| 97 |
+
return path
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
|
| 101 |
+
r"""Return full path to the user-shared data dir for this application.
|
| 102 |
+
|
| 103 |
+
"appname" is the name of application.
|
| 104 |
+
If None, just the system directory is returned.
|
| 105 |
+
"appauthor" (only used on Windows) is the name of the
|
| 106 |
+
appauthor or distributing body for this application. Typically
|
| 107 |
+
it is the owning company name. This falls back to appname. You may
|
| 108 |
+
pass False to disable it.
|
| 109 |
+
"version" is an optional version path element to append to the
|
| 110 |
+
path. You might want to use this if you want multiple versions
|
| 111 |
+
of your app to be able to run independently. If used, this
|
| 112 |
+
would typically be "<major>.<minor>".
|
| 113 |
+
Only applied when appname is present.
|
| 114 |
+
"multipath" is an optional parameter only applicable to *nix
|
| 115 |
+
which indicates that the entire list of data dirs should be
|
| 116 |
+
returned. By default, the first item from XDG_DATA_DIRS is
|
| 117 |
+
returned, or '/usr/local/share/<AppName>',
|
| 118 |
+
if XDG_DATA_DIRS is not set
|
| 119 |
+
|
| 120 |
+
Typical site data directories are:
|
| 121 |
+
Mac OS X: /Library/Application Support/<AppName>
|
| 122 |
+
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
|
| 123 |
+
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
|
| 124 |
+
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
| 125 |
+
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
|
| 126 |
+
|
| 127 |
+
For Unix, this is using the $XDG_DATA_DIRS[0] default.
|
| 128 |
+
|
| 129 |
+
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
| 130 |
+
"""
|
| 131 |
+
if system == "win32":
|
| 132 |
+
if appauthor is None:
|
| 133 |
+
appauthor = appname
|
| 134 |
+
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
|
| 135 |
+
if appname:
|
| 136 |
+
if appauthor is not False:
|
| 137 |
+
path = os.path.join(path, appauthor, appname)
|
| 138 |
+
else:
|
| 139 |
+
path = os.path.join(path, appname)
|
| 140 |
+
elif system == 'darwin':
|
| 141 |
+
path = os.path.expanduser('/Library/Application Support')
|
| 142 |
+
if appname:
|
| 143 |
+
path = os.path.join(path, appname)
|
| 144 |
+
else:
|
| 145 |
+
# XDG default for $XDG_DATA_DIRS
|
| 146 |
+
# only first, if multipath is False
|
| 147 |
+
path = os.getenv('XDG_DATA_DIRS',
|
| 148 |
+
os.pathsep.join(['/usr/local/share', '/usr/share']))
|
| 149 |
+
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
| 150 |
+
if appname:
|
| 151 |
+
if version:
|
| 152 |
+
appname = os.path.join(appname, version)
|
| 153 |
+
pathlist = [os.sep.join([x, appname]) for x in pathlist]
|
| 154 |
+
|
| 155 |
+
if multipath:
|
| 156 |
+
path = os.pathsep.join(pathlist)
|
| 157 |
+
else:
|
| 158 |
+
path = pathlist[0]
|
| 159 |
+
return path
|
| 160 |
+
|
| 161 |
+
if appname and version:
|
| 162 |
+
path = os.path.join(path, version)
|
| 163 |
+
return path
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
|
| 167 |
+
r"""Return full path to the user-specific config dir for this application.
|
| 168 |
+
|
| 169 |
+
"appname" is the name of application.
|
| 170 |
+
If None, just the system directory is returned.
|
| 171 |
+
"appauthor" (only used on Windows) is the name of the
|
| 172 |
+
appauthor or distributing body for this application. Typically
|
| 173 |
+
it is the owning company name. This falls back to appname. You may
|
| 174 |
+
pass False to disable it.
|
| 175 |
+
"version" is an optional version path element to append to the
|
| 176 |
+
path. You might want to use this if you want multiple versions
|
| 177 |
+
of your app to be able to run independently. If used, this
|
| 178 |
+
would typically be "<major>.<minor>".
|
| 179 |
+
Only applied when appname is present.
|
| 180 |
+
"roaming" (boolean, default False) can be set True to use the Windows
|
| 181 |
+
roaming appdata directory. That means that for users on a Windows
|
| 182 |
+
network setup for roaming profiles, this user data will be
|
| 183 |
+
sync'd on login. See
|
| 184 |
+
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
| 185 |
+
for a discussion of issues.
|
| 186 |
+
|
| 187 |
+
Typical user config directories are:
|
| 188 |
+
Mac OS X: same as user_data_dir
|
| 189 |
+
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
|
| 190 |
+
Win *: same as user_data_dir
|
| 191 |
+
|
| 192 |
+
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
|
| 193 |
+
That means, by default "~/.config/<AppName>".
|
| 194 |
+
"""
|
| 195 |
+
if system in ["win32", "darwin"]:
|
| 196 |
+
path = user_data_dir(appname, appauthor, None, roaming)
|
| 197 |
+
else:
|
| 198 |
+
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
|
| 199 |
+
if appname:
|
| 200 |
+
path = os.path.join(path, appname)
|
| 201 |
+
if appname and version:
|
| 202 |
+
path = os.path.join(path, version)
|
| 203 |
+
return path
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
|
| 207 |
+
r"""Return full path to the user-shared data dir for this application.
|
| 208 |
+
|
| 209 |
+
"appname" is the name of application.
|
| 210 |
+
If None, just the system directory is returned.
|
| 211 |
+
"appauthor" (only used on Windows) is the name of the
|
| 212 |
+
appauthor or distributing body for this application. Typically
|
| 213 |
+
it is the owning company name. This falls back to appname. You may
|
| 214 |
+
pass False to disable it.
|
| 215 |
+
"version" is an optional version path element to append to the
|
| 216 |
+
path. You might want to use this if you want multiple versions
|
| 217 |
+
of your app to be able to run independently. If used, this
|
| 218 |
+
would typically be "<major>.<minor>".
|
| 219 |
+
Only applied when appname is present.
|
| 220 |
+
"multipath" is an optional parameter only applicable to *nix
|
| 221 |
+
which indicates that the entire list of config dirs should be
|
| 222 |
+
returned. By default, the first item from XDG_CONFIG_DIRS is
|
| 223 |
+
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
|
| 224 |
+
|
| 225 |
+
Typical site config directories are:
|
| 226 |
+
Mac OS X: same as site_data_dir
|
| 227 |
+
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
|
| 228 |
+
$XDG_CONFIG_DIRS
|
| 229 |
+
Win *: same as site_data_dir
|
| 230 |
+
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
| 231 |
+
|
| 232 |
+
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
|
| 233 |
+
|
| 234 |
+
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
| 235 |
+
"""
|
| 236 |
+
if system in ["win32", "darwin"]:
|
| 237 |
+
path = site_data_dir(appname, appauthor)
|
| 238 |
+
if appname and version:
|
| 239 |
+
path = os.path.join(path, version)
|
| 240 |
+
else:
|
| 241 |
+
# XDG default for $XDG_CONFIG_DIRS
|
| 242 |
+
# only first, if multipath is False
|
| 243 |
+
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
|
| 244 |
+
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
| 245 |
+
if appname:
|
| 246 |
+
if version:
|
| 247 |
+
appname = os.path.join(appname, version)
|
| 248 |
+
pathlist = [os.sep.join([x, appname]) for x in pathlist]
|
| 249 |
+
|
| 250 |
+
if multipath:
|
| 251 |
+
path = os.pathsep.join(pathlist)
|
| 252 |
+
else:
|
| 253 |
+
path = pathlist[0]
|
| 254 |
+
return path
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
|
| 258 |
+
r"""Return full path to the user-specific cache dir for this application.
|
| 259 |
+
|
| 260 |
+
"appname" is the name of application.
|
| 261 |
+
If None, just the system directory is returned.
|
| 262 |
+
"appauthor" (only used on Windows) is the name of the
|
| 263 |
+
appauthor or distributing body for this application. Typically
|
| 264 |
+
it is the owning company name. This falls back to appname. You may
|
| 265 |
+
pass False to disable it.
|
| 266 |
+
"version" is an optional version path element to append to the
|
| 267 |
+
path. You might want to use this if you want multiple versions
|
| 268 |
+
of your app to be able to run independently. If used, this
|
| 269 |
+
would typically be "<major>.<minor>".
|
| 270 |
+
Only applied when appname is present.
|
| 271 |
+
"opinion" (boolean) can be False to disable the appending of
|
| 272 |
+
"Cache" to the base app data dir for Windows. See
|
| 273 |
+
discussion below.
|
| 274 |
+
|
| 275 |
+
Typical user cache directories are:
|
| 276 |
+
Mac OS X: ~/Library/Caches/<AppName>
|
| 277 |
+
Unix: ~/.cache/<AppName> (XDG default)
|
| 278 |
+
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
|
| 279 |
+
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
|
| 280 |
+
|
| 281 |
+
On Windows the only suggestion in the MSDN docs is that local settings go in
|
| 282 |
+
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
|
| 283 |
+
app data dir (the default returned by `user_data_dir` above). Apps typically
|
| 284 |
+
put cache data somewhere *under* the given dir here. Some examples:
|
| 285 |
+
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
|
| 286 |
+
...\Acme\SuperApp\Cache\1.0
|
| 287 |
+
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
|
| 288 |
+
This can be disabled with the `opinion=False` option.
|
| 289 |
+
"""
|
| 290 |
+
if system == "win32":
|
| 291 |
+
if appauthor is None:
|
| 292 |
+
appauthor = appname
|
| 293 |
+
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
|
| 294 |
+
if appname:
|
| 295 |
+
if appauthor is not False:
|
| 296 |
+
path = os.path.join(path, appauthor, appname)
|
| 297 |
+
else:
|
| 298 |
+
path = os.path.join(path, appname)
|
| 299 |
+
if opinion:
|
| 300 |
+
path = os.path.join(path, "Cache")
|
| 301 |
+
elif system == 'darwin':
|
| 302 |
+
path = os.path.expanduser('~/Library/Caches')
|
| 303 |
+
if appname:
|
| 304 |
+
path = os.path.join(path, appname)
|
| 305 |
+
else:
|
| 306 |
+
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
|
| 307 |
+
if appname:
|
| 308 |
+
path = os.path.join(path, appname)
|
| 309 |
+
if appname and version:
|
| 310 |
+
path = os.path.join(path, version)
|
| 311 |
+
return path
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
|
| 315 |
+
r"""Return full path to the user-specific state dir for this application.
|
| 316 |
+
|
| 317 |
+
"appname" is the name of application.
|
| 318 |
+
If None, just the system directory is returned.
|
| 319 |
+
"appauthor" (only used on Windows) is the name of the
|
| 320 |
+
appauthor or distributing body for this application. Typically
|
| 321 |
+
it is the owning company name. This falls back to appname. You may
|
| 322 |
+
pass False to disable it.
|
| 323 |
+
"version" is an optional version path element to append to the
|
| 324 |
+
path. You might want to use this if you want multiple versions
|
| 325 |
+
of your app to be able to run independently. If used, this
|
| 326 |
+
would typically be "<major>.<minor>".
|
| 327 |
+
Only applied when appname is present.
|
| 328 |
+
"roaming" (boolean, default False) can be set True to use the Windows
|
| 329 |
+
roaming appdata directory. That means that for users on a Windows
|
| 330 |
+
network setup for roaming profiles, this user data will be
|
| 331 |
+
sync'd on login. See
|
| 332 |
+
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
| 333 |
+
for a discussion of issues.
|
| 334 |
+
|
| 335 |
+
Typical user state directories are:
|
| 336 |
+
Mac OS X: same as user_data_dir
|
| 337 |
+
Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined
|
| 338 |
+
Win *: same as user_data_dir
|
| 339 |
+
|
| 340 |
+
For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
|
| 341 |
+
to extend the XDG spec and support $XDG_STATE_HOME.
|
| 342 |
+
|
| 343 |
+
That means, by default "~/.local/state/<AppName>".
|
| 344 |
+
"""
|
| 345 |
+
if system in ["win32", "darwin"]:
|
| 346 |
+
path = user_data_dir(appname, appauthor, None, roaming)
|
| 347 |
+
else:
|
| 348 |
+
path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
|
| 349 |
+
if appname:
|
| 350 |
+
path = os.path.join(path, appname)
|
| 351 |
+
if appname and version:
|
| 352 |
+
path = os.path.join(path, version)
|
| 353 |
+
return path
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
|
| 357 |
+
r"""Return full path to the user-specific log dir for this application.
|
| 358 |
+
|
| 359 |
+
"appname" is the name of application.
|
| 360 |
+
If None, just the system directory is returned.
|
| 361 |
+
"appauthor" (only used on Windows) is the name of the
|
| 362 |
+
appauthor or distributing body for this application. Typically
|
| 363 |
+
it is the owning company name. This falls back to appname. You may
|
| 364 |
+
pass False to disable it.
|
| 365 |
+
"version" is an optional version path element to append to the
|
| 366 |
+
path. You might want to use this if you want multiple versions
|
| 367 |
+
of your app to be able to run independently. If used, this
|
| 368 |
+
would typically be "<major>.<minor>".
|
| 369 |
+
Only applied when appname is present.
|
| 370 |
+
"opinion" (boolean) can be False to disable the appending of
|
| 371 |
+
"Logs" to the base app data dir for Windows, and "log" to the
|
| 372 |
+
base cache dir for Unix. See discussion below.
|
| 373 |
+
|
| 374 |
+
Typical user log directories are:
|
| 375 |
+
Mac OS X: ~/Library/Logs/<AppName>
|
| 376 |
+
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
|
| 377 |
+
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
|
| 378 |
+
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
|
| 379 |
+
|
| 380 |
+
On Windows the only suggestion in the MSDN docs is that local settings
|
| 381 |
+
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
|
| 382 |
+
examples of what some windows apps use for a logs dir.)
|
| 383 |
+
|
| 384 |
+
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
|
| 385 |
+
value for Windows and appends "log" to the user cache dir for Unix.
|
| 386 |
+
This can be disabled with the `opinion=False` option.
|
| 387 |
+
"""
|
| 388 |
+
if system == "darwin":
|
| 389 |
+
path = os.path.join(
|
| 390 |
+
os.path.expanduser('~/Library/Logs'),
|
| 391 |
+
appname)
|
| 392 |
+
elif system == "win32":
|
| 393 |
+
path = user_data_dir(appname, appauthor, version)
|
| 394 |
+
version = False
|
| 395 |
+
if opinion:
|
| 396 |
+
path = os.path.join(path, "Logs")
|
| 397 |
+
else:
|
| 398 |
+
path = user_cache_dir(appname, appauthor, version)
|
| 399 |
+
version = False
|
| 400 |
+
if opinion:
|
| 401 |
+
path = os.path.join(path, "log")
|
| 402 |
+
if appname and version:
|
| 403 |
+
path = os.path.join(path, version)
|
| 404 |
+
return path
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
class AppDirs(object):
|
| 408 |
+
"""Convenience wrapper for getting application dirs."""
|
| 409 |
+
def __init__(self, appname=None, appauthor=None, version=None,
|
| 410 |
+
roaming=False, multipath=False):
|
| 411 |
+
self.appname = appname
|
| 412 |
+
self.appauthor = appauthor
|
| 413 |
+
self.version = version
|
| 414 |
+
self.roaming = roaming
|
| 415 |
+
self.multipath = multipath
|
| 416 |
+
|
| 417 |
+
@property
|
| 418 |
+
def user_data_dir(self):
|
| 419 |
+
return user_data_dir(self.appname, self.appauthor,
|
| 420 |
+
version=self.version, roaming=self.roaming)
|
| 421 |
+
|
| 422 |
+
@property
|
| 423 |
+
def site_data_dir(self):
|
| 424 |
+
return site_data_dir(self.appname, self.appauthor,
|
| 425 |
+
version=self.version, multipath=self.multipath)
|
| 426 |
+
|
| 427 |
+
@property
|
| 428 |
+
def user_config_dir(self):
|
| 429 |
+
return user_config_dir(self.appname, self.appauthor,
|
| 430 |
+
version=self.version, roaming=self.roaming)
|
| 431 |
+
|
| 432 |
+
@property
|
| 433 |
+
def site_config_dir(self):
|
| 434 |
+
return site_config_dir(self.appname, self.appauthor,
|
| 435 |
+
version=self.version, multipath=self.multipath)
|
| 436 |
+
|
| 437 |
+
@property
|
| 438 |
+
def user_cache_dir(self):
|
| 439 |
+
return user_cache_dir(self.appname, self.appauthor,
|
| 440 |
+
version=self.version)
|
| 441 |
+
|
| 442 |
+
@property
|
| 443 |
+
def user_state_dir(self):
|
| 444 |
+
return user_state_dir(self.appname, self.appauthor,
|
| 445 |
+
version=self.version)
|
| 446 |
+
|
| 447 |
+
@property
|
| 448 |
+
def user_log_dir(self):
|
| 449 |
+
return user_log_dir(self.appname, self.appauthor,
|
| 450 |
+
version=self.version)
|
| 451 |
+
|
| 452 |
+
|
| 453 |
+
#---- internal support stuff
|
| 454 |
+
|
| 455 |
+
def _get_win_folder_from_registry(csidl_name):
|
| 456 |
+
"""This is a fallback technique at best. I'm not sure if using the
|
| 457 |
+
registry for this guarantees us the correct answer for all CSIDL_*
|
| 458 |
+
names.
|
| 459 |
+
"""
|
| 460 |
+
if PY3:
|
| 461 |
+
import winreg as _winreg
|
| 462 |
+
else:
|
| 463 |
+
import _winreg
|
| 464 |
+
|
| 465 |
+
shell_folder_name = {
|
| 466 |
+
"CSIDL_APPDATA": "AppData",
|
| 467 |
+
"CSIDL_COMMON_APPDATA": "Common AppData",
|
| 468 |
+
"CSIDL_LOCAL_APPDATA": "Local AppData",
|
| 469 |
+
}[csidl_name]
|
| 470 |
+
|
| 471 |
+
key = _winreg.OpenKey(
|
| 472 |
+
_winreg.HKEY_CURRENT_USER,
|
| 473 |
+
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
|
| 474 |
+
)
|
| 475 |
+
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
|
| 476 |
+
return dir
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
def _get_win_folder_with_pywin32(csidl_name):
|
| 480 |
+
from win32com.shell import shellcon, shell
|
| 481 |
+
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
|
| 482 |
+
# Try to make this a unicode path because SHGetFolderPath does
|
| 483 |
+
# not return unicode strings when there is unicode data in the
|
| 484 |
+
# path.
|
| 485 |
+
try:
|
| 486 |
+
dir = unicode(dir)
|
| 487 |
+
|
| 488 |
+
# Downgrade to short path name if have highbit chars. See
|
| 489 |
+
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
| 490 |
+
has_high_char = False
|
| 491 |
+
for c in dir:
|
| 492 |
+
if ord(c) > 255:
|
| 493 |
+
has_high_char = True
|
| 494 |
+
break
|
| 495 |
+
if has_high_char:
|
| 496 |
+
try:
|
| 497 |
+
import win32api
|
| 498 |
+
dir = win32api.GetShortPathName(dir)
|
| 499 |
+
except ImportError:
|
| 500 |
+
pass
|
| 501 |
+
except UnicodeError:
|
| 502 |
+
pass
|
| 503 |
+
return dir
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
def _get_win_folder_with_ctypes(csidl_name):
|
| 507 |
+
import ctypes
|
| 508 |
+
|
| 509 |
+
csidl_const = {
|
| 510 |
+
"CSIDL_APPDATA": 26,
|
| 511 |
+
"CSIDL_COMMON_APPDATA": 35,
|
| 512 |
+
"CSIDL_LOCAL_APPDATA": 28,
|
| 513 |
+
}[csidl_name]
|
| 514 |
+
|
| 515 |
+
buf = ctypes.create_unicode_buffer(1024)
|
| 516 |
+
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
|
| 517 |
+
|
| 518 |
+
# Downgrade to short path name if have highbit chars. See
|
| 519 |
+
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
| 520 |
+
has_high_char = False
|
| 521 |
+
for c in buf:
|
| 522 |
+
if ord(c) > 255:
|
| 523 |
+
has_high_char = True
|
| 524 |
+
break
|
| 525 |
+
if has_high_char:
|
| 526 |
+
buf2 = ctypes.create_unicode_buffer(1024)
|
| 527 |
+
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
|
| 528 |
+
buf = buf2
|
| 529 |
+
|
| 530 |
+
return buf.value
|
| 531 |
+
|
| 532 |
+
def _get_win_folder_with_jna(csidl_name):
|
| 533 |
+
import array
|
| 534 |
+
from com.sun import jna
|
| 535 |
+
from com.sun.jna.platform import win32
|
| 536 |
+
|
| 537 |
+
buf_size = win32.WinDef.MAX_PATH * 2
|
| 538 |
+
buf = array.zeros('c', buf_size)
|
| 539 |
+
shell = win32.Shell32.INSTANCE
|
| 540 |
+
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
|
| 541 |
+
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
| 542 |
+
|
| 543 |
+
# Downgrade to short path name if have highbit chars. See
|
| 544 |
+
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
| 545 |
+
has_high_char = False
|
| 546 |
+
for c in dir:
|
| 547 |
+
if ord(c) > 255:
|
| 548 |
+
has_high_char = True
|
| 549 |
+
break
|
| 550 |
+
if has_high_char:
|
| 551 |
+
buf = array.zeros('c', buf_size)
|
| 552 |
+
kernel = win32.Kernel32.INSTANCE
|
| 553 |
+
if kernel.GetShortPathName(dir, buf, buf_size):
|
| 554 |
+
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
| 555 |
+
|
| 556 |
+
return dir
|
| 557 |
+
|
| 558 |
+
if system == "win32":
|
| 559 |
+
try:
|
| 560 |
+
import win32com.shell
|
| 561 |
+
_get_win_folder = _get_win_folder_with_pywin32
|
| 562 |
+
except ImportError:
|
| 563 |
+
try:
|
| 564 |
+
from ctypes import windll
|
| 565 |
+
_get_win_folder = _get_win_folder_with_ctypes
|
| 566 |
+
except ImportError:
|
| 567 |
+
try:
|
| 568 |
+
import com.sun.jna
|
| 569 |
+
_get_win_folder = _get_win_folder_with_jna
|
| 570 |
+
except ImportError:
|
| 571 |
+
_get_win_folder = _get_win_folder_from_registry
|
| 572 |
+
|
| 573 |
+
|
| 574 |
+
#---- self test code
|
| 575 |
+
|
| 576 |
+
if __name__ == "__main__":
|
| 577 |
+
appname = "MyApp"
|
| 578 |
+
appauthor = "MyCompany"
|
| 579 |
+
|
| 580 |
+
props = ("user_data_dir",
|
| 581 |
+
"user_config_dir",
|
| 582 |
+
"user_cache_dir",
|
| 583 |
+
"user_state_dir",
|
| 584 |
+
"user_log_dir",
|
| 585 |
+
"site_data_dir",
|
| 586 |
+
"site_config_dir")
|
| 587 |
+
|
| 588 |
+
print("-- app dirs %s --" % __version__)
|
| 589 |
+
|
| 590 |
+
print("-- app dirs (with optional 'version')")
|
| 591 |
+
dirs = AppDirs(appname, appauthor, version="1.0")
|
| 592 |
+
for prop in props:
|
| 593 |
+
print("%s: %s" % (prop, getattr(dirs, prop)))
|
| 594 |
+
|
| 595 |
+
print("\n-- app dirs (without optional 'version')")
|
| 596 |
+
dirs = AppDirs(appname, appauthor)
|
| 597 |
+
for prop in props:
|
| 598 |
+
print("%s: %s" % (prop, getattr(dirs, prop)))
|
| 599 |
+
|
| 600 |
+
print("\n-- app dirs (without optional 'appauthor')")
|
| 601 |
+
dirs = AppDirs(appname)
|
| 602 |
+
for prop in props:
|
| 603 |
+
print("%s: %s" % (prop, getattr(dirs, prop)))
|
| 604 |
+
|
| 605 |
+
print("\n-- app dirs (with disabled 'appauthor')")
|
| 606 |
+
dirs = AppDirs(appname, appauthor=False)
|
| 607 |
+
for prop in props:
|
| 608 |
+
print("%s: %s" % (prop, getattr(dirs, prop)))
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/decorator.py
ADDED
|
@@ -0,0 +1,459 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ######################### LICENSE ############################ #
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2005-2025, Michele Simionato
|
| 4 |
+
# All rights reserved.
|
| 5 |
+
|
| 6 |
+
# Redistribution and use in source and binary forms, with or without
|
| 7 |
+
# modification, are permitted provided that the following conditions are
|
| 8 |
+
# met:
|
| 9 |
+
|
| 10 |
+
# Redistributions of source code must retain the above copyright
|
| 11 |
+
# notice, this list of conditions and the following disclaimer.
|
| 12 |
+
# Redistributions in bytecode form must reproduce the above copyright
|
| 13 |
+
# notice, this list of conditions and the following disclaimer in
|
| 14 |
+
# the documentation and/or other materials provided with the
|
| 15 |
+
# distribution.
|
| 16 |
+
|
| 17 |
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 18 |
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 19 |
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 20 |
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 21 |
+
# HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
| 22 |
+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
| 23 |
+
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
| 24 |
+
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
| 25 |
+
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
| 26 |
+
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
| 27 |
+
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
| 28 |
+
# DAMAGE.
|
| 29 |
+
|
| 30 |
+
"""
|
| 31 |
+
Decorator module, see
|
| 32 |
+
https://github.com/micheles/decorator/blob/master/docs/documentation.md
|
| 33 |
+
for the documentation.
|
| 34 |
+
"""
|
| 35 |
+
import re
|
| 36 |
+
import sys
|
| 37 |
+
import inspect
|
| 38 |
+
import operator
|
| 39 |
+
import itertools
|
| 40 |
+
import functools
|
| 41 |
+
from contextlib import _GeneratorContextManager
|
| 42 |
+
from inspect import getfullargspec, iscoroutinefunction, isgeneratorfunction
|
| 43 |
+
|
| 44 |
+
__version__ = '5.2.1'
|
| 45 |
+
|
| 46 |
+
DEF = re.compile(r'\s*def\s*([_\w][_\w\d]*)\s*\(')
|
| 47 |
+
POS = inspect.Parameter.POSITIONAL_OR_KEYWORD
|
| 48 |
+
EMPTY = inspect.Parameter.empty
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# this is not used anymore in the core, but kept for backward compatibility
|
| 52 |
+
class FunctionMaker(object):
|
| 53 |
+
"""
|
| 54 |
+
An object with the ability to create functions with a given signature.
|
| 55 |
+
It has attributes name, doc, module, signature, defaults, dict and
|
| 56 |
+
methods update and make.
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
# Atomic get-and-increment provided by the GIL
|
| 60 |
+
_compile_count = itertools.count()
|
| 61 |
+
|
| 62 |
+
# make pylint happy
|
| 63 |
+
args = varargs = varkw = defaults = kwonlyargs = kwonlydefaults = ()
|
| 64 |
+
|
| 65 |
+
def __init__(self, func=None, name=None, signature=None,
|
| 66 |
+
defaults=None, doc=None, module=None, funcdict=None):
|
| 67 |
+
self.shortsignature = signature
|
| 68 |
+
if func:
|
| 69 |
+
# func can be a class or a callable, but not an instance method
|
| 70 |
+
self.name = func.__name__
|
| 71 |
+
if self.name == '<lambda>': # small hack for lambda functions
|
| 72 |
+
self.name = '_lambda_'
|
| 73 |
+
self.doc = func.__doc__
|
| 74 |
+
self.module = func.__module__
|
| 75 |
+
if inspect.isroutine(func) or isinstance(func, functools.partial):
|
| 76 |
+
argspec = getfullargspec(func)
|
| 77 |
+
self.annotations = getattr(func, '__annotations__', {})
|
| 78 |
+
for a in ('args', 'varargs', 'varkw', 'defaults', 'kwonlyargs',
|
| 79 |
+
'kwonlydefaults'):
|
| 80 |
+
setattr(self, a, getattr(argspec, a))
|
| 81 |
+
for i, arg in enumerate(self.args):
|
| 82 |
+
setattr(self, 'arg%d' % i, arg)
|
| 83 |
+
allargs = list(self.args)
|
| 84 |
+
allshortargs = list(self.args)
|
| 85 |
+
if self.varargs:
|
| 86 |
+
allargs.append('*' + self.varargs)
|
| 87 |
+
allshortargs.append('*' + self.varargs)
|
| 88 |
+
elif self.kwonlyargs:
|
| 89 |
+
allargs.append('*') # single star syntax
|
| 90 |
+
for a in self.kwonlyargs:
|
| 91 |
+
allargs.append('%s=None' % a)
|
| 92 |
+
allshortargs.append('%s=%s' % (a, a))
|
| 93 |
+
if self.varkw:
|
| 94 |
+
allargs.append('**' + self.varkw)
|
| 95 |
+
allshortargs.append('**' + self.varkw)
|
| 96 |
+
self.signature = ', '.join(allargs)
|
| 97 |
+
self.shortsignature = ', '.join(allshortargs)
|
| 98 |
+
self.dict = func.__dict__.copy()
|
| 99 |
+
# func=None happens when decorating a caller
|
| 100 |
+
if name:
|
| 101 |
+
self.name = name
|
| 102 |
+
if signature is not None:
|
| 103 |
+
self.signature = signature
|
| 104 |
+
if defaults:
|
| 105 |
+
self.defaults = defaults
|
| 106 |
+
if doc:
|
| 107 |
+
self.doc = doc
|
| 108 |
+
if module:
|
| 109 |
+
self.module = module
|
| 110 |
+
if funcdict:
|
| 111 |
+
self.dict = funcdict
|
| 112 |
+
# check existence required attributes
|
| 113 |
+
assert hasattr(self, 'name')
|
| 114 |
+
if not hasattr(self, 'signature'):
|
| 115 |
+
raise TypeError('You are decorating a non function: %s' % func)
|
| 116 |
+
|
| 117 |
+
def update(self, func, **kw):
|
| 118 |
+
"""
|
| 119 |
+
Update the signature of func with the data in self
|
| 120 |
+
"""
|
| 121 |
+
func.__name__ = self.name
|
| 122 |
+
func.__doc__ = getattr(self, 'doc', None)
|
| 123 |
+
func.__dict__ = getattr(self, 'dict', {})
|
| 124 |
+
func.__defaults__ = self.defaults
|
| 125 |
+
func.__kwdefaults__ = self.kwonlydefaults or None
|
| 126 |
+
func.__annotations__ = getattr(self, 'annotations', None)
|
| 127 |
+
try:
|
| 128 |
+
frame = sys._getframe(3)
|
| 129 |
+
except AttributeError: # for IronPython and similar implementations
|
| 130 |
+
callermodule = '?'
|
| 131 |
+
else:
|
| 132 |
+
callermodule = frame.f_globals.get('__name__', '?')
|
| 133 |
+
func.__module__ = getattr(self, 'module', callermodule)
|
| 134 |
+
func.__dict__.update(kw)
|
| 135 |
+
|
| 136 |
+
def make(self, src_templ, evaldict=None, addsource=False, **attrs):
|
| 137 |
+
"""
|
| 138 |
+
Make a new function from a given template and update the signature
|
| 139 |
+
"""
|
| 140 |
+
src = src_templ % vars(self) # expand name and signature
|
| 141 |
+
evaldict = evaldict or {}
|
| 142 |
+
mo = DEF.search(src)
|
| 143 |
+
if mo is None:
|
| 144 |
+
raise SyntaxError('not a valid function template\n%s' % src)
|
| 145 |
+
name = mo.group(1) # extract the function name
|
| 146 |
+
names = set([name] + [arg.strip(' *') for arg in
|
| 147 |
+
self.shortsignature.split(',')])
|
| 148 |
+
for n in names:
|
| 149 |
+
if n in ('_func_', '_call_'):
|
| 150 |
+
raise NameError('%s is overridden in\n%s' % (n, src))
|
| 151 |
+
|
| 152 |
+
if not src.endswith('\n'): # add a newline for old Pythons
|
| 153 |
+
src += '\n'
|
| 154 |
+
|
| 155 |
+
# Ensure each generated function has a unique filename for profilers
|
| 156 |
+
# (such as cProfile) that depend on the tuple of (<filename>,
|
| 157 |
+
# <definition line>, <function name>) being unique.
|
| 158 |
+
filename = '<decorator-gen-%d>' % next(self._compile_count)
|
| 159 |
+
try:
|
| 160 |
+
code = compile(src, filename, 'single')
|
| 161 |
+
exec(code, evaldict)
|
| 162 |
+
except Exception:
|
| 163 |
+
print('Error in generated code:', file=sys.stderr)
|
| 164 |
+
print(src, file=sys.stderr)
|
| 165 |
+
raise
|
| 166 |
+
func = evaldict[name]
|
| 167 |
+
if addsource:
|
| 168 |
+
attrs['__source__'] = src
|
| 169 |
+
self.update(func, **attrs)
|
| 170 |
+
return func
|
| 171 |
+
|
| 172 |
+
@classmethod
|
| 173 |
+
def create(cls, obj, body, evaldict, defaults=None,
|
| 174 |
+
doc=None, module=None, addsource=True, **attrs):
|
| 175 |
+
"""
|
| 176 |
+
Create a function from the strings name, signature and body.
|
| 177 |
+
evaldict is the evaluation dictionary. If addsource is true an
|
| 178 |
+
attribute __source__ is added to the result. The attributes attrs
|
| 179 |
+
are added, if any.
|
| 180 |
+
"""
|
| 181 |
+
if isinstance(obj, str): # "name(signature)"
|
| 182 |
+
name, rest = obj.strip().split('(', 1)
|
| 183 |
+
signature = rest[:-1] # strip a right parens
|
| 184 |
+
func = None
|
| 185 |
+
else: # a function
|
| 186 |
+
name = None
|
| 187 |
+
signature = None
|
| 188 |
+
func = obj
|
| 189 |
+
self = cls(func, name, signature, defaults, doc, module)
|
| 190 |
+
ibody = '\n'.join(' ' + line for line in body.splitlines())
|
| 191 |
+
caller = evaldict.get('_call_') # when called from `decorate`
|
| 192 |
+
if caller and iscoroutinefunction(caller):
|
| 193 |
+
body = ('async def %(name)s(%(signature)s):\n' + ibody).replace(
|
| 194 |
+
'return', 'return await')
|
| 195 |
+
else:
|
| 196 |
+
body = 'def %(name)s(%(signature)s):\n' + ibody
|
| 197 |
+
return self.make(body, evaldict, addsource, **attrs)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def fix(args, kwargs, sig):
|
| 201 |
+
"""
|
| 202 |
+
Fix args and kwargs to be consistent with the signature
|
| 203 |
+
"""
|
| 204 |
+
ba = sig.bind(*args, **kwargs)
|
| 205 |
+
ba.apply_defaults() # needed for test_dan_schult
|
| 206 |
+
return ba.args, ba.kwargs
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def decorate(func, caller, extras=(), kwsyntax=False):
|
| 210 |
+
"""
|
| 211 |
+
Decorates a function/generator/coroutine using a caller.
|
| 212 |
+
If kwsyntax is True calling the decorated functions with keyword
|
| 213 |
+
syntax will pass the named arguments inside the ``kw`` dictionary,
|
| 214 |
+
even if such argument are positional, similarly to what functools.wraps
|
| 215 |
+
does. By default kwsyntax is False and the the arguments are untouched.
|
| 216 |
+
"""
|
| 217 |
+
sig = inspect.signature(func)
|
| 218 |
+
if isinstance(func, functools.partial):
|
| 219 |
+
func = functools.update_wrapper(func, func.func)
|
| 220 |
+
if iscoroutinefunction(caller):
|
| 221 |
+
async def fun(*args, **kw):
|
| 222 |
+
if not kwsyntax:
|
| 223 |
+
args, kw = fix(args, kw, sig)
|
| 224 |
+
return await caller(func, *(extras + args), **kw)
|
| 225 |
+
elif isgeneratorfunction(caller):
|
| 226 |
+
def fun(*args, **kw):
|
| 227 |
+
if not kwsyntax:
|
| 228 |
+
args, kw = fix(args, kw, sig)
|
| 229 |
+
for res in caller(func, *(extras + args), **kw):
|
| 230 |
+
yield res
|
| 231 |
+
else:
|
| 232 |
+
def fun(*args, **kw):
|
| 233 |
+
if not kwsyntax:
|
| 234 |
+
args, kw = fix(args, kw, sig)
|
| 235 |
+
return caller(func, *(extras + args), **kw)
|
| 236 |
+
|
| 237 |
+
fun.__name__ = func.__name__
|
| 238 |
+
fun.__doc__ = func.__doc__
|
| 239 |
+
fun.__wrapped__ = func
|
| 240 |
+
fun.__signature__ = sig
|
| 241 |
+
fun.__qualname__ = func.__qualname__
|
| 242 |
+
# builtin functions like defaultdict.__setitem__ lack many attributes
|
| 243 |
+
try:
|
| 244 |
+
fun.__defaults__ = func.__defaults__
|
| 245 |
+
except AttributeError:
|
| 246 |
+
pass
|
| 247 |
+
try:
|
| 248 |
+
fun.__kwdefaults__ = func.__kwdefaults__
|
| 249 |
+
except AttributeError:
|
| 250 |
+
pass
|
| 251 |
+
try:
|
| 252 |
+
fun.__annotations__ = func.__annotations__
|
| 253 |
+
except AttributeError:
|
| 254 |
+
pass
|
| 255 |
+
try:
|
| 256 |
+
fun.__module__ = func.__module__
|
| 257 |
+
except AttributeError:
|
| 258 |
+
pass
|
| 259 |
+
try:
|
| 260 |
+
fun.__name__ = func.__name__
|
| 261 |
+
except AttributeError: # happens with old versions of numpy.vectorize
|
| 262 |
+
func.__name__ == 'noname'
|
| 263 |
+
try:
|
| 264 |
+
fun.__dict__.update(func.__dict__)
|
| 265 |
+
except AttributeError:
|
| 266 |
+
pass
|
| 267 |
+
return fun
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def decoratorx(caller):
|
| 271 |
+
"""
|
| 272 |
+
A version of "decorator" implemented via "exec" and not via the
|
| 273 |
+
Signature object. Use this if you are want to preserve the `.__code__`
|
| 274 |
+
object properties (https://github.com/micheles/decorator/issues/129).
|
| 275 |
+
"""
|
| 276 |
+
def dec(func):
|
| 277 |
+
return FunctionMaker.create(
|
| 278 |
+
func,
|
| 279 |
+
"return _call_(_func_, %(shortsignature)s)",
|
| 280 |
+
dict(_call_=caller, _func_=func),
|
| 281 |
+
__wrapped__=func, __qualname__=func.__qualname__)
|
| 282 |
+
return dec
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def decorator(caller, _func=None, kwsyntax=False):
|
| 286 |
+
"""
|
| 287 |
+
decorator(caller) converts a caller function into a decorator
|
| 288 |
+
"""
|
| 289 |
+
if _func is not None: # return a decorated function
|
| 290 |
+
# this is obsolete behavior; you should use decorate instead
|
| 291 |
+
return decorate(_func, caller, (), kwsyntax)
|
| 292 |
+
# else return a decorator function
|
| 293 |
+
sig = inspect.signature(caller)
|
| 294 |
+
dec_params = [p for p in sig.parameters.values() if p.kind is POS]
|
| 295 |
+
|
| 296 |
+
def dec(func=None, *args, **kw):
|
| 297 |
+
na = len(args) + 1
|
| 298 |
+
extras = args + tuple(kw.get(p.name, p.default)
|
| 299 |
+
for p in dec_params[na:]
|
| 300 |
+
if p.default is not EMPTY)
|
| 301 |
+
if func is None:
|
| 302 |
+
return lambda func: decorate(func, caller, extras, kwsyntax)
|
| 303 |
+
else:
|
| 304 |
+
return decorate(func, caller, extras, kwsyntax)
|
| 305 |
+
dec.__signature__ = sig.replace(parameters=dec_params)
|
| 306 |
+
dec.__name__ = caller.__name__
|
| 307 |
+
dec.__doc__ = caller.__doc__
|
| 308 |
+
dec.__wrapped__ = caller
|
| 309 |
+
dec.__qualname__ = caller.__qualname__
|
| 310 |
+
dec.__kwdefaults__ = getattr(caller, '__kwdefaults__', None)
|
| 311 |
+
dec.__dict__.update(caller.__dict__)
|
| 312 |
+
return dec
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
# ####################### contextmanager ####################### #
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
class ContextManager(_GeneratorContextManager):
|
| 319 |
+
def __init__(self, g, *a, **k):
|
| 320 |
+
_GeneratorContextManager.__init__(self, g, a, k)
|
| 321 |
+
|
| 322 |
+
def __call__(self, func):
|
| 323 |
+
def caller(f, *a, **k):
|
| 324 |
+
with self.__class__(self.func, *self.args, **self.kwds):
|
| 325 |
+
return f(*a, **k)
|
| 326 |
+
return decorate(func, caller)
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
_contextmanager = decorator(ContextManager)
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
def contextmanager(func):
|
| 333 |
+
# Enable Pylint config: contextmanager-decorators=decorator.contextmanager
|
| 334 |
+
return _contextmanager(func)
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
# ############################ dispatch_on ############################ #
|
| 338 |
+
|
| 339 |
+
def append(a, vancestors):
|
| 340 |
+
"""
|
| 341 |
+
Append ``a`` to the list of the virtual ancestors, unless it is already
|
| 342 |
+
included.
|
| 343 |
+
"""
|
| 344 |
+
add = True
|
| 345 |
+
for j, va in enumerate(vancestors):
|
| 346 |
+
if issubclass(va, a):
|
| 347 |
+
add = False
|
| 348 |
+
break
|
| 349 |
+
if issubclass(a, va):
|
| 350 |
+
vancestors[j] = a
|
| 351 |
+
add = False
|
| 352 |
+
if add:
|
| 353 |
+
vancestors.append(a)
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
# inspired from simplegeneric by P.J. Eby and functools.singledispatch
|
| 357 |
+
def dispatch_on(*dispatch_args):
|
| 358 |
+
"""
|
| 359 |
+
Factory of decorators turning a function into a generic function
|
| 360 |
+
dispatching on the given arguments.
|
| 361 |
+
"""
|
| 362 |
+
assert dispatch_args, 'No dispatch args passed'
|
| 363 |
+
dispatch_str = '(%s,)' % ', '.join(dispatch_args)
|
| 364 |
+
|
| 365 |
+
def check(arguments, wrong=operator.ne, msg=''):
|
| 366 |
+
"""Make sure one passes the expected number of arguments"""
|
| 367 |
+
if wrong(len(arguments), len(dispatch_args)):
|
| 368 |
+
raise TypeError('Expected %d arguments, got %d%s' %
|
| 369 |
+
(len(dispatch_args), len(arguments), msg))
|
| 370 |
+
|
| 371 |
+
def gen_func_dec(func):
|
| 372 |
+
"""Decorator turning a function into a generic function"""
|
| 373 |
+
|
| 374 |
+
# first check the dispatch arguments
|
| 375 |
+
argset = set(getfullargspec(func).args)
|
| 376 |
+
if not set(dispatch_args) <= argset:
|
| 377 |
+
raise NameError('Unknown dispatch arguments %s' % dispatch_str)
|
| 378 |
+
|
| 379 |
+
typemap = {}
|
| 380 |
+
|
| 381 |
+
def vancestors(*types):
|
| 382 |
+
"""
|
| 383 |
+
Get a list of sets of virtual ancestors for the given types
|
| 384 |
+
"""
|
| 385 |
+
check(types)
|
| 386 |
+
ras = [[] for _ in range(len(dispatch_args))]
|
| 387 |
+
for types_ in typemap:
|
| 388 |
+
for t, type_, ra in zip(types, types_, ras):
|
| 389 |
+
if issubclass(t, type_) and type_ not in t.mro():
|
| 390 |
+
append(type_, ra)
|
| 391 |
+
return [set(ra) for ra in ras]
|
| 392 |
+
|
| 393 |
+
def ancestors(*types):
|
| 394 |
+
"""
|
| 395 |
+
Get a list of virtual MROs, one for each type
|
| 396 |
+
"""
|
| 397 |
+
check(types)
|
| 398 |
+
lists = []
|
| 399 |
+
for t, vas in zip(types, vancestors(*types)):
|
| 400 |
+
n_vas = len(vas)
|
| 401 |
+
if n_vas > 1:
|
| 402 |
+
raise RuntimeError(
|
| 403 |
+
'Ambiguous dispatch for %s: %s' % (t, vas))
|
| 404 |
+
elif n_vas == 1:
|
| 405 |
+
va, = vas
|
| 406 |
+
mro = type('t', (t, va), {}).mro()[1:]
|
| 407 |
+
else:
|
| 408 |
+
mro = t.mro()
|
| 409 |
+
lists.append(mro[:-1]) # discard t and object
|
| 410 |
+
return lists
|
| 411 |
+
|
| 412 |
+
def register(*types):
|
| 413 |
+
"""
|
| 414 |
+
Decorator to register an implementation for the given types
|
| 415 |
+
"""
|
| 416 |
+
check(types)
|
| 417 |
+
|
| 418 |
+
def dec(f):
|
| 419 |
+
check(getfullargspec(f).args, operator.lt, ' in ' + f.__name__)
|
| 420 |
+
typemap[types] = f
|
| 421 |
+
return f
|
| 422 |
+
return dec
|
| 423 |
+
|
| 424 |
+
def dispatch_info(*types):
|
| 425 |
+
"""
|
| 426 |
+
An utility to introspect the dispatch algorithm
|
| 427 |
+
"""
|
| 428 |
+
check(types)
|
| 429 |
+
lst = []
|
| 430 |
+
for ancs in itertools.product(*ancestors(*types)):
|
| 431 |
+
lst.append(tuple(a.__name__ for a in ancs))
|
| 432 |
+
return lst
|
| 433 |
+
|
| 434 |
+
def _dispatch(dispatch_args, *args, **kw):
|
| 435 |
+
types = tuple(type(arg) for arg in dispatch_args)
|
| 436 |
+
try: # fast path
|
| 437 |
+
f = typemap[types]
|
| 438 |
+
except KeyError:
|
| 439 |
+
pass
|
| 440 |
+
else:
|
| 441 |
+
return f(*args, **kw)
|
| 442 |
+
combinations = itertools.product(*ancestors(*types))
|
| 443 |
+
next(combinations) # the first one has been already tried
|
| 444 |
+
for types_ in combinations:
|
| 445 |
+
f = typemap.get(types_)
|
| 446 |
+
if f is not None:
|
| 447 |
+
return f(*args, **kw)
|
| 448 |
+
|
| 449 |
+
# else call the default implementation
|
| 450 |
+
return func(*args, **kw)
|
| 451 |
+
|
| 452 |
+
return FunctionMaker.create(
|
| 453 |
+
func, 'return _f_(%s, %%(shortsignature)s)' % dispatch_str,
|
| 454 |
+
dict(_f_=_dispatch), register=register, default=func,
|
| 455 |
+
typemap=typemap, vancestors=vancestors, ancestors=ancestors,
|
| 456 |
+
dispatch_info=dispatch_info, __wrapped__=func)
|
| 457 |
+
|
| 458 |
+
gen_func_dec.__name__ = 'dispatch_on' + dispatch_str
|
| 459 |
+
return gen_func_dec
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/distutils-precedence.pth
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'stdlib') == 'local'; enabled and __import__('_distutils_hack').add_shim();
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/ipykernel_launcher.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Entry point for launching an IPython kernel.
|
| 2 |
+
|
| 3 |
+
This is separate from the ipykernel package so we can avoid doing imports until
|
| 4 |
+
after removing the cwd from sys.path.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
if __name__ == "__main__":
|
| 11 |
+
# Remove the CWD from sys.path while we load stuff.
|
| 12 |
+
# This is added back by InteractiveShellApp.init_path()
|
| 13 |
+
if sys.path[0] == "" or Path(sys.path[0]) == Path.cwd():
|
| 14 |
+
del sys.path[0]
|
| 15 |
+
|
| 16 |
+
from ipykernel import kernelapp as app
|
| 17 |
+
|
| 18 |
+
app.launch_new_instance()
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/isympy.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Python shell for SymPy.
|
| 3 |
+
|
| 4 |
+
This is just a normal Python shell (IPython shell if you have the
|
| 5 |
+
IPython package installed), that executes the following commands for
|
| 6 |
+
the user:
|
| 7 |
+
|
| 8 |
+
>>> from __future__ import division
|
| 9 |
+
>>> from sympy import *
|
| 10 |
+
>>> x, y, z, t = symbols('x y z t')
|
| 11 |
+
>>> k, m, n = symbols('k m n', integer=True)
|
| 12 |
+
>>> f, g, h = symbols('f g h', cls=Function)
|
| 13 |
+
>>> init_printing()
|
| 14 |
+
|
| 15 |
+
So starting 'isympy' is equivalent to starting Python (or IPython) and
|
| 16 |
+
executing the above commands by hand. It is intended for easy and quick
|
| 17 |
+
experimentation with SymPy. isympy is a good way to use SymPy as an
|
| 18 |
+
interactive calculator. If you have IPython and Matplotlib installed, then
|
| 19 |
+
interactive plotting is enabled by default.
|
| 20 |
+
|
| 21 |
+
COMMAND LINE OPTIONS
|
| 22 |
+
--------------------
|
| 23 |
+
|
| 24 |
+
-c CONSOLE, --console=CONSOLE
|
| 25 |
+
|
| 26 |
+
Use the specified shell (Python or IPython) shell as the console
|
| 27 |
+
backend instead of the default one (IPython if present, Python
|
| 28 |
+
otherwise), e.g.:
|
| 29 |
+
|
| 30 |
+
$isympy -c python
|
| 31 |
+
|
| 32 |
+
CONSOLE must be one of 'ipython' or 'python'
|
| 33 |
+
|
| 34 |
+
-p PRETTY, --pretty PRETTY
|
| 35 |
+
|
| 36 |
+
Setup pretty-printing in SymPy. When pretty-printing is enabled,
|
| 37 |
+
expressions can be printed with Unicode or ASCII. The default is
|
| 38 |
+
to use pretty-printing (with Unicode if the terminal supports it).
|
| 39 |
+
When this option is 'no', expressions will not be pretty-printed
|
| 40 |
+
and ASCII will be used:
|
| 41 |
+
|
| 42 |
+
$isympy -p no
|
| 43 |
+
|
| 44 |
+
PRETTY must be one of 'unicode', 'ascii', or 'no'
|
| 45 |
+
|
| 46 |
+
-t TYPES, --types=TYPES
|
| 47 |
+
|
| 48 |
+
Setup the ground types for the polys. By default, gmpy ground types
|
| 49 |
+
are used if gmpy2 or gmpy is installed, otherwise it falls back to python
|
| 50 |
+
ground types, which are a little bit slower. You can manually
|
| 51 |
+
choose python ground types even if gmpy is installed (e.g., for
|
| 52 |
+
testing purposes):
|
| 53 |
+
|
| 54 |
+
$isympy -t python
|
| 55 |
+
|
| 56 |
+
TYPES must be one of 'gmpy', 'gmpy1' or 'python'
|
| 57 |
+
|
| 58 |
+
Note that the ground type gmpy1 is primarily intended for testing; it
|
| 59 |
+
forces the use of gmpy version 1 even if gmpy2 is available.
|
| 60 |
+
|
| 61 |
+
This is the same as setting the environment variable
|
| 62 |
+
SYMPY_GROUND_TYPES to the given ground type (e.g.,
|
| 63 |
+
SYMPY_GROUND_TYPES='gmpy')
|
| 64 |
+
|
| 65 |
+
The ground types can be determined interactively from the variable
|
| 66 |
+
sympy.polys.domains.GROUND_TYPES.
|
| 67 |
+
|
| 68 |
+
-o ORDER, --order ORDER
|
| 69 |
+
|
| 70 |
+
Setup the ordering of terms for printing. The default is lex, which
|
| 71 |
+
orders terms lexicographically (e.g., x**2 + x + 1). You can choose
|
| 72 |
+
other orderings, such as rev-lex, which will use reverse
|
| 73 |
+
lexicographic ordering (e.g., 1 + x + x**2):
|
| 74 |
+
|
| 75 |
+
$isympy -o rev-lex
|
| 76 |
+
|
| 77 |
+
ORDER must be one of 'lex', 'rev-lex', 'grlex', 'rev-grlex',
|
| 78 |
+
'grevlex', 'rev-grevlex', 'old', or 'none'.
|
| 79 |
+
|
| 80 |
+
Note that for very large expressions, ORDER='none' may speed up
|
| 81 |
+
printing considerably but the terms will have no canonical order.
|
| 82 |
+
|
| 83 |
+
-q, --quiet
|
| 84 |
+
|
| 85 |
+
Print only Python's and SymPy's versions to stdout at startup.
|
| 86 |
+
|
| 87 |
+
-d, --doctest
|
| 88 |
+
|
| 89 |
+
Use the same format that should be used for doctests. This is
|
| 90 |
+
equivalent to -c python -p no.
|
| 91 |
+
|
| 92 |
+
-C, --no-cache
|
| 93 |
+
|
| 94 |
+
Disable the caching mechanism. Disabling the cache may slow certain
|
| 95 |
+
operations down considerably. This is useful for testing the cache,
|
| 96 |
+
or for benchmarking, as the cache can result in deceptive timings.
|
| 97 |
+
|
| 98 |
+
This is equivalent to setting the environment variable
|
| 99 |
+
SYMPY_USE_CACHE to 'no'.
|
| 100 |
+
|
| 101 |
+
-a, --auto-symbols (requires at least IPython 0.11)
|
| 102 |
+
|
| 103 |
+
Automatically create missing symbols. Normally, typing a name of a
|
| 104 |
+
Symbol that has not been instantiated first would raise NameError,
|
| 105 |
+
but with this option enabled, any undefined name will be
|
| 106 |
+
automatically created as a Symbol.
|
| 107 |
+
|
| 108 |
+
Note that this is intended only for interactive, calculator style
|
| 109 |
+
usage. In a script that uses SymPy, Symbols should be instantiated
|
| 110 |
+
at the top, so that it's clear what they are.
|
| 111 |
+
|
| 112 |
+
This will not override any names that are already defined, which
|
| 113 |
+
includes the single character letters represented by the mnemonic
|
| 114 |
+
QCOSINE (see the "Gotchas and Pitfalls" document in the
|
| 115 |
+
documentation). You can delete existing names by executing "del
|
| 116 |
+
name". If a name is defined, typing "'name' in dir()" will return True.
|
| 117 |
+
|
| 118 |
+
The Symbols that are created using this have default assumptions.
|
| 119 |
+
If you want to place assumptions on symbols, you should create them
|
| 120 |
+
using symbols() or var().
|
| 121 |
+
|
| 122 |
+
Finally, this only works in the top level namespace. So, for
|
| 123 |
+
example, if you define a function in isympy with an undefined
|
| 124 |
+
Symbol, it will not work.
|
| 125 |
+
|
| 126 |
+
See also the -i and -I options.
|
| 127 |
+
|
| 128 |
+
-i, --int-to-Integer (requires at least IPython 0.11)
|
| 129 |
+
|
| 130 |
+
Automatically wrap int literals with Integer. This makes it so that
|
| 131 |
+
things like 1/2 will come out as Rational(1, 2), rather than 0.5. This
|
| 132 |
+
works by preprocessing the source and wrapping all int literals with
|
| 133 |
+
Integer. Note that this will not change the behavior of int literals
|
| 134 |
+
assigned to variables, and it also won't change the behavior of functions
|
| 135 |
+
that return int literals.
|
| 136 |
+
|
| 137 |
+
If you want an int, you can wrap the literal in int(), e.g. int(3)/int(2)
|
| 138 |
+
gives 1.5 (with division imported from __future__).
|
| 139 |
+
|
| 140 |
+
-I, --interactive (requires at least IPython 0.11)
|
| 141 |
+
|
| 142 |
+
This is equivalent to --auto-symbols --int-to-Integer. Future options
|
| 143 |
+
designed for ease of interactive use may be added to this.
|
| 144 |
+
|
| 145 |
+
-D, --debug
|
| 146 |
+
|
| 147 |
+
Enable debugging output. This is the same as setting the
|
| 148 |
+
environment variable SYMPY_DEBUG to 'True'. The debug status is set
|
| 149 |
+
in the variable SYMPY_DEBUG within isympy.
|
| 150 |
+
|
| 151 |
+
-- IPython options
|
| 152 |
+
|
| 153 |
+
Additionally you can pass command line options directly to the IPython
|
| 154 |
+
interpreter (the standard Python shell is not supported). However you
|
| 155 |
+
need to add the '--' separator between two types of options, e.g the
|
| 156 |
+
startup banner option and the colors option. You need to enter the
|
| 157 |
+
options as required by the version of IPython that you are using, too:
|
| 158 |
+
|
| 159 |
+
in IPython 0.11,
|
| 160 |
+
|
| 161 |
+
$isympy -q -- --colors=NoColor
|
| 162 |
+
|
| 163 |
+
or older versions of IPython,
|
| 164 |
+
|
| 165 |
+
$isympy -q -- -colors NoColor
|
| 166 |
+
|
| 167 |
+
See also isympy --help.
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
import os
|
| 171 |
+
import sys
|
| 172 |
+
|
| 173 |
+
# DO NOT IMPORT SYMPY HERE! Or the setting of the sympy environment variables
|
| 174 |
+
# by the command line will break.
|
| 175 |
+
|
| 176 |
+
def main() -> None:
|
| 177 |
+
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
| 178 |
+
|
| 179 |
+
VERSION = None
|
| 180 |
+
if '--version' in sys.argv:
|
| 181 |
+
# We cannot import sympy before this is run, because flags like -C and
|
| 182 |
+
# -t set environment variables that must be set before SymPy is
|
| 183 |
+
# imported. The only thing we need to import it for is to get the
|
| 184 |
+
# version, which only matters with the --version flag.
|
| 185 |
+
import sympy
|
| 186 |
+
VERSION = sympy.__version__
|
| 187 |
+
|
| 188 |
+
usage = 'isympy [options] -- [ipython options]'
|
| 189 |
+
parser = ArgumentParser(
|
| 190 |
+
usage=usage,
|
| 191 |
+
description=__doc__,
|
| 192 |
+
formatter_class=RawDescriptionHelpFormatter,
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
parser.add_argument('--version', action='version', version=VERSION)
|
| 196 |
+
|
| 197 |
+
parser.add_argument(
|
| 198 |
+
'-c', '--console',
|
| 199 |
+
dest='console',
|
| 200 |
+
action='store',
|
| 201 |
+
default=None,
|
| 202 |
+
choices=['ipython', 'python'],
|
| 203 |
+
metavar='CONSOLE',
|
| 204 |
+
help='select type of interactive session: ipython | python; defaults '
|
| 205 |
+
'to ipython if IPython is installed, otherwise python')
|
| 206 |
+
|
| 207 |
+
parser.add_argument(
|
| 208 |
+
'-p', '--pretty',
|
| 209 |
+
dest='pretty',
|
| 210 |
+
action='store',
|
| 211 |
+
default=None,
|
| 212 |
+
metavar='PRETTY',
|
| 213 |
+
choices=['unicode', 'ascii', 'no'],
|
| 214 |
+
help='setup pretty printing: unicode | ascii | no; defaults to '
|
| 215 |
+
'unicode printing if the terminal supports it, otherwise ascii')
|
| 216 |
+
|
| 217 |
+
parser.add_argument(
|
| 218 |
+
'-t', '--types',
|
| 219 |
+
dest='types',
|
| 220 |
+
action='store',
|
| 221 |
+
default=None,
|
| 222 |
+
metavar='TYPES',
|
| 223 |
+
choices=['gmpy', 'gmpy1', 'python'],
|
| 224 |
+
help='setup ground types: gmpy | gmpy1 | python; defaults to gmpy if gmpy2 '
|
| 225 |
+
'or gmpy is installed, otherwise python')
|
| 226 |
+
|
| 227 |
+
parser.add_argument(
|
| 228 |
+
'-o', '--order',
|
| 229 |
+
dest='order',
|
| 230 |
+
action='store',
|
| 231 |
+
default=None,
|
| 232 |
+
metavar='ORDER',
|
| 233 |
+
choices=['lex', 'grlex', 'grevlex', 'rev-lex', 'rev-grlex', 'rev-grevlex', 'old', 'none'],
|
| 234 |
+
help='setup ordering of terms: [rev-]lex | [rev-]grlex | [rev-]grevlex | old | none; defaults to lex')
|
| 235 |
+
|
| 236 |
+
parser.add_argument(
|
| 237 |
+
'-q', '--quiet',
|
| 238 |
+
dest='quiet',
|
| 239 |
+
action='store_true',
|
| 240 |
+
default=False,
|
| 241 |
+
help='print only version information at startup')
|
| 242 |
+
|
| 243 |
+
parser.add_argument(
|
| 244 |
+
'-d', '--doctest',
|
| 245 |
+
dest='doctest',
|
| 246 |
+
action='store_true',
|
| 247 |
+
default=False,
|
| 248 |
+
help='use the doctest format for output (you can just copy and paste it)')
|
| 249 |
+
|
| 250 |
+
parser.add_argument(
|
| 251 |
+
'-C', '--no-cache',
|
| 252 |
+
dest='cache',
|
| 253 |
+
action='store_false',
|
| 254 |
+
default=True,
|
| 255 |
+
help='disable caching mechanism')
|
| 256 |
+
|
| 257 |
+
parser.add_argument(
|
| 258 |
+
'-a', '--auto-symbols',
|
| 259 |
+
dest='auto_symbols',
|
| 260 |
+
action='store_true',
|
| 261 |
+
default=False,
|
| 262 |
+
help='automatically construct missing symbols')
|
| 263 |
+
|
| 264 |
+
parser.add_argument(
|
| 265 |
+
'-i', '--int-to-Integer',
|
| 266 |
+
dest='auto_int_to_Integer',
|
| 267 |
+
action='store_true',
|
| 268 |
+
default=False,
|
| 269 |
+
help="automatically wrap int literals with Integer")
|
| 270 |
+
|
| 271 |
+
parser.add_argument(
|
| 272 |
+
'-I', '--interactive',
|
| 273 |
+
dest='interactive',
|
| 274 |
+
action='store_true',
|
| 275 |
+
default=False,
|
| 276 |
+
help="equivalent to -a -i")
|
| 277 |
+
|
| 278 |
+
parser.add_argument(
|
| 279 |
+
'-D', '--debug',
|
| 280 |
+
dest='debug',
|
| 281 |
+
action='store_true',
|
| 282 |
+
default=False,
|
| 283 |
+
help='enable debugging output')
|
| 284 |
+
|
| 285 |
+
(options, ipy_args) = parser.parse_known_args()
|
| 286 |
+
if '--' in ipy_args:
|
| 287 |
+
ipy_args.remove('--')
|
| 288 |
+
|
| 289 |
+
if not options.cache:
|
| 290 |
+
os.environ['SYMPY_USE_CACHE'] = 'no'
|
| 291 |
+
|
| 292 |
+
if options.types:
|
| 293 |
+
os.environ['SYMPY_GROUND_TYPES'] = options.types
|
| 294 |
+
|
| 295 |
+
if options.debug:
|
| 296 |
+
os.environ['SYMPY_DEBUG'] = str(options.debug)
|
| 297 |
+
|
| 298 |
+
if options.doctest:
|
| 299 |
+
options.pretty = 'no'
|
| 300 |
+
options.console = 'python'
|
| 301 |
+
|
| 302 |
+
session = options.console
|
| 303 |
+
|
| 304 |
+
if session is not None:
|
| 305 |
+
ipython = session == 'ipython'
|
| 306 |
+
else:
|
| 307 |
+
try:
|
| 308 |
+
import IPython
|
| 309 |
+
ipython = True
|
| 310 |
+
except ImportError:
|
| 311 |
+
if not options.quiet:
|
| 312 |
+
from sympy.interactive.session import no_ipython
|
| 313 |
+
print(no_ipython)
|
| 314 |
+
ipython = False
|
| 315 |
+
|
| 316 |
+
args = {
|
| 317 |
+
'pretty_print': True,
|
| 318 |
+
'use_unicode': None,
|
| 319 |
+
'use_latex': None,
|
| 320 |
+
'order': None,
|
| 321 |
+
'argv': ipy_args,
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
if options.pretty == 'unicode':
|
| 325 |
+
args['use_unicode'] = True
|
| 326 |
+
elif options.pretty == 'ascii':
|
| 327 |
+
args['use_unicode'] = False
|
| 328 |
+
elif options.pretty == 'no':
|
| 329 |
+
args['pretty_print'] = False
|
| 330 |
+
|
| 331 |
+
if options.order is not None:
|
| 332 |
+
args['order'] = options.order
|
| 333 |
+
|
| 334 |
+
args['quiet'] = options.quiet
|
| 335 |
+
args['auto_symbols'] = options.auto_symbols or options.interactive
|
| 336 |
+
args['auto_int_to_Integer'] = options.auto_int_to_Integer or options.interactive
|
| 337 |
+
|
| 338 |
+
from sympy.interactive import init_session
|
| 339 |
+
init_session(ipython, **args)
|
| 340 |
+
|
| 341 |
+
if __name__ == "__main__":
|
| 342 |
+
main()
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jsonpointer.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# python-json-pointer - An implementation of the JSON Pointer syntax
|
| 4 |
+
# https://github.com/stefankoegl/python-json-pointer
|
| 5 |
+
#
|
| 6 |
+
# Copyright (c) 2011 Stefan Kögl <stefan@skoegl.net>
|
| 7 |
+
# All rights reserved.
|
| 8 |
+
#
|
| 9 |
+
# Redistribution and use in source and binary forms, with or without
|
| 10 |
+
# modification, are permitted provided that the following conditions
|
| 11 |
+
# are met:
|
| 12 |
+
#
|
| 13 |
+
# 1. Redistributions of source code must retain the above copyright
|
| 14 |
+
# notice, this list of conditions and the following disclaimer.
|
| 15 |
+
# 2. Redistributions in binary form must reproduce the above copyright
|
| 16 |
+
# notice, this list of conditions and the following disclaimer in the
|
| 17 |
+
# documentation and/or other materials provided with the distribution.
|
| 18 |
+
# 3. The name of the author may not be used to endorse or promote products
|
| 19 |
+
# derived from this software without specific prior written permission.
|
| 20 |
+
#
|
| 21 |
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
| 22 |
+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
| 23 |
+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
| 24 |
+
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
| 25 |
+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
| 26 |
+
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 27 |
+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 28 |
+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 29 |
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
| 30 |
+
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 31 |
+
#
|
| 32 |
+
|
| 33 |
+
""" Identify specific nodes in a JSON document (RFC 6901) """
|
| 34 |
+
|
| 35 |
+
# Will be parsed by setup.py to determine package metadata
|
| 36 |
+
__author__ = 'Stefan Kögl <stefan@skoegl.net>'
|
| 37 |
+
__version__ = '3.0.0'
|
| 38 |
+
__website__ = 'https://github.com/stefankoegl/python-json-pointer'
|
| 39 |
+
__license__ = 'Modified BSD License'
|
| 40 |
+
|
| 41 |
+
import copy
|
| 42 |
+
import re
|
| 43 |
+
from collections.abc import Mapping, Sequence
|
| 44 |
+
from itertools import tee, chain
|
| 45 |
+
|
| 46 |
+
_nothing = object()
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def set_pointer(doc, pointer, value, inplace=True):
|
| 50 |
+
"""Resolves a pointer against doc and sets the value of the target within doc.
|
| 51 |
+
|
| 52 |
+
With inplace set to true, doc is modified as long as pointer is not the
|
| 53 |
+
root.
|
| 54 |
+
|
| 55 |
+
>>> obj = {'foo': {'anArray': [ {'prop': 44}], 'another prop': {'baz': 'A string' }}}
|
| 56 |
+
|
| 57 |
+
>>> set_pointer(obj, '/foo/anArray/0/prop', 55) == \
|
| 58 |
+
{'foo': {'another prop': {'baz': 'A string'}, 'anArray': [{'prop': 55}]}}
|
| 59 |
+
True
|
| 60 |
+
|
| 61 |
+
>>> set_pointer(obj, '/foo/yet another prop', 'added prop') == \
|
| 62 |
+
{'foo': {'another prop': {'baz': 'A string'}, 'yet another prop': 'added prop', 'anArray': [{'prop': 55}]}}
|
| 63 |
+
True
|
| 64 |
+
|
| 65 |
+
>>> obj = {'foo': {}}
|
| 66 |
+
>>> set_pointer(obj, '/foo/a%20b', 'x') == \
|
| 67 |
+
{'foo': {'a%20b': 'x' }}
|
| 68 |
+
True
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
pointer = JsonPointer(pointer)
|
| 72 |
+
return pointer.set(doc, value, inplace)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def resolve_pointer(doc, pointer, default=_nothing):
|
| 76 |
+
""" Resolves pointer against doc and returns the referenced object
|
| 77 |
+
|
| 78 |
+
>>> obj = {'foo': {'anArray': [ {'prop': 44}], 'another prop': {'baz': 'A string' }}, 'a%20b': 1, 'c d': 2}
|
| 79 |
+
|
| 80 |
+
>>> resolve_pointer(obj, '') == obj
|
| 81 |
+
True
|
| 82 |
+
|
| 83 |
+
>>> resolve_pointer(obj, '/foo') == obj['foo']
|
| 84 |
+
True
|
| 85 |
+
|
| 86 |
+
>>> resolve_pointer(obj, '/foo/another prop') == obj['foo']['another prop']
|
| 87 |
+
True
|
| 88 |
+
|
| 89 |
+
>>> resolve_pointer(obj, '/foo/another prop/baz') == obj['foo']['another prop']['baz']
|
| 90 |
+
True
|
| 91 |
+
|
| 92 |
+
>>> resolve_pointer(obj, '/foo/anArray/0') == obj['foo']['anArray'][0]
|
| 93 |
+
True
|
| 94 |
+
|
| 95 |
+
>>> resolve_pointer(obj, '/some/path', None) == None
|
| 96 |
+
True
|
| 97 |
+
|
| 98 |
+
>>> resolve_pointer(obj, '/a b', None) == None
|
| 99 |
+
True
|
| 100 |
+
|
| 101 |
+
>>> resolve_pointer(obj, '/a%20b') == 1
|
| 102 |
+
True
|
| 103 |
+
|
| 104 |
+
>>> resolve_pointer(obj, '/c d') == 2
|
| 105 |
+
True
|
| 106 |
+
|
| 107 |
+
>>> resolve_pointer(obj, '/c%20d', None) == None
|
| 108 |
+
True
|
| 109 |
+
"""
|
| 110 |
+
|
| 111 |
+
pointer = JsonPointer(pointer)
|
| 112 |
+
return pointer.resolve(doc, default)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def pairwise(iterable):
|
| 116 |
+
""" Transforms a list to a list of tuples of adjacent items
|
| 117 |
+
|
| 118 |
+
s -> (s0,s1), (s1,s2), (s2, s3), ...
|
| 119 |
+
|
| 120 |
+
>>> list(pairwise([]))
|
| 121 |
+
[]
|
| 122 |
+
|
| 123 |
+
>>> list(pairwise([1]))
|
| 124 |
+
[]
|
| 125 |
+
|
| 126 |
+
>>> list(pairwise([1, 2, 3, 4]))
|
| 127 |
+
[(1, 2), (2, 3), (3, 4)]
|
| 128 |
+
"""
|
| 129 |
+
a, b = tee(iterable)
|
| 130 |
+
for _ in b:
|
| 131 |
+
break
|
| 132 |
+
return zip(a, b)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
class JsonPointerException(Exception):
|
| 136 |
+
pass
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
class EndOfList(object):
|
| 140 |
+
"""Result of accessing element "-" of a list"""
|
| 141 |
+
|
| 142 |
+
def __init__(self, list_):
|
| 143 |
+
self.list_ = list_
|
| 144 |
+
|
| 145 |
+
def __repr__(self):
|
| 146 |
+
return '{cls}({lst})'.format(cls=self.__class__.__name__,
|
| 147 |
+
lst=repr(self.list_))
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
class JsonPointer(object):
|
| 151 |
+
"""A JSON Pointer that can reference parts of a JSON document"""
|
| 152 |
+
|
| 153 |
+
# Array indices must not contain:
|
| 154 |
+
# leading zeros, signs, spaces, decimals, etc
|
| 155 |
+
_RE_ARRAY_INDEX = re.compile('0|[1-9][0-9]*$')
|
| 156 |
+
_RE_INVALID_ESCAPE = re.compile('(~[^01]|~$)')
|
| 157 |
+
|
| 158 |
+
def __init__(self, pointer):
|
| 159 |
+
|
| 160 |
+
# validate escapes
|
| 161 |
+
invalid_escape = self._RE_INVALID_ESCAPE.search(pointer)
|
| 162 |
+
if invalid_escape:
|
| 163 |
+
raise JsonPointerException('Found invalid escape {}'.format(
|
| 164 |
+
invalid_escape.group()))
|
| 165 |
+
|
| 166 |
+
parts = pointer.split('/')
|
| 167 |
+
if parts.pop(0) != '':
|
| 168 |
+
raise JsonPointerException('Location must start with /')
|
| 169 |
+
|
| 170 |
+
parts = [unescape(part) for part in parts]
|
| 171 |
+
self.parts = parts
|
| 172 |
+
|
| 173 |
+
def to_last(self, doc):
|
| 174 |
+
"""Resolves ptr until the last step, returns (sub-doc, last-step)"""
|
| 175 |
+
|
| 176 |
+
if not self.parts:
|
| 177 |
+
return doc, None
|
| 178 |
+
|
| 179 |
+
for part in self.parts[:-1]:
|
| 180 |
+
doc = self.walk(doc, part)
|
| 181 |
+
|
| 182 |
+
return doc, JsonPointer.get_part(doc, self.parts[-1])
|
| 183 |
+
|
| 184 |
+
def resolve(self, doc, default=_nothing):
|
| 185 |
+
"""Resolves the pointer against doc and returns the referenced object"""
|
| 186 |
+
|
| 187 |
+
for part in self.parts:
|
| 188 |
+
|
| 189 |
+
try:
|
| 190 |
+
doc = self.walk(doc, part)
|
| 191 |
+
except JsonPointerException:
|
| 192 |
+
if default is _nothing:
|
| 193 |
+
raise
|
| 194 |
+
else:
|
| 195 |
+
return default
|
| 196 |
+
|
| 197 |
+
return doc
|
| 198 |
+
|
| 199 |
+
get = resolve
|
| 200 |
+
|
| 201 |
+
def set(self, doc, value, inplace=True):
|
| 202 |
+
"""Resolve the pointer against the doc and replace the target with value."""
|
| 203 |
+
|
| 204 |
+
if len(self.parts) == 0:
|
| 205 |
+
if inplace:
|
| 206 |
+
raise JsonPointerException('Cannot set root in place')
|
| 207 |
+
return value
|
| 208 |
+
|
| 209 |
+
if not inplace:
|
| 210 |
+
doc = copy.deepcopy(doc)
|
| 211 |
+
|
| 212 |
+
(parent, part) = self.to_last(doc)
|
| 213 |
+
|
| 214 |
+
if isinstance(parent, Sequence) and part == '-':
|
| 215 |
+
parent.append(value)
|
| 216 |
+
else:
|
| 217 |
+
parent[part] = value
|
| 218 |
+
|
| 219 |
+
return doc
|
| 220 |
+
|
| 221 |
+
@classmethod
|
| 222 |
+
def get_part(cls, doc, part):
|
| 223 |
+
"""Returns the next step in the correct type"""
|
| 224 |
+
|
| 225 |
+
if isinstance(doc, Mapping):
|
| 226 |
+
return part
|
| 227 |
+
|
| 228 |
+
elif isinstance(doc, Sequence):
|
| 229 |
+
|
| 230 |
+
if part == '-':
|
| 231 |
+
return part
|
| 232 |
+
|
| 233 |
+
if not JsonPointer._RE_ARRAY_INDEX.match(str(part)):
|
| 234 |
+
raise JsonPointerException("'%s' is not a valid sequence index" % part)
|
| 235 |
+
|
| 236 |
+
return int(part)
|
| 237 |
+
|
| 238 |
+
elif hasattr(doc, '__getitem__'):
|
| 239 |
+
# Allow indexing via ducktyping
|
| 240 |
+
# if the target has defined __getitem__
|
| 241 |
+
return part
|
| 242 |
+
|
| 243 |
+
else:
|
| 244 |
+
raise JsonPointerException("Document '%s' does not support indexing, "
|
| 245 |
+
"must be mapping/sequence or support __getitem__" % type(doc))
|
| 246 |
+
|
| 247 |
+
def get_parts(self):
|
| 248 |
+
"""Returns the list of the parts. For example, JsonPointer('/a/b').get_parts() == ['a', 'b']"""
|
| 249 |
+
|
| 250 |
+
return self.parts
|
| 251 |
+
|
| 252 |
+
def walk(self, doc, part):
|
| 253 |
+
""" Walks one step in doc and returns the referenced part """
|
| 254 |
+
|
| 255 |
+
part = JsonPointer.get_part(doc, part)
|
| 256 |
+
|
| 257 |
+
assert hasattr(doc, '__getitem__'), "invalid document type %s" % (type(doc),)
|
| 258 |
+
|
| 259 |
+
if isinstance(doc, Sequence):
|
| 260 |
+
if part == '-':
|
| 261 |
+
return EndOfList(doc)
|
| 262 |
+
|
| 263 |
+
try:
|
| 264 |
+
return doc[part]
|
| 265 |
+
|
| 266 |
+
except IndexError:
|
| 267 |
+
raise JsonPointerException("index '%s' is out of bounds" % (part,))
|
| 268 |
+
|
| 269 |
+
# Else the object is a mapping or supports __getitem__(so assume custom indexing)
|
| 270 |
+
try:
|
| 271 |
+
return doc[part]
|
| 272 |
+
|
| 273 |
+
except KeyError:
|
| 274 |
+
raise JsonPointerException("member '%s' not found in %s" % (part, doc))
|
| 275 |
+
|
| 276 |
+
def contains(self, ptr):
|
| 277 |
+
""" Returns True if self contains the given ptr """
|
| 278 |
+
return self.parts[:len(ptr.parts)] == ptr.parts
|
| 279 |
+
|
| 280 |
+
def __contains__(self, item):
|
| 281 |
+
""" Returns True if self contains the given ptr """
|
| 282 |
+
return self.contains(item)
|
| 283 |
+
|
| 284 |
+
def join(self, suffix):
|
| 285 |
+
""" Returns a new JsonPointer with the given suffix append to this ptr """
|
| 286 |
+
if isinstance(suffix, JsonPointer):
|
| 287 |
+
suffix_parts = suffix.parts
|
| 288 |
+
elif isinstance(suffix, str):
|
| 289 |
+
suffix_parts = JsonPointer(suffix).parts
|
| 290 |
+
else:
|
| 291 |
+
suffix_parts = suffix
|
| 292 |
+
try:
|
| 293 |
+
return JsonPointer.from_parts(chain(self.parts, suffix_parts))
|
| 294 |
+
except: # noqa E722
|
| 295 |
+
raise JsonPointerException("Invalid suffix")
|
| 296 |
+
|
| 297 |
+
def __truediv__(self, suffix): # Python 3
|
| 298 |
+
return self.join(suffix)
|
| 299 |
+
|
| 300 |
+
@property
|
| 301 |
+
def path(self):
|
| 302 |
+
"""Returns the string representation of the pointer
|
| 303 |
+
|
| 304 |
+
>>> ptr = JsonPointer('/~0/0/~1').path == '/~0/0/~1'
|
| 305 |
+
"""
|
| 306 |
+
parts = [escape(part) for part in self.parts]
|
| 307 |
+
return ''.join('/' + part for part in parts)
|
| 308 |
+
|
| 309 |
+
def __eq__(self, other):
|
| 310 |
+
"""Compares a pointer to another object
|
| 311 |
+
|
| 312 |
+
Pointers can be compared by comparing their strings (or splitted
|
| 313 |
+
strings), because no two different parts can point to the same
|
| 314 |
+
structure in an object (eg no different number representations)
|
| 315 |
+
"""
|
| 316 |
+
|
| 317 |
+
if not isinstance(other, JsonPointer):
|
| 318 |
+
return False
|
| 319 |
+
|
| 320 |
+
return self.parts == other.parts
|
| 321 |
+
|
| 322 |
+
def __hash__(self):
|
| 323 |
+
return hash(tuple(self.parts))
|
| 324 |
+
|
| 325 |
+
def __str__(self):
|
| 326 |
+
return self.path
|
| 327 |
+
|
| 328 |
+
def __repr__(self):
|
| 329 |
+
return type(self).__name__ + "(" + repr(self.path) + ")"
|
| 330 |
+
|
| 331 |
+
@classmethod
|
| 332 |
+
def from_parts(cls, parts):
|
| 333 |
+
"""Constructs a JsonPointer from a list of (unescaped) paths
|
| 334 |
+
|
| 335 |
+
>>> JsonPointer.from_parts(['a', '~', '/', 0]).path == '/a/~0/~1/0'
|
| 336 |
+
True
|
| 337 |
+
"""
|
| 338 |
+
parts = [escape(str(part)) for part in parts]
|
| 339 |
+
ptr = cls(''.join('/' + part for part in parts))
|
| 340 |
+
return ptr
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def escape(s):
|
| 344 |
+
return s.replace('~', '~0').replace('/', '~1')
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def unescape(s):
|
| 348 |
+
return s.replace('~1', '/').replace('~0', '~')
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jupyter.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Launch the root jupyter command"""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
if __name__ == "__main__":
|
| 5 |
+
from jupyter_core.command import main
|
| 6 |
+
|
| 7 |
+
main()
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/nest_asyncio.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Patch asyncio to allow nested event loops."""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import asyncio.events as events
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
import threading
|
| 8 |
+
from contextlib import contextmanager, suppress
|
| 9 |
+
from heapq import heappop
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def apply(loop=None):
|
| 13 |
+
"""Patch asyncio to make its event loop reentrant."""
|
| 14 |
+
_patch_asyncio()
|
| 15 |
+
_patch_policy()
|
| 16 |
+
_patch_tornado()
|
| 17 |
+
|
| 18 |
+
loop = loop or asyncio.get_event_loop()
|
| 19 |
+
_patch_loop(loop)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _patch_asyncio():
|
| 23 |
+
"""Patch asyncio module to use pure Python tasks and futures."""
|
| 24 |
+
|
| 25 |
+
def run(main, *, debug=False):
|
| 26 |
+
loop = asyncio.get_event_loop()
|
| 27 |
+
loop.set_debug(debug)
|
| 28 |
+
task = asyncio.ensure_future(main)
|
| 29 |
+
try:
|
| 30 |
+
return loop.run_until_complete(task)
|
| 31 |
+
finally:
|
| 32 |
+
if not task.done():
|
| 33 |
+
task.cancel()
|
| 34 |
+
with suppress(asyncio.CancelledError):
|
| 35 |
+
loop.run_until_complete(task)
|
| 36 |
+
|
| 37 |
+
def _get_event_loop(stacklevel=3):
|
| 38 |
+
loop = events._get_running_loop()
|
| 39 |
+
if loop is None:
|
| 40 |
+
loop = events.get_event_loop_policy().get_event_loop()
|
| 41 |
+
return loop
|
| 42 |
+
|
| 43 |
+
# Use module level _current_tasks, all_tasks and patch run method.
|
| 44 |
+
if hasattr(asyncio, '_nest_patched'):
|
| 45 |
+
return
|
| 46 |
+
if sys.version_info >= (3, 6, 0):
|
| 47 |
+
asyncio.Task = asyncio.tasks._CTask = asyncio.tasks.Task = \
|
| 48 |
+
asyncio.tasks._PyTask
|
| 49 |
+
asyncio.Future = asyncio.futures._CFuture = asyncio.futures.Future = \
|
| 50 |
+
asyncio.futures._PyFuture
|
| 51 |
+
if sys.version_info < (3, 7, 0):
|
| 52 |
+
asyncio.tasks._current_tasks = asyncio.tasks.Task._current_tasks
|
| 53 |
+
asyncio.all_tasks = asyncio.tasks.Task.all_tasks
|
| 54 |
+
if sys.version_info >= (3, 9, 0):
|
| 55 |
+
events._get_event_loop = events.get_event_loop = \
|
| 56 |
+
asyncio.get_event_loop = _get_event_loop
|
| 57 |
+
asyncio.run = run
|
| 58 |
+
asyncio._nest_patched = True
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _patch_policy():
|
| 62 |
+
"""Patch the policy to always return a patched loop."""
|
| 63 |
+
|
| 64 |
+
def get_event_loop(self):
|
| 65 |
+
if self._local._loop is None:
|
| 66 |
+
loop = self.new_event_loop()
|
| 67 |
+
_patch_loop(loop)
|
| 68 |
+
self.set_event_loop(loop)
|
| 69 |
+
return self._local._loop
|
| 70 |
+
|
| 71 |
+
policy = events.get_event_loop_policy()
|
| 72 |
+
policy.__class__.get_event_loop = get_event_loop
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _patch_loop(loop):
|
| 76 |
+
"""Patch loop to make it reentrant."""
|
| 77 |
+
|
| 78 |
+
def run_forever(self):
|
| 79 |
+
with manage_run(self), manage_asyncgens(self):
|
| 80 |
+
while True:
|
| 81 |
+
self._run_once()
|
| 82 |
+
if self._stopping:
|
| 83 |
+
break
|
| 84 |
+
self._stopping = False
|
| 85 |
+
|
| 86 |
+
def run_until_complete(self, future):
|
| 87 |
+
with manage_run(self):
|
| 88 |
+
f = asyncio.ensure_future(future, loop=self)
|
| 89 |
+
if f is not future:
|
| 90 |
+
f._log_destroy_pending = False
|
| 91 |
+
while not f.done():
|
| 92 |
+
self._run_once()
|
| 93 |
+
if self._stopping:
|
| 94 |
+
break
|
| 95 |
+
if not f.done():
|
| 96 |
+
raise RuntimeError(
|
| 97 |
+
'Event loop stopped before Future completed.')
|
| 98 |
+
return f.result()
|
| 99 |
+
|
| 100 |
+
def _run_once(self):
|
| 101 |
+
"""
|
| 102 |
+
Simplified re-implementation of asyncio's _run_once that
|
| 103 |
+
runs handles as they become ready.
|
| 104 |
+
"""
|
| 105 |
+
ready = self._ready
|
| 106 |
+
scheduled = self._scheduled
|
| 107 |
+
while scheduled and scheduled[0]._cancelled:
|
| 108 |
+
heappop(scheduled)
|
| 109 |
+
|
| 110 |
+
timeout = (
|
| 111 |
+
0 if ready or self._stopping
|
| 112 |
+
else min(max(
|
| 113 |
+
scheduled[0]._when - self.time(), 0), 86400) if scheduled
|
| 114 |
+
else None)
|
| 115 |
+
event_list = self._selector.select(timeout)
|
| 116 |
+
self._process_events(event_list)
|
| 117 |
+
|
| 118 |
+
end_time = self.time() + self._clock_resolution
|
| 119 |
+
while scheduled and scheduled[0]._when < end_time:
|
| 120 |
+
handle = heappop(scheduled)
|
| 121 |
+
ready.append(handle)
|
| 122 |
+
|
| 123 |
+
for _ in range(len(ready)):
|
| 124 |
+
if not ready:
|
| 125 |
+
break
|
| 126 |
+
handle = ready.popleft()
|
| 127 |
+
if not handle._cancelled:
|
| 128 |
+
# preempt the current task so that that checks in
|
| 129 |
+
# Task.__step do not raise
|
| 130 |
+
curr_task = curr_tasks.pop(self, None)
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
handle._run()
|
| 134 |
+
finally:
|
| 135 |
+
# restore the current task
|
| 136 |
+
if curr_task is not None:
|
| 137 |
+
curr_tasks[self] = curr_task
|
| 138 |
+
|
| 139 |
+
handle = None
|
| 140 |
+
|
| 141 |
+
@contextmanager
|
| 142 |
+
def manage_run(self):
|
| 143 |
+
"""Set up the loop for running."""
|
| 144 |
+
self._check_closed()
|
| 145 |
+
old_thread_id = self._thread_id
|
| 146 |
+
old_running_loop = events._get_running_loop()
|
| 147 |
+
try:
|
| 148 |
+
self._thread_id = threading.get_ident()
|
| 149 |
+
events._set_running_loop(self)
|
| 150 |
+
self._num_runs_pending += 1
|
| 151 |
+
if self._is_proactorloop:
|
| 152 |
+
if self._self_reading_future is None:
|
| 153 |
+
self.call_soon(self._loop_self_reading)
|
| 154 |
+
yield
|
| 155 |
+
finally:
|
| 156 |
+
self._thread_id = old_thread_id
|
| 157 |
+
events._set_running_loop(old_running_loop)
|
| 158 |
+
self._num_runs_pending -= 1
|
| 159 |
+
if self._is_proactorloop:
|
| 160 |
+
if (self._num_runs_pending == 0
|
| 161 |
+
and self._self_reading_future is not None):
|
| 162 |
+
ov = self._self_reading_future._ov
|
| 163 |
+
self._self_reading_future.cancel()
|
| 164 |
+
if ov is not None:
|
| 165 |
+
self._proactor._unregister(ov)
|
| 166 |
+
self._self_reading_future = None
|
| 167 |
+
|
| 168 |
+
@contextmanager
|
| 169 |
+
def manage_asyncgens(self):
|
| 170 |
+
if not hasattr(sys, 'get_asyncgen_hooks'):
|
| 171 |
+
# Python version is too old.
|
| 172 |
+
return
|
| 173 |
+
old_agen_hooks = sys.get_asyncgen_hooks()
|
| 174 |
+
try:
|
| 175 |
+
self._set_coroutine_origin_tracking(self._debug)
|
| 176 |
+
if self._asyncgens is not None:
|
| 177 |
+
sys.set_asyncgen_hooks(
|
| 178 |
+
firstiter=self._asyncgen_firstiter_hook,
|
| 179 |
+
finalizer=self._asyncgen_finalizer_hook)
|
| 180 |
+
yield
|
| 181 |
+
finally:
|
| 182 |
+
self._set_coroutine_origin_tracking(False)
|
| 183 |
+
if self._asyncgens is not None:
|
| 184 |
+
sys.set_asyncgen_hooks(*old_agen_hooks)
|
| 185 |
+
|
| 186 |
+
def _check_running(self):
|
| 187 |
+
"""Do not throw exception if loop is already running."""
|
| 188 |
+
pass
|
| 189 |
+
|
| 190 |
+
if hasattr(loop, '_nest_patched'):
|
| 191 |
+
return
|
| 192 |
+
if not isinstance(loop, asyncio.BaseEventLoop):
|
| 193 |
+
raise ValueError('Can\'t patch loop of type %s' % type(loop))
|
| 194 |
+
cls = loop.__class__
|
| 195 |
+
cls.run_forever = run_forever
|
| 196 |
+
cls.run_until_complete = run_until_complete
|
| 197 |
+
cls._run_once = _run_once
|
| 198 |
+
cls._check_running = _check_running
|
| 199 |
+
cls._check_runnung = _check_running # typo in Python 3.7 source
|
| 200 |
+
cls._num_runs_pending = 1 if loop.is_running() else 0
|
| 201 |
+
cls._is_proactorloop = (
|
| 202 |
+
os.name == 'nt' and issubclass(cls, asyncio.ProactorEventLoop))
|
| 203 |
+
if sys.version_info < (3, 7, 0):
|
| 204 |
+
cls._set_coroutine_origin_tracking = cls._set_coroutine_wrapper
|
| 205 |
+
curr_tasks = asyncio.tasks._current_tasks \
|
| 206 |
+
if sys.version_info >= (3, 7, 0) else asyncio.Task._current_tasks
|
| 207 |
+
cls._nest_patched = True
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def _patch_tornado():
|
| 211 |
+
"""
|
| 212 |
+
If tornado is imported before nest_asyncio, make tornado aware of
|
| 213 |
+
the pure-Python asyncio Future.
|
| 214 |
+
"""
|
| 215 |
+
if 'tornado' in sys.modules:
|
| 216 |
+
import tornado.concurrent as tc # type: ignore
|
| 217 |
+
tc.Future = asyncio.Future
|
| 218 |
+
if asyncio.Future not in tc.FUTURES:
|
| 219 |
+
tc.FUTURES += (asyncio.Future,)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pandocfilters.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Author: John MacFarlane <jgm@berkeley.edu>
|
| 2 |
+
# Copyright: (C) 2013 John MacFarlane
|
| 3 |
+
# License: BSD3
|
| 4 |
+
|
| 5 |
+
"""
|
| 6 |
+
Functions to aid writing python scripts that process the pandoc
|
| 7 |
+
AST serialized as JSON.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import codecs
|
| 11 |
+
import hashlib
|
| 12 |
+
import io
|
| 13 |
+
import json
|
| 14 |
+
import os
|
| 15 |
+
import sys
|
| 16 |
+
import atexit
|
| 17 |
+
import shutil
|
| 18 |
+
import tempfile
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# some utility-functions: make it easier to create your own filters
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def get_filename4code(module, content, ext=None):
|
| 25 |
+
"""Generate filename based on content
|
| 26 |
+
|
| 27 |
+
The function ensures that the (temporary) directory exists, so that the
|
| 28 |
+
file can be written.
|
| 29 |
+
|
| 30 |
+
By default, the directory won't be cleaned up,
|
| 31 |
+
so a filter can use the directory as a cache and
|
| 32 |
+
decide not to regenerate if there's no change.
|
| 33 |
+
|
| 34 |
+
In case the user preferres the files to be temporary files,
|
| 35 |
+
an environment variable `PANDOCFILTER_CLEANUP` can be set to
|
| 36 |
+
any non-empty value such as `1` to
|
| 37 |
+
make sure the directory is created in a temporary location and removed
|
| 38 |
+
after finishing the filter. In this case there's no caching and files
|
| 39 |
+
will be regenerated each time the filter is run.
|
| 40 |
+
|
| 41 |
+
Example:
|
| 42 |
+
filename = get_filename4code("myfilter", code)
|
| 43 |
+
"""
|
| 44 |
+
if os.getenv('PANDOCFILTER_CLEANUP'):
|
| 45 |
+
imagedir = tempfile.mkdtemp(prefix=module)
|
| 46 |
+
atexit.register(lambda: shutil.rmtree(imagedir))
|
| 47 |
+
else:
|
| 48 |
+
imagedir = module + "-images"
|
| 49 |
+
fn = hashlib.sha1(content.encode(sys.getfilesystemencoding())).hexdigest()
|
| 50 |
+
try:
|
| 51 |
+
os.makedirs(imagedir, exist_ok=True)
|
| 52 |
+
sys.stderr.write('Created directory ' + imagedir + '\n')
|
| 53 |
+
except OSError:
|
| 54 |
+
sys.stderr.write('Could not create directory "' + imagedir + '"\n')
|
| 55 |
+
if ext:
|
| 56 |
+
fn += "." + ext
|
| 57 |
+
return os.path.join(imagedir, fn)
|
| 58 |
+
|
| 59 |
+
def get_value(kv, key, value = None):
|
| 60 |
+
"""get value from the keyvalues (options)"""
|
| 61 |
+
res = []
|
| 62 |
+
for k, v in kv:
|
| 63 |
+
if k == key:
|
| 64 |
+
value = v
|
| 65 |
+
else:
|
| 66 |
+
res.append([k, v])
|
| 67 |
+
return value, res
|
| 68 |
+
|
| 69 |
+
def get_caption(kv):
|
| 70 |
+
"""get caption from the keyvalues (options)
|
| 71 |
+
|
| 72 |
+
Example:
|
| 73 |
+
if key == 'CodeBlock':
|
| 74 |
+
[[ident, classes, keyvals], code] = value
|
| 75 |
+
caption, typef, keyvals = get_caption(keyvals)
|
| 76 |
+
...
|
| 77 |
+
return Para([Image([ident, [], keyvals], caption, [filename, typef])])
|
| 78 |
+
"""
|
| 79 |
+
caption = []
|
| 80 |
+
typef = ""
|
| 81 |
+
value, res = get_value(kv, u"caption")
|
| 82 |
+
if value is not None:
|
| 83 |
+
caption = [Str(value)]
|
| 84 |
+
typef = "fig:"
|
| 85 |
+
|
| 86 |
+
return caption, typef, res
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def get_extension(format, default, **alternates):
|
| 90 |
+
"""get the extension for the result, needs a default and some specialisations
|
| 91 |
+
|
| 92 |
+
Example:
|
| 93 |
+
filetype = get_extension(format, "png", html="svg", latex="eps")
|
| 94 |
+
"""
|
| 95 |
+
try:
|
| 96 |
+
return alternates[format]
|
| 97 |
+
except KeyError:
|
| 98 |
+
return default
|
| 99 |
+
|
| 100 |
+
# end of utilities
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def walk(x, action, format, meta):
|
| 104 |
+
"""Walk a tree, applying an action to every object.
|
| 105 |
+
Returns a modified tree. An action is a function of the form
|
| 106 |
+
`action(key, value, format, meta)`, where:
|
| 107 |
+
|
| 108 |
+
* `key` is the type of the pandoc object (e.g. 'Str', 'Para') `value` is
|
| 109 |
+
* the contents of the object (e.g. a string for 'Str', a list of
|
| 110 |
+
inline elements for 'Para')
|
| 111 |
+
* `format` is the target output format (as supplied by the
|
| 112 |
+
`format` argument of `walk`)
|
| 113 |
+
* `meta` is the document's metadata
|
| 114 |
+
|
| 115 |
+
The return of an action is either:
|
| 116 |
+
|
| 117 |
+
* `None`: this means that the object should remain unchanged
|
| 118 |
+
* a pandoc object: this will replace the original object
|
| 119 |
+
* a list of pandoc objects: these will replace the original object; the
|
| 120 |
+
list is merged with the neighbors of the orignal objects (spliced into
|
| 121 |
+
the list the original object belongs to); returning an empty list deletes
|
| 122 |
+
the object
|
| 123 |
+
"""
|
| 124 |
+
if isinstance(x, list):
|
| 125 |
+
array = []
|
| 126 |
+
for item in x:
|
| 127 |
+
if isinstance(item, dict) and 't' in item:
|
| 128 |
+
res = action(item['t'],
|
| 129 |
+
item['c'] if 'c' in item else None, format, meta)
|
| 130 |
+
if res is None:
|
| 131 |
+
array.append(walk(item, action, format, meta))
|
| 132 |
+
elif isinstance(res, list):
|
| 133 |
+
for z in res:
|
| 134 |
+
array.append(walk(z, action, format, meta))
|
| 135 |
+
else:
|
| 136 |
+
array.append(walk(res, action, format, meta))
|
| 137 |
+
else:
|
| 138 |
+
array.append(walk(item, action, format, meta))
|
| 139 |
+
return array
|
| 140 |
+
elif isinstance(x, dict):
|
| 141 |
+
return {k: walk(v, action, format, meta) for k, v in x.items()}
|
| 142 |
+
else:
|
| 143 |
+
return x
|
| 144 |
+
|
| 145 |
+
def toJSONFilter(action):
|
| 146 |
+
"""Like `toJSONFilters`, but takes a single action as argument.
|
| 147 |
+
"""
|
| 148 |
+
toJSONFilters([action])
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def toJSONFilters(actions):
|
| 152 |
+
"""Generate a JSON-to-JSON filter from stdin to stdout
|
| 153 |
+
|
| 154 |
+
The filter:
|
| 155 |
+
|
| 156 |
+
* reads a JSON-formatted pandoc document from stdin
|
| 157 |
+
* transforms it by walking the tree and performing the actions
|
| 158 |
+
* returns a new JSON-formatted pandoc document to stdout
|
| 159 |
+
|
| 160 |
+
The argument `actions` is a list of functions of the form
|
| 161 |
+
`action(key, value, format, meta)`, as described in more
|
| 162 |
+
detail under `walk`.
|
| 163 |
+
|
| 164 |
+
This function calls `applyJSONFilters`, with the `format`
|
| 165 |
+
argument provided by the first command-line argument,
|
| 166 |
+
if present. (Pandoc sets this by default when calling
|
| 167 |
+
filters.)
|
| 168 |
+
"""
|
| 169 |
+
try:
|
| 170 |
+
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
|
| 171 |
+
except AttributeError:
|
| 172 |
+
# Python 2 does not have sys.stdin.buffer.
|
| 173 |
+
# REF: https://stackoverflow.com/questions/2467928/python-unicodeencode
|
| 174 |
+
input_stream = codecs.getreader("utf-8")(sys.stdin)
|
| 175 |
+
|
| 176 |
+
source = input_stream.read()
|
| 177 |
+
if len(sys.argv) > 1:
|
| 178 |
+
format = sys.argv[1]
|
| 179 |
+
else:
|
| 180 |
+
format = ""
|
| 181 |
+
|
| 182 |
+
sys.stdout.write(applyJSONFilters(actions, source, format))
|
| 183 |
+
|
| 184 |
+
def applyJSONFilters(actions, source, format=""):
|
| 185 |
+
"""Walk through JSON structure and apply filters
|
| 186 |
+
|
| 187 |
+
This:
|
| 188 |
+
|
| 189 |
+
* reads a JSON-formatted pandoc document from a source string
|
| 190 |
+
* transforms it by walking the tree and performing the actions
|
| 191 |
+
* returns a new JSON-formatted pandoc document as a string
|
| 192 |
+
|
| 193 |
+
The `actions` argument is a list of functions (see `walk`
|
| 194 |
+
for a full description).
|
| 195 |
+
|
| 196 |
+
The argument `source` is a string encoded JSON object.
|
| 197 |
+
|
| 198 |
+
The argument `format` is a string describing the output format.
|
| 199 |
+
|
| 200 |
+
Returns a the new JSON-formatted pandoc document.
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
doc = json.loads(source)
|
| 204 |
+
|
| 205 |
+
if 'meta' in doc:
|
| 206 |
+
meta = doc['meta']
|
| 207 |
+
elif doc[0]: # old API
|
| 208 |
+
meta = doc[0]['unMeta']
|
| 209 |
+
else:
|
| 210 |
+
meta = {}
|
| 211 |
+
altered = doc
|
| 212 |
+
for action in actions:
|
| 213 |
+
altered = walk(altered, action, format, meta)
|
| 214 |
+
|
| 215 |
+
return json.dumps(altered)
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def stringify(x):
|
| 219 |
+
"""Walks the tree x and returns concatenated string content,
|
| 220 |
+
leaving out all formatting.
|
| 221 |
+
"""
|
| 222 |
+
result = []
|
| 223 |
+
|
| 224 |
+
def go(key, val, format, meta):
|
| 225 |
+
if key in ['Str', 'MetaString']:
|
| 226 |
+
result.append(val)
|
| 227 |
+
elif key == 'Code':
|
| 228 |
+
result.append(val[1])
|
| 229 |
+
elif key == 'Math':
|
| 230 |
+
result.append(val[1])
|
| 231 |
+
elif key == 'LineBreak':
|
| 232 |
+
result.append(" ")
|
| 233 |
+
elif key == 'SoftBreak':
|
| 234 |
+
result.append(" ")
|
| 235 |
+
elif key == 'Space':
|
| 236 |
+
result.append(" ")
|
| 237 |
+
|
| 238 |
+
walk(x, go, "", {})
|
| 239 |
+
return ''.join(result)
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def attributes(attrs):
|
| 243 |
+
"""Returns an attribute list, constructed from the
|
| 244 |
+
dictionary attrs.
|
| 245 |
+
"""
|
| 246 |
+
attrs = attrs or {}
|
| 247 |
+
ident = attrs.get("id", "")
|
| 248 |
+
classes = attrs.get("classes", [])
|
| 249 |
+
keyvals = [[x, attrs[x]] for x in attrs if (x != "classes" and x != "id")]
|
| 250 |
+
return [ident, classes, keyvals]
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def elt(eltType, numargs):
|
| 254 |
+
def fun(*args):
|
| 255 |
+
lenargs = len(args)
|
| 256 |
+
if lenargs != numargs:
|
| 257 |
+
raise ValueError(eltType + ' expects ' + str(numargs) +
|
| 258 |
+
' arguments, but given ' + str(lenargs))
|
| 259 |
+
if numargs == 0:
|
| 260 |
+
xs = []
|
| 261 |
+
elif len(args) == 1:
|
| 262 |
+
xs = args[0]
|
| 263 |
+
else:
|
| 264 |
+
xs = list(args)
|
| 265 |
+
return {'t': eltType, 'c': xs}
|
| 266 |
+
return fun
|
| 267 |
+
|
| 268 |
+
# Constructors for block elements
|
| 269 |
+
|
| 270 |
+
Plain = elt('Plain', 1)
|
| 271 |
+
Para = elt('Para', 1)
|
| 272 |
+
CodeBlock = elt('CodeBlock', 2)
|
| 273 |
+
RawBlock = elt('RawBlock', 2)
|
| 274 |
+
BlockQuote = elt('BlockQuote', 1)
|
| 275 |
+
OrderedList = elt('OrderedList', 2)
|
| 276 |
+
BulletList = elt('BulletList', 1)
|
| 277 |
+
DefinitionList = elt('DefinitionList', 1)
|
| 278 |
+
Header = elt('Header', 3)
|
| 279 |
+
HorizontalRule = elt('HorizontalRule', 0)
|
| 280 |
+
Table = elt('Table', 5)
|
| 281 |
+
Div = elt('Div', 2)
|
| 282 |
+
Null = elt('Null', 0)
|
| 283 |
+
|
| 284 |
+
# Constructors for inline elements
|
| 285 |
+
|
| 286 |
+
Str = elt('Str', 1)
|
| 287 |
+
Emph = elt('Emph', 1)
|
| 288 |
+
Strong = elt('Strong', 1)
|
| 289 |
+
Strikeout = elt('Strikeout', 1)
|
| 290 |
+
Superscript = elt('Superscript', 1)
|
| 291 |
+
Subscript = elt('Subscript', 1)
|
| 292 |
+
SmallCaps = elt('SmallCaps', 1)
|
| 293 |
+
Quoted = elt('Quoted', 2)
|
| 294 |
+
Cite = elt('Cite', 2)
|
| 295 |
+
Code = elt('Code', 2)
|
| 296 |
+
Space = elt('Space', 0)
|
| 297 |
+
LineBreak = elt('LineBreak', 0)
|
| 298 |
+
Math = elt('Math', 2)
|
| 299 |
+
RawInline = elt('RawInline', 2)
|
| 300 |
+
Link = elt('Link', 3)
|
| 301 |
+
Image = elt('Image', 3)
|
| 302 |
+
Note = elt('Note', 1)
|
| 303 |
+
SoftBreak = elt('SoftBreak', 0)
|
| 304 |
+
Span = elt('Span', 2)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pip-22.0.2.virtualenv
ADDED
|
File without changes
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/protobuf-3.20.3-py3.10-nspkg.pth
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
import sys, types, os;has_mfs = sys.version_info > (3, 5);p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('google',));importlib = has_mfs and __import__('importlib.util');has_mfs and __import__('importlib.machinery');m = has_mfs and sys.modules.setdefault('google', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('google', [os.path.dirname(p)])));m = m or sys.modules.setdefault('google', types.ModuleType('google'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__config__.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is generated by SciPy's build process
|
| 2 |
+
# It contains system_info results at the time of building this package.
|
| 3 |
+
from enum import Enum
|
| 4 |
+
|
| 5 |
+
__all__ = ["show"]
|
| 6 |
+
_built_with_meson = True
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DisplayModes(Enum):
|
| 10 |
+
stdout = "stdout"
|
| 11 |
+
dicts = "dicts"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _cleanup(d):
|
| 15 |
+
"""
|
| 16 |
+
Removes empty values in a `dict` recursively
|
| 17 |
+
This ensures we remove values that Meson could not provide to CONFIG
|
| 18 |
+
"""
|
| 19 |
+
if isinstance(d, dict):
|
| 20 |
+
return { k: _cleanup(v) for k, v in d.items() if v != '' and _cleanup(v) != '' }
|
| 21 |
+
else:
|
| 22 |
+
return d
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
CONFIG = _cleanup(
|
| 26 |
+
{
|
| 27 |
+
"Compilers": {
|
| 28 |
+
"c": {
|
| 29 |
+
"name": "gcc",
|
| 30 |
+
"linker": r"ld.bfd",
|
| 31 |
+
"version": "10.2.1",
|
| 32 |
+
"commands": r"cc",
|
| 33 |
+
"args": r"",
|
| 34 |
+
"linker args": r"",
|
| 35 |
+
},
|
| 36 |
+
"cython": {
|
| 37 |
+
"name": r"cython",
|
| 38 |
+
"linker": r"cython",
|
| 39 |
+
"version": r"3.0.11",
|
| 40 |
+
"commands": r"cython",
|
| 41 |
+
"args": r"",
|
| 42 |
+
"linker args": r"",
|
| 43 |
+
},
|
| 44 |
+
"c++": {
|
| 45 |
+
"name": "gcc",
|
| 46 |
+
"linker": r"ld.bfd",
|
| 47 |
+
"version": "10.2.1",
|
| 48 |
+
"commands": r"c++",
|
| 49 |
+
"args": r"",
|
| 50 |
+
"linker args": r"",
|
| 51 |
+
},
|
| 52 |
+
"fortran": {
|
| 53 |
+
"name": "gcc",
|
| 54 |
+
"linker": r"ld.bfd",
|
| 55 |
+
"version": "10.2.1",
|
| 56 |
+
"commands": r"gfortran",
|
| 57 |
+
"args": r"",
|
| 58 |
+
"linker args": r"",
|
| 59 |
+
},
|
| 60 |
+
"pythran": {
|
| 61 |
+
"version": r"0.16.1",
|
| 62 |
+
"include directory": r"../../tmp/pip-build-env-h_xz8lfs/overlay/lib/python3.10/site-packages/pythran"
|
| 63 |
+
},
|
| 64 |
+
},
|
| 65 |
+
"Machine Information": {
|
| 66 |
+
"host": {
|
| 67 |
+
"cpu": r"x86_64",
|
| 68 |
+
"family": r"x86_64",
|
| 69 |
+
"endian": r"little",
|
| 70 |
+
"system": r"linux",
|
| 71 |
+
},
|
| 72 |
+
"build": {
|
| 73 |
+
"cpu": r"x86_64",
|
| 74 |
+
"family": r"x86_64",
|
| 75 |
+
"endian": r"little",
|
| 76 |
+
"system": r"linux",
|
| 77 |
+
},
|
| 78 |
+
"cross-compiled": bool("False".lower().replace('false', '')),
|
| 79 |
+
},
|
| 80 |
+
"Build Dependencies": {
|
| 81 |
+
"blas": {
|
| 82 |
+
"name": "scipy-openblas",
|
| 83 |
+
"found": bool("True".lower().replace('false', '')),
|
| 84 |
+
"version": "0.3.27.dev",
|
| 85 |
+
"detection method": "pkgconfig",
|
| 86 |
+
"include directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/include",
|
| 87 |
+
"lib directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/lib",
|
| 88 |
+
"openblas configuration": r"OpenBLAS 0.3.27.dev DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=64",
|
| 89 |
+
"pc file directory": r"/project",
|
| 90 |
+
},
|
| 91 |
+
"lapack": {
|
| 92 |
+
"name": "scipy-openblas",
|
| 93 |
+
"found": bool("True".lower().replace('false', '')),
|
| 94 |
+
"version": "0.3.27.dev",
|
| 95 |
+
"detection method": "pkgconfig",
|
| 96 |
+
"include directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/include",
|
| 97 |
+
"lib directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/lib",
|
| 98 |
+
"openblas configuration": r"OpenBLAS 0.3.27.dev DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=64",
|
| 99 |
+
"pc file directory": r"/project",
|
| 100 |
+
},
|
| 101 |
+
"pybind11": {
|
| 102 |
+
"name": "pybind11",
|
| 103 |
+
"version": "2.12.0",
|
| 104 |
+
"detection method": "config-tool",
|
| 105 |
+
"include directory": r"unknown",
|
| 106 |
+
},
|
| 107 |
+
},
|
| 108 |
+
"Python Information": {
|
| 109 |
+
"path": r"/opt/python/cp310-cp310/bin/python",
|
| 110 |
+
"version": "3.10",
|
| 111 |
+
},
|
| 112 |
+
}
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _check_pyyaml():
|
| 117 |
+
import yaml
|
| 118 |
+
|
| 119 |
+
return yaml
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def show(mode=DisplayModes.stdout.value):
|
| 123 |
+
"""
|
| 124 |
+
Show libraries and system information on which SciPy was built
|
| 125 |
+
and is being used
|
| 126 |
+
|
| 127 |
+
Parameters
|
| 128 |
+
----------
|
| 129 |
+
mode : {`'stdout'`, `'dicts'`}, optional.
|
| 130 |
+
Indicates how to display the config information.
|
| 131 |
+
`'stdout'` prints to console, `'dicts'` returns a dictionary
|
| 132 |
+
of the configuration.
|
| 133 |
+
|
| 134 |
+
Returns
|
| 135 |
+
-------
|
| 136 |
+
out : {`dict`, `None`}
|
| 137 |
+
If mode is `'dicts'`, a dict is returned, else None
|
| 138 |
+
|
| 139 |
+
Notes
|
| 140 |
+
-----
|
| 141 |
+
1. The `'stdout'` mode will give more readable
|
| 142 |
+
output if ``pyyaml`` is installed
|
| 143 |
+
|
| 144 |
+
"""
|
| 145 |
+
if mode == DisplayModes.stdout.value:
|
| 146 |
+
try: # Non-standard library, check import
|
| 147 |
+
yaml = _check_pyyaml()
|
| 148 |
+
|
| 149 |
+
print(yaml.dump(CONFIG))
|
| 150 |
+
except ModuleNotFoundError:
|
| 151 |
+
import warnings
|
| 152 |
+
import json
|
| 153 |
+
|
| 154 |
+
warnings.warn("Install `pyyaml` for better output", stacklevel=1)
|
| 155 |
+
print(json.dumps(CONFIG, indent=2))
|
| 156 |
+
elif mode == DisplayModes.dicts.value:
|
| 157 |
+
return CONFIG
|
| 158 |
+
else:
|
| 159 |
+
raise AttributeError(
|
| 160 |
+
f"Invalid `mode`, use one of: {', '.join([e.value for e in DisplayModes])}"
|
| 161 |
+
)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__init__.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SciPy: A scientific computing package for Python
|
| 3 |
+
================================================
|
| 4 |
+
|
| 5 |
+
Documentation is available in the docstrings and
|
| 6 |
+
online at https://docs.scipy.org.
|
| 7 |
+
|
| 8 |
+
Subpackages
|
| 9 |
+
-----------
|
| 10 |
+
Using any of these subpackages requires an explicit import. For example,
|
| 11 |
+
``import scipy.cluster``.
|
| 12 |
+
|
| 13 |
+
::
|
| 14 |
+
|
| 15 |
+
cluster --- Vector Quantization / Kmeans
|
| 16 |
+
constants --- Physical and mathematical constants and units
|
| 17 |
+
datasets --- Dataset methods
|
| 18 |
+
fft --- Discrete Fourier transforms
|
| 19 |
+
fftpack --- Legacy discrete Fourier transforms
|
| 20 |
+
integrate --- Integration routines
|
| 21 |
+
interpolate --- Interpolation Tools
|
| 22 |
+
io --- Data input and output
|
| 23 |
+
linalg --- Linear algebra routines
|
| 24 |
+
misc --- Utilities that don't have another home.
|
| 25 |
+
ndimage --- N-D image package
|
| 26 |
+
odr --- Orthogonal Distance Regression
|
| 27 |
+
optimize --- Optimization Tools
|
| 28 |
+
signal --- Signal Processing Tools
|
| 29 |
+
sparse --- Sparse Matrices
|
| 30 |
+
spatial --- Spatial data structures and algorithms
|
| 31 |
+
special --- Special functions
|
| 32 |
+
stats --- Statistical Functions
|
| 33 |
+
|
| 34 |
+
Public API in the main SciPy namespace
|
| 35 |
+
--------------------------------------
|
| 36 |
+
::
|
| 37 |
+
|
| 38 |
+
__version__ --- SciPy version string
|
| 39 |
+
LowLevelCallable --- Low-level callback function
|
| 40 |
+
show_config --- Show scipy build configuration
|
| 41 |
+
test --- Run scipy unittests
|
| 42 |
+
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
import importlib as _importlib
|
| 46 |
+
|
| 47 |
+
from numpy import __version__ as __numpy_version__
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
from scipy.__config__ import show as show_config
|
| 52 |
+
except ImportError as e:
|
| 53 |
+
msg = """Error importing SciPy: you cannot import SciPy while
|
| 54 |
+
being in scipy source directory; please exit the SciPy source
|
| 55 |
+
tree first and relaunch your Python interpreter."""
|
| 56 |
+
raise ImportError(msg) from e
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
from scipy.version import version as __version__
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# Allow distributors to run custom init code
|
| 63 |
+
from . import _distributor_init
|
| 64 |
+
del _distributor_init
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
from scipy._lib import _pep440
|
| 68 |
+
# In maintenance branch, change to np_maxversion N+3 if numpy is at N
|
| 69 |
+
np_minversion = '1.23.5'
|
| 70 |
+
np_maxversion = '2.3.0'
|
| 71 |
+
if (_pep440.parse(__numpy_version__) < _pep440.Version(np_minversion) or
|
| 72 |
+
_pep440.parse(__numpy_version__) >= _pep440.Version(np_maxversion)):
|
| 73 |
+
import warnings
|
| 74 |
+
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
|
| 75 |
+
f" is required for this version of SciPy (detected "
|
| 76 |
+
f"version {__numpy_version__})",
|
| 77 |
+
UserWarning, stacklevel=2)
|
| 78 |
+
del _pep440
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# This is the first import of an extension module within SciPy. If there's
|
| 82 |
+
# a general issue with the install, such that extension modules are missing
|
| 83 |
+
# or cannot be imported, this is where we'll get a failure - so give an
|
| 84 |
+
# informative error message.
|
| 85 |
+
try:
|
| 86 |
+
from scipy._lib._ccallback import LowLevelCallable
|
| 87 |
+
except ImportError as e:
|
| 88 |
+
msg = "The `scipy` install you are using seems to be broken, " + \
|
| 89 |
+
"(extension modules cannot be imported), " + \
|
| 90 |
+
"please try reinstalling."
|
| 91 |
+
raise ImportError(msg) from e
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
from scipy._lib._testutils import PytestTester
|
| 95 |
+
test = PytestTester(__name__)
|
| 96 |
+
del PytestTester
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
submodules = [
|
| 100 |
+
'cluster',
|
| 101 |
+
'constants',
|
| 102 |
+
'datasets',
|
| 103 |
+
'fft',
|
| 104 |
+
'fftpack',
|
| 105 |
+
'integrate',
|
| 106 |
+
'interpolate',
|
| 107 |
+
'io',
|
| 108 |
+
'linalg',
|
| 109 |
+
'misc',
|
| 110 |
+
'ndimage',
|
| 111 |
+
'odr',
|
| 112 |
+
'optimize',
|
| 113 |
+
'signal',
|
| 114 |
+
'sparse',
|
| 115 |
+
'spatial',
|
| 116 |
+
'special',
|
| 117 |
+
'stats'
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
__all__ = submodules + [
|
| 121 |
+
'LowLevelCallable',
|
| 122 |
+
'test',
|
| 123 |
+
'show_config',
|
| 124 |
+
'__version__',
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def __dir__():
|
| 129 |
+
return __all__
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def __getattr__(name):
|
| 133 |
+
if name in submodules:
|
| 134 |
+
return _importlib.import_module(f'scipy.{name}')
|
| 135 |
+
else:
|
| 136 |
+
try:
|
| 137 |
+
return globals()[name]
|
| 138 |
+
except KeyError:
|
| 139 |
+
raise AttributeError(
|
| 140 |
+
f"Module 'scipy' has no attribute '{name}'"
|
| 141 |
+
)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/_distributor_init.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Distributor init file
|
| 2 |
+
|
| 3 |
+
Distributors: you can replace the contents of this file with your own custom
|
| 4 |
+
code to support particular distributions of SciPy.
|
| 5 |
+
|
| 6 |
+
For example, this is a good place to put any checks for hardware requirements
|
| 7 |
+
or BLAS/LAPACK library initialization.
|
| 8 |
+
|
| 9 |
+
The SciPy standard source distribution will not put code in this file beyond
|
| 10 |
+
the try-except import of `_distributor_init_local` (which is not part of a
|
| 11 |
+
standard source distribution), so you can safely replace this file with your
|
| 12 |
+
own version.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
from . import _distributor_init_local # noqa: F401
|
| 17 |
+
except ImportError:
|
| 18 |
+
pass
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/conftest.py
ADDED
|
@@ -0,0 +1,413 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pytest customization
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import warnings
|
| 5 |
+
import tempfile
|
| 6 |
+
from contextlib import contextmanager
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import numpy.testing as npt
|
| 10 |
+
import pytest
|
| 11 |
+
import hypothesis
|
| 12 |
+
|
| 13 |
+
from scipy._lib._fpumode import get_fpu_mode
|
| 14 |
+
from scipy._lib._testutils import FPUModeChangeWarning
|
| 15 |
+
from scipy._lib._array_api import SCIPY_ARRAY_API, SCIPY_DEVICE
|
| 16 |
+
from scipy._lib import _pep440
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
from scipy_doctest.conftest import dt_config
|
| 20 |
+
HAVE_SCPDT = True
|
| 21 |
+
except ModuleNotFoundError:
|
| 22 |
+
HAVE_SCPDT = False
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def pytest_configure(config):
|
| 26 |
+
config.addinivalue_line("markers",
|
| 27 |
+
"slow: Tests that are very slow.")
|
| 28 |
+
config.addinivalue_line("markers",
|
| 29 |
+
"xslow: mark test as extremely slow (not run unless explicitly requested)")
|
| 30 |
+
config.addinivalue_line("markers",
|
| 31 |
+
"xfail_on_32bit: mark test as failing on 32-bit platforms")
|
| 32 |
+
try:
|
| 33 |
+
import pytest_timeout # noqa:F401
|
| 34 |
+
except Exception:
|
| 35 |
+
config.addinivalue_line(
|
| 36 |
+
"markers", 'timeout: mark a test for a non-default timeout')
|
| 37 |
+
try:
|
| 38 |
+
# This is a more reliable test of whether pytest_fail_slow is installed
|
| 39 |
+
# When I uninstalled it, `import pytest_fail_slow` didn't fail!
|
| 40 |
+
from pytest_fail_slow import parse_duration # type: ignore[import-not-found] # noqa:F401,E501
|
| 41 |
+
except Exception:
|
| 42 |
+
config.addinivalue_line(
|
| 43 |
+
"markers", 'fail_slow: mark a test for a non-default timeout failure')
|
| 44 |
+
config.addinivalue_line("markers",
|
| 45 |
+
"skip_xp_backends(*backends, reasons=None, np_only=False, cpu_only=False): "
|
| 46 |
+
"mark the desired skip configuration for the `skip_xp_backends` fixture.")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def pytest_runtest_setup(item):
|
| 50 |
+
mark = item.get_closest_marker("xslow")
|
| 51 |
+
if mark is not None:
|
| 52 |
+
try:
|
| 53 |
+
v = int(os.environ.get('SCIPY_XSLOW', '0'))
|
| 54 |
+
except ValueError:
|
| 55 |
+
v = False
|
| 56 |
+
if not v:
|
| 57 |
+
pytest.skip("very slow test; "
|
| 58 |
+
"set environment variable SCIPY_XSLOW=1 to run it")
|
| 59 |
+
mark = item.get_closest_marker("xfail_on_32bit")
|
| 60 |
+
if mark is not None and np.intp(0).itemsize < 8:
|
| 61 |
+
pytest.xfail(f'Fails on our 32-bit test platform(s): {mark.args[0]}')
|
| 62 |
+
|
| 63 |
+
# Older versions of threadpoolctl have an issue that may lead to this
|
| 64 |
+
# warning being emitted, see gh-14441
|
| 65 |
+
with npt.suppress_warnings() as sup:
|
| 66 |
+
sup.filter(pytest.PytestUnraisableExceptionWarning)
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
from threadpoolctl import threadpool_limits
|
| 70 |
+
|
| 71 |
+
HAS_THREADPOOLCTL = True
|
| 72 |
+
except Exception: # observed in gh-14441: (ImportError, AttributeError)
|
| 73 |
+
# Optional dependency only. All exceptions are caught, for robustness
|
| 74 |
+
HAS_THREADPOOLCTL = False
|
| 75 |
+
|
| 76 |
+
if HAS_THREADPOOLCTL:
|
| 77 |
+
# Set the number of openmp threads based on the number of workers
|
| 78 |
+
# xdist is using to prevent oversubscription. Simplified version of what
|
| 79 |
+
# sklearn does (it can rely on threadpoolctl and its builtin OpenMP helper
|
| 80 |
+
# functions)
|
| 81 |
+
try:
|
| 82 |
+
xdist_worker_count = int(os.environ['PYTEST_XDIST_WORKER_COUNT'])
|
| 83 |
+
except KeyError:
|
| 84 |
+
# raises when pytest-xdist is not installed
|
| 85 |
+
return
|
| 86 |
+
|
| 87 |
+
if not os.getenv('OMP_NUM_THREADS'):
|
| 88 |
+
max_openmp_threads = os.cpu_count() // 2 # use nr of physical cores
|
| 89 |
+
threads_per_worker = max(max_openmp_threads // xdist_worker_count, 1)
|
| 90 |
+
try:
|
| 91 |
+
threadpool_limits(threads_per_worker, user_api='blas')
|
| 92 |
+
except Exception:
|
| 93 |
+
# May raise AttributeError for older versions of OpenBLAS.
|
| 94 |
+
# Catch any error for robustness.
|
| 95 |
+
return
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
@pytest.fixture(scope="function", autouse=True)
|
| 99 |
+
def check_fpu_mode(request):
|
| 100 |
+
"""
|
| 101 |
+
Check FPU mode was not changed during the test.
|
| 102 |
+
"""
|
| 103 |
+
old_mode = get_fpu_mode()
|
| 104 |
+
yield
|
| 105 |
+
new_mode = get_fpu_mode()
|
| 106 |
+
|
| 107 |
+
if old_mode != new_mode:
|
| 108 |
+
warnings.warn(f"FPU mode changed from {old_mode:#x} to {new_mode:#x} during "
|
| 109 |
+
"the test",
|
| 110 |
+
category=FPUModeChangeWarning, stacklevel=0)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# Array API backend handling
|
| 114 |
+
xp_available_backends = {'numpy': np}
|
| 115 |
+
|
| 116 |
+
if SCIPY_ARRAY_API and isinstance(SCIPY_ARRAY_API, str):
|
| 117 |
+
# fill the dict of backends with available libraries
|
| 118 |
+
try:
|
| 119 |
+
import array_api_strict
|
| 120 |
+
xp_available_backends.update({'array_api_strict': array_api_strict})
|
| 121 |
+
if _pep440.parse(array_api_strict.__version__) < _pep440.Version('2.0'):
|
| 122 |
+
raise ImportError("array-api-strict must be >= version 2.0")
|
| 123 |
+
array_api_strict.set_array_api_strict_flags(
|
| 124 |
+
api_version='2023.12'
|
| 125 |
+
)
|
| 126 |
+
except ImportError:
|
| 127 |
+
pass
|
| 128 |
+
|
| 129 |
+
try:
|
| 130 |
+
import torch # type: ignore[import-not-found]
|
| 131 |
+
xp_available_backends.update({'pytorch': torch})
|
| 132 |
+
# can use `mps` or `cpu`
|
| 133 |
+
torch.set_default_device(SCIPY_DEVICE)
|
| 134 |
+
except ImportError:
|
| 135 |
+
pass
|
| 136 |
+
|
| 137 |
+
try:
|
| 138 |
+
import cupy # type: ignore[import-not-found]
|
| 139 |
+
xp_available_backends.update({'cupy': cupy})
|
| 140 |
+
except ImportError:
|
| 141 |
+
pass
|
| 142 |
+
|
| 143 |
+
try:
|
| 144 |
+
import jax.numpy # type: ignore[import-not-found]
|
| 145 |
+
xp_available_backends.update({'jax.numpy': jax.numpy})
|
| 146 |
+
jax.config.update("jax_enable_x64", True)
|
| 147 |
+
jax.config.update("jax_default_device", jax.devices(SCIPY_DEVICE)[0])
|
| 148 |
+
except ImportError:
|
| 149 |
+
pass
|
| 150 |
+
|
| 151 |
+
# by default, use all available backends
|
| 152 |
+
if SCIPY_ARRAY_API.lower() not in ("1", "true"):
|
| 153 |
+
SCIPY_ARRAY_API_ = json.loads(SCIPY_ARRAY_API)
|
| 154 |
+
|
| 155 |
+
if 'all' in SCIPY_ARRAY_API_:
|
| 156 |
+
pass # same as True
|
| 157 |
+
else:
|
| 158 |
+
# only select a subset of backend by filtering out the dict
|
| 159 |
+
try:
|
| 160 |
+
xp_available_backends = {
|
| 161 |
+
backend: xp_available_backends[backend]
|
| 162 |
+
for backend in SCIPY_ARRAY_API_
|
| 163 |
+
}
|
| 164 |
+
except KeyError:
|
| 165 |
+
msg = f"'--array-api-backend' must be in {xp_available_backends.keys()}"
|
| 166 |
+
raise ValueError(msg)
|
| 167 |
+
|
| 168 |
+
if 'cupy' in xp_available_backends:
|
| 169 |
+
SCIPY_DEVICE = 'cuda'
|
| 170 |
+
|
| 171 |
+
array_api_compatible = pytest.mark.parametrize("xp", xp_available_backends.values())
|
| 172 |
+
|
| 173 |
+
skip_xp_invalid_arg = pytest.mark.skipif(SCIPY_ARRAY_API,
|
| 174 |
+
reason = ('Test involves masked arrays, object arrays, or other types '
|
| 175 |
+
'that are not valid input when `SCIPY_ARRAY_API` is used.'))
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
@pytest.fixture
|
| 179 |
+
def skip_xp_backends(xp, request):
|
| 180 |
+
"""
|
| 181 |
+
Skip based on the ``skip_xp_backends`` marker.
|
| 182 |
+
|
| 183 |
+
Parameters
|
| 184 |
+
----------
|
| 185 |
+
*backends : tuple
|
| 186 |
+
Backends to skip, e.g. ``("array_api_strict", "torch")``.
|
| 187 |
+
These are overriden when ``np_only`` is ``True``, and are not
|
| 188 |
+
necessary to provide for non-CPU backends when ``cpu_only`` is ``True``.
|
| 189 |
+
reasons : list, optional
|
| 190 |
+
A list of reasons for each skip. When ``np_only`` is ``True``,
|
| 191 |
+
this should be a singleton list. Otherwise, this should be a list
|
| 192 |
+
of reasons, one for each corresponding backend in ``backends``.
|
| 193 |
+
If unprovided, default reasons are used. Note that it is not possible
|
| 194 |
+
to specify a custom reason with ``cpu_only``. Default: ``None``.
|
| 195 |
+
np_only : bool, optional
|
| 196 |
+
When ``True``, the test is skipped for all backends other
|
| 197 |
+
than the default NumPy backend. There is no need to provide
|
| 198 |
+
any ``backends`` in this case. To specify a reason, pass a
|
| 199 |
+
singleton list to ``reasons``. Default: ``False``.
|
| 200 |
+
cpu_only : bool, optional
|
| 201 |
+
When ``True``, the test is skipped on non-CPU devices.
|
| 202 |
+
There is no need to provide any ``backends`` in this case,
|
| 203 |
+
but any ``backends`` will also be skipped on the CPU.
|
| 204 |
+
Default: ``False``.
|
| 205 |
+
"""
|
| 206 |
+
if "skip_xp_backends" not in request.keywords:
|
| 207 |
+
return
|
| 208 |
+
backends = request.keywords["skip_xp_backends"].args
|
| 209 |
+
kwargs = request.keywords["skip_xp_backends"].kwargs
|
| 210 |
+
np_only = kwargs.get("np_only", False)
|
| 211 |
+
cpu_only = kwargs.get("cpu_only", False)
|
| 212 |
+
if np_only:
|
| 213 |
+
reasons = kwargs.get("reasons", ["do not run with non-NumPy backends."])
|
| 214 |
+
reason = reasons[0]
|
| 215 |
+
if xp.__name__ != 'numpy':
|
| 216 |
+
pytest.skip(reason=reason)
|
| 217 |
+
return
|
| 218 |
+
if cpu_only:
|
| 219 |
+
reason = "do not run with `SCIPY_ARRAY_API` set and not on CPU"
|
| 220 |
+
if SCIPY_ARRAY_API and SCIPY_DEVICE != 'cpu':
|
| 221 |
+
if xp.__name__ == 'cupy':
|
| 222 |
+
pytest.skip(reason=reason)
|
| 223 |
+
elif xp.__name__ == 'torch':
|
| 224 |
+
if 'cpu' not in xp.empty(0).device.type:
|
| 225 |
+
pytest.skip(reason=reason)
|
| 226 |
+
elif xp.__name__ == 'jax.numpy':
|
| 227 |
+
for d in xp.empty(0).devices():
|
| 228 |
+
if 'cpu' not in d.device_kind:
|
| 229 |
+
pytest.skip(reason=reason)
|
| 230 |
+
|
| 231 |
+
if backends is not None:
|
| 232 |
+
reasons = kwargs.get("reasons", False)
|
| 233 |
+
for i, backend in enumerate(backends):
|
| 234 |
+
if xp.__name__ == backend:
|
| 235 |
+
if not reasons:
|
| 236 |
+
reason = f"do not run with array API backend: {backend}"
|
| 237 |
+
else:
|
| 238 |
+
reason = reasons[i]
|
| 239 |
+
pytest.skip(reason=reason)
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
# Following the approach of NumPy's conftest.py...
|
| 243 |
+
# Use a known and persistent tmpdir for hypothesis' caches, which
|
| 244 |
+
# can be automatically cleared by the OS or user.
|
| 245 |
+
hypothesis.configuration.set_hypothesis_home_dir(
|
| 246 |
+
os.path.join(tempfile.gettempdir(), ".hypothesis")
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
# We register two custom profiles for SciPy - for details see
|
| 250 |
+
# https://hypothesis.readthedocs.io/en/latest/settings.html
|
| 251 |
+
# The first is designed for our own CI runs; the latter also
|
| 252 |
+
# forces determinism and is designed for use via scipy.test()
|
| 253 |
+
hypothesis.settings.register_profile(
|
| 254 |
+
name="nondeterministic", deadline=None, print_blob=True,
|
| 255 |
+
)
|
| 256 |
+
hypothesis.settings.register_profile(
|
| 257 |
+
name="deterministic",
|
| 258 |
+
deadline=None, print_blob=True, database=None, derandomize=True,
|
| 259 |
+
suppress_health_check=list(hypothesis.HealthCheck),
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
# Profile is currently set by environment variable `SCIPY_HYPOTHESIS_PROFILE`
|
| 263 |
+
# In the future, it would be good to work the choice into dev.py.
|
| 264 |
+
SCIPY_HYPOTHESIS_PROFILE = os.environ.get("SCIPY_HYPOTHESIS_PROFILE",
|
| 265 |
+
"deterministic")
|
| 266 |
+
hypothesis.settings.load_profile(SCIPY_HYPOTHESIS_PROFILE)
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
############################################################################
|
| 270 |
+
# doctesting stuff
|
| 271 |
+
|
| 272 |
+
if HAVE_SCPDT:
|
| 273 |
+
|
| 274 |
+
# FIXME: populate the dict once
|
| 275 |
+
@contextmanager
|
| 276 |
+
def warnings_errors_and_rng(test=None):
|
| 277 |
+
"""Temporarily turn (almost) all warnings to errors.
|
| 278 |
+
|
| 279 |
+
Filter out known warnings which we allow.
|
| 280 |
+
"""
|
| 281 |
+
known_warnings = dict()
|
| 282 |
+
|
| 283 |
+
# these functions are known to emit "divide by zero" RuntimeWarnings
|
| 284 |
+
divide_by_zero = [
|
| 285 |
+
'scipy.linalg.norm', 'scipy.ndimage.center_of_mass',
|
| 286 |
+
]
|
| 287 |
+
for name in divide_by_zero:
|
| 288 |
+
known_warnings[name] = dict(category=RuntimeWarning,
|
| 289 |
+
message='divide by zero')
|
| 290 |
+
|
| 291 |
+
# Deprecated stuff in scipy.signal and elsewhere
|
| 292 |
+
deprecated = [
|
| 293 |
+
'scipy.signal.cwt', 'scipy.signal.morlet', 'scipy.signal.morlet2',
|
| 294 |
+
'scipy.signal.ricker',
|
| 295 |
+
'scipy.integrate.simpson',
|
| 296 |
+
'scipy.interpolate.interp2d',
|
| 297 |
+
]
|
| 298 |
+
for name in deprecated:
|
| 299 |
+
known_warnings[name] = dict(category=DeprecationWarning)
|
| 300 |
+
|
| 301 |
+
from scipy import integrate
|
| 302 |
+
# the funcions are known to emit IntergrationWarnings
|
| 303 |
+
integration_w = ['scipy.special.ellip_normal',
|
| 304 |
+
'scipy.special.ellip_harm_2',
|
| 305 |
+
]
|
| 306 |
+
for name in integration_w:
|
| 307 |
+
known_warnings[name] = dict(category=integrate.IntegrationWarning,
|
| 308 |
+
message='The occurrence of roundoff')
|
| 309 |
+
|
| 310 |
+
# scipy.stats deliberately emits UserWarnings sometimes
|
| 311 |
+
user_w = ['scipy.stats.anderson_ksamp', 'scipy.stats.kurtosistest',
|
| 312 |
+
'scipy.stats.normaltest', 'scipy.sparse.linalg.norm']
|
| 313 |
+
for name in user_w:
|
| 314 |
+
known_warnings[name] = dict(category=UserWarning)
|
| 315 |
+
|
| 316 |
+
# additional one-off warnings to filter
|
| 317 |
+
dct = {
|
| 318 |
+
'scipy.sparse.linalg.norm':
|
| 319 |
+
dict(category=UserWarning, message="Exited at iteration"),
|
| 320 |
+
# tutorials
|
| 321 |
+
'linalg.rst':
|
| 322 |
+
dict(message='the matrix subclass is not',
|
| 323 |
+
category=PendingDeprecationWarning),
|
| 324 |
+
'stats.rst':
|
| 325 |
+
dict(message='The maximum number of subdivisions',
|
| 326 |
+
category=integrate.IntegrationWarning),
|
| 327 |
+
}
|
| 328 |
+
known_warnings.update(dct)
|
| 329 |
+
|
| 330 |
+
# these legitimately emit warnings in examples
|
| 331 |
+
legit = set('scipy.signal.normalize')
|
| 332 |
+
|
| 333 |
+
# Now, the meat of the matter: filter warnings,
|
| 334 |
+
# also control the random seed for each doctest.
|
| 335 |
+
|
| 336 |
+
# XXX: this matches the refguide-check behavior, but is a tad strange:
|
| 337 |
+
# makes sure that the seed the old-fashioned np.random* methods is
|
| 338 |
+
# *NOT* reproducible but the new-style `default_rng()` *IS* repoducible.
|
| 339 |
+
# Should these two be either both repro or both not repro?
|
| 340 |
+
|
| 341 |
+
from scipy._lib._util import _fixed_default_rng
|
| 342 |
+
import numpy as np
|
| 343 |
+
with _fixed_default_rng():
|
| 344 |
+
np.random.seed(None)
|
| 345 |
+
with warnings.catch_warnings():
|
| 346 |
+
if test and test.name in known_warnings:
|
| 347 |
+
warnings.filterwarnings('ignore',
|
| 348 |
+
**known_warnings[test.name])
|
| 349 |
+
yield
|
| 350 |
+
elif test and test.name in legit:
|
| 351 |
+
yield
|
| 352 |
+
else:
|
| 353 |
+
warnings.simplefilter('error', Warning)
|
| 354 |
+
yield
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
dt_config.user_context_mgr = warnings_errors_and_rng
|
| 358 |
+
dt_config.skiplist = set([
|
| 359 |
+
'scipy.linalg.LinAlgError', # comes from numpy
|
| 360 |
+
'scipy.fftpack.fftshift', # fftpack stuff is also from numpy
|
| 361 |
+
'scipy.fftpack.ifftshift',
|
| 362 |
+
'scipy.fftpack.fftfreq',
|
| 363 |
+
'scipy.special.sinc', # sinc is from numpy
|
| 364 |
+
'scipy.optimize.show_options', # does not have much to doctest
|
| 365 |
+
'scipy.signal.normalize', # manipulates warnings (XXX temp skip)
|
| 366 |
+
'scipy.sparse.linalg.norm', # XXX temp skip
|
| 367 |
+
])
|
| 368 |
+
|
| 369 |
+
# these are affected by NumPy 2.0 scalar repr: rely on string comparison
|
| 370 |
+
if np.__version__ < "2":
|
| 371 |
+
dt_config.skiplist.update(set([
|
| 372 |
+
'scipy.io.hb_read',
|
| 373 |
+
'scipy.io.hb_write',
|
| 374 |
+
'scipy.sparse.csgraph.connected_components',
|
| 375 |
+
'scipy.sparse.csgraph.depth_first_order',
|
| 376 |
+
'scipy.sparse.csgraph.shortest_path',
|
| 377 |
+
'scipy.sparse.csgraph.floyd_warshall',
|
| 378 |
+
'scipy.sparse.csgraph.dijkstra',
|
| 379 |
+
'scipy.sparse.csgraph.bellman_ford',
|
| 380 |
+
'scipy.sparse.csgraph.johnson',
|
| 381 |
+
'scipy.sparse.csgraph.yen',
|
| 382 |
+
'scipy.sparse.csgraph.breadth_first_order',
|
| 383 |
+
'scipy.sparse.csgraph.reverse_cuthill_mckee',
|
| 384 |
+
'scipy.sparse.csgraph.structural_rank',
|
| 385 |
+
'scipy.sparse.csgraph.construct_dist_matrix',
|
| 386 |
+
'scipy.sparse.csgraph.reconstruct_path',
|
| 387 |
+
'scipy.ndimage.value_indices',
|
| 388 |
+
'scipy.stats.mstats.describe',
|
| 389 |
+
]))
|
| 390 |
+
|
| 391 |
+
# help pytest collection a bit: these names are either private
|
| 392 |
+
# (distributions), or just do not need doctesting.
|
| 393 |
+
dt_config.pytest_extra_ignore = [
|
| 394 |
+
"scipy.stats.distributions",
|
| 395 |
+
"scipy.optimize.cython_optimize",
|
| 396 |
+
"scipy.test",
|
| 397 |
+
"scipy.show_config",
|
| 398 |
+
]
|
| 399 |
+
|
| 400 |
+
dt_config.pytest_extra_xfail = {
|
| 401 |
+
# name: reason
|
| 402 |
+
"io.rst": "",
|
| 403 |
+
"ND_regular_grid.rst": "ReST parser limitation",
|
| 404 |
+
"extrapolation_examples.rst": "ReST parser limitation",
|
| 405 |
+
"sampling_pinv.rst": "__cinit__ unexpected argument",
|
| 406 |
+
"sampling_srou.rst": "nan in scalar_power",
|
| 407 |
+
"probability_distributions.rst": "integration warning",
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
# tutorials
|
| 411 |
+
dt_config.pseudocode = set(['integrate.nquad(func,'])
|
| 412 |
+
dt_config.local_resources = {'io.rst': ["octave_a.mat"]}
|
| 413 |
+
############################################################################
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/linalg.pxd
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from scipy.linalg cimport cython_blas, cython_lapack
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize.pxd
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .optimize cimport cython_optimize
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/README
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
From the website for the L-BFGS-B code (from at
|
| 2 |
+
http://www.ece.northwestern.edu/~nocedal/lbfgsb.html):
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
L-BFGS-B is a limited-memory quasi-Newton code for bound-constrained
|
| 6 |
+
optimization, i.e. for problems where the only constraints are of the
|
| 7 |
+
form l<= x <= u.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
This is a Python wrapper (using F2PY) written by David M. Cooke
|
| 11 |
+
<cookedm@physics.mcmaster.ca> and released as version 0.9 on April 9, 2004.
|
| 12 |
+
The wrapper was slightly modified by Joonas Paalasmaa for the 3.0 version
|
| 13 |
+
in March 2012.
|
| 14 |
+
|
| 15 |
+
License of L-BFGS-B (Fortran code)
|
| 16 |
+
==================================
|
| 17 |
+
|
| 18 |
+
The version included here (in lbfgsb.f) is 3.0 (released April 25, 2011). It was
|
| 19 |
+
written by Ciyou Zhu, Richard Byrd, and Jorge Nocedal <nocedal@ece.nwu.edu>. It
|
| 20 |
+
carries the following condition for use:
|
| 21 |
+
|
| 22 |
+
"""
|
| 23 |
+
This software is freely available, but we expect that all publications
|
| 24 |
+
describing work using this software, or all commercial products using it,
|
| 25 |
+
quote at least one of the references given below. This software is released
|
| 26 |
+
under the BSD License.
|
| 27 |
+
|
| 28 |
+
References
|
| 29 |
+
* R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
|
| 30 |
+
Constrained Optimization, (1995), SIAM Journal on Scientific and
|
| 31 |
+
Statistical Computing, 16, 5, pp. 1190-1208.
|
| 32 |
+
* C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
|
| 33 |
+
FORTRAN routines for large scale bound constrained optimization (1997),
|
| 34 |
+
ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
|
| 35 |
+
* J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
|
| 36 |
+
FORTRAN routines for large scale bound constrained optimization (2011),
|
| 37 |
+
ACM Transactions on Mathematical Software, 38, 1.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
The Python wrapper
|
| 41 |
+
==================
|
| 42 |
+
|
| 43 |
+
This code uses F2PY (http://cens.ioc.ee/projects/f2py2e/) to generate
|
| 44 |
+
the wrapper around the Fortran code.
|
| 45 |
+
|
| 46 |
+
The Python code and wrapper are copyrighted 2004 by David M. Cooke
|
| 47 |
+
<cookedm@physics.mcmaster.ca>.
|
| 48 |
+
|
| 49 |
+
Example usage
|
| 50 |
+
=============
|
| 51 |
+
|
| 52 |
+
An example of the usage is given at the bottom of the lbfgsb.py file.
|
| 53 |
+
Run it with 'python lbfgsb.py'.
|
| 54 |
+
|
| 55 |
+
License for the Python wrapper
|
| 56 |
+
==============================
|
| 57 |
+
|
| 58 |
+
Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
|
| 59 |
+
|
| 60 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
| 61 |
+
this software and associated documentation files (the "Software"), to deal in
|
| 62 |
+
the Software without restriction, including without limitation the rights to
|
| 63 |
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
| 64 |
+
of the Software, and to permit persons to whom the Software is furnished to do
|
| 65 |
+
so, subject to the following conditions:
|
| 66 |
+
|
| 67 |
+
The above copyright notice and this permission notice shall be included in all
|
| 68 |
+
copies or substantial portions of the Software.
|
| 69 |
+
|
| 70 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 71 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 72 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 73 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 74 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 75 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 76 |
+
SOFTWARE.
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__init__.py
ADDED
|
@@ -0,0 +1,452 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
=====================================================
|
| 3 |
+
Optimization and root finding (:mod:`scipy.optimize`)
|
| 4 |
+
=====================================================
|
| 5 |
+
|
| 6 |
+
.. currentmodule:: scipy.optimize
|
| 7 |
+
|
| 8 |
+
.. toctree::
|
| 9 |
+
:hidden:
|
| 10 |
+
|
| 11 |
+
optimize.cython_optimize
|
| 12 |
+
|
| 13 |
+
SciPy ``optimize`` provides functions for minimizing (or maximizing)
|
| 14 |
+
objective functions, possibly subject to constraints. It includes
|
| 15 |
+
solvers for nonlinear problems (with support for both local and global
|
| 16 |
+
optimization algorithms), linear programming, constrained
|
| 17 |
+
and nonlinear least-squares, root finding, and curve fitting.
|
| 18 |
+
|
| 19 |
+
Common functions and objects, shared across different solvers, are:
|
| 20 |
+
|
| 21 |
+
.. autosummary::
|
| 22 |
+
:toctree: generated/
|
| 23 |
+
|
| 24 |
+
show_options - Show specific options optimization solvers.
|
| 25 |
+
OptimizeResult - The optimization result returned by some optimizers.
|
| 26 |
+
OptimizeWarning - The optimization encountered problems.
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
Optimization
|
| 30 |
+
============
|
| 31 |
+
|
| 32 |
+
Scalar functions optimization
|
| 33 |
+
-----------------------------
|
| 34 |
+
|
| 35 |
+
.. autosummary::
|
| 36 |
+
:toctree: generated/
|
| 37 |
+
|
| 38 |
+
minimize_scalar - Interface for minimizers of univariate functions
|
| 39 |
+
|
| 40 |
+
The `minimize_scalar` function supports the following methods:
|
| 41 |
+
|
| 42 |
+
.. toctree::
|
| 43 |
+
|
| 44 |
+
optimize.minimize_scalar-brent
|
| 45 |
+
optimize.minimize_scalar-bounded
|
| 46 |
+
optimize.minimize_scalar-golden
|
| 47 |
+
|
| 48 |
+
Local (multivariate) optimization
|
| 49 |
+
---------------------------------
|
| 50 |
+
|
| 51 |
+
.. autosummary::
|
| 52 |
+
:toctree: generated/
|
| 53 |
+
|
| 54 |
+
minimize - Interface for minimizers of multivariate functions.
|
| 55 |
+
|
| 56 |
+
The `minimize` function supports the following methods:
|
| 57 |
+
|
| 58 |
+
.. toctree::
|
| 59 |
+
|
| 60 |
+
optimize.minimize-neldermead
|
| 61 |
+
optimize.minimize-powell
|
| 62 |
+
optimize.minimize-cg
|
| 63 |
+
optimize.minimize-bfgs
|
| 64 |
+
optimize.minimize-newtoncg
|
| 65 |
+
optimize.minimize-lbfgsb
|
| 66 |
+
optimize.minimize-tnc
|
| 67 |
+
optimize.minimize-cobyla
|
| 68 |
+
optimize.minimize-cobyqa
|
| 69 |
+
optimize.minimize-slsqp
|
| 70 |
+
optimize.minimize-trustconstr
|
| 71 |
+
optimize.minimize-dogleg
|
| 72 |
+
optimize.minimize-trustncg
|
| 73 |
+
optimize.minimize-trustkrylov
|
| 74 |
+
optimize.minimize-trustexact
|
| 75 |
+
|
| 76 |
+
Constraints are passed to `minimize` function as a single object or
|
| 77 |
+
as a list of objects from the following classes:
|
| 78 |
+
|
| 79 |
+
.. autosummary::
|
| 80 |
+
:toctree: generated/
|
| 81 |
+
|
| 82 |
+
NonlinearConstraint - Class defining general nonlinear constraints.
|
| 83 |
+
LinearConstraint - Class defining general linear constraints.
|
| 84 |
+
|
| 85 |
+
Simple bound constraints are handled separately and there is a special class
|
| 86 |
+
for them:
|
| 87 |
+
|
| 88 |
+
.. autosummary::
|
| 89 |
+
:toctree: generated/
|
| 90 |
+
|
| 91 |
+
Bounds - Bound constraints.
|
| 92 |
+
|
| 93 |
+
Quasi-Newton strategies implementing `HessianUpdateStrategy`
|
| 94 |
+
interface can be used to approximate the Hessian in `minimize`
|
| 95 |
+
function (available only for the 'trust-constr' method). Available
|
| 96 |
+
quasi-Newton methods implementing this interface are:
|
| 97 |
+
|
| 98 |
+
.. autosummary::
|
| 99 |
+
:toctree: generated/
|
| 100 |
+
|
| 101 |
+
BFGS - Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
|
| 102 |
+
SR1 - Symmetric-rank-1 Hessian update strategy.
|
| 103 |
+
|
| 104 |
+
.. _global_optimization:
|
| 105 |
+
|
| 106 |
+
Global optimization
|
| 107 |
+
-------------------
|
| 108 |
+
|
| 109 |
+
.. autosummary::
|
| 110 |
+
:toctree: generated/
|
| 111 |
+
|
| 112 |
+
basinhopping - Basinhopping stochastic optimizer.
|
| 113 |
+
brute - Brute force searching optimizer.
|
| 114 |
+
differential_evolution - Stochastic optimizer using differential evolution.
|
| 115 |
+
|
| 116 |
+
shgo - Simplicial homology global optimizer.
|
| 117 |
+
dual_annealing - Dual annealing stochastic optimizer.
|
| 118 |
+
direct - DIRECT (Dividing Rectangles) optimizer.
|
| 119 |
+
|
| 120 |
+
Least-squares and curve fitting
|
| 121 |
+
===============================
|
| 122 |
+
|
| 123 |
+
Nonlinear least-squares
|
| 124 |
+
-----------------------
|
| 125 |
+
|
| 126 |
+
.. autosummary::
|
| 127 |
+
:toctree: generated/
|
| 128 |
+
|
| 129 |
+
least_squares - Solve a nonlinear least-squares problem with bounds on the variables.
|
| 130 |
+
|
| 131 |
+
Linear least-squares
|
| 132 |
+
--------------------
|
| 133 |
+
|
| 134 |
+
.. autosummary::
|
| 135 |
+
:toctree: generated/
|
| 136 |
+
|
| 137 |
+
nnls - Linear least-squares problem with non-negativity constraint.
|
| 138 |
+
lsq_linear - Linear least-squares problem with bound constraints.
|
| 139 |
+
isotonic_regression - Least squares problem of isotonic regression via PAVA.
|
| 140 |
+
|
| 141 |
+
Curve fitting
|
| 142 |
+
-------------
|
| 143 |
+
|
| 144 |
+
.. autosummary::
|
| 145 |
+
:toctree: generated/
|
| 146 |
+
|
| 147 |
+
curve_fit -- Fit curve to a set of points.
|
| 148 |
+
|
| 149 |
+
Root finding
|
| 150 |
+
============
|
| 151 |
+
|
| 152 |
+
Scalar functions
|
| 153 |
+
----------------
|
| 154 |
+
.. autosummary::
|
| 155 |
+
:toctree: generated/
|
| 156 |
+
|
| 157 |
+
root_scalar - Unified interface for nonlinear solvers of scalar functions.
|
| 158 |
+
brentq - quadratic interpolation Brent method.
|
| 159 |
+
brenth - Brent method, modified by Harris with hyperbolic extrapolation.
|
| 160 |
+
ridder - Ridder's method.
|
| 161 |
+
bisect - Bisection method.
|
| 162 |
+
newton - Newton's method (also Secant and Halley's methods).
|
| 163 |
+
toms748 - Alefeld, Potra & Shi Algorithm 748.
|
| 164 |
+
RootResults - The root finding result returned by some root finders.
|
| 165 |
+
|
| 166 |
+
The `root_scalar` function supports the following methods:
|
| 167 |
+
|
| 168 |
+
.. toctree::
|
| 169 |
+
|
| 170 |
+
optimize.root_scalar-brentq
|
| 171 |
+
optimize.root_scalar-brenth
|
| 172 |
+
optimize.root_scalar-bisect
|
| 173 |
+
optimize.root_scalar-ridder
|
| 174 |
+
optimize.root_scalar-newton
|
| 175 |
+
optimize.root_scalar-toms748
|
| 176 |
+
optimize.root_scalar-secant
|
| 177 |
+
optimize.root_scalar-halley
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
The table below lists situations and appropriate methods, along with
|
| 182 |
+
*asymptotic* convergence rates per iteration (and per function evaluation)
|
| 183 |
+
for successful convergence to a simple root(*).
|
| 184 |
+
Bisection is the slowest of them all, adding one bit of accuracy for each
|
| 185 |
+
function evaluation, but is guaranteed to converge.
|
| 186 |
+
The other bracketing methods all (eventually) increase the number of accurate
|
| 187 |
+
bits by about 50% for every function evaluation.
|
| 188 |
+
The derivative-based methods, all built on `newton`, can converge quite quickly
|
| 189 |
+
if the initial value is close to the root. They can also be applied to
|
| 190 |
+
functions defined on (a subset of) the complex plane.
|
| 191 |
+
|
| 192 |
+
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
| 193 |
+
| Domain of f | Bracket? | Derivatives? | Solvers | Convergence |
|
| 194 |
+
+ + +----------+-----------+ +-------------+----------------+
|
| 195 |
+
| | | `fprime` | `fprime2` | | Guaranteed? | Rate(s)(*) |
|
| 196 |
+
+=============+==========+==========+===========+=============+=============+================+
|
| 197 |
+
| `R` | Yes | N/A | N/A | - bisection | - Yes | - 1 "Linear" |
|
| 198 |
+
| | | | | - brentq | - Yes | - >=1, <= 1.62 |
|
| 199 |
+
| | | | | - brenth | - Yes | - >=1, <= 1.62 |
|
| 200 |
+
| | | | | - ridder | - Yes | - 2.0 (1.41) |
|
| 201 |
+
| | | | | - toms748 | - Yes | - 2.7 (1.65) |
|
| 202 |
+
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
| 203 |
+
| `R` or `C` | No | No | No | secant | No | 1.62 (1.62) |
|
| 204 |
+
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
| 205 |
+
| `R` or `C` | No | Yes | No | newton | No | 2.00 (1.41) |
|
| 206 |
+
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
| 207 |
+
| `R` or `C` | No | Yes | Yes | halley | No | 3.00 (1.44) |
|
| 208 |
+
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
| 209 |
+
|
| 210 |
+
.. seealso::
|
| 211 |
+
|
| 212 |
+
`scipy.optimize.cython_optimize` -- Typed Cython versions of root finding functions
|
| 213 |
+
|
| 214 |
+
Fixed point finding:
|
| 215 |
+
|
| 216 |
+
.. autosummary::
|
| 217 |
+
:toctree: generated/
|
| 218 |
+
|
| 219 |
+
fixed_point - Single-variable fixed-point solver.
|
| 220 |
+
|
| 221 |
+
Multidimensional
|
| 222 |
+
----------------
|
| 223 |
+
|
| 224 |
+
.. autosummary::
|
| 225 |
+
:toctree: generated/
|
| 226 |
+
|
| 227 |
+
root - Unified interface for nonlinear solvers of multivariate functions.
|
| 228 |
+
|
| 229 |
+
The `root` function supports the following methods:
|
| 230 |
+
|
| 231 |
+
.. toctree::
|
| 232 |
+
|
| 233 |
+
optimize.root-hybr
|
| 234 |
+
optimize.root-lm
|
| 235 |
+
optimize.root-broyden1
|
| 236 |
+
optimize.root-broyden2
|
| 237 |
+
optimize.root-anderson
|
| 238 |
+
optimize.root-linearmixing
|
| 239 |
+
optimize.root-diagbroyden
|
| 240 |
+
optimize.root-excitingmixing
|
| 241 |
+
optimize.root-krylov
|
| 242 |
+
optimize.root-dfsane
|
| 243 |
+
|
| 244 |
+
Linear programming / MILP
|
| 245 |
+
=========================
|
| 246 |
+
|
| 247 |
+
.. autosummary::
|
| 248 |
+
:toctree: generated/
|
| 249 |
+
|
| 250 |
+
milp -- Mixed integer linear programming.
|
| 251 |
+
linprog -- Unified interface for minimizers of linear programming problems.
|
| 252 |
+
|
| 253 |
+
The `linprog` function supports the following methods:
|
| 254 |
+
|
| 255 |
+
.. toctree::
|
| 256 |
+
|
| 257 |
+
optimize.linprog-simplex
|
| 258 |
+
optimize.linprog-interior-point
|
| 259 |
+
optimize.linprog-revised_simplex
|
| 260 |
+
optimize.linprog-highs-ipm
|
| 261 |
+
optimize.linprog-highs-ds
|
| 262 |
+
optimize.linprog-highs
|
| 263 |
+
|
| 264 |
+
The simplex, interior-point, and revised simplex methods support callback
|
| 265 |
+
functions, such as:
|
| 266 |
+
|
| 267 |
+
.. autosummary::
|
| 268 |
+
:toctree: generated/
|
| 269 |
+
|
| 270 |
+
linprog_verbose_callback -- Sample callback function for linprog (simplex).
|
| 271 |
+
|
| 272 |
+
Assignment problems
|
| 273 |
+
===================
|
| 274 |
+
|
| 275 |
+
.. autosummary::
|
| 276 |
+
:toctree: generated/
|
| 277 |
+
|
| 278 |
+
linear_sum_assignment -- Solves the linear-sum assignment problem.
|
| 279 |
+
quadratic_assignment -- Solves the quadratic assignment problem.
|
| 280 |
+
|
| 281 |
+
The `quadratic_assignment` function supports the following methods:
|
| 282 |
+
|
| 283 |
+
.. toctree::
|
| 284 |
+
|
| 285 |
+
optimize.qap-faq
|
| 286 |
+
optimize.qap-2opt
|
| 287 |
+
|
| 288 |
+
Utilities
|
| 289 |
+
=========
|
| 290 |
+
|
| 291 |
+
Finite-difference approximation
|
| 292 |
+
-------------------------------
|
| 293 |
+
|
| 294 |
+
.. autosummary::
|
| 295 |
+
:toctree: generated/
|
| 296 |
+
|
| 297 |
+
approx_fprime - Approximate the gradient of a scalar function.
|
| 298 |
+
check_grad - Check the supplied derivative using finite differences.
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
Line search
|
| 302 |
+
-----------
|
| 303 |
+
|
| 304 |
+
.. autosummary::
|
| 305 |
+
:toctree: generated/
|
| 306 |
+
|
| 307 |
+
bracket - Bracket a minimum, given two starting points.
|
| 308 |
+
line_search - Return a step that satisfies the strong Wolfe conditions.
|
| 309 |
+
|
| 310 |
+
Hessian approximation
|
| 311 |
+
---------------------
|
| 312 |
+
|
| 313 |
+
.. autosummary::
|
| 314 |
+
:toctree: generated/
|
| 315 |
+
|
| 316 |
+
LbfgsInvHessProduct - Linear operator for L-BFGS approximate inverse Hessian.
|
| 317 |
+
HessianUpdateStrategy - Interface for implementing Hessian update strategies
|
| 318 |
+
|
| 319 |
+
Benchmark problems
|
| 320 |
+
------------------
|
| 321 |
+
|
| 322 |
+
.. autosummary::
|
| 323 |
+
:toctree: generated/
|
| 324 |
+
|
| 325 |
+
rosen - The Rosenbrock function.
|
| 326 |
+
rosen_der - The derivative of the Rosenbrock function.
|
| 327 |
+
rosen_hess - The Hessian matrix of the Rosenbrock function.
|
| 328 |
+
rosen_hess_prod - Product of the Rosenbrock Hessian with a vector.
|
| 329 |
+
|
| 330 |
+
Legacy functions
|
| 331 |
+
================
|
| 332 |
+
|
| 333 |
+
The functions below are not recommended for use in new scripts;
|
| 334 |
+
all of these methods are accessible via a newer, more consistent
|
| 335 |
+
interfaces, provided by the interfaces above.
|
| 336 |
+
|
| 337 |
+
Optimization
|
| 338 |
+
------------
|
| 339 |
+
|
| 340 |
+
General-purpose multivariate methods:
|
| 341 |
+
|
| 342 |
+
.. autosummary::
|
| 343 |
+
:toctree: generated/
|
| 344 |
+
|
| 345 |
+
fmin - Nelder-Mead Simplex algorithm.
|
| 346 |
+
fmin_powell - Powell's (modified) conjugate direction method.
|
| 347 |
+
fmin_cg - Non-linear (Polak-Ribiere) conjugate gradient algorithm.
|
| 348 |
+
fmin_bfgs - Quasi-Newton method (Broydon-Fletcher-Goldfarb-Shanno).
|
| 349 |
+
fmin_ncg - Line-search Newton Conjugate Gradient.
|
| 350 |
+
|
| 351 |
+
Constrained multivariate methods:
|
| 352 |
+
|
| 353 |
+
.. autosummary::
|
| 354 |
+
:toctree: generated/
|
| 355 |
+
|
| 356 |
+
fmin_l_bfgs_b - Zhu, Byrd, and Nocedal's constrained optimizer.
|
| 357 |
+
fmin_tnc - Truncated Newton code.
|
| 358 |
+
fmin_cobyla - Constrained optimization by linear approximation.
|
| 359 |
+
fmin_slsqp - Minimization using sequential least-squares programming.
|
| 360 |
+
|
| 361 |
+
Univariate (scalar) minimization methods:
|
| 362 |
+
|
| 363 |
+
.. autosummary::
|
| 364 |
+
:toctree: generated/
|
| 365 |
+
|
| 366 |
+
fminbound - Bounded minimization of a scalar function.
|
| 367 |
+
brent - 1-D function minimization using Brent method.
|
| 368 |
+
golden - 1-D function minimization using Golden Section method.
|
| 369 |
+
|
| 370 |
+
Least-squares
|
| 371 |
+
-------------
|
| 372 |
+
|
| 373 |
+
.. autosummary::
|
| 374 |
+
:toctree: generated/
|
| 375 |
+
|
| 376 |
+
leastsq - Minimize the sum of squares of M equations in N unknowns.
|
| 377 |
+
|
| 378 |
+
Root finding
|
| 379 |
+
------------
|
| 380 |
+
|
| 381 |
+
General nonlinear solvers:
|
| 382 |
+
|
| 383 |
+
.. autosummary::
|
| 384 |
+
:toctree: generated/
|
| 385 |
+
|
| 386 |
+
fsolve - Non-linear multivariable equation solver.
|
| 387 |
+
broyden1 - Broyden's first method.
|
| 388 |
+
broyden2 - Broyden's second method.
|
| 389 |
+
NoConvergence - Exception raised when nonlinear solver does not converge.
|
| 390 |
+
|
| 391 |
+
Large-scale nonlinear solvers:
|
| 392 |
+
|
| 393 |
+
.. autosummary::
|
| 394 |
+
:toctree: generated/
|
| 395 |
+
|
| 396 |
+
newton_krylov
|
| 397 |
+
anderson
|
| 398 |
+
|
| 399 |
+
BroydenFirst
|
| 400 |
+
InverseJacobian
|
| 401 |
+
KrylovJacobian
|
| 402 |
+
|
| 403 |
+
Simple iteration solvers:
|
| 404 |
+
|
| 405 |
+
.. autosummary::
|
| 406 |
+
:toctree: generated/
|
| 407 |
+
|
| 408 |
+
excitingmixing
|
| 409 |
+
linearmixing
|
| 410 |
+
diagbroyden
|
| 411 |
+
|
| 412 |
+
""" # noqa: E501
|
| 413 |
+
|
| 414 |
+
from ._optimize import *
|
| 415 |
+
from ._minimize import *
|
| 416 |
+
from ._root import *
|
| 417 |
+
from ._root_scalar import *
|
| 418 |
+
from ._minpack_py import *
|
| 419 |
+
from ._zeros_py import *
|
| 420 |
+
from ._lbfgsb_py import fmin_l_bfgs_b, LbfgsInvHessProduct
|
| 421 |
+
from ._tnc import fmin_tnc
|
| 422 |
+
from ._cobyla_py import fmin_cobyla
|
| 423 |
+
from ._nonlin import *
|
| 424 |
+
from ._slsqp_py import fmin_slsqp
|
| 425 |
+
from ._nnls import nnls
|
| 426 |
+
from ._basinhopping import basinhopping
|
| 427 |
+
from ._linprog import linprog, linprog_verbose_callback
|
| 428 |
+
from ._lsap import linear_sum_assignment
|
| 429 |
+
from ._differentialevolution import differential_evolution
|
| 430 |
+
from ._lsq import least_squares, lsq_linear
|
| 431 |
+
from ._isotonic import isotonic_regression
|
| 432 |
+
from ._constraints import (NonlinearConstraint,
|
| 433 |
+
LinearConstraint,
|
| 434 |
+
Bounds)
|
| 435 |
+
from ._hessian_update_strategy import HessianUpdateStrategy, BFGS, SR1
|
| 436 |
+
from ._shgo import shgo
|
| 437 |
+
from ._dual_annealing import dual_annealing
|
| 438 |
+
from ._qap import quadratic_assignment
|
| 439 |
+
from ._direct_py import direct
|
| 440 |
+
from ._milp import milp
|
| 441 |
+
|
| 442 |
+
# Deprecated namespaces, to be removed in v2.0.0
|
| 443 |
+
from . import (
|
| 444 |
+
cobyla, lbfgsb, linesearch, minpack, minpack2, moduleTNC, nonlin, optimize,
|
| 445 |
+
slsqp, tnc, zeros
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
__all__ = [s for s in dir() if not s.startswith('_')]
|
| 449 |
+
|
| 450 |
+
from scipy._lib._testutils import PytestTester
|
| 451 |
+
test = PytestTester(__name__)
|
| 452 |
+
del PytestTester
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_basinhopping.py
ADDED
|
@@ -0,0 +1,753 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
basinhopping: The basinhopping global optimization algorithm
|
| 3 |
+
"""
|
| 4 |
+
import numpy as np
|
| 5 |
+
import math
|
| 6 |
+
import inspect
|
| 7 |
+
import scipy.optimize
|
| 8 |
+
from scipy._lib._util import check_random_state
|
| 9 |
+
|
| 10 |
+
__all__ = ['basinhopping']
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
_params = (inspect.Parameter('res_new', kind=inspect.Parameter.KEYWORD_ONLY),
|
| 14 |
+
inspect.Parameter('res_old', kind=inspect.Parameter.KEYWORD_ONLY))
|
| 15 |
+
_new_accept_test_signature = inspect.Signature(parameters=_params)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class Storage:
|
| 19 |
+
"""
|
| 20 |
+
Class used to store the lowest energy structure
|
| 21 |
+
"""
|
| 22 |
+
def __init__(self, minres):
|
| 23 |
+
self._add(minres)
|
| 24 |
+
|
| 25 |
+
def _add(self, minres):
|
| 26 |
+
self.minres = minres
|
| 27 |
+
self.minres.x = np.copy(minres.x)
|
| 28 |
+
|
| 29 |
+
def update(self, minres):
|
| 30 |
+
if minres.success and (minres.fun < self.minres.fun
|
| 31 |
+
or not self.minres.success):
|
| 32 |
+
self._add(minres)
|
| 33 |
+
return True
|
| 34 |
+
else:
|
| 35 |
+
return False
|
| 36 |
+
|
| 37 |
+
def get_lowest(self):
|
| 38 |
+
return self.minres
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class BasinHoppingRunner:
|
| 42 |
+
"""This class implements the core of the basinhopping algorithm.
|
| 43 |
+
|
| 44 |
+
x0 : ndarray
|
| 45 |
+
The starting coordinates.
|
| 46 |
+
minimizer : callable
|
| 47 |
+
The local minimizer, with signature ``result = minimizer(x)``.
|
| 48 |
+
The return value is an `optimize.OptimizeResult` object.
|
| 49 |
+
step_taking : callable
|
| 50 |
+
This function displaces the coordinates randomly. Signature should
|
| 51 |
+
be ``x_new = step_taking(x)``. Note that `x` may be modified in-place.
|
| 52 |
+
accept_tests : list of callables
|
| 53 |
+
Each test is passed the kwargs `f_new`, `x_new`, `f_old` and
|
| 54 |
+
`x_old`. These tests will be used to judge whether or not to accept
|
| 55 |
+
the step. The acceptable return values are True, False, or ``"force
|
| 56 |
+
accept"``. If any of the tests return False then the step is rejected.
|
| 57 |
+
If ``"force accept"``, then this will override any other tests in
|
| 58 |
+
order to accept the step. This can be used, for example, to forcefully
|
| 59 |
+
escape from a local minimum that ``basinhopping`` is trapped in.
|
| 60 |
+
disp : bool, optional
|
| 61 |
+
Display status messages.
|
| 62 |
+
|
| 63 |
+
"""
|
| 64 |
+
def __init__(self, x0, minimizer, step_taking, accept_tests, disp=False):
|
| 65 |
+
self.x = np.copy(x0)
|
| 66 |
+
self.minimizer = minimizer
|
| 67 |
+
self.step_taking = step_taking
|
| 68 |
+
self.accept_tests = accept_tests
|
| 69 |
+
self.disp = disp
|
| 70 |
+
|
| 71 |
+
self.nstep = 0
|
| 72 |
+
|
| 73 |
+
# initialize return object
|
| 74 |
+
self.res = scipy.optimize.OptimizeResult()
|
| 75 |
+
self.res.minimization_failures = 0
|
| 76 |
+
|
| 77 |
+
# do initial minimization
|
| 78 |
+
minres = minimizer(self.x)
|
| 79 |
+
if not minres.success:
|
| 80 |
+
self.res.minimization_failures += 1
|
| 81 |
+
if self.disp:
|
| 82 |
+
print("warning: basinhopping: local minimization failure")
|
| 83 |
+
self.x = np.copy(minres.x)
|
| 84 |
+
self.energy = minres.fun
|
| 85 |
+
self.incumbent_minres = minres # best minimize result found so far
|
| 86 |
+
if self.disp:
|
| 87 |
+
print("basinhopping step %d: f %g" % (self.nstep, self.energy))
|
| 88 |
+
|
| 89 |
+
# initialize storage class
|
| 90 |
+
self.storage = Storage(minres)
|
| 91 |
+
|
| 92 |
+
if hasattr(minres, "nfev"):
|
| 93 |
+
self.res.nfev = minres.nfev
|
| 94 |
+
if hasattr(minres, "njev"):
|
| 95 |
+
self.res.njev = minres.njev
|
| 96 |
+
if hasattr(minres, "nhev"):
|
| 97 |
+
self.res.nhev = minres.nhev
|
| 98 |
+
|
| 99 |
+
def _monte_carlo_step(self):
|
| 100 |
+
"""Do one Monte Carlo iteration
|
| 101 |
+
|
| 102 |
+
Randomly displace the coordinates, minimize, and decide whether
|
| 103 |
+
or not to accept the new coordinates.
|
| 104 |
+
"""
|
| 105 |
+
# Take a random step. Make a copy of x because the step_taking
|
| 106 |
+
# algorithm might change x in place
|
| 107 |
+
x_after_step = np.copy(self.x)
|
| 108 |
+
x_after_step = self.step_taking(x_after_step)
|
| 109 |
+
|
| 110 |
+
# do a local minimization
|
| 111 |
+
minres = self.minimizer(x_after_step)
|
| 112 |
+
x_after_quench = minres.x
|
| 113 |
+
energy_after_quench = minres.fun
|
| 114 |
+
if not minres.success:
|
| 115 |
+
self.res.minimization_failures += 1
|
| 116 |
+
if self.disp:
|
| 117 |
+
print("warning: basinhopping: local minimization failure")
|
| 118 |
+
if hasattr(minres, "nfev"):
|
| 119 |
+
self.res.nfev += minres.nfev
|
| 120 |
+
if hasattr(minres, "njev"):
|
| 121 |
+
self.res.njev += minres.njev
|
| 122 |
+
if hasattr(minres, "nhev"):
|
| 123 |
+
self.res.nhev += minres.nhev
|
| 124 |
+
|
| 125 |
+
# accept the move based on self.accept_tests. If any test is False,
|
| 126 |
+
# then reject the step. If any test returns the special string
|
| 127 |
+
# 'force accept', then accept the step regardless. This can be used
|
| 128 |
+
# to forcefully escape from a local minimum if normal basin hopping
|
| 129 |
+
# steps are not sufficient.
|
| 130 |
+
accept = True
|
| 131 |
+
for test in self.accept_tests:
|
| 132 |
+
if inspect.signature(test) == _new_accept_test_signature:
|
| 133 |
+
testres = test(res_new=minres, res_old=self.incumbent_minres)
|
| 134 |
+
else:
|
| 135 |
+
testres = test(f_new=energy_after_quench, x_new=x_after_quench,
|
| 136 |
+
f_old=self.energy, x_old=self.x)
|
| 137 |
+
|
| 138 |
+
if testres == 'force accept':
|
| 139 |
+
accept = True
|
| 140 |
+
break
|
| 141 |
+
elif testres is None:
|
| 142 |
+
raise ValueError("accept_tests must return True, False, or "
|
| 143 |
+
"'force accept'")
|
| 144 |
+
elif not testres:
|
| 145 |
+
accept = False
|
| 146 |
+
|
| 147 |
+
# Report the result of the acceptance test to the take step class.
|
| 148 |
+
# This is for adaptive step taking
|
| 149 |
+
if hasattr(self.step_taking, "report"):
|
| 150 |
+
self.step_taking.report(accept, f_new=energy_after_quench,
|
| 151 |
+
x_new=x_after_quench, f_old=self.energy,
|
| 152 |
+
x_old=self.x)
|
| 153 |
+
|
| 154 |
+
return accept, minres
|
| 155 |
+
|
| 156 |
+
def one_cycle(self):
|
| 157 |
+
"""Do one cycle of the basinhopping algorithm
|
| 158 |
+
"""
|
| 159 |
+
self.nstep += 1
|
| 160 |
+
new_global_min = False
|
| 161 |
+
|
| 162 |
+
accept, minres = self._monte_carlo_step()
|
| 163 |
+
|
| 164 |
+
if accept:
|
| 165 |
+
self.energy = minres.fun
|
| 166 |
+
self.x = np.copy(minres.x)
|
| 167 |
+
self.incumbent_minres = minres # best minimize result found so far
|
| 168 |
+
new_global_min = self.storage.update(minres)
|
| 169 |
+
|
| 170 |
+
# print some information
|
| 171 |
+
if self.disp:
|
| 172 |
+
self.print_report(minres.fun, accept)
|
| 173 |
+
if new_global_min:
|
| 174 |
+
print("found new global minimum on step %d with function"
|
| 175 |
+
" value %g" % (self.nstep, self.energy))
|
| 176 |
+
|
| 177 |
+
# save some variables as BasinHoppingRunner attributes
|
| 178 |
+
self.xtrial = minres.x
|
| 179 |
+
self.energy_trial = minres.fun
|
| 180 |
+
self.accept = accept
|
| 181 |
+
|
| 182 |
+
return new_global_min
|
| 183 |
+
|
| 184 |
+
def print_report(self, energy_trial, accept):
|
| 185 |
+
"""print a status update"""
|
| 186 |
+
minres = self.storage.get_lowest()
|
| 187 |
+
print("basinhopping step %d: f %g trial_f %g accepted %d "
|
| 188 |
+
" lowest_f %g" % (self.nstep, self.energy, energy_trial,
|
| 189 |
+
accept, minres.fun))
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
class AdaptiveStepsize:
|
| 193 |
+
"""
|
| 194 |
+
Class to implement adaptive stepsize.
|
| 195 |
+
|
| 196 |
+
This class wraps the step taking class and modifies the stepsize to
|
| 197 |
+
ensure the true acceptance rate is as close as possible to the target.
|
| 198 |
+
|
| 199 |
+
Parameters
|
| 200 |
+
----------
|
| 201 |
+
takestep : callable
|
| 202 |
+
The step taking routine. Must contain modifiable attribute
|
| 203 |
+
takestep.stepsize
|
| 204 |
+
accept_rate : float, optional
|
| 205 |
+
The target step acceptance rate
|
| 206 |
+
interval : int, optional
|
| 207 |
+
Interval for how often to update the stepsize
|
| 208 |
+
factor : float, optional
|
| 209 |
+
The step size is multiplied or divided by this factor upon each
|
| 210 |
+
update.
|
| 211 |
+
verbose : bool, optional
|
| 212 |
+
Print information about each update
|
| 213 |
+
|
| 214 |
+
"""
|
| 215 |
+
def __init__(self, takestep, accept_rate=0.5, interval=50, factor=0.9,
|
| 216 |
+
verbose=True):
|
| 217 |
+
self.takestep = takestep
|
| 218 |
+
self.target_accept_rate = accept_rate
|
| 219 |
+
self.interval = interval
|
| 220 |
+
self.factor = factor
|
| 221 |
+
self.verbose = verbose
|
| 222 |
+
|
| 223 |
+
self.nstep = 0
|
| 224 |
+
self.nstep_tot = 0
|
| 225 |
+
self.naccept = 0
|
| 226 |
+
|
| 227 |
+
def __call__(self, x):
|
| 228 |
+
return self.take_step(x)
|
| 229 |
+
|
| 230 |
+
def _adjust_step_size(self):
|
| 231 |
+
old_stepsize = self.takestep.stepsize
|
| 232 |
+
accept_rate = float(self.naccept) / self.nstep
|
| 233 |
+
if accept_rate > self.target_accept_rate:
|
| 234 |
+
# We're accepting too many steps. This generally means we're
|
| 235 |
+
# trapped in a basin. Take bigger steps.
|
| 236 |
+
self.takestep.stepsize /= self.factor
|
| 237 |
+
else:
|
| 238 |
+
# We're not accepting enough steps. Take smaller steps.
|
| 239 |
+
self.takestep.stepsize *= self.factor
|
| 240 |
+
if self.verbose:
|
| 241 |
+
print(f"adaptive stepsize: acceptance rate {accept_rate:f} target "
|
| 242 |
+
f"{self.target_accept_rate:f} new stepsize "
|
| 243 |
+
f"{self.takestep.stepsize:g} old stepsize {old_stepsize:g}")
|
| 244 |
+
|
| 245 |
+
def take_step(self, x):
|
| 246 |
+
self.nstep += 1
|
| 247 |
+
self.nstep_tot += 1
|
| 248 |
+
if self.nstep % self.interval == 0:
|
| 249 |
+
self._adjust_step_size()
|
| 250 |
+
return self.takestep(x)
|
| 251 |
+
|
| 252 |
+
def report(self, accept, **kwargs):
|
| 253 |
+
"called by basinhopping to report the result of the step"
|
| 254 |
+
if accept:
|
| 255 |
+
self.naccept += 1
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
class RandomDisplacement:
|
| 259 |
+
"""Add a random displacement of maximum size `stepsize` to each coordinate.
|
| 260 |
+
|
| 261 |
+
Calling this updates `x` in-place.
|
| 262 |
+
|
| 263 |
+
Parameters
|
| 264 |
+
----------
|
| 265 |
+
stepsize : float, optional
|
| 266 |
+
Maximum stepsize in any dimension
|
| 267 |
+
random_gen : {None, int, `numpy.random.Generator`,
|
| 268 |
+
`numpy.random.RandomState`}, optional
|
| 269 |
+
|
| 270 |
+
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
| 271 |
+
singleton is used.
|
| 272 |
+
If `seed` is an int, a new ``RandomState`` instance is used,
|
| 273 |
+
seeded with `seed`.
|
| 274 |
+
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
| 275 |
+
that instance is used.
|
| 276 |
+
|
| 277 |
+
"""
|
| 278 |
+
|
| 279 |
+
def __init__(self, stepsize=0.5, random_gen=None):
|
| 280 |
+
self.stepsize = stepsize
|
| 281 |
+
self.random_gen = check_random_state(random_gen)
|
| 282 |
+
|
| 283 |
+
def __call__(self, x):
|
| 284 |
+
x += self.random_gen.uniform(-self.stepsize, self.stepsize,
|
| 285 |
+
np.shape(x))
|
| 286 |
+
return x
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
class MinimizerWrapper:
|
| 290 |
+
"""
|
| 291 |
+
wrap a minimizer function as a minimizer class
|
| 292 |
+
"""
|
| 293 |
+
def __init__(self, minimizer, func=None, **kwargs):
|
| 294 |
+
self.minimizer = minimizer
|
| 295 |
+
self.func = func
|
| 296 |
+
self.kwargs = kwargs
|
| 297 |
+
|
| 298 |
+
def __call__(self, x0):
|
| 299 |
+
if self.func is None:
|
| 300 |
+
return self.minimizer(x0, **self.kwargs)
|
| 301 |
+
else:
|
| 302 |
+
return self.minimizer(self.func, x0, **self.kwargs)
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
class Metropolis:
|
| 306 |
+
"""Metropolis acceptance criterion.
|
| 307 |
+
|
| 308 |
+
Parameters
|
| 309 |
+
----------
|
| 310 |
+
T : float
|
| 311 |
+
The "temperature" parameter for the accept or reject criterion.
|
| 312 |
+
random_gen : {None, int, `numpy.random.Generator`,
|
| 313 |
+
`numpy.random.RandomState`}, optional
|
| 314 |
+
|
| 315 |
+
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
| 316 |
+
singleton is used.
|
| 317 |
+
If `seed` is an int, a new ``RandomState`` instance is used,
|
| 318 |
+
seeded with `seed`.
|
| 319 |
+
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
| 320 |
+
that instance is used.
|
| 321 |
+
Random number generator used for acceptance test.
|
| 322 |
+
|
| 323 |
+
"""
|
| 324 |
+
|
| 325 |
+
def __init__(self, T, random_gen=None):
|
| 326 |
+
# Avoid ZeroDivisionError since "MBH can be regarded as a special case
|
| 327 |
+
# of the BH framework with the Metropolis criterion, where temperature
|
| 328 |
+
# T = 0." (Reject all steps that increase energy.)
|
| 329 |
+
self.beta = 1.0 / T if T != 0 else float('inf')
|
| 330 |
+
self.random_gen = check_random_state(random_gen)
|
| 331 |
+
|
| 332 |
+
def accept_reject(self, res_new, res_old):
|
| 333 |
+
"""
|
| 334 |
+
Assuming the local search underlying res_new was successful:
|
| 335 |
+
If new energy is lower than old, it will always be accepted.
|
| 336 |
+
If new is higher than old, there is a chance it will be accepted,
|
| 337 |
+
less likely for larger differences.
|
| 338 |
+
"""
|
| 339 |
+
with np.errstate(invalid='ignore'):
|
| 340 |
+
# The energy values being fed to Metropolis are 1-length arrays, and if
|
| 341 |
+
# they are equal, their difference is 0, which gets multiplied by beta,
|
| 342 |
+
# which is inf, and array([0]) * float('inf') causes
|
| 343 |
+
#
|
| 344 |
+
# RuntimeWarning: invalid value encountered in multiply
|
| 345 |
+
#
|
| 346 |
+
# Ignore this warning so when the algorithm is on a flat plane, it always
|
| 347 |
+
# accepts the step, to try to move off the plane.
|
| 348 |
+
prod = -(res_new.fun - res_old.fun) * self.beta
|
| 349 |
+
w = math.exp(min(0, prod))
|
| 350 |
+
|
| 351 |
+
rand = self.random_gen.uniform()
|
| 352 |
+
return w >= rand and (res_new.success or not res_old.success)
|
| 353 |
+
|
| 354 |
+
def __call__(self, *, res_new, res_old):
|
| 355 |
+
"""
|
| 356 |
+
f_new and f_old are mandatory in kwargs
|
| 357 |
+
"""
|
| 358 |
+
return bool(self.accept_reject(res_new, res_old))
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def basinhopping(func, x0, niter=100, T=1.0, stepsize=0.5,
|
| 362 |
+
minimizer_kwargs=None, take_step=None, accept_test=None,
|
| 363 |
+
callback=None, interval=50, disp=False, niter_success=None,
|
| 364 |
+
seed=None, *, target_accept_rate=0.5, stepwise_factor=0.9):
|
| 365 |
+
"""Find the global minimum of a function using the basin-hopping algorithm.
|
| 366 |
+
|
| 367 |
+
Basin-hopping is a two-phase method that combines a global stepping
|
| 368 |
+
algorithm with local minimization at each step. Designed to mimic
|
| 369 |
+
the natural process of energy minimization of clusters of atoms, it works
|
| 370 |
+
well for similar problems with "funnel-like, but rugged" energy landscapes
|
| 371 |
+
[5]_.
|
| 372 |
+
|
| 373 |
+
As the step-taking, step acceptance, and minimization methods are all
|
| 374 |
+
customizable, this function can also be used to implement other two-phase
|
| 375 |
+
methods.
|
| 376 |
+
|
| 377 |
+
Parameters
|
| 378 |
+
----------
|
| 379 |
+
func : callable ``f(x, *args)``
|
| 380 |
+
Function to be optimized. ``args`` can be passed as an optional item
|
| 381 |
+
in the dict `minimizer_kwargs`
|
| 382 |
+
x0 : array_like
|
| 383 |
+
Initial guess.
|
| 384 |
+
niter : integer, optional
|
| 385 |
+
The number of basin-hopping iterations. There will be a total of
|
| 386 |
+
``niter + 1`` runs of the local minimizer.
|
| 387 |
+
T : float, optional
|
| 388 |
+
The "temperature" parameter for the acceptance or rejection criterion.
|
| 389 |
+
Higher "temperatures" mean that larger jumps in function value will be
|
| 390 |
+
accepted. For best results `T` should be comparable to the
|
| 391 |
+
separation (in function value) between local minima.
|
| 392 |
+
stepsize : float, optional
|
| 393 |
+
Maximum step size for use in the random displacement.
|
| 394 |
+
minimizer_kwargs : dict, optional
|
| 395 |
+
Extra keyword arguments to be passed to the local minimizer
|
| 396 |
+
`scipy.optimize.minimize` Some important options could be:
|
| 397 |
+
|
| 398 |
+
method : str
|
| 399 |
+
The minimization method (e.g. ``"L-BFGS-B"``)
|
| 400 |
+
args : tuple
|
| 401 |
+
Extra arguments passed to the objective function (`func`) and
|
| 402 |
+
its derivatives (Jacobian, Hessian).
|
| 403 |
+
|
| 404 |
+
take_step : callable ``take_step(x)``, optional
|
| 405 |
+
Replace the default step-taking routine with this routine. The default
|
| 406 |
+
step-taking routine is a random displacement of the coordinates, but
|
| 407 |
+
other step-taking algorithms may be better for some systems.
|
| 408 |
+
`take_step` can optionally have the attribute ``take_step.stepsize``.
|
| 409 |
+
If this attribute exists, then `basinhopping` will adjust
|
| 410 |
+
``take_step.stepsize`` in order to try to optimize the global minimum
|
| 411 |
+
search.
|
| 412 |
+
accept_test : callable, ``accept_test(f_new=f_new, x_new=x_new, f_old=fold, x_old=x_old)``, optional
|
| 413 |
+
Define a test which will be used to judge whether to accept the
|
| 414 |
+
step. This will be used in addition to the Metropolis test based on
|
| 415 |
+
"temperature" `T`. The acceptable return values are True,
|
| 416 |
+
False, or ``"force accept"``. If any of the tests return False
|
| 417 |
+
then the step is rejected. If the latter, then this will override any
|
| 418 |
+
other tests in order to accept the step. This can be used, for example,
|
| 419 |
+
to forcefully escape from a local minimum that `basinhopping` is
|
| 420 |
+
trapped in.
|
| 421 |
+
callback : callable, ``callback(x, f, accept)``, optional
|
| 422 |
+
A callback function which will be called for all minima found. ``x``
|
| 423 |
+
and ``f`` are the coordinates and function value of the trial minimum,
|
| 424 |
+
and ``accept`` is whether that minimum was accepted. This can
|
| 425 |
+
be used, for example, to save the lowest N minima found. Also,
|
| 426 |
+
`callback` can be used to specify a user defined stop criterion by
|
| 427 |
+
optionally returning True to stop the `basinhopping` routine.
|
| 428 |
+
interval : integer, optional
|
| 429 |
+
interval for how often to update the `stepsize`
|
| 430 |
+
disp : bool, optional
|
| 431 |
+
Set to True to print status messages
|
| 432 |
+
niter_success : integer, optional
|
| 433 |
+
Stop the run if the global minimum candidate remains the same for this
|
| 434 |
+
number of iterations.
|
| 435 |
+
seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
|
| 436 |
+
|
| 437 |
+
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
| 438 |
+
singleton is used.
|
| 439 |
+
If `seed` is an int, a new ``RandomState`` instance is used,
|
| 440 |
+
seeded with `seed`.
|
| 441 |
+
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
| 442 |
+
that instance is used.
|
| 443 |
+
Specify `seed` for repeatable minimizations. The random numbers
|
| 444 |
+
generated with this seed only affect the default Metropolis
|
| 445 |
+
`accept_test` and the default `take_step`. If you supply your own
|
| 446 |
+
`take_step` and `accept_test`, and these functions use random
|
| 447 |
+
number generation, then those functions are responsible for the state
|
| 448 |
+
of their random number generator.
|
| 449 |
+
target_accept_rate : float, optional
|
| 450 |
+
The target acceptance rate that is used to adjust the `stepsize`.
|
| 451 |
+
If the current acceptance rate is greater than the target,
|
| 452 |
+
then the `stepsize` is increased. Otherwise, it is decreased.
|
| 453 |
+
Range is (0, 1). Default is 0.5.
|
| 454 |
+
|
| 455 |
+
.. versionadded:: 1.8.0
|
| 456 |
+
|
| 457 |
+
stepwise_factor : float, optional
|
| 458 |
+
The `stepsize` is multiplied or divided by this stepwise factor upon
|
| 459 |
+
each update. Range is (0, 1). Default is 0.9.
|
| 460 |
+
|
| 461 |
+
.. versionadded:: 1.8.0
|
| 462 |
+
|
| 463 |
+
Returns
|
| 464 |
+
-------
|
| 465 |
+
res : OptimizeResult
|
| 466 |
+
The optimization result represented as a `OptimizeResult` object.
|
| 467 |
+
Important attributes are: ``x`` the solution array, ``fun`` the value
|
| 468 |
+
of the function at the solution, and ``message`` which describes the
|
| 469 |
+
cause of the termination. The ``OptimizeResult`` object returned by the
|
| 470 |
+
selected minimizer at the lowest minimum is also contained within this
|
| 471 |
+
object and can be accessed through the ``lowest_optimization_result``
|
| 472 |
+
attribute. See `OptimizeResult` for a description of other attributes.
|
| 473 |
+
|
| 474 |
+
See Also
|
| 475 |
+
--------
|
| 476 |
+
minimize :
|
| 477 |
+
The local minimization function called once for each basinhopping step.
|
| 478 |
+
`minimizer_kwargs` is passed to this routine.
|
| 479 |
+
|
| 480 |
+
Notes
|
| 481 |
+
-----
|
| 482 |
+
Basin-hopping is a stochastic algorithm which attempts to find the global
|
| 483 |
+
minimum of a smooth scalar function of one or more variables [1]_ [2]_ [3]_
|
| 484 |
+
[4]_. The algorithm in its current form was described by David Wales and
|
| 485 |
+
Jonathan Doye [2]_ http://www-wales.ch.cam.ac.uk/.
|
| 486 |
+
|
| 487 |
+
The algorithm is iterative with each cycle composed of the following
|
| 488 |
+
features
|
| 489 |
+
|
| 490 |
+
1) random perturbation of the coordinates
|
| 491 |
+
|
| 492 |
+
2) local minimization
|
| 493 |
+
|
| 494 |
+
3) accept or reject the new coordinates based on the minimized function
|
| 495 |
+
value
|
| 496 |
+
|
| 497 |
+
The acceptance test used here is the Metropolis criterion of standard Monte
|
| 498 |
+
Carlo algorithms, although there are many other possibilities [3]_.
|
| 499 |
+
|
| 500 |
+
This global minimization method has been shown to be extremely efficient
|
| 501 |
+
for a wide variety of problems in physics and chemistry. It is
|
| 502 |
+
particularly useful when the function has many minima separated by large
|
| 503 |
+
barriers. See the `Cambridge Cluster Database
|
| 504 |
+
<https://www-wales.ch.cam.ac.uk/CCD.html>`_ for databases of molecular
|
| 505 |
+
systems that have been optimized primarily using basin-hopping. This
|
| 506 |
+
database includes minimization problems exceeding 300 degrees of freedom.
|
| 507 |
+
|
| 508 |
+
See the free software program `GMIN <https://www-wales.ch.cam.ac.uk/GMIN>`_
|
| 509 |
+
for a Fortran implementation of basin-hopping. This implementation has many
|
| 510 |
+
variations of the procedure described above, including more
|
| 511 |
+
advanced step taking algorithms and alternate acceptance criterion.
|
| 512 |
+
|
| 513 |
+
For stochastic global optimization there is no way to determine if the true
|
| 514 |
+
global minimum has actually been found. Instead, as a consistency check,
|
| 515 |
+
the algorithm can be run from a number of different random starting points
|
| 516 |
+
to ensure the lowest minimum found in each example has converged to the
|
| 517 |
+
global minimum. For this reason, `basinhopping` will by default simply
|
| 518 |
+
run for the number of iterations `niter` and return the lowest minimum
|
| 519 |
+
found. It is left to the user to ensure that this is in fact the global
|
| 520 |
+
minimum.
|
| 521 |
+
|
| 522 |
+
Choosing `stepsize`: This is a crucial parameter in `basinhopping` and
|
| 523 |
+
depends on the problem being solved. The step is chosen uniformly in the
|
| 524 |
+
region from x0-stepsize to x0+stepsize, in each dimension. Ideally, it
|
| 525 |
+
should be comparable to the typical separation (in argument values) between
|
| 526 |
+
local minima of the function being optimized. `basinhopping` will, by
|
| 527 |
+
default, adjust `stepsize` to find an optimal value, but this may take
|
| 528 |
+
many iterations. You will get quicker results if you set a sensible
|
| 529 |
+
initial value for ``stepsize``.
|
| 530 |
+
|
| 531 |
+
Choosing `T`: The parameter `T` is the "temperature" used in the
|
| 532 |
+
Metropolis criterion. Basinhopping steps are always accepted if
|
| 533 |
+
``func(xnew) < func(xold)``. Otherwise, they are accepted with
|
| 534 |
+
probability::
|
| 535 |
+
|
| 536 |
+
exp( -(func(xnew) - func(xold)) / T )
|
| 537 |
+
|
| 538 |
+
So, for best results, `T` should to be comparable to the typical
|
| 539 |
+
difference (in function values) between local minima. (The height of
|
| 540 |
+
"walls" between local minima is irrelevant.)
|
| 541 |
+
|
| 542 |
+
If `T` is 0, the algorithm becomes Monotonic Basin-Hopping, in which all
|
| 543 |
+
steps that increase energy are rejected.
|
| 544 |
+
|
| 545 |
+
.. versionadded:: 0.12.0
|
| 546 |
+
|
| 547 |
+
References
|
| 548 |
+
----------
|
| 549 |
+
.. [1] Wales, David J. 2003, Energy Landscapes, Cambridge University Press,
|
| 550 |
+
Cambridge, UK.
|
| 551 |
+
.. [2] Wales, D J, and Doye J P K, Global Optimization by Basin-Hopping and
|
| 552 |
+
the Lowest Energy Structures of Lennard-Jones Clusters Containing up to
|
| 553 |
+
110 Atoms. Journal of Physical Chemistry A, 1997, 101, 5111.
|
| 554 |
+
.. [3] Li, Z. and Scheraga, H. A., Monte Carlo-minimization approach to the
|
| 555 |
+
multiple-minima problem in protein folding, Proc. Natl. Acad. Sci. USA,
|
| 556 |
+
1987, 84, 6611.
|
| 557 |
+
.. [4] Wales, D. J. and Scheraga, H. A., Global optimization of clusters,
|
| 558 |
+
crystals, and biomolecules, Science, 1999, 285, 1368.
|
| 559 |
+
.. [5] Olson, B., Hashmi, I., Molloy, K., and Shehu1, A., Basin Hopping as
|
| 560 |
+
a General and Versatile Optimization Framework for the Characterization
|
| 561 |
+
of Biological Macromolecules, Advances in Artificial Intelligence,
|
| 562 |
+
Volume 2012 (2012), Article ID 674832, :doi:`10.1155/2012/674832`
|
| 563 |
+
|
| 564 |
+
Examples
|
| 565 |
+
--------
|
| 566 |
+
The following example is a 1-D minimization problem, with many
|
| 567 |
+
local minima superimposed on a parabola.
|
| 568 |
+
|
| 569 |
+
>>> import numpy as np
|
| 570 |
+
>>> from scipy.optimize import basinhopping
|
| 571 |
+
>>> func = lambda x: np.cos(14.5 * x - 0.3) + (x + 0.2) * x
|
| 572 |
+
>>> x0 = [1.]
|
| 573 |
+
|
| 574 |
+
Basinhopping, internally, uses a local minimization algorithm. We will use
|
| 575 |
+
the parameter `minimizer_kwargs` to tell basinhopping which algorithm to
|
| 576 |
+
use and how to set up that minimizer. This parameter will be passed to
|
| 577 |
+
`scipy.optimize.minimize`.
|
| 578 |
+
|
| 579 |
+
>>> minimizer_kwargs = {"method": "BFGS"}
|
| 580 |
+
>>> ret = basinhopping(func, x0, minimizer_kwargs=minimizer_kwargs,
|
| 581 |
+
... niter=200)
|
| 582 |
+
>>> # the global minimum is:
|
| 583 |
+
>>> ret.x, ret.fun
|
| 584 |
+
-0.1951, -1.0009
|
| 585 |
+
|
| 586 |
+
Next consider a 2-D minimization problem. Also, this time, we
|
| 587 |
+
will use gradient information to significantly speed up the search.
|
| 588 |
+
|
| 589 |
+
>>> def func2d(x):
|
| 590 |
+
... f = np.cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] +
|
| 591 |
+
... 0.2) * x[0]
|
| 592 |
+
... df = np.zeros(2)
|
| 593 |
+
... df[0] = -14.5 * np.sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2
|
| 594 |
+
... df[1] = 2. * x[1] + 0.2
|
| 595 |
+
... return f, df
|
| 596 |
+
|
| 597 |
+
We'll also use a different local minimization algorithm. Also, we must tell
|
| 598 |
+
the minimizer that our function returns both energy and gradient (Jacobian).
|
| 599 |
+
|
| 600 |
+
>>> minimizer_kwargs = {"method":"L-BFGS-B", "jac":True}
|
| 601 |
+
>>> x0 = [1.0, 1.0]
|
| 602 |
+
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
| 603 |
+
... niter=200)
|
| 604 |
+
>>> print("global minimum: x = [%.4f, %.4f], f(x) = %.4f" % (ret.x[0],
|
| 605 |
+
... ret.x[1],
|
| 606 |
+
... ret.fun))
|
| 607 |
+
global minimum: x = [-0.1951, -0.1000], f(x) = -1.0109
|
| 608 |
+
|
| 609 |
+
Here is an example using a custom step-taking routine. Imagine you want
|
| 610 |
+
the first coordinate to take larger steps than the rest of the coordinates.
|
| 611 |
+
This can be implemented like so:
|
| 612 |
+
|
| 613 |
+
>>> class MyTakeStep:
|
| 614 |
+
... def __init__(self, stepsize=0.5):
|
| 615 |
+
... self.stepsize = stepsize
|
| 616 |
+
... self.rng = np.random.default_rng()
|
| 617 |
+
... def __call__(self, x):
|
| 618 |
+
... s = self.stepsize
|
| 619 |
+
... x[0] += self.rng.uniform(-2.*s, 2.*s)
|
| 620 |
+
... x[1:] += self.rng.uniform(-s, s, x[1:].shape)
|
| 621 |
+
... return x
|
| 622 |
+
|
| 623 |
+
Since ``MyTakeStep.stepsize`` exists basinhopping will adjust the magnitude
|
| 624 |
+
of `stepsize` to optimize the search. We'll use the same 2-D function as
|
| 625 |
+
before
|
| 626 |
+
|
| 627 |
+
>>> mytakestep = MyTakeStep()
|
| 628 |
+
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
| 629 |
+
... niter=200, take_step=mytakestep)
|
| 630 |
+
>>> print("global minimum: x = [%.4f, %.4f], f(x) = %.4f" % (ret.x[0],
|
| 631 |
+
... ret.x[1],
|
| 632 |
+
... ret.fun))
|
| 633 |
+
global minimum: x = [-0.1951, -0.1000], f(x) = -1.0109
|
| 634 |
+
|
| 635 |
+
Now, let's do an example using a custom callback function which prints the
|
| 636 |
+
value of every minimum found
|
| 637 |
+
|
| 638 |
+
>>> def print_fun(x, f, accepted):
|
| 639 |
+
... print("at minimum %.4f accepted %d" % (f, int(accepted)))
|
| 640 |
+
|
| 641 |
+
We'll run it for only 10 basinhopping steps this time.
|
| 642 |
+
|
| 643 |
+
>>> rng = np.random.default_rng()
|
| 644 |
+
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
| 645 |
+
... niter=10, callback=print_fun, seed=rng)
|
| 646 |
+
at minimum 0.4159 accepted 1
|
| 647 |
+
at minimum -0.4317 accepted 1
|
| 648 |
+
at minimum -1.0109 accepted 1
|
| 649 |
+
at minimum -0.9073 accepted 1
|
| 650 |
+
at minimum -0.4317 accepted 0
|
| 651 |
+
at minimum -0.1021 accepted 1
|
| 652 |
+
at minimum -0.7425 accepted 1
|
| 653 |
+
at minimum -0.9073 accepted 1
|
| 654 |
+
at minimum -0.4317 accepted 0
|
| 655 |
+
at minimum -0.7425 accepted 1
|
| 656 |
+
at minimum -0.9073 accepted 1
|
| 657 |
+
|
| 658 |
+
The minimum at -1.0109 is actually the global minimum, found already on the
|
| 659 |
+
8th iteration.
|
| 660 |
+
|
| 661 |
+
""" # numpy/numpydoc#87 # noqa: E501
|
| 662 |
+
if target_accept_rate <= 0. or target_accept_rate >= 1.:
|
| 663 |
+
raise ValueError('target_accept_rate has to be in range (0, 1)')
|
| 664 |
+
if stepwise_factor <= 0. or stepwise_factor >= 1.:
|
| 665 |
+
raise ValueError('stepwise_factor has to be in range (0, 1)')
|
| 666 |
+
|
| 667 |
+
x0 = np.array(x0)
|
| 668 |
+
|
| 669 |
+
# set up the np.random generator
|
| 670 |
+
rng = check_random_state(seed)
|
| 671 |
+
|
| 672 |
+
# set up minimizer
|
| 673 |
+
if minimizer_kwargs is None:
|
| 674 |
+
minimizer_kwargs = dict()
|
| 675 |
+
wrapped_minimizer = MinimizerWrapper(scipy.optimize.minimize, func,
|
| 676 |
+
**minimizer_kwargs)
|
| 677 |
+
|
| 678 |
+
# set up step-taking algorithm
|
| 679 |
+
if take_step is not None:
|
| 680 |
+
if not callable(take_step):
|
| 681 |
+
raise TypeError("take_step must be callable")
|
| 682 |
+
# if take_step.stepsize exists then use AdaptiveStepsize to control
|
| 683 |
+
# take_step.stepsize
|
| 684 |
+
if hasattr(take_step, "stepsize"):
|
| 685 |
+
take_step_wrapped = AdaptiveStepsize(
|
| 686 |
+
take_step, interval=interval,
|
| 687 |
+
accept_rate=target_accept_rate,
|
| 688 |
+
factor=stepwise_factor,
|
| 689 |
+
verbose=disp)
|
| 690 |
+
else:
|
| 691 |
+
take_step_wrapped = take_step
|
| 692 |
+
else:
|
| 693 |
+
# use default
|
| 694 |
+
displace = RandomDisplacement(stepsize=stepsize, random_gen=rng)
|
| 695 |
+
take_step_wrapped = AdaptiveStepsize(displace, interval=interval,
|
| 696 |
+
accept_rate=target_accept_rate,
|
| 697 |
+
factor=stepwise_factor,
|
| 698 |
+
verbose=disp)
|
| 699 |
+
|
| 700 |
+
# set up accept tests
|
| 701 |
+
accept_tests = []
|
| 702 |
+
if accept_test is not None:
|
| 703 |
+
if not callable(accept_test):
|
| 704 |
+
raise TypeError("accept_test must be callable")
|
| 705 |
+
accept_tests = [accept_test]
|
| 706 |
+
|
| 707 |
+
# use default
|
| 708 |
+
metropolis = Metropolis(T, random_gen=rng)
|
| 709 |
+
accept_tests.append(metropolis)
|
| 710 |
+
|
| 711 |
+
if niter_success is None:
|
| 712 |
+
niter_success = niter + 2
|
| 713 |
+
|
| 714 |
+
bh = BasinHoppingRunner(x0, wrapped_minimizer, take_step_wrapped,
|
| 715 |
+
accept_tests, disp=disp)
|
| 716 |
+
|
| 717 |
+
# The wrapped minimizer is called once during construction of
|
| 718 |
+
# BasinHoppingRunner, so run the callback
|
| 719 |
+
if callable(callback):
|
| 720 |
+
callback(bh.storage.minres.x, bh.storage.minres.fun, True)
|
| 721 |
+
|
| 722 |
+
# start main iteration loop
|
| 723 |
+
count, i = 0, 0
|
| 724 |
+
message = ["requested number of basinhopping iterations completed"
|
| 725 |
+
" successfully"]
|
| 726 |
+
for i in range(niter):
|
| 727 |
+
new_global_min = bh.one_cycle()
|
| 728 |
+
|
| 729 |
+
if callable(callback):
|
| 730 |
+
# should we pass a copy of x?
|
| 731 |
+
val = callback(bh.xtrial, bh.energy_trial, bh.accept)
|
| 732 |
+
if val is not None:
|
| 733 |
+
if val:
|
| 734 |
+
message = ["callback function requested stop early by"
|
| 735 |
+
"returning True"]
|
| 736 |
+
break
|
| 737 |
+
|
| 738 |
+
count += 1
|
| 739 |
+
if new_global_min:
|
| 740 |
+
count = 0
|
| 741 |
+
elif count > niter_success:
|
| 742 |
+
message = ["success condition satisfied"]
|
| 743 |
+
break
|
| 744 |
+
|
| 745 |
+
# prepare return object
|
| 746 |
+
res = bh.res
|
| 747 |
+
res.lowest_optimization_result = bh.storage.get_lowest()
|
| 748 |
+
res.x = np.copy(res.lowest_optimization_result.x)
|
| 749 |
+
res.fun = res.lowest_optimization_result.fun
|
| 750 |
+
res.message = message
|
| 751 |
+
res.nit = i + 1
|
| 752 |
+
res.success = res.lowest_optimization_result.success
|
| 753 |
+
return res
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_bracket.py
ADDED
|
@@ -0,0 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import scipy._lib._elementwise_iterative_method as eim
|
| 3 |
+
from scipy._lib._util import _RichResult
|
| 4 |
+
|
| 5 |
+
_ELIMITS = -1 # used in _bracket_root
|
| 6 |
+
_ESTOPONESIDE = 2 # used in _bracket_root
|
| 7 |
+
|
| 8 |
+
def _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter):
|
| 9 |
+
|
| 10 |
+
if not callable(func):
|
| 11 |
+
raise ValueError('`func` must be callable.')
|
| 12 |
+
|
| 13 |
+
if not np.iterable(args):
|
| 14 |
+
args = (args,)
|
| 15 |
+
|
| 16 |
+
xl0 = np.asarray(xl0)[()]
|
| 17 |
+
if not np.issubdtype(xl0.dtype, np.number) or np.iscomplex(xl0).any():
|
| 18 |
+
raise ValueError('`xl0` must be numeric and real.')
|
| 19 |
+
|
| 20 |
+
xr0 = xl0 + 1 if xr0 is None else xr0
|
| 21 |
+
xmin = -np.inf if xmin is None else xmin
|
| 22 |
+
xmax = np.inf if xmax is None else xmax
|
| 23 |
+
factor = 2. if factor is None else factor
|
| 24 |
+
xl0, xr0, xmin, xmax, factor = np.broadcast_arrays(xl0, xr0, xmin, xmax, factor)
|
| 25 |
+
|
| 26 |
+
if not np.issubdtype(xr0.dtype, np.number) or np.iscomplex(xr0).any():
|
| 27 |
+
raise ValueError('`xr0` must be numeric and real.')
|
| 28 |
+
|
| 29 |
+
if not np.issubdtype(xmin.dtype, np.number) or np.iscomplex(xmin).any():
|
| 30 |
+
raise ValueError('`xmin` must be numeric and real.')
|
| 31 |
+
|
| 32 |
+
if not np.issubdtype(xmax.dtype, np.number) or np.iscomplex(xmax).any():
|
| 33 |
+
raise ValueError('`xmax` must be numeric and real.')
|
| 34 |
+
|
| 35 |
+
if not np.issubdtype(factor.dtype, np.number) or np.iscomplex(factor).any():
|
| 36 |
+
raise ValueError('`factor` must be numeric and real.')
|
| 37 |
+
if not np.all(factor > 1):
|
| 38 |
+
raise ValueError('All elements of `factor` must be greater than 1.')
|
| 39 |
+
|
| 40 |
+
maxiter = np.asarray(maxiter)
|
| 41 |
+
message = '`maxiter` must be a non-negative integer.'
|
| 42 |
+
if (not np.issubdtype(maxiter.dtype, np.number) or maxiter.shape != tuple()
|
| 43 |
+
or np.iscomplex(maxiter)):
|
| 44 |
+
raise ValueError(message)
|
| 45 |
+
maxiter_int = int(maxiter[()])
|
| 46 |
+
if not maxiter == maxiter_int or maxiter < 0:
|
| 47 |
+
raise ValueError(message)
|
| 48 |
+
|
| 49 |
+
return func, xl0, xr0, xmin, xmax, factor, args, maxiter
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _bracket_root(func, xl0, xr0=None, *, xmin=None, xmax=None, factor=None,
|
| 53 |
+
args=(), maxiter=1000):
|
| 54 |
+
"""Bracket the root of a monotonic scalar function of one variable
|
| 55 |
+
|
| 56 |
+
This function works elementwise when `xl0`, `xr0`, `xmin`, `xmax`, `factor`, and
|
| 57 |
+
the elements of `args` are broadcastable arrays.
|
| 58 |
+
|
| 59 |
+
Parameters
|
| 60 |
+
----------
|
| 61 |
+
func : callable
|
| 62 |
+
The function for which the root is to be bracketed.
|
| 63 |
+
The signature must be::
|
| 64 |
+
|
| 65 |
+
func(x: ndarray, *args) -> ndarray
|
| 66 |
+
|
| 67 |
+
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
| 68 |
+
which may contain an arbitrary number of arrays that are broadcastable
|
| 69 |
+
with `x`. ``func`` must be an elementwise function: each element
|
| 70 |
+
``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
|
| 71 |
+
xl0, xr0: float array_like
|
| 72 |
+
Starting guess of bracket, which need not contain a root. If `xr0` is
|
| 73 |
+
not provided, ``xr0 = xl0 + 1``. Must be broadcastable with one another.
|
| 74 |
+
xmin, xmax : float array_like, optional
|
| 75 |
+
Minimum and maximum allowable endpoints of the bracket, inclusive. Must
|
| 76 |
+
be broadcastable with `xl0` and `xr0`.
|
| 77 |
+
factor : float array_like, default: 2
|
| 78 |
+
The factor used to grow the bracket. See notes for details.
|
| 79 |
+
args : tuple, optional
|
| 80 |
+
Additional positional arguments to be passed to `func`. Must be arrays
|
| 81 |
+
broadcastable with `xl0`, `xr0`, `xmin`, and `xmax`. If the callable to be
|
| 82 |
+
bracketed requires arguments that are not broadcastable with these
|
| 83 |
+
arrays, wrap that callable with `func` such that `func` accepts
|
| 84 |
+
only `x` and broadcastable arrays.
|
| 85 |
+
maxiter : int, optional
|
| 86 |
+
The maximum number of iterations of the algorithm to perform.
|
| 87 |
+
|
| 88 |
+
Returns
|
| 89 |
+
-------
|
| 90 |
+
res : _RichResult
|
| 91 |
+
An instance of `scipy._lib._util._RichResult` with the following
|
| 92 |
+
attributes. The descriptions are written as though the values will be
|
| 93 |
+
scalars; however, if `func` returns an array, the outputs will be
|
| 94 |
+
arrays of the same shape.
|
| 95 |
+
|
| 96 |
+
xl, xr : float
|
| 97 |
+
The lower and upper ends of the bracket, if the algorithm
|
| 98 |
+
terminated successfully.
|
| 99 |
+
fl, fr : float
|
| 100 |
+
The function value at the lower and upper ends of the bracket.
|
| 101 |
+
nfev : int
|
| 102 |
+
The number of function evaluations required to find the bracket.
|
| 103 |
+
This is distinct from the number of times `func` is *called*
|
| 104 |
+
because the function may evaluated at multiple points in a single
|
| 105 |
+
call.
|
| 106 |
+
nit : int
|
| 107 |
+
The number of iterations of the algorithm that were performed.
|
| 108 |
+
status : int
|
| 109 |
+
An integer representing the exit status of the algorithm.
|
| 110 |
+
|
| 111 |
+
- ``0`` : The algorithm produced a valid bracket.
|
| 112 |
+
- ``-1`` : The bracket expanded to the allowable limits without finding a bracket.
|
| 113 |
+
- ``-2`` : The maximum number of iterations was reached.
|
| 114 |
+
- ``-3`` : A non-finite value was encountered.
|
| 115 |
+
- ``-4`` : Iteration was terminated by `callback`.
|
| 116 |
+
- ``-5``: The initial bracket does not satisfy `xmin <= xl0 < xr0 < xmax`.
|
| 117 |
+
- ``1`` : The algorithm is proceeding normally (in `callback` only).
|
| 118 |
+
- ``2`` : A bracket was found in the opposite search direction (in `callback` only).
|
| 119 |
+
|
| 120 |
+
success : bool
|
| 121 |
+
``True`` when the algorithm terminated successfully (status ``0``).
|
| 122 |
+
|
| 123 |
+
Notes
|
| 124 |
+
-----
|
| 125 |
+
This function generalizes an algorithm found in pieces throughout
|
| 126 |
+
`scipy.stats`. The strategy is to iteratively grow the bracket `(l, r)`
|
| 127 |
+
until ``func(l) < 0 < func(r)``. The bracket grows to the left as follows.
|
| 128 |
+
|
| 129 |
+
- If `xmin` is not provided, the distance between `xl0` and `l` is iteratively
|
| 130 |
+
increased by `factor`.
|
| 131 |
+
- If `xmin` is provided, the distance between `xmin` and `l` is iteratively
|
| 132 |
+
decreased by `factor`. Note that this also *increases* the bracket size.
|
| 133 |
+
|
| 134 |
+
Growth of the bracket to the right is analogous.
|
| 135 |
+
|
| 136 |
+
Growth of the bracket in one direction stops when the endpoint is no longer
|
| 137 |
+
finite, the function value at the endpoint is no longer finite, or the
|
| 138 |
+
endpoint reaches its limiting value (`xmin` or `xmax`). Iteration terminates
|
| 139 |
+
when the bracket stops growing in both directions, the bracket surrounds
|
| 140 |
+
the root, or a root is found (accidentally).
|
| 141 |
+
|
| 142 |
+
If two brackets are found - that is, a bracket is found on both sides in
|
| 143 |
+
the same iteration, the smaller of the two is returned.
|
| 144 |
+
If roots of the function are found, both `l` and `r` are set to the
|
| 145 |
+
leftmost root.
|
| 146 |
+
|
| 147 |
+
""" # noqa: E501
|
| 148 |
+
# Todo:
|
| 149 |
+
# - find bracket with sign change in specified direction
|
| 150 |
+
# - Add tolerance
|
| 151 |
+
# - allow factor < 1?
|
| 152 |
+
|
| 153 |
+
callback = None # works; I just don't want to test it
|
| 154 |
+
temp = _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter)
|
| 155 |
+
func, xl0, xr0, xmin, xmax, factor, args, maxiter = temp
|
| 156 |
+
|
| 157 |
+
xs = (xl0, xr0)
|
| 158 |
+
temp = eim._initialize(func, xs, args)
|
| 159 |
+
func, xs, fs, args, shape, dtype, xp = temp # line split for PEP8
|
| 160 |
+
xl0, xr0 = xs
|
| 161 |
+
xmin = np.broadcast_to(xmin, shape).astype(dtype, copy=False).ravel()
|
| 162 |
+
xmax = np.broadcast_to(xmax, shape).astype(dtype, copy=False).ravel()
|
| 163 |
+
invalid_bracket = ~((xmin <= xl0) & (xl0 < xr0) & (xr0 <= xmax))
|
| 164 |
+
|
| 165 |
+
# The approach is to treat the left and right searches as though they were
|
| 166 |
+
# (almost) totally independent one-sided bracket searches. (The interaction
|
| 167 |
+
# is considered when checking for termination and preparing the result
|
| 168 |
+
# object.)
|
| 169 |
+
# `x` is the "moving" end of the bracket
|
| 170 |
+
x = np.concatenate(xs)
|
| 171 |
+
f = np.concatenate(fs)
|
| 172 |
+
invalid_bracket = np.concatenate((invalid_bracket, invalid_bracket))
|
| 173 |
+
n = len(x) // 2
|
| 174 |
+
|
| 175 |
+
# `x_last` is the previous location of the moving end of the bracket. If
|
| 176 |
+
# the signs of `f` and `f_last` are different, `x` and `x_last` form a
|
| 177 |
+
# bracket.
|
| 178 |
+
x_last = np.concatenate((x[n:], x[:n]))
|
| 179 |
+
f_last = np.concatenate((f[n:], f[:n]))
|
| 180 |
+
# `x0` is the "fixed" end of the bracket.
|
| 181 |
+
x0 = x_last
|
| 182 |
+
# We don't need to retain the corresponding function value, since the
|
| 183 |
+
# fixed end of the bracket is only needed to compute the new value of the
|
| 184 |
+
# moving end; it is never returned.
|
| 185 |
+
limit = np.concatenate((xmin, xmax))
|
| 186 |
+
|
| 187 |
+
factor = np.broadcast_to(factor, shape).astype(dtype, copy=False).ravel()
|
| 188 |
+
factor = np.concatenate((factor, factor))
|
| 189 |
+
|
| 190 |
+
active = np.arange(2*n)
|
| 191 |
+
args = [np.concatenate((arg, arg)) for arg in args]
|
| 192 |
+
|
| 193 |
+
# This is needed due to inner workings of `eim._loop`.
|
| 194 |
+
# We're abusing it a tiny bit.
|
| 195 |
+
shape = shape + (2,)
|
| 196 |
+
|
| 197 |
+
# `d` is for "distance".
|
| 198 |
+
# For searches without a limit, the distance between the fixed end of the
|
| 199 |
+
# bracket `x0` and the moving end `x` will grow by `factor` each iteration.
|
| 200 |
+
# For searches with a limit, the distance between the `limit` and moving
|
| 201 |
+
# end of the bracket `x` will shrink by `factor` each iteration.
|
| 202 |
+
i = np.isinf(limit)
|
| 203 |
+
ni = ~i
|
| 204 |
+
d = np.zeros_like(x)
|
| 205 |
+
d[i] = x[i] - x0[i]
|
| 206 |
+
d[ni] = limit[ni] - x[ni]
|
| 207 |
+
|
| 208 |
+
status = np.full_like(x, eim._EINPROGRESS, dtype=int) # in progress
|
| 209 |
+
status[invalid_bracket] = eim._EINPUTERR
|
| 210 |
+
nit, nfev = 0, 1 # one function evaluation per side performed above
|
| 211 |
+
|
| 212 |
+
work = _RichResult(x=x, x0=x0, f=f, limit=limit, factor=factor,
|
| 213 |
+
active=active, d=d, x_last=x_last, f_last=f_last,
|
| 214 |
+
nit=nit, nfev=nfev, status=status, args=args,
|
| 215 |
+
xl=None, xr=None, fl=None, fr=None, n=n)
|
| 216 |
+
res_work_pairs = [('status', 'status'), ('xl', 'xl'), ('xr', 'xr'),
|
| 217 |
+
('nit', 'nit'), ('nfev', 'nfev'), ('fl', 'fl'),
|
| 218 |
+
('fr', 'fr'), ('x', 'x'), ('f', 'f'),
|
| 219 |
+
('x_last', 'x_last'), ('f_last', 'f_last')]
|
| 220 |
+
|
| 221 |
+
def pre_func_eval(work):
|
| 222 |
+
# Initialize moving end of bracket
|
| 223 |
+
x = np.zeros_like(work.x)
|
| 224 |
+
|
| 225 |
+
# Unlimited brackets grow by `factor` by increasing distance from fixed
|
| 226 |
+
# end to moving end.
|
| 227 |
+
i = np.isinf(work.limit) # indices of unlimited brackets
|
| 228 |
+
work.d[i] *= work.factor[i]
|
| 229 |
+
x[i] = work.x0[i] + work.d[i]
|
| 230 |
+
|
| 231 |
+
# Limited brackets grow by decreasing the distance from the limit to
|
| 232 |
+
# the moving end.
|
| 233 |
+
ni = ~i # indices of limited brackets
|
| 234 |
+
work.d[ni] /= work.factor[ni]
|
| 235 |
+
x[ni] = work.limit[ni] - work.d[ni]
|
| 236 |
+
|
| 237 |
+
return x
|
| 238 |
+
|
| 239 |
+
def post_func_eval(x, f, work):
|
| 240 |
+
# Keep track of the previous location of the moving end so that we can
|
| 241 |
+
# return a narrower bracket. (The alternative is to remember the
|
| 242 |
+
# original fixed end, but then the bracket would be wider than needed.)
|
| 243 |
+
work.x_last = work.x
|
| 244 |
+
work.f_last = work.f
|
| 245 |
+
work.x = x
|
| 246 |
+
work.f = f
|
| 247 |
+
|
| 248 |
+
def check_termination(work):
|
| 249 |
+
# Condition 0: initial bracket is invalid
|
| 250 |
+
stop = (work.status == eim._EINPUTERR)
|
| 251 |
+
|
| 252 |
+
# Condition 1: a valid bracket (or the root itself) has been found
|
| 253 |
+
sf = np.sign(work.f)
|
| 254 |
+
sf_last = np.sign(work.f_last)
|
| 255 |
+
i = ((sf_last == -sf) | (sf_last == 0) | (sf == 0)) & ~stop
|
| 256 |
+
work.status[i] = eim._ECONVERGED
|
| 257 |
+
stop[i] = True
|
| 258 |
+
|
| 259 |
+
# Condition 2: the other side's search found a valid bracket.
|
| 260 |
+
# (If we just found a bracket with the rightward search, we can stop
|
| 261 |
+
# the leftward search, and vice-versa.)
|
| 262 |
+
# To do this, we need to set the status of the other side's search;
|
| 263 |
+
# this is tricky because `work.status` contains only the *active*
|
| 264 |
+
# elements, so we don't immediately know the index of the element we
|
| 265 |
+
# need to set - or even if it's still there. (That search may have
|
| 266 |
+
# terminated already, e.g. by reaching its `limit`.)
|
| 267 |
+
# To facilitate this, `work.active` contains a unit integer index of
|
| 268 |
+
# each search. Index `k` (`k < n)` and `k + n` correspond with a
|
| 269 |
+
# leftward and rightward search, respectively. Elements are removed
|
| 270 |
+
# from `work.active` just as they are removed from `work.status`, so
|
| 271 |
+
# we use `work.active` to help find the right location in
|
| 272 |
+
# `work.status`.
|
| 273 |
+
# Get the integer indices of the elements that can also stop
|
| 274 |
+
also_stop = (work.active[i] + work.n) % (2*work.n)
|
| 275 |
+
# Check whether they are still active.
|
| 276 |
+
# To start, we need to find out where in `work.active` they would
|
| 277 |
+
# appear if they are indeed there.
|
| 278 |
+
j = np.searchsorted(work.active, also_stop)
|
| 279 |
+
# If the location exceeds the length of the `work.active`, they are
|
| 280 |
+
# not there.
|
| 281 |
+
j = j[j < len(work.active)]
|
| 282 |
+
# Check whether they are still there.
|
| 283 |
+
j = j[also_stop == work.active[j]]
|
| 284 |
+
# Now convert these to boolean indices to use with `work.status`.
|
| 285 |
+
i = np.zeros_like(stop)
|
| 286 |
+
i[j] = True # boolean indices of elements that can also stop
|
| 287 |
+
i = i & ~stop
|
| 288 |
+
work.status[i] = _ESTOPONESIDE
|
| 289 |
+
stop[i] = True
|
| 290 |
+
|
| 291 |
+
# Condition 3: moving end of bracket reaches limit
|
| 292 |
+
i = (work.x == work.limit) & ~stop
|
| 293 |
+
work.status[i] = _ELIMITS
|
| 294 |
+
stop[i] = True
|
| 295 |
+
|
| 296 |
+
# Condition 4: non-finite value encountered
|
| 297 |
+
i = ~(np.isfinite(work.x) & np.isfinite(work.f)) & ~stop
|
| 298 |
+
work.status[i] = eim._EVALUEERR
|
| 299 |
+
stop[i] = True
|
| 300 |
+
|
| 301 |
+
return stop
|
| 302 |
+
|
| 303 |
+
def post_termination_check(work):
|
| 304 |
+
pass
|
| 305 |
+
|
| 306 |
+
def customize_result(res, shape):
|
| 307 |
+
n = len(res['x']) // 2
|
| 308 |
+
|
| 309 |
+
# To avoid ambiguity, below we refer to `xl0`, the initial left endpoint
|
| 310 |
+
# as `a` and `xr0`, the initial right endpoint, as `b`.
|
| 311 |
+
# Because we treat the two one-sided searches as though they were
|
| 312 |
+
# independent, what we keep track of in `work` and what we want to
|
| 313 |
+
# return in `res` look quite different. Combine the results from the
|
| 314 |
+
# two one-sided searches before reporting the results to the user.
|
| 315 |
+
# - "a" refers to the leftward search (the moving end started at `a`)
|
| 316 |
+
# - "b" refers to the rightward search (the moving end started at `b`)
|
| 317 |
+
# - "l" refers to the left end of the bracket (closer to -oo)
|
| 318 |
+
# - "r" refers to the right end of the bracket (closer to +oo)
|
| 319 |
+
xal = res['x'][:n]
|
| 320 |
+
xar = res['x_last'][:n]
|
| 321 |
+
xbl = res['x_last'][n:]
|
| 322 |
+
xbr = res['x'][n:]
|
| 323 |
+
|
| 324 |
+
fal = res['f'][:n]
|
| 325 |
+
far = res['f_last'][:n]
|
| 326 |
+
fbl = res['f_last'][n:]
|
| 327 |
+
fbr = res['f'][n:]
|
| 328 |
+
|
| 329 |
+
# Initialize the brackets and corresponding function values to return
|
| 330 |
+
# to the user. Brackets may not be valid (e.g. there is no root,
|
| 331 |
+
# there weren't enough iterations, NaN encountered), but we still need
|
| 332 |
+
# to return something. One option would be all NaNs, but what I've
|
| 333 |
+
# chosen here is the left- and right-most points at which the function
|
| 334 |
+
# has been evaluated. This gives the user some information about what
|
| 335 |
+
# interval of the real line has been searched and shows that there is
|
| 336 |
+
# no sign change between the two ends.
|
| 337 |
+
xl = xal.copy()
|
| 338 |
+
fl = fal.copy()
|
| 339 |
+
xr = xbr.copy()
|
| 340 |
+
fr = fbr.copy()
|
| 341 |
+
|
| 342 |
+
# `status` indicates whether the bracket is valid or not. If so,
|
| 343 |
+
# we want to adjust the bracket we return to be the narrowest possible
|
| 344 |
+
# given the points at which we evaluated the function.
|
| 345 |
+
# For example if bracket "a" is valid and smaller than bracket "b" OR
|
| 346 |
+
# if bracket "a" is valid and bracket "b" is not valid, we want to
|
| 347 |
+
# return bracket "a" (and vice versa).
|
| 348 |
+
sa = res['status'][:n]
|
| 349 |
+
sb = res['status'][n:]
|
| 350 |
+
|
| 351 |
+
da = xar - xal
|
| 352 |
+
db = xbr - xbl
|
| 353 |
+
|
| 354 |
+
i1 = ((da <= db) & (sa == 0)) | ((sa == 0) & (sb != 0))
|
| 355 |
+
i2 = ((db <= da) & (sb == 0)) | ((sb == 0) & (sa != 0))
|
| 356 |
+
|
| 357 |
+
xr[i1] = xar[i1]
|
| 358 |
+
fr[i1] = far[i1]
|
| 359 |
+
xl[i2] = xbl[i2]
|
| 360 |
+
fl[i2] = fbl[i2]
|
| 361 |
+
|
| 362 |
+
# Finish assembling the result object
|
| 363 |
+
res['xl'] = xl
|
| 364 |
+
res['xr'] = xr
|
| 365 |
+
res['fl'] = fl
|
| 366 |
+
res['fr'] = fr
|
| 367 |
+
|
| 368 |
+
res['nit'] = np.maximum(res['nit'][:n], res['nit'][n:])
|
| 369 |
+
res['nfev'] = res['nfev'][:n] + res['nfev'][n:]
|
| 370 |
+
# If the status on one side is zero, the status is zero. In any case,
|
| 371 |
+
# report the status from one side only.
|
| 372 |
+
res['status'] = np.choose(sa == 0, (sb, sa))
|
| 373 |
+
res['success'] = (res['status'] == 0)
|
| 374 |
+
|
| 375 |
+
del res['x']
|
| 376 |
+
del res['f']
|
| 377 |
+
del res['x_last']
|
| 378 |
+
del res['f_last']
|
| 379 |
+
|
| 380 |
+
return shape[:-1]
|
| 381 |
+
|
| 382 |
+
return eim._loop(work, callback, shape, maxiter, func, args, dtype,
|
| 383 |
+
pre_func_eval, post_func_eval, check_termination,
|
| 384 |
+
post_termination_check, customize_result, res_work_pairs,
|
| 385 |
+
xp)
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
def _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter):
|
| 389 |
+
|
| 390 |
+
if not callable(func):
|
| 391 |
+
raise ValueError('`func` must be callable.')
|
| 392 |
+
|
| 393 |
+
if not np.iterable(args):
|
| 394 |
+
args = (args,)
|
| 395 |
+
|
| 396 |
+
xm0 = np.asarray(xm0)[()]
|
| 397 |
+
if not np.issubdtype(xm0.dtype, np.number) or np.iscomplex(xm0).any():
|
| 398 |
+
raise ValueError('`xm0` must be numeric and real.')
|
| 399 |
+
|
| 400 |
+
xmin = -np.inf if xmin is None else xmin
|
| 401 |
+
xmax = np.inf if xmax is None else xmax
|
| 402 |
+
|
| 403 |
+
# If xl0 (xr0) is not supplied, fill with a dummy value for the sake
|
| 404 |
+
# of broadcasting. We need to wait until xmin (xmax) has been validated
|
| 405 |
+
# to compute the default values.
|
| 406 |
+
xl0_not_supplied = False
|
| 407 |
+
if xl0 is None:
|
| 408 |
+
xl0 = np.nan
|
| 409 |
+
xl0_not_supplied = True
|
| 410 |
+
|
| 411 |
+
xr0_not_supplied = False
|
| 412 |
+
if xr0 is None:
|
| 413 |
+
xr0 = np.nan
|
| 414 |
+
xr0_not_supplied = True
|
| 415 |
+
|
| 416 |
+
factor = 2.0 if factor is None else factor
|
| 417 |
+
xl0, xm0, xr0, xmin, xmax, factor = np.broadcast_arrays(
|
| 418 |
+
xl0, xm0, xr0, xmin, xmax, factor
|
| 419 |
+
)
|
| 420 |
+
|
| 421 |
+
if not np.issubdtype(xl0.dtype, np.number) or np.iscomplex(xl0).any():
|
| 422 |
+
raise ValueError('`xl0` must be numeric and real.')
|
| 423 |
+
|
| 424 |
+
if not np.issubdtype(xr0.dtype, np.number) or np.iscomplex(xr0).any():
|
| 425 |
+
raise ValueError('`xr0` must be numeric and real.')
|
| 426 |
+
|
| 427 |
+
if not np.issubdtype(xmin.dtype, np.number) or np.iscomplex(xmin).any():
|
| 428 |
+
raise ValueError('`xmin` must be numeric and real.')
|
| 429 |
+
|
| 430 |
+
if not np.issubdtype(xmax.dtype, np.number) or np.iscomplex(xmax).any():
|
| 431 |
+
raise ValueError('`xmax` must be numeric and real.')
|
| 432 |
+
|
| 433 |
+
if not np.issubdtype(factor.dtype, np.number) or np.iscomplex(factor).any():
|
| 434 |
+
raise ValueError('`factor` must be numeric and real.')
|
| 435 |
+
if not np.all(factor > 1):
|
| 436 |
+
raise ValueError('All elements of `factor` must be greater than 1.')
|
| 437 |
+
|
| 438 |
+
# Calculate default values of xl0 and/or xr0 if they have not been supplied
|
| 439 |
+
# by the user. We need to be careful to ensure xl0 and xr0 are not outside
|
| 440 |
+
# of (xmin, xmax).
|
| 441 |
+
if xl0_not_supplied:
|
| 442 |
+
xl0 = xm0 - np.minimum((xm0 - xmin)/16, 0.5)
|
| 443 |
+
if xr0_not_supplied:
|
| 444 |
+
xr0 = xm0 + np.minimum((xmax - xm0)/16, 0.5)
|
| 445 |
+
|
| 446 |
+
maxiter = np.asarray(maxiter)
|
| 447 |
+
message = '`maxiter` must be a non-negative integer.'
|
| 448 |
+
if (not np.issubdtype(maxiter.dtype, np.number) or maxiter.shape != tuple()
|
| 449 |
+
or np.iscomplex(maxiter)):
|
| 450 |
+
raise ValueError(message)
|
| 451 |
+
maxiter_int = int(maxiter[()])
|
| 452 |
+
if not maxiter == maxiter_int or maxiter < 0:
|
| 453 |
+
raise ValueError(message)
|
| 454 |
+
|
| 455 |
+
return func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
def _bracket_minimum(func, xm0, *, xl0=None, xr0=None, xmin=None, xmax=None,
|
| 459 |
+
factor=None, args=(), maxiter=1000):
|
| 460 |
+
"""Bracket the minimum of a unimodal scalar function of one variable
|
| 461 |
+
|
| 462 |
+
This function works elementwise when `xm0`, `xl0`, `xr0`, `xmin`, `xmax`,
|
| 463 |
+
and the elements of `args` are broadcastable arrays.
|
| 464 |
+
|
| 465 |
+
Parameters
|
| 466 |
+
----------
|
| 467 |
+
func : callable
|
| 468 |
+
The function for which the minimum is to be bracketed.
|
| 469 |
+
The signature must be::
|
| 470 |
+
|
| 471 |
+
func(x: ndarray, *args) -> ndarray
|
| 472 |
+
|
| 473 |
+
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
| 474 |
+
which may contain an arbitrary number of arrays that are broadcastable
|
| 475 |
+
with ``x``. `func` must be an elementwise function: each element
|
| 476 |
+
``func(x)[i]`` must equal ``func(x[i])`` for all indices `i`.
|
| 477 |
+
xm0: float array_like
|
| 478 |
+
Starting guess for middle point of bracket.
|
| 479 |
+
xl0, xr0: float array_like, optional
|
| 480 |
+
Starting guesses for left and right endpoints of the bracket. Must be
|
| 481 |
+
broadcastable with one another and with `xm0`.
|
| 482 |
+
xmin, xmax : float array_like, optional
|
| 483 |
+
Minimum and maximum allowable endpoints of the bracket, inclusive. Must
|
| 484 |
+
be broadcastable with `xl0`, `xm0`, and `xr0`.
|
| 485 |
+
factor : float array_like, optional
|
| 486 |
+
Controls expansion of bracket endpoint in downhill direction. Works
|
| 487 |
+
differently in the cases where a limit is set in the downhill direction
|
| 488 |
+
with `xmax` or `xmin`. See Notes.
|
| 489 |
+
args : tuple, optional
|
| 490 |
+
Additional positional arguments to be passed to `func`. Must be arrays
|
| 491 |
+
broadcastable with `xl0`, `xm0`, `xr0`, `xmin`, and `xmax`. If the
|
| 492 |
+
callable to be bracketed requires arguments that are not broadcastable
|
| 493 |
+
with these arrays, wrap that callable with `func` such that `func`
|
| 494 |
+
accepts only ``x`` and broadcastable arrays.
|
| 495 |
+
maxiter : int, optional
|
| 496 |
+
The maximum number of iterations of the algorithm to perform. The number
|
| 497 |
+
of function evaluations is three greater than the number of iterations.
|
| 498 |
+
|
| 499 |
+
Returns
|
| 500 |
+
-------
|
| 501 |
+
res : _RichResult
|
| 502 |
+
An instance of `scipy._lib._util._RichResult` with the following
|
| 503 |
+
attributes. The descriptions are written as though the values will be
|
| 504 |
+
scalars; however, if `func` returns an array, the outputs will be
|
| 505 |
+
arrays of the same shape.
|
| 506 |
+
|
| 507 |
+
xl, xm, xr : float
|
| 508 |
+
The left, middle, and right points of the bracket, if the algorithm
|
| 509 |
+
terminated successfully.
|
| 510 |
+
fl, fm, fr : float
|
| 511 |
+
The function value at the left, middle, and right points of the bracket.
|
| 512 |
+
nfev : int
|
| 513 |
+
The number of function evaluations required to find the bracket.
|
| 514 |
+
nit : int
|
| 515 |
+
The number of iterations of the algorithm that were performed.
|
| 516 |
+
status : int
|
| 517 |
+
An integer representing the exit status of the algorithm.
|
| 518 |
+
|
| 519 |
+
- ``0`` : The algorithm produced a valid bracket.
|
| 520 |
+
- ``-1`` : The bracket expanded to the allowable limits. Assuming
|
| 521 |
+
unimodality, this implies the endpoint at the limit is a
|
| 522 |
+
minimizer.
|
| 523 |
+
- ``-2`` : The maximum number of iterations was reached.
|
| 524 |
+
- ``-3`` : A non-finite value was encountered.
|
| 525 |
+
- ``-4`` : ``None`` shall pass.
|
| 526 |
+
- ``-5`` : The initial bracket does not satisfy
|
| 527 |
+
`xmin <= xl0 < xm0 < xr0 <= xmax`.
|
| 528 |
+
|
| 529 |
+
success : bool
|
| 530 |
+
``True`` when the algorithm terminated successfully (status ``0``).
|
| 531 |
+
|
| 532 |
+
Notes
|
| 533 |
+
-----
|
| 534 |
+
Similar to `scipy.optimize.bracket`, this function seeks to find real
|
| 535 |
+
points ``xl < xm < xr`` such that ``f(xl) >= f(xm)`` and ``f(xr) >= f(xm)``,
|
| 536 |
+
where at least one of the inequalities is strict. Unlike `scipy.optimize.bracket`,
|
| 537 |
+
this function can operate in a vectorized manner on array input, so long as
|
| 538 |
+
the input arrays are broadcastable with each other. Also unlike
|
| 539 |
+
`scipy.optimize.bracket`, users may specify minimum and maximum endpoints
|
| 540 |
+
for the desired bracket.
|
| 541 |
+
|
| 542 |
+
Given an initial trio of points ``xl = xl0``, ``xm = xm0``, ``xr = xr0``,
|
| 543 |
+
the algorithm checks if these points already give a valid bracket. If not,
|
| 544 |
+
a new endpoint, ``w`` is chosen in the "downhill" direction, ``xm`` becomes the new
|
| 545 |
+
opposite endpoint, and either `xl` or `xr` becomes the new middle point,
|
| 546 |
+
depending on which direction is downhill. The algorithm repeats from here.
|
| 547 |
+
|
| 548 |
+
The new endpoint `w` is chosen differently depending on whether or not a
|
| 549 |
+
boundary `xmin` or `xmax` has been set in the downhill direction. Without
|
| 550 |
+
loss of generality, suppose the downhill direction is to the right, so that
|
| 551 |
+
``f(xl) > f(xm) > f(xr)``. If there is no boundary to the right, then `w`
|
| 552 |
+
is chosen to be ``xr + factor * (xr - xm)`` where `factor` is controlled by
|
| 553 |
+
the user (defaults to 2.0) so that step sizes increase in geometric proportion.
|
| 554 |
+
If there is a boundary, `xmax` in this case, then `w` is chosen to be
|
| 555 |
+
``xmax - (xmax - xr)/factor``, with steps slowing to a stop at
|
| 556 |
+
`xmax`. This cautious approach ensures that a minimum near but distinct from
|
| 557 |
+
the boundary isn't missed while also detecting whether or not the `xmax` is
|
| 558 |
+
a minimizer when `xmax` is reached after a finite number of steps.
|
| 559 |
+
""" # noqa: E501
|
| 560 |
+
callback = None # works; I just don't want to test it
|
| 561 |
+
|
| 562 |
+
temp = _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter)
|
| 563 |
+
func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter = temp
|
| 564 |
+
|
| 565 |
+
xs = (xl0, xm0, xr0)
|
| 566 |
+
temp = eim._initialize(func, xs, args)
|
| 567 |
+
func, xs, fs, args, shape, dtype, xp = temp
|
| 568 |
+
|
| 569 |
+
xl0, xm0, xr0 = xs
|
| 570 |
+
fl0, fm0, fr0 = fs
|
| 571 |
+
xmin = np.broadcast_to(xmin, shape).astype(dtype, copy=False).ravel()
|
| 572 |
+
xmax = np.broadcast_to(xmax, shape).astype(dtype, copy=False).ravel()
|
| 573 |
+
invalid_bracket = ~((xmin <= xl0) & (xl0 < xm0) & (xm0 < xr0) & (xr0 <= xmax))
|
| 574 |
+
# We will modify factor later on so make a copy. np.broadcast_to returns
|
| 575 |
+
# a read-only view.
|
| 576 |
+
factor = np.broadcast_to(factor, shape).astype(dtype, copy=True).ravel()
|
| 577 |
+
|
| 578 |
+
# To simplify the logic, swap xl and xr if f(xl) < f(xr). We should always be
|
| 579 |
+
# marching downhill in the direction from xl to xr.
|
| 580 |
+
comp = fl0 < fr0
|
| 581 |
+
xl0[comp], xr0[comp] = xr0[comp], xl0[comp]
|
| 582 |
+
fl0[comp], fr0[comp] = fr0[comp], fl0[comp]
|
| 583 |
+
# We only need the boundary in the direction we're traveling.
|
| 584 |
+
limit = np.where(comp, xmin, xmax)
|
| 585 |
+
|
| 586 |
+
unlimited = np.isinf(limit)
|
| 587 |
+
limited = ~unlimited
|
| 588 |
+
step = np.empty_like(xl0)
|
| 589 |
+
|
| 590 |
+
step[unlimited] = (xr0[unlimited] - xm0[unlimited])
|
| 591 |
+
step[limited] = (limit[limited] - xr0[limited])
|
| 592 |
+
|
| 593 |
+
# Step size is divided by factor for case where there is a limit.
|
| 594 |
+
factor[limited] = 1 / factor[limited]
|
| 595 |
+
|
| 596 |
+
status = np.full_like(xl0, eim._EINPROGRESS, dtype=int)
|
| 597 |
+
status[invalid_bracket] = eim._EINPUTERR
|
| 598 |
+
nit, nfev = 0, 3
|
| 599 |
+
|
| 600 |
+
work = _RichResult(xl=xl0, xm=xm0, xr=xr0, xr0=xr0, fl=fl0, fm=fm0, fr=fr0,
|
| 601 |
+
step=step, limit=limit, limited=limited, factor=factor, nit=nit,
|
| 602 |
+
nfev=nfev, status=status, args=args)
|
| 603 |
+
|
| 604 |
+
res_work_pairs = [('status', 'status'), ('xl', 'xl'), ('xm', 'xm'), ('xr', 'xr'),
|
| 605 |
+
('nit', 'nit'), ('nfev', 'nfev'), ('fl', 'fl'), ('fm', 'fm'),
|
| 606 |
+
('fr', 'fr')]
|
| 607 |
+
|
| 608 |
+
def pre_func_eval(work):
|
| 609 |
+
work.step *= work.factor
|
| 610 |
+
x = np.empty_like(work.xr)
|
| 611 |
+
x[~work.limited] = work.xr0[~work.limited] + work.step[~work.limited]
|
| 612 |
+
x[work.limited] = work.limit[work.limited] - work.step[work.limited]
|
| 613 |
+
# Since the new bracket endpoint is calculated from an offset with the
|
| 614 |
+
# limit, it may be the case that the new endpoint equals the old endpoint,
|
| 615 |
+
# when the old endpoint is sufficiently close to the limit. We use the
|
| 616 |
+
# limit itself as the new endpoint in these cases.
|
| 617 |
+
x[work.limited] = np.where(
|
| 618 |
+
x[work.limited] == work.xr[work.limited],
|
| 619 |
+
work.limit[work.limited],
|
| 620 |
+
x[work.limited],
|
| 621 |
+
)
|
| 622 |
+
return x
|
| 623 |
+
|
| 624 |
+
def post_func_eval(x, f, work):
|
| 625 |
+
work.xl, work.xm, work.xr = work.xm, work.xr, x
|
| 626 |
+
work.fl, work.fm, work.fr = work.fm, work.fr, f
|
| 627 |
+
|
| 628 |
+
def check_termination(work):
|
| 629 |
+
# Condition 0: Initial bracket is invalid.
|
| 630 |
+
stop = (work.status == eim._EINPUTERR)
|
| 631 |
+
|
| 632 |
+
# Condition 1: A valid bracket has been found.
|
| 633 |
+
i = (
|
| 634 |
+
(work.fl >= work.fm) & (work.fr > work.fm)
|
| 635 |
+
| (work.fl > work.fm) & (work.fr >= work.fm)
|
| 636 |
+
) & ~stop
|
| 637 |
+
work.status[i] = eim._ECONVERGED
|
| 638 |
+
stop[i] = True
|
| 639 |
+
|
| 640 |
+
# Condition 2: Moving end of bracket reaches limit.
|
| 641 |
+
i = (work.xr == work.limit) & ~stop
|
| 642 |
+
work.status[i] = _ELIMITS
|
| 643 |
+
stop[i] = True
|
| 644 |
+
|
| 645 |
+
# Condition 3: non-finite value encountered
|
| 646 |
+
i = ~(np.isfinite(work.xr) & np.isfinite(work.fr)) & ~stop
|
| 647 |
+
work.status[i] = eim._EVALUEERR
|
| 648 |
+
stop[i] = True
|
| 649 |
+
|
| 650 |
+
return stop
|
| 651 |
+
|
| 652 |
+
def post_termination_check(work):
|
| 653 |
+
pass
|
| 654 |
+
|
| 655 |
+
def customize_result(res, shape):
|
| 656 |
+
# Reorder entries of xl and xr if they were swapped due to f(xl0) < f(xr0).
|
| 657 |
+
comp = res['xl'] > res['xr']
|
| 658 |
+
res['xl'][comp], res['xr'][comp] = res['xr'][comp], res['xl'][comp]
|
| 659 |
+
res['fl'][comp], res['fr'][comp] = res['fr'][comp], res['fl'][comp]
|
| 660 |
+
return shape
|
| 661 |
+
|
| 662 |
+
return eim._loop(work, callback, shape,
|
| 663 |
+
maxiter, func, args, dtype,
|
| 664 |
+
pre_func_eval, post_func_eval,
|
| 665 |
+
check_termination, post_termination_check,
|
| 666 |
+
customize_result, res_work_pairs, xp)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_chandrupatla.py
ADDED
|
@@ -0,0 +1,549 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import numpy as np
|
| 3 |
+
import scipy._lib._elementwise_iterative_method as eim
|
| 4 |
+
from scipy._lib._util import _RichResult
|
| 5 |
+
from scipy._lib._array_api import xp_clip, xp_minimum, xp_sign
|
| 6 |
+
|
| 7 |
+
# TODO:
|
| 8 |
+
# - (maybe?) don't use fancy indexing assignment
|
| 9 |
+
# - figure out how to replace the new `try`/`except`s
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _chandrupatla(func, a, b, *, args=(), xatol=None, xrtol=None,
|
| 13 |
+
fatol=None, frtol=0, maxiter=None, callback=None):
|
| 14 |
+
"""Find the root of an elementwise function using Chandrupatla's algorithm.
|
| 15 |
+
|
| 16 |
+
For each element of the output of `func`, `chandrupatla` seeks the scalar
|
| 17 |
+
root that makes the element 0. This function allows for `a`, `b`, and the
|
| 18 |
+
output of `func` to be of any broadcastable shapes.
|
| 19 |
+
|
| 20 |
+
Parameters
|
| 21 |
+
----------
|
| 22 |
+
func : callable
|
| 23 |
+
The function whose root is desired. The signature must be::
|
| 24 |
+
|
| 25 |
+
func(x: ndarray, *args) -> ndarray
|
| 26 |
+
|
| 27 |
+
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
| 28 |
+
which may contain an arbitrary number of components of any type(s).
|
| 29 |
+
``func`` must be an elementwise function: each element ``func(x)[i]``
|
| 30 |
+
must equal ``func(x[i])`` for all indices ``i``. `_chandrupatla`
|
| 31 |
+
seeks an array ``x`` such that ``func(x)`` is an array of zeros.
|
| 32 |
+
a, b : array_like
|
| 33 |
+
The lower and upper bounds of the root of the function. Must be
|
| 34 |
+
broadcastable with one another.
|
| 35 |
+
args : tuple, optional
|
| 36 |
+
Additional positional arguments to be passed to `func`.
|
| 37 |
+
xatol, xrtol, fatol, frtol : float, optional
|
| 38 |
+
Absolute and relative tolerances on the root and function value.
|
| 39 |
+
See Notes for details.
|
| 40 |
+
maxiter : int, optional
|
| 41 |
+
The maximum number of iterations of the algorithm to perform.
|
| 42 |
+
The default is the maximum possible number of bisections within
|
| 43 |
+
the (normal) floating point numbers of the relevant dtype.
|
| 44 |
+
callback : callable, optional
|
| 45 |
+
An optional user-supplied function to be called before the first
|
| 46 |
+
iteration and after each iteration.
|
| 47 |
+
Called as ``callback(res)``, where ``res`` is a ``_RichResult``
|
| 48 |
+
similar to that returned by `_chandrupatla` (but containing the current
|
| 49 |
+
iterate's values of all variables). If `callback` raises a
|
| 50 |
+
``StopIteration``, the algorithm will terminate immediately and
|
| 51 |
+
`_chandrupatla` will return a result.
|
| 52 |
+
|
| 53 |
+
Returns
|
| 54 |
+
-------
|
| 55 |
+
res : _RichResult
|
| 56 |
+
An instance of `scipy._lib._util._RichResult` with the following
|
| 57 |
+
attributes. The descriptions are written as though the values will be
|
| 58 |
+
scalars; however, if `func` returns an array, the outputs will be
|
| 59 |
+
arrays of the same shape.
|
| 60 |
+
|
| 61 |
+
x : float
|
| 62 |
+
The root of the function, if the algorithm terminated successfully.
|
| 63 |
+
nfev : int
|
| 64 |
+
The number of times the function was called to find the root.
|
| 65 |
+
nit : int
|
| 66 |
+
The number of iterations of Chandrupatla's algorithm performed.
|
| 67 |
+
status : int
|
| 68 |
+
An integer representing the exit status of the algorithm.
|
| 69 |
+
``0`` : The algorithm converged to the specified tolerances.
|
| 70 |
+
``-1`` : The algorithm encountered an invalid bracket.
|
| 71 |
+
``-2`` : The maximum number of iterations was reached.
|
| 72 |
+
``-3`` : A non-finite value was encountered.
|
| 73 |
+
``-4`` : Iteration was terminated by `callback`.
|
| 74 |
+
``1`` : The algorithm is proceeding normally (in `callback` only).
|
| 75 |
+
success : bool
|
| 76 |
+
``True`` when the algorithm terminated successfully (status ``0``).
|
| 77 |
+
fun : float
|
| 78 |
+
The value of `func` evaluated at `x`.
|
| 79 |
+
xl, xr : float
|
| 80 |
+
The lower and upper ends of the bracket.
|
| 81 |
+
fl, fr : float
|
| 82 |
+
The function value at the lower and upper ends of the bracket.
|
| 83 |
+
|
| 84 |
+
Notes
|
| 85 |
+
-----
|
| 86 |
+
Implemented based on Chandrupatla's original paper [1]_.
|
| 87 |
+
|
| 88 |
+
If ``xl`` and ``xr`` are the left and right ends of the bracket,
|
| 89 |
+
``xmin = xl if abs(func(xl)) <= abs(func(xr)) else xr``,
|
| 90 |
+
and ``fmin0 = min(func(a), func(b))``, then the algorithm is considered to
|
| 91 |
+
have converged when ``abs(xr - xl) < xatol + abs(xmin) * xrtol`` or
|
| 92 |
+
``fun(xmin) <= fatol + abs(fmin0) * frtol``. This is equivalent to the
|
| 93 |
+
termination condition described in [1]_ with ``xrtol = 4e-10``,
|
| 94 |
+
``xatol = 1e-5``, and ``fatol = frtol = 0``. The default values are
|
| 95 |
+
``xatol = 4*tiny``, ``xrtol = 4*eps``, ``frtol = 0``, and ``fatol = tiny``,
|
| 96 |
+
where ``eps`` and ``tiny`` are the precision and smallest normal number
|
| 97 |
+
of the result ``dtype`` of function inputs and outputs.
|
| 98 |
+
|
| 99 |
+
References
|
| 100 |
+
----------
|
| 101 |
+
|
| 102 |
+
.. [1] Chandrupatla, Tirupathi R.
|
| 103 |
+
"A new hybrid quadratic/bisection algorithm for finding the zero of a
|
| 104 |
+
nonlinear function without using derivatives".
|
| 105 |
+
Advances in Engineering Software, 28(3), 145-149.
|
| 106 |
+
https://doi.org/10.1016/s0965-9978(96)00051-8
|
| 107 |
+
|
| 108 |
+
See Also
|
| 109 |
+
--------
|
| 110 |
+
brentq, brenth, ridder, bisect, newton
|
| 111 |
+
|
| 112 |
+
Examples
|
| 113 |
+
--------
|
| 114 |
+
>>> from scipy import optimize
|
| 115 |
+
>>> def f(x, c):
|
| 116 |
+
... return x**3 - 2*x - c
|
| 117 |
+
>>> c = 5
|
| 118 |
+
>>> res = optimize._chandrupatla._chandrupatla(f, 0, 3, args=(c,))
|
| 119 |
+
>>> res.x
|
| 120 |
+
2.0945514818937463
|
| 121 |
+
|
| 122 |
+
>>> c = [3, 4, 5]
|
| 123 |
+
>>> res = optimize._chandrupatla._chandrupatla(f, 0, 3, args=(c,))
|
| 124 |
+
>>> res.x
|
| 125 |
+
array([1.8932892 , 2. , 2.09455148])
|
| 126 |
+
|
| 127 |
+
"""
|
| 128 |
+
res = _chandrupatla_iv(func, args, xatol, xrtol,
|
| 129 |
+
fatol, frtol, maxiter, callback)
|
| 130 |
+
func, args, xatol, xrtol, fatol, frtol, maxiter, callback = res
|
| 131 |
+
|
| 132 |
+
# Initialization
|
| 133 |
+
temp = eim._initialize(func, (a, b), args)
|
| 134 |
+
func, xs, fs, args, shape, dtype, xp = temp
|
| 135 |
+
x1, x2 = xs
|
| 136 |
+
f1, f2 = fs
|
| 137 |
+
status = xp.full_like(x1, eim._EINPROGRESS, dtype=xp.int32) # in progress
|
| 138 |
+
nit, nfev = 0, 2 # two function evaluations performed above
|
| 139 |
+
finfo = xp.finfo(dtype)
|
| 140 |
+
xatol = 4*finfo.smallest_normal if xatol is None else xatol
|
| 141 |
+
xrtol = 4*finfo.eps if xrtol is None else xrtol
|
| 142 |
+
fatol = finfo.smallest_normal if fatol is None else fatol
|
| 143 |
+
frtol = frtol * xp_minimum(xp.abs(f1), xp.abs(f2))
|
| 144 |
+
maxiter = (math.log2(finfo.max) - math.log2(finfo.smallest_normal)
|
| 145 |
+
if maxiter is None else maxiter)
|
| 146 |
+
work = _RichResult(x1=x1, f1=f1, x2=x2, f2=f2, x3=None, f3=None, t=0.5,
|
| 147 |
+
xatol=xatol, xrtol=xrtol, fatol=fatol, frtol=frtol,
|
| 148 |
+
nit=nit, nfev=nfev, status=status)
|
| 149 |
+
res_work_pairs = [('status', 'status'), ('x', 'xmin'), ('fun', 'fmin'),
|
| 150 |
+
('nit', 'nit'), ('nfev', 'nfev'), ('xl', 'x1'),
|
| 151 |
+
('fl', 'f1'), ('xr', 'x2'), ('fr', 'f2')]
|
| 152 |
+
|
| 153 |
+
def pre_func_eval(work):
|
| 154 |
+
# [1] Figure 1 (first box)
|
| 155 |
+
x = work.x1 + work.t * (work.x2 - work.x1)
|
| 156 |
+
return x
|
| 157 |
+
|
| 158 |
+
def post_func_eval(x, f, work):
|
| 159 |
+
# [1] Figure 1 (first diamond and boxes)
|
| 160 |
+
# Note: y/n are reversed in figure; compare to BASIC in appendix
|
| 161 |
+
work.x3, work.f3 = (xp.asarray(work.x2, copy=True),
|
| 162 |
+
xp.asarray(work.f2, copy=True))
|
| 163 |
+
j = xp.sign(f) == xp.sign(work.f1)
|
| 164 |
+
nj = ~j
|
| 165 |
+
work.x3[j], work.f3[j] = work.x1[j], work.f1[j]
|
| 166 |
+
work.x2[nj], work.f2[nj] = work.x1[nj], work.f1[nj]
|
| 167 |
+
work.x1, work.f1 = x, f
|
| 168 |
+
|
| 169 |
+
def check_termination(work):
|
| 170 |
+
# [1] Figure 1 (second diamond)
|
| 171 |
+
# Check for all terminal conditions and record statuses.
|
| 172 |
+
|
| 173 |
+
# See [1] Section 4 (first two sentences)
|
| 174 |
+
i = xp.abs(work.f1) < xp.abs(work.f2)
|
| 175 |
+
work.xmin = xp.where(i, work.x1, work.x2)
|
| 176 |
+
work.fmin = xp.where(i, work.f1, work.f2)
|
| 177 |
+
stop = xp.zeros_like(work.x1, dtype=xp.bool) # termination condition met
|
| 178 |
+
|
| 179 |
+
# If function value tolerance is met, report successful convergence,
|
| 180 |
+
# regardless of other conditions. Note that `frtol` has been redefined
|
| 181 |
+
# as `frtol = frtol * minimum(f1, f2)`, where `f1` and `f2` are the
|
| 182 |
+
# function evaluated at the original ends of the bracket.
|
| 183 |
+
i = xp.abs(work.fmin) <= work.fatol + work.frtol
|
| 184 |
+
work.status[i] = eim._ECONVERGED
|
| 185 |
+
stop[i] = True
|
| 186 |
+
|
| 187 |
+
# If the bracket is no longer valid, report failure (unless a function
|
| 188 |
+
# tolerance is met, as detected above).
|
| 189 |
+
i = (xp_sign(work.f1) == xp_sign(work.f2)) & ~stop
|
| 190 |
+
NaN = xp.asarray(xp.nan, dtype=work.xmin.dtype)
|
| 191 |
+
work.xmin[i], work.fmin[i], work.status[i] = NaN, NaN, eim._ESIGNERR
|
| 192 |
+
stop[i] = True
|
| 193 |
+
|
| 194 |
+
# If the abscissae are non-finite or either function value is NaN,
|
| 195 |
+
# report failure.
|
| 196 |
+
x_nonfinite = ~(xp.isfinite(work.x1) & xp.isfinite(work.x2))
|
| 197 |
+
f_nan = xp.isnan(work.f1) & xp.isnan(work.f2)
|
| 198 |
+
i = (x_nonfinite | f_nan) & ~stop
|
| 199 |
+
work.xmin[i], work.fmin[i], work.status[i] = NaN, NaN, eim._EVALUEERR
|
| 200 |
+
stop[i] = True
|
| 201 |
+
|
| 202 |
+
# This is the convergence criterion used in bisect. Chandrupatla's
|
| 203 |
+
# criterion is equivalent to this except with a factor of 4 on `xrtol`.
|
| 204 |
+
work.dx = xp.abs(work.x2 - work.x1)
|
| 205 |
+
work.tol = xp.abs(work.xmin) * work.xrtol + work.xatol
|
| 206 |
+
i = work.dx < work.tol
|
| 207 |
+
work.status[i] = eim._ECONVERGED
|
| 208 |
+
stop[i] = True
|
| 209 |
+
|
| 210 |
+
return stop
|
| 211 |
+
|
| 212 |
+
def post_termination_check(work):
|
| 213 |
+
# [1] Figure 1 (third diamond and boxes / Equation 1)
|
| 214 |
+
xi1 = (work.x1 - work.x2) / (work.x3 - work.x2)
|
| 215 |
+
phi1 = (work.f1 - work.f2) / (work.f3 - work.f2)
|
| 216 |
+
alpha = (work.x3 - work.x1) / (work.x2 - work.x1)
|
| 217 |
+
j = ((1 - xp.sqrt(1 - xi1)) < phi1) & (phi1 < xp.sqrt(xi1))
|
| 218 |
+
|
| 219 |
+
f1j, f2j, f3j, alphaj = work.f1[j], work.f2[j], work.f3[j], alpha[j]
|
| 220 |
+
t = xp.full_like(alpha, 0.5)
|
| 221 |
+
t[j] = (f1j / (f1j - f2j) * f3j / (f3j - f2j)
|
| 222 |
+
- alphaj * f1j / (f3j - f1j) * f2j / (f2j - f3j))
|
| 223 |
+
|
| 224 |
+
# [1] Figure 1 (last box; see also BASIC in appendix with comment
|
| 225 |
+
# "Adjust T Away from the Interval Boundary")
|
| 226 |
+
tl = 0.5 * work.tol / work.dx
|
| 227 |
+
work.t = xp_clip(t, tl, 1 - tl)
|
| 228 |
+
|
| 229 |
+
def customize_result(res, shape):
|
| 230 |
+
xl, xr, fl, fr = res['xl'], res['xr'], res['fl'], res['fr']
|
| 231 |
+
i = res['xl'] < res['xr']
|
| 232 |
+
res['xl'] = xp.where(i, xl, xr)
|
| 233 |
+
res['xr'] = xp.where(i, xr, xl)
|
| 234 |
+
res['fl'] = xp.where(i, fl, fr)
|
| 235 |
+
res['fr'] = xp.where(i, fr, fl)
|
| 236 |
+
return shape
|
| 237 |
+
|
| 238 |
+
return eim._loop(work, callback, shape, maxiter, func, args, dtype,
|
| 239 |
+
pre_func_eval, post_func_eval, check_termination,
|
| 240 |
+
post_termination_check, customize_result, res_work_pairs,
|
| 241 |
+
xp=xp)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _chandrupatla_iv(func, args, xatol, xrtol,
|
| 245 |
+
fatol, frtol, maxiter, callback):
|
| 246 |
+
# Input validation for `_chandrupatla`
|
| 247 |
+
|
| 248 |
+
if not callable(func):
|
| 249 |
+
raise ValueError('`func` must be callable.')
|
| 250 |
+
|
| 251 |
+
if not np.iterable(args):
|
| 252 |
+
args = (args,)
|
| 253 |
+
|
| 254 |
+
# tolerances are floats, not arrays; OK to use NumPy
|
| 255 |
+
tols = np.asarray([xatol if xatol is not None else 1,
|
| 256 |
+
xrtol if xrtol is not None else 1,
|
| 257 |
+
fatol if fatol is not None else 1,
|
| 258 |
+
frtol if frtol is not None else 1])
|
| 259 |
+
if (not np.issubdtype(tols.dtype, np.number) or np.any(tols < 0)
|
| 260 |
+
or np.any(np.isnan(tols)) or tols.shape != (4,)):
|
| 261 |
+
raise ValueError('Tolerances must be non-negative scalars.')
|
| 262 |
+
|
| 263 |
+
if maxiter is not None:
|
| 264 |
+
maxiter_int = int(maxiter)
|
| 265 |
+
if maxiter != maxiter_int or maxiter < 0:
|
| 266 |
+
raise ValueError('`maxiter` must be a non-negative integer.')
|
| 267 |
+
|
| 268 |
+
if callback is not None and not callable(callback):
|
| 269 |
+
raise ValueError('`callback` must be callable.')
|
| 270 |
+
|
| 271 |
+
return func, args, xatol, xrtol, fatol, frtol, maxiter, callback
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def _chandrupatla_minimize(func, x1, x2, x3, *, args=(), xatol=None,
|
| 275 |
+
xrtol=None, fatol=None, frtol=None, maxiter=100,
|
| 276 |
+
callback=None):
|
| 277 |
+
"""Find the minimizer of an elementwise function.
|
| 278 |
+
|
| 279 |
+
For each element of the output of `func`, `_chandrupatla_minimize` seeks
|
| 280 |
+
the scalar minimizer that minimizes the element. This function allows for
|
| 281 |
+
`x1`, `x2`, `x3`, and the elements of `args` to be arrays of any
|
| 282 |
+
broadcastable shapes.
|
| 283 |
+
|
| 284 |
+
Parameters
|
| 285 |
+
----------
|
| 286 |
+
func : callable
|
| 287 |
+
The function whose minimizer is desired. The signature must be::
|
| 288 |
+
|
| 289 |
+
func(x: ndarray, *args) -> ndarray
|
| 290 |
+
|
| 291 |
+
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
| 292 |
+
which may contain an arbitrary number of arrays that are broadcastable
|
| 293 |
+
with `x`. ``func`` must be an elementwise function: each element
|
| 294 |
+
``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
|
| 295 |
+
`_chandrupatla` seeks an array ``x`` such that ``func(x)`` is an array
|
| 296 |
+
of minima.
|
| 297 |
+
x1, x2, x3 : array_like
|
| 298 |
+
The abscissae of a standard scalar minimization bracket. A bracket is
|
| 299 |
+
valid if ``x1 < x2 < x3`` and ``func(x1) > func(x2) <= func(x3)``.
|
| 300 |
+
Must be broadcastable with one another and `args`.
|
| 301 |
+
args : tuple, optional
|
| 302 |
+
Additional positional arguments to be passed to `func`. Must be arrays
|
| 303 |
+
broadcastable with `x1`, `x2`, and `x3`. If the callable to be
|
| 304 |
+
differentiated requires arguments that are not broadcastable with `x`,
|
| 305 |
+
wrap that callable with `func` such that `func` accepts only `x` and
|
| 306 |
+
broadcastable arrays.
|
| 307 |
+
xatol, xrtol, fatol, frtol : float, optional
|
| 308 |
+
Absolute and relative tolerances on the minimizer and function value.
|
| 309 |
+
See Notes for details.
|
| 310 |
+
maxiter : int, optional
|
| 311 |
+
The maximum number of iterations of the algorithm to perform.
|
| 312 |
+
callback : callable, optional
|
| 313 |
+
An optional user-supplied function to be called before the first
|
| 314 |
+
iteration and after each iteration.
|
| 315 |
+
Called as ``callback(res)``, where ``res`` is a ``_RichResult``
|
| 316 |
+
similar to that returned by `_chandrupatla_minimize` (but containing
|
| 317 |
+
the current iterate's values of all variables). If `callback` raises a
|
| 318 |
+
``StopIteration``, the algorithm will terminate immediately and
|
| 319 |
+
`_chandrupatla_minimize` will return a result.
|
| 320 |
+
|
| 321 |
+
Returns
|
| 322 |
+
-------
|
| 323 |
+
res : _RichResult
|
| 324 |
+
An instance of `scipy._lib._util._RichResult` with the following
|
| 325 |
+
attributes. (The descriptions are written as though the values will be
|
| 326 |
+
scalars; however, if `func` returns an array, the outputs will be
|
| 327 |
+
arrays of the same shape.)
|
| 328 |
+
|
| 329 |
+
success : bool
|
| 330 |
+
``True`` when the algorithm terminated successfully (status ``0``).
|
| 331 |
+
status : int
|
| 332 |
+
An integer representing the exit status of the algorithm.
|
| 333 |
+
``0`` : The algorithm converged to the specified tolerances.
|
| 334 |
+
``-1`` : The algorithm encountered an invalid bracket.
|
| 335 |
+
``-2`` : The maximum number of iterations was reached.
|
| 336 |
+
``-3`` : A non-finite value was encountered.
|
| 337 |
+
``-4`` : Iteration was terminated by `callback`.
|
| 338 |
+
``1`` : The algorithm is proceeding normally (in `callback` only).
|
| 339 |
+
x : float
|
| 340 |
+
The minimizer of the function, if the algorithm terminated
|
| 341 |
+
successfully.
|
| 342 |
+
fun : float
|
| 343 |
+
The value of `func` evaluated at `x`.
|
| 344 |
+
nfev : int
|
| 345 |
+
The number of points at which `func` was evaluated.
|
| 346 |
+
nit : int
|
| 347 |
+
The number of iterations of the algorithm that were performed.
|
| 348 |
+
xl, xm, xr : float
|
| 349 |
+
The final three-point bracket.
|
| 350 |
+
fl, fm, fr : float
|
| 351 |
+
The function value at the bracket points.
|
| 352 |
+
|
| 353 |
+
Notes
|
| 354 |
+
-----
|
| 355 |
+
Implemented based on Chandrupatla's original paper [1]_.
|
| 356 |
+
|
| 357 |
+
If ``x1 < x2 < x3`` are the points of the bracket and ``f1 > f2 <= f3``
|
| 358 |
+
are the values of ``func`` at those points, then the algorithm is
|
| 359 |
+
considered to have converged when ``x3 - x1 <= abs(x2)*xrtol + xatol``
|
| 360 |
+
or ``(f1 - 2*f2 + f3)/2 <= abs(f2)*frtol + fatol``. Note that first of
|
| 361 |
+
these differs from the termination conditions described in [1]_. The
|
| 362 |
+
default values of `xrtol` is the square root of the precision of the
|
| 363 |
+
appropriate dtype, and ``xatol = fatol = frtol`` is the smallest normal
|
| 364 |
+
number of the appropriate dtype.
|
| 365 |
+
|
| 366 |
+
References
|
| 367 |
+
----------
|
| 368 |
+
.. [1] Chandrupatla, Tirupathi R. (1998).
|
| 369 |
+
"An efficient quadratic fit-sectioning algorithm for minimization
|
| 370 |
+
without derivatives".
|
| 371 |
+
Computer Methods in Applied Mechanics and Engineering, 152 (1-2),
|
| 372 |
+
211-217. https://doi.org/10.1016/S0045-7825(97)00190-4
|
| 373 |
+
|
| 374 |
+
See Also
|
| 375 |
+
--------
|
| 376 |
+
golden, brent, bounded
|
| 377 |
+
|
| 378 |
+
Examples
|
| 379 |
+
--------
|
| 380 |
+
>>> from scipy.optimize._chandrupatla import _chandrupatla_minimize
|
| 381 |
+
>>> def f(x, args=1):
|
| 382 |
+
... return (x - args)**2
|
| 383 |
+
>>> res = _chandrupatla_minimize(f, -5, 0, 5)
|
| 384 |
+
>>> res.x
|
| 385 |
+
1.0
|
| 386 |
+
>>> c = [1, 1.5, 2]
|
| 387 |
+
>>> res = _chandrupatla_minimize(f, -5, 0, 5, args=(c,))
|
| 388 |
+
>>> res.x
|
| 389 |
+
array([1. , 1.5, 2. ])
|
| 390 |
+
"""
|
| 391 |
+
res = _chandrupatla_iv(func, args, xatol, xrtol,
|
| 392 |
+
fatol, frtol, maxiter, callback)
|
| 393 |
+
func, args, xatol, xrtol, fatol, frtol, maxiter, callback = res
|
| 394 |
+
|
| 395 |
+
# Initialization
|
| 396 |
+
xs = (x1, x2, x3)
|
| 397 |
+
temp = eim._initialize(func, xs, args)
|
| 398 |
+
func, xs, fs, args, shape, dtype, xp = temp # line split for PEP8
|
| 399 |
+
x1, x2, x3 = xs
|
| 400 |
+
f1, f2, f3 = fs
|
| 401 |
+
phi = dtype.type(0.5 + 0.5*5**0.5) # golden ratio
|
| 402 |
+
status = np.full_like(x1, eim._EINPROGRESS, dtype=int) # in progress
|
| 403 |
+
nit, nfev = 0, 3 # three function evaluations performed above
|
| 404 |
+
fatol = np.finfo(dtype).tiny if fatol is None else fatol
|
| 405 |
+
frtol = np.finfo(dtype).tiny if frtol is None else frtol
|
| 406 |
+
xatol = np.finfo(dtype).tiny if xatol is None else xatol
|
| 407 |
+
xrtol = np.sqrt(np.finfo(dtype).eps) if xrtol is None else xrtol
|
| 408 |
+
|
| 409 |
+
# Ensure that x1 < x2 < x3 initially.
|
| 410 |
+
xs, fs = np.vstack((x1, x2, x3)), np.vstack((f1, f2, f3))
|
| 411 |
+
i = np.argsort(xs, axis=0)
|
| 412 |
+
x1, x2, x3 = np.take_along_axis(xs, i, axis=0)
|
| 413 |
+
f1, f2, f3 = np.take_along_axis(fs, i, axis=0)
|
| 414 |
+
q0 = x3.copy() # "At the start, q0 is set at x3..." ([1] after (7))
|
| 415 |
+
|
| 416 |
+
work = _RichResult(x1=x1, f1=f1, x2=x2, f2=f2, x3=x3, f3=f3, phi=phi,
|
| 417 |
+
xatol=xatol, xrtol=xrtol, fatol=fatol, frtol=frtol,
|
| 418 |
+
nit=nit, nfev=nfev, status=status, q0=q0, args=args)
|
| 419 |
+
res_work_pairs = [('status', 'status'),
|
| 420 |
+
('x', 'x2'), ('fun', 'f2'),
|
| 421 |
+
('nit', 'nit'), ('nfev', 'nfev'),
|
| 422 |
+
('xl', 'x1'), ('xm', 'x2'), ('xr', 'x3'),
|
| 423 |
+
('fl', 'f1'), ('fm', 'f2'), ('fr', 'f3')]
|
| 424 |
+
|
| 425 |
+
def pre_func_eval(work):
|
| 426 |
+
# `_check_termination` is called first -> `x3 - x2 > x2 - x1`
|
| 427 |
+
# But let's calculate a few terms that we'll reuse
|
| 428 |
+
x21 = work.x2 - work.x1
|
| 429 |
+
x32 = work.x3 - work.x2
|
| 430 |
+
|
| 431 |
+
# [1] Section 3. "The quadratic minimum point Q1 is calculated using
|
| 432 |
+
# the relations developed in the previous section." [1] Section 2 (5/6)
|
| 433 |
+
A = x21 * (work.f3 - work.f2)
|
| 434 |
+
B = x32 * (work.f1 - work.f2)
|
| 435 |
+
C = A / (A + B)
|
| 436 |
+
# q1 = C * (work.x1 + work.x2) / 2 + (1 - C) * (work.x2 + work.x3) / 2
|
| 437 |
+
q1 = 0.5 * (C*(work.x1 - work.x3) + work.x2 + work.x3) # much faster
|
| 438 |
+
# this is an array, so multiplying by 0.5 does not change dtype
|
| 439 |
+
|
| 440 |
+
# "If Q1 and Q0 are sufficiently close... Q1 is accepted if it is
|
| 441 |
+
# sufficiently away from the inside point x2"
|
| 442 |
+
i = abs(q1 - work.q0) < 0.5 * abs(x21) # [1] (7)
|
| 443 |
+
xi = q1[i]
|
| 444 |
+
# Later, after (9), "If the point Q1 is in a +/- xtol neighborhood of
|
| 445 |
+
# x2, the new point is chosen in the larger interval at a distance
|
| 446 |
+
# tol away from x2."
|
| 447 |
+
# See also QBASIC code after "Accept Ql adjust if close to X2".
|
| 448 |
+
j = abs(q1[i] - work.x2[i]) <= work.xtol[i]
|
| 449 |
+
xi[j] = work.x2[i][j] + np.sign(x32[i][j]) * work.xtol[i][j]
|
| 450 |
+
|
| 451 |
+
# "If condition (7) is not satisfied, golden sectioning of the larger
|
| 452 |
+
# interval is carried out to introduce the new point."
|
| 453 |
+
# (For simplicity, we go ahead and calculate it for all points, but we
|
| 454 |
+
# change the elements for which the condition was satisfied.)
|
| 455 |
+
x = work.x2 + (2 - work.phi) * x32
|
| 456 |
+
x[i] = xi
|
| 457 |
+
|
| 458 |
+
# "We define Q0 as the value of Q1 at the previous iteration."
|
| 459 |
+
work.q0 = q1
|
| 460 |
+
return x
|
| 461 |
+
|
| 462 |
+
def post_func_eval(x, f, work):
|
| 463 |
+
# Standard logic for updating a three-point bracket based on a new
|
| 464 |
+
# point. In QBASIC code, see "IF SGN(X-X2) = SGN(X3-X2) THEN...".
|
| 465 |
+
# There is an awful lot of data copying going on here; this would
|
| 466 |
+
# probably benefit from code optimization or implementation in Pythran.
|
| 467 |
+
i = np.sign(x - work.x2) == np.sign(work.x3 - work.x2)
|
| 468 |
+
xi, x1i, x2i, x3i = x[i], work.x1[i], work.x2[i], work.x3[i],
|
| 469 |
+
fi, f1i, f2i, f3i = f[i], work.f1[i], work.f2[i], work.f3[i]
|
| 470 |
+
j = fi > f2i
|
| 471 |
+
x3i[j], f3i[j] = xi[j], fi[j]
|
| 472 |
+
j = ~j
|
| 473 |
+
x1i[j], f1i[j], x2i[j], f2i[j] = x2i[j], f2i[j], xi[j], fi[j]
|
| 474 |
+
|
| 475 |
+
ni = ~i
|
| 476 |
+
xni, x1ni, x2ni, x3ni = x[ni], work.x1[ni], work.x2[ni], work.x3[ni],
|
| 477 |
+
fni, f1ni, f2ni, f3ni = f[ni], work.f1[ni], work.f2[ni], work.f3[ni]
|
| 478 |
+
j = fni > f2ni
|
| 479 |
+
x1ni[j], f1ni[j] = xni[j], fni[j]
|
| 480 |
+
j = ~j
|
| 481 |
+
x3ni[j], f3ni[j], x2ni[j], f2ni[j] = x2ni[j], f2ni[j], xni[j], fni[j]
|
| 482 |
+
|
| 483 |
+
work.x1[i], work.x2[i], work.x3[i] = x1i, x2i, x3i
|
| 484 |
+
work.f1[i], work.f2[i], work.f3[i] = f1i, f2i, f3i
|
| 485 |
+
work.x1[ni], work.x2[ni], work.x3[ni] = x1ni, x2ni, x3ni,
|
| 486 |
+
work.f1[ni], work.f2[ni], work.f3[ni] = f1ni, f2ni, f3ni
|
| 487 |
+
|
| 488 |
+
def check_termination(work):
|
| 489 |
+
# Check for all terminal conditions and record statuses.
|
| 490 |
+
stop = np.zeros_like(work.x1, dtype=bool) # termination condition met
|
| 491 |
+
|
| 492 |
+
# Bracket is invalid; stop and don't return minimizer/minimum
|
| 493 |
+
i = ((work.f2 > work.f1) | (work.f2 > work.f3))
|
| 494 |
+
work.x2[i], work.f2[i] = np.nan, np.nan
|
| 495 |
+
stop[i], work.status[i] = True, eim._ESIGNERR
|
| 496 |
+
|
| 497 |
+
# Non-finite values; stop and don't return minimizer/minimum
|
| 498 |
+
finite = np.isfinite(work.x1+work.x2+work.x3+work.f1+work.f2+work.f3)
|
| 499 |
+
i = ~(finite | stop)
|
| 500 |
+
work.x2[i], work.f2[i] = np.nan, np.nan
|
| 501 |
+
stop[i], work.status[i] = True, eim._EVALUEERR
|
| 502 |
+
|
| 503 |
+
# [1] Section 3 "Points 1 and 3 are interchanged if necessary to make
|
| 504 |
+
# the (x2, x3) the larger interval."
|
| 505 |
+
# Note: I had used np.choose; this is much faster. This would be a good
|
| 506 |
+
# place to save e.g. `work.x3 - work.x2` for reuse, but I tried and
|
| 507 |
+
# didn't notice a speed boost, so let's keep it simple.
|
| 508 |
+
i = abs(work.x3 - work.x2) < abs(work.x2 - work.x1)
|
| 509 |
+
temp = work.x1[i]
|
| 510 |
+
work.x1[i] = work.x3[i]
|
| 511 |
+
work.x3[i] = temp
|
| 512 |
+
temp = work.f1[i]
|
| 513 |
+
work.f1[i] = work.f3[i]
|
| 514 |
+
work.f3[i] = temp
|
| 515 |
+
|
| 516 |
+
# [1] Section 3 (bottom of page 212)
|
| 517 |
+
# "We set a tolerance value xtol..."
|
| 518 |
+
work.xtol = abs(work.x2) * work.xrtol + work.xatol # [1] (8)
|
| 519 |
+
# "The convergence based on interval is achieved when..."
|
| 520 |
+
# Note: Equality allowed in case of `xtol=0`
|
| 521 |
+
i = abs(work.x3 - work.x2) <= 2 * work.xtol # [1] (9)
|
| 522 |
+
|
| 523 |
+
# "We define ftol using..."
|
| 524 |
+
ftol = abs(work.f2) * work.frtol + work.fatol # [1] (10)
|
| 525 |
+
# "The convergence based on function values is achieved when..."
|
| 526 |
+
# Note 1: modify in place to incorporate tolerance on function value.
|
| 527 |
+
# Note 2: factor of 2 is not in the text; see QBASIC start of DO loop
|
| 528 |
+
i |= (work.f1 - 2 * work.f2 + work.f3) <= 2*ftol # [1] (11)
|
| 529 |
+
i &= ~stop
|
| 530 |
+
stop[i], work.status[i] = True, eim._ECONVERGED
|
| 531 |
+
|
| 532 |
+
return stop
|
| 533 |
+
|
| 534 |
+
def post_termination_check(work):
|
| 535 |
+
pass
|
| 536 |
+
|
| 537 |
+
def customize_result(res, shape):
|
| 538 |
+
xl, xr, fl, fr = res['xl'], res['xr'], res['fl'], res['fr']
|
| 539 |
+
i = res['xl'] < res['xr']
|
| 540 |
+
res['xl'] = np.choose(i, (xr, xl))
|
| 541 |
+
res['xr'] = np.choose(i, (xl, xr))
|
| 542 |
+
res['fl'] = np.choose(i, (fr, fl))
|
| 543 |
+
res['fr'] = np.choose(i, (fl, fr))
|
| 544 |
+
return shape
|
| 545 |
+
|
| 546 |
+
return eim._loop(work, callback, shape, maxiter, func, args, dtype,
|
| 547 |
+
pre_func_eval, post_func_eval, check_termination,
|
| 548 |
+
post_termination_check, customize_result, res_work_pairs,
|
| 549 |
+
xp=xp)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyla_py.py
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Interface to Constrained Optimization By Linear Approximation
|
| 3 |
+
|
| 4 |
+
Functions
|
| 5 |
+
---------
|
| 6 |
+
.. autosummary::
|
| 7 |
+
:toctree: generated/
|
| 8 |
+
|
| 9 |
+
fmin_cobyla
|
| 10 |
+
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import functools
|
| 14 |
+
from threading import RLock
|
| 15 |
+
|
| 16 |
+
import numpy as np
|
| 17 |
+
from scipy.optimize import _cobyla as cobyla
|
| 18 |
+
from ._optimize import (OptimizeResult, _check_unknown_options,
|
| 19 |
+
_prepare_scalar_function)
|
| 20 |
+
try:
|
| 21 |
+
from itertools import izip
|
| 22 |
+
except ImportError:
|
| 23 |
+
izip = zip
|
| 24 |
+
|
| 25 |
+
__all__ = ['fmin_cobyla']
|
| 26 |
+
|
| 27 |
+
# Workaround as _cobyla.minimize is not threadsafe
|
| 28 |
+
# due to an unknown f2py bug and can segfault,
|
| 29 |
+
# see gh-9658.
|
| 30 |
+
_module_lock = RLock()
|
| 31 |
+
def synchronized(func):
|
| 32 |
+
@functools.wraps(func)
|
| 33 |
+
def wrapper(*args, **kwargs):
|
| 34 |
+
with _module_lock:
|
| 35 |
+
return func(*args, **kwargs)
|
| 36 |
+
return wrapper
|
| 37 |
+
|
| 38 |
+
@synchronized
|
| 39 |
+
def fmin_cobyla(func, x0, cons, args=(), consargs=None, rhobeg=1.0,
|
| 40 |
+
rhoend=1e-4, maxfun=1000, disp=None, catol=2e-4,
|
| 41 |
+
*, callback=None):
|
| 42 |
+
"""
|
| 43 |
+
Minimize a function using the Constrained Optimization By Linear
|
| 44 |
+
Approximation (COBYLA) method. This method wraps a FORTRAN
|
| 45 |
+
implementation of the algorithm.
|
| 46 |
+
|
| 47 |
+
Parameters
|
| 48 |
+
----------
|
| 49 |
+
func : callable
|
| 50 |
+
Function to minimize. In the form func(x, \\*args).
|
| 51 |
+
x0 : ndarray
|
| 52 |
+
Initial guess.
|
| 53 |
+
cons : sequence
|
| 54 |
+
Constraint functions; must all be ``>=0`` (a single function
|
| 55 |
+
if only 1 constraint). Each function takes the parameters `x`
|
| 56 |
+
as its first argument, and it can return either a single number or
|
| 57 |
+
an array or list of numbers.
|
| 58 |
+
args : tuple, optional
|
| 59 |
+
Extra arguments to pass to function.
|
| 60 |
+
consargs : tuple, optional
|
| 61 |
+
Extra arguments to pass to constraint functions (default of None means
|
| 62 |
+
use same extra arguments as those passed to func).
|
| 63 |
+
Use ``()`` for no extra arguments.
|
| 64 |
+
rhobeg : float, optional
|
| 65 |
+
Reasonable initial changes to the variables.
|
| 66 |
+
rhoend : float, optional
|
| 67 |
+
Final accuracy in the optimization (not precisely guaranteed). This
|
| 68 |
+
is a lower bound on the size of the trust region.
|
| 69 |
+
disp : {0, 1, 2, 3}, optional
|
| 70 |
+
Controls the frequency of output; 0 implies no output.
|
| 71 |
+
maxfun : int, optional
|
| 72 |
+
Maximum number of function evaluations.
|
| 73 |
+
catol : float, optional
|
| 74 |
+
Absolute tolerance for constraint violations.
|
| 75 |
+
callback : callable, optional
|
| 76 |
+
Called after each iteration, as ``callback(x)``, where ``x`` is the
|
| 77 |
+
current parameter vector.
|
| 78 |
+
|
| 79 |
+
Returns
|
| 80 |
+
-------
|
| 81 |
+
x : ndarray
|
| 82 |
+
The argument that minimises `f`.
|
| 83 |
+
|
| 84 |
+
See also
|
| 85 |
+
--------
|
| 86 |
+
minimize: Interface to minimization algorithms for multivariate
|
| 87 |
+
functions. See the 'COBYLA' `method` in particular.
|
| 88 |
+
|
| 89 |
+
Notes
|
| 90 |
+
-----
|
| 91 |
+
This algorithm is based on linear approximations to the objective
|
| 92 |
+
function and each constraint. We briefly describe the algorithm.
|
| 93 |
+
|
| 94 |
+
Suppose the function is being minimized over k variables. At the
|
| 95 |
+
jth iteration the algorithm has k+1 points v_1, ..., v_(k+1),
|
| 96 |
+
an approximate solution x_j, and a radius RHO_j.
|
| 97 |
+
(i.e., linear plus a constant) approximations to the objective
|
| 98 |
+
function and constraint functions such that their function values
|
| 99 |
+
agree with the linear approximation on the k+1 points v_1,.., v_(k+1).
|
| 100 |
+
This gives a linear program to solve (where the linear approximations
|
| 101 |
+
of the constraint functions are constrained to be non-negative).
|
| 102 |
+
|
| 103 |
+
However, the linear approximations are likely only good
|
| 104 |
+
approximations near the current simplex, so the linear program is
|
| 105 |
+
given the further requirement that the solution, which
|
| 106 |
+
will become x_(j+1), must be within RHO_j from x_j. RHO_j only
|
| 107 |
+
decreases, never increases. The initial RHO_j is rhobeg and the
|
| 108 |
+
final RHO_j is rhoend. In this way COBYLA's iterations behave
|
| 109 |
+
like a trust region algorithm.
|
| 110 |
+
|
| 111 |
+
Additionally, the linear program may be inconsistent, or the
|
| 112 |
+
approximation may give poor improvement. For details about
|
| 113 |
+
how these issues are resolved, as well as how the points v_i are
|
| 114 |
+
updated, refer to the source code or the references below.
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
References
|
| 118 |
+
----------
|
| 119 |
+
Powell M.J.D. (1994), "A direct search optimization method that models
|
| 120 |
+
the objective and constraint functions by linear interpolation.", in
|
| 121 |
+
Advances in Optimization and Numerical Analysis, eds. S. Gomez and
|
| 122 |
+
J-P Hennart, Kluwer Academic (Dordrecht), pp. 51-67
|
| 123 |
+
|
| 124 |
+
Powell M.J.D. (1998), "Direct search algorithms for optimization
|
| 125 |
+
calculations", Acta Numerica 7, 287-336
|
| 126 |
+
|
| 127 |
+
Powell M.J.D. (2007), "A view of algorithms for optimization without
|
| 128 |
+
derivatives", Cambridge University Technical Report DAMTP 2007/NA03
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
Examples
|
| 132 |
+
--------
|
| 133 |
+
Minimize the objective function f(x,y) = x*y subject
|
| 134 |
+
to the constraints x**2 + y**2 < 1 and y > 0::
|
| 135 |
+
|
| 136 |
+
>>> def objective(x):
|
| 137 |
+
... return x[0]*x[1]
|
| 138 |
+
...
|
| 139 |
+
>>> def constr1(x):
|
| 140 |
+
... return 1 - (x[0]**2 + x[1]**2)
|
| 141 |
+
...
|
| 142 |
+
>>> def constr2(x):
|
| 143 |
+
... return x[1]
|
| 144 |
+
...
|
| 145 |
+
>>> from scipy.optimize import fmin_cobyla
|
| 146 |
+
>>> fmin_cobyla(objective, [0.0, 0.1], [constr1, constr2], rhoend=1e-7)
|
| 147 |
+
array([-0.70710685, 0.70710671])
|
| 148 |
+
|
| 149 |
+
The exact solution is (-sqrt(2)/2, sqrt(2)/2).
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
"""
|
| 154 |
+
err = "cons must be a sequence of callable functions or a single"\
|
| 155 |
+
" callable function."
|
| 156 |
+
try:
|
| 157 |
+
len(cons)
|
| 158 |
+
except TypeError as e:
|
| 159 |
+
if callable(cons):
|
| 160 |
+
cons = [cons]
|
| 161 |
+
else:
|
| 162 |
+
raise TypeError(err) from e
|
| 163 |
+
else:
|
| 164 |
+
for thisfunc in cons:
|
| 165 |
+
if not callable(thisfunc):
|
| 166 |
+
raise TypeError(err)
|
| 167 |
+
|
| 168 |
+
if consargs is None:
|
| 169 |
+
consargs = args
|
| 170 |
+
|
| 171 |
+
# build constraints
|
| 172 |
+
con = tuple({'type': 'ineq', 'fun': c, 'args': consargs} for c in cons)
|
| 173 |
+
|
| 174 |
+
# options
|
| 175 |
+
opts = {'rhobeg': rhobeg,
|
| 176 |
+
'tol': rhoend,
|
| 177 |
+
'disp': disp,
|
| 178 |
+
'maxiter': maxfun,
|
| 179 |
+
'catol': catol,
|
| 180 |
+
'callback': callback}
|
| 181 |
+
|
| 182 |
+
sol = _minimize_cobyla(func, x0, args, constraints=con,
|
| 183 |
+
**opts)
|
| 184 |
+
if disp and not sol['success']:
|
| 185 |
+
print(f"COBYLA failed to find a solution: {sol.message}")
|
| 186 |
+
return sol['x']
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
@synchronized
|
| 190 |
+
def _minimize_cobyla(fun, x0, args=(), constraints=(),
|
| 191 |
+
rhobeg=1.0, tol=1e-4, maxiter=1000,
|
| 192 |
+
disp=False, catol=2e-4, callback=None, bounds=None,
|
| 193 |
+
**unknown_options):
|
| 194 |
+
"""
|
| 195 |
+
Minimize a scalar function of one or more variables using the
|
| 196 |
+
Constrained Optimization BY Linear Approximation (COBYLA) algorithm.
|
| 197 |
+
|
| 198 |
+
Options
|
| 199 |
+
-------
|
| 200 |
+
rhobeg : float
|
| 201 |
+
Reasonable initial changes to the variables.
|
| 202 |
+
tol : float
|
| 203 |
+
Final accuracy in the optimization (not precisely guaranteed).
|
| 204 |
+
This is a lower bound on the size of the trust region.
|
| 205 |
+
disp : bool
|
| 206 |
+
Set to True to print convergence messages. If False,
|
| 207 |
+
`verbosity` is ignored as set to 0.
|
| 208 |
+
maxiter : int
|
| 209 |
+
Maximum number of function evaluations.
|
| 210 |
+
catol : float
|
| 211 |
+
Tolerance (absolute) for constraint violations
|
| 212 |
+
|
| 213 |
+
"""
|
| 214 |
+
_check_unknown_options(unknown_options)
|
| 215 |
+
maxfun = maxiter
|
| 216 |
+
rhoend = tol
|
| 217 |
+
iprint = int(bool(disp))
|
| 218 |
+
|
| 219 |
+
# check constraints
|
| 220 |
+
if isinstance(constraints, dict):
|
| 221 |
+
constraints = (constraints, )
|
| 222 |
+
|
| 223 |
+
if bounds:
|
| 224 |
+
i_lb = np.isfinite(bounds.lb)
|
| 225 |
+
if np.any(i_lb):
|
| 226 |
+
def lb_constraint(x, *args, **kwargs):
|
| 227 |
+
return x[i_lb] - bounds.lb[i_lb]
|
| 228 |
+
|
| 229 |
+
constraints.append({'type': 'ineq', 'fun': lb_constraint})
|
| 230 |
+
|
| 231 |
+
i_ub = np.isfinite(bounds.ub)
|
| 232 |
+
if np.any(i_ub):
|
| 233 |
+
def ub_constraint(x):
|
| 234 |
+
return bounds.ub[i_ub] - x[i_ub]
|
| 235 |
+
|
| 236 |
+
constraints.append({'type': 'ineq', 'fun': ub_constraint})
|
| 237 |
+
|
| 238 |
+
for ic, con in enumerate(constraints):
|
| 239 |
+
# check type
|
| 240 |
+
try:
|
| 241 |
+
ctype = con['type'].lower()
|
| 242 |
+
except KeyError as e:
|
| 243 |
+
raise KeyError('Constraint %d has no type defined.' % ic) from e
|
| 244 |
+
except TypeError as e:
|
| 245 |
+
raise TypeError('Constraints must be defined using a '
|
| 246 |
+
'dictionary.') from e
|
| 247 |
+
except AttributeError as e:
|
| 248 |
+
raise TypeError("Constraint's type must be a string.") from e
|
| 249 |
+
else:
|
| 250 |
+
if ctype != 'ineq':
|
| 251 |
+
raise ValueError("Constraints of type '%s' not handled by "
|
| 252 |
+
"COBYLA." % con['type'])
|
| 253 |
+
|
| 254 |
+
# check function
|
| 255 |
+
if 'fun' not in con:
|
| 256 |
+
raise KeyError('Constraint %d has no function defined.' % ic)
|
| 257 |
+
|
| 258 |
+
# check extra arguments
|
| 259 |
+
if 'args' not in con:
|
| 260 |
+
con['args'] = ()
|
| 261 |
+
|
| 262 |
+
# m is the total number of constraint values
|
| 263 |
+
# it takes into account that some constraints may be vector-valued
|
| 264 |
+
cons_lengths = []
|
| 265 |
+
for c in constraints:
|
| 266 |
+
f = c['fun'](x0, *c['args'])
|
| 267 |
+
try:
|
| 268 |
+
cons_length = len(f)
|
| 269 |
+
except TypeError:
|
| 270 |
+
cons_length = 1
|
| 271 |
+
cons_lengths.append(cons_length)
|
| 272 |
+
m = sum(cons_lengths)
|
| 273 |
+
|
| 274 |
+
# create the ScalarFunction, cobyla doesn't require derivative function
|
| 275 |
+
def _jac(x, *args):
|
| 276 |
+
return None
|
| 277 |
+
|
| 278 |
+
sf = _prepare_scalar_function(fun, x0, args=args, jac=_jac)
|
| 279 |
+
|
| 280 |
+
def calcfc(x, con):
|
| 281 |
+
f = sf.fun(x)
|
| 282 |
+
i = 0
|
| 283 |
+
for size, c in izip(cons_lengths, constraints):
|
| 284 |
+
con[i: i + size] = c['fun'](x, *c['args'])
|
| 285 |
+
i += size
|
| 286 |
+
return f
|
| 287 |
+
|
| 288 |
+
def wrapped_callback(x):
|
| 289 |
+
if callback is not None:
|
| 290 |
+
callback(np.copy(x))
|
| 291 |
+
|
| 292 |
+
info = np.zeros(4, np.float64)
|
| 293 |
+
xopt, info = cobyla.minimize(calcfc, m=m, x=np.copy(x0), rhobeg=rhobeg,
|
| 294 |
+
rhoend=rhoend, iprint=iprint, maxfun=maxfun,
|
| 295 |
+
dinfo=info, callback=wrapped_callback)
|
| 296 |
+
|
| 297 |
+
if info[3] > catol:
|
| 298 |
+
# Check constraint violation
|
| 299 |
+
info[0] = 4
|
| 300 |
+
|
| 301 |
+
return OptimizeResult(x=xopt,
|
| 302 |
+
status=int(info[0]),
|
| 303 |
+
success=info[0] == 1,
|
| 304 |
+
message={1: 'Optimization terminated successfully.',
|
| 305 |
+
2: 'Maximum number of function evaluations '
|
| 306 |
+
'has been exceeded.',
|
| 307 |
+
3: 'Rounding errors are becoming damaging '
|
| 308 |
+
'in COBYLA subroutine.',
|
| 309 |
+
4: 'Did not converge to a solution '
|
| 310 |
+
'satisfying the constraints. See '
|
| 311 |
+
'`maxcv` for magnitude of violation.',
|
| 312 |
+
5: 'NaN result encountered.'
|
| 313 |
+
}.get(info[0], 'Unknown exit status.'),
|
| 314 |
+
nfev=int(info[1]),
|
| 315 |
+
fun=info[2],
|
| 316 |
+
maxcv=info[3])
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyqa_py.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
from ._optimize import _check_unknown_options
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _minimize_cobyqa(fun, x0, args=(), bounds=None, constraints=(),
|
| 7 |
+
callback=None, disp=False, maxfev=None, maxiter=None,
|
| 8 |
+
f_target=-np.inf, feasibility_tol=1e-8,
|
| 9 |
+
initial_tr_radius=1.0, final_tr_radius=1e-6, scale=False,
|
| 10 |
+
**unknown_options):
|
| 11 |
+
"""
|
| 12 |
+
Minimize a scalar function of one or more variables using the
|
| 13 |
+
Constrained Optimization BY Quadratic Approximations (COBYQA) algorithm [1]_.
|
| 14 |
+
|
| 15 |
+
.. versionadded:: 1.14.0
|
| 16 |
+
|
| 17 |
+
Options
|
| 18 |
+
-------
|
| 19 |
+
disp : bool
|
| 20 |
+
Set to True to print information about the optimization procedure.
|
| 21 |
+
maxfev : int
|
| 22 |
+
Maximum number of function evaluations.
|
| 23 |
+
maxiter : int
|
| 24 |
+
Maximum number of iterations.
|
| 25 |
+
f_target : float
|
| 26 |
+
Target value for the objective function. The optimization procedure is
|
| 27 |
+
terminated when the objective function value of a feasible point (see
|
| 28 |
+
`feasibility_tol` below) is less than or equal to this target.
|
| 29 |
+
feasibility_tol : float
|
| 30 |
+
Absolute tolerance for the constraint violation.
|
| 31 |
+
initial_tr_radius : float
|
| 32 |
+
Initial trust-region radius. Typically, this value should be in the
|
| 33 |
+
order of one tenth of the greatest expected change to the variables.
|
| 34 |
+
final_tr_radius : float
|
| 35 |
+
Final trust-region radius. It should indicate the accuracy required in
|
| 36 |
+
the final values of the variables. If provided, this option overrides
|
| 37 |
+
the value of `tol` in the `minimize` function.
|
| 38 |
+
scale : bool
|
| 39 |
+
Set to True to scale the variables according to the bounds. If True and
|
| 40 |
+
if all the lower and upper bounds are finite, the variables are scaled
|
| 41 |
+
to be within the range :math:`[-1, 1]`. If any of the lower or upper
|
| 42 |
+
bounds is infinite, the variables are not scaled.
|
| 43 |
+
|
| 44 |
+
References
|
| 45 |
+
----------
|
| 46 |
+
.. [1] COBYQA
|
| 47 |
+
https://www.cobyqa.com/stable/
|
| 48 |
+
"""
|
| 49 |
+
from .._lib.cobyqa import minimize # import here to avoid circular imports
|
| 50 |
+
|
| 51 |
+
_check_unknown_options(unknown_options)
|
| 52 |
+
options = {
|
| 53 |
+
'disp': bool(disp),
|
| 54 |
+
'maxfev': int(maxfev) if maxfev is not None else 500 * len(x0),
|
| 55 |
+
'maxiter': int(maxiter) if maxiter is not None else 1000 * len(x0),
|
| 56 |
+
'target': float(f_target),
|
| 57 |
+
'feasibility_tol': float(feasibility_tol),
|
| 58 |
+
'radius_init': float(initial_tr_radius),
|
| 59 |
+
'radius_final': float(final_tr_radius),
|
| 60 |
+
'scale': bool(scale),
|
| 61 |
+
}
|
| 62 |
+
return minimize(fun, x0, args, bounds, constraints, callback, options)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_constraints.py
ADDED
|
@@ -0,0 +1,590 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constraints definition for minimize."""
|
| 2 |
+
import numpy as np
|
| 3 |
+
from ._hessian_update_strategy import BFGS
|
| 4 |
+
from ._differentiable_functions import (
|
| 5 |
+
VectorFunction, LinearVectorFunction, IdentityVectorFunction)
|
| 6 |
+
from ._optimize import OptimizeWarning
|
| 7 |
+
from warnings import warn, catch_warnings, simplefilter, filterwarnings
|
| 8 |
+
from scipy.sparse import issparse
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _arr_to_scalar(x):
|
| 12 |
+
# If x is a numpy array, return x.item(). This will
|
| 13 |
+
# fail if the array has more than one element.
|
| 14 |
+
return x.item() if isinstance(x, np.ndarray) else x
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class NonlinearConstraint:
|
| 18 |
+
"""Nonlinear constraint on the variables.
|
| 19 |
+
|
| 20 |
+
The constraint has the general inequality form::
|
| 21 |
+
|
| 22 |
+
lb <= fun(x) <= ub
|
| 23 |
+
|
| 24 |
+
Here the vector of independent variables x is passed as ndarray of shape
|
| 25 |
+
(n,) and ``fun`` returns a vector with m components.
|
| 26 |
+
|
| 27 |
+
It is possible to use equal bounds to represent an equality constraint or
|
| 28 |
+
infinite bounds to represent a one-sided constraint.
|
| 29 |
+
|
| 30 |
+
Parameters
|
| 31 |
+
----------
|
| 32 |
+
fun : callable
|
| 33 |
+
The function defining the constraint.
|
| 34 |
+
The signature is ``fun(x) -> array_like, shape (m,)``.
|
| 35 |
+
lb, ub : array_like
|
| 36 |
+
Lower and upper bounds on the constraint. Each array must have the
|
| 37 |
+
shape (m,) or be a scalar, in the latter case a bound will be the same
|
| 38 |
+
for all components of the constraint. Use ``np.inf`` with an
|
| 39 |
+
appropriate sign to specify a one-sided constraint.
|
| 40 |
+
Set components of `lb` and `ub` equal to represent an equality
|
| 41 |
+
constraint. Note that you can mix constraints of different types:
|
| 42 |
+
interval, one-sided or equality, by setting different components of
|
| 43 |
+
`lb` and `ub` as necessary.
|
| 44 |
+
jac : {callable, '2-point', '3-point', 'cs'}, optional
|
| 45 |
+
Method of computing the Jacobian matrix (an m-by-n matrix,
|
| 46 |
+
where element (i, j) is the partial derivative of f[i] with
|
| 47 |
+
respect to x[j]). The keywords {'2-point', '3-point',
|
| 48 |
+
'cs'} select a finite difference scheme for the numerical estimation.
|
| 49 |
+
A callable must have the following signature:
|
| 50 |
+
``jac(x) -> {ndarray, sparse matrix}, shape (m, n)``.
|
| 51 |
+
Default is '2-point'.
|
| 52 |
+
hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy, None}, optional
|
| 53 |
+
Method for computing the Hessian matrix. The keywords
|
| 54 |
+
{'2-point', '3-point', 'cs'} select a finite difference scheme for
|
| 55 |
+
numerical estimation. Alternatively, objects implementing
|
| 56 |
+
`HessianUpdateStrategy` interface can be used to approximate the
|
| 57 |
+
Hessian. Currently available implementations are:
|
| 58 |
+
|
| 59 |
+
- `BFGS` (default option)
|
| 60 |
+
- `SR1`
|
| 61 |
+
|
| 62 |
+
A callable must return the Hessian matrix of ``dot(fun, v)`` and
|
| 63 |
+
must have the following signature:
|
| 64 |
+
``hess(x, v) -> {LinearOperator, sparse matrix, array_like}, shape (n, n)``.
|
| 65 |
+
Here ``v`` is ndarray with shape (m,) containing Lagrange multipliers.
|
| 66 |
+
keep_feasible : array_like of bool, optional
|
| 67 |
+
Whether to keep the constraint components feasible throughout
|
| 68 |
+
iterations. A single value set this property for all components.
|
| 69 |
+
Default is False. Has no effect for equality constraints.
|
| 70 |
+
finite_diff_rel_step: None or array_like, optional
|
| 71 |
+
Relative step size for the finite difference approximation. Default is
|
| 72 |
+
None, which will select a reasonable value automatically depending
|
| 73 |
+
on a finite difference scheme.
|
| 74 |
+
finite_diff_jac_sparsity: {None, array_like, sparse matrix}, optional
|
| 75 |
+
Defines the sparsity structure of the Jacobian matrix for finite
|
| 76 |
+
difference estimation, its shape must be (m, n). If the Jacobian has
|
| 77 |
+
only few non-zero elements in *each* row, providing the sparsity
|
| 78 |
+
structure will greatly speed up the computations. A zero entry means
|
| 79 |
+
that a corresponding element in the Jacobian is identically zero.
|
| 80 |
+
If provided, forces the use of 'lsmr' trust-region solver.
|
| 81 |
+
If None (default) then dense differencing will be used.
|
| 82 |
+
|
| 83 |
+
Notes
|
| 84 |
+
-----
|
| 85 |
+
Finite difference schemes {'2-point', '3-point', 'cs'} may be used for
|
| 86 |
+
approximating either the Jacobian or the Hessian. We, however, do not allow
|
| 87 |
+
its use for approximating both simultaneously. Hence whenever the Jacobian
|
| 88 |
+
is estimated via finite-differences, we require the Hessian to be estimated
|
| 89 |
+
using one of the quasi-Newton strategies.
|
| 90 |
+
|
| 91 |
+
The scheme 'cs' is potentially the most accurate, but requires the function
|
| 92 |
+
to correctly handles complex inputs and be analytically continuable to the
|
| 93 |
+
complex plane. The scheme '3-point' is more accurate than '2-point' but
|
| 94 |
+
requires twice as many operations.
|
| 95 |
+
|
| 96 |
+
Examples
|
| 97 |
+
--------
|
| 98 |
+
Constrain ``x[0] < sin(x[1]) + 1.9``
|
| 99 |
+
|
| 100 |
+
>>> from scipy.optimize import NonlinearConstraint
|
| 101 |
+
>>> import numpy as np
|
| 102 |
+
>>> con = lambda x: x[0] - np.sin(x[1])
|
| 103 |
+
>>> nlc = NonlinearConstraint(con, -np.inf, 1.9)
|
| 104 |
+
|
| 105 |
+
"""
|
| 106 |
+
def __init__(self, fun, lb, ub, jac='2-point', hess=BFGS(),
|
| 107 |
+
keep_feasible=False, finite_diff_rel_step=None,
|
| 108 |
+
finite_diff_jac_sparsity=None):
|
| 109 |
+
self.fun = fun
|
| 110 |
+
self.lb = lb
|
| 111 |
+
self.ub = ub
|
| 112 |
+
self.finite_diff_rel_step = finite_diff_rel_step
|
| 113 |
+
self.finite_diff_jac_sparsity = finite_diff_jac_sparsity
|
| 114 |
+
self.jac = jac
|
| 115 |
+
self.hess = hess
|
| 116 |
+
self.keep_feasible = keep_feasible
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
class LinearConstraint:
|
| 120 |
+
"""Linear constraint on the variables.
|
| 121 |
+
|
| 122 |
+
The constraint has the general inequality form::
|
| 123 |
+
|
| 124 |
+
lb <= A.dot(x) <= ub
|
| 125 |
+
|
| 126 |
+
Here the vector of independent variables x is passed as ndarray of shape
|
| 127 |
+
(n,) and the matrix A has shape (m, n).
|
| 128 |
+
|
| 129 |
+
It is possible to use equal bounds to represent an equality constraint or
|
| 130 |
+
infinite bounds to represent a one-sided constraint.
|
| 131 |
+
|
| 132 |
+
Parameters
|
| 133 |
+
----------
|
| 134 |
+
A : {array_like, sparse matrix}, shape (m, n)
|
| 135 |
+
Matrix defining the constraint.
|
| 136 |
+
lb, ub : dense array_like, optional
|
| 137 |
+
Lower and upper limits on the constraint. Each array must have the
|
| 138 |
+
shape (m,) or be a scalar, in the latter case a bound will be the same
|
| 139 |
+
for all components of the constraint. Use ``np.inf`` with an
|
| 140 |
+
appropriate sign to specify a one-sided constraint.
|
| 141 |
+
Set components of `lb` and `ub` equal to represent an equality
|
| 142 |
+
constraint. Note that you can mix constraints of different types:
|
| 143 |
+
interval, one-sided or equality, by setting different components of
|
| 144 |
+
`lb` and `ub` as necessary. Defaults to ``lb = -np.inf``
|
| 145 |
+
and ``ub = np.inf`` (no limits).
|
| 146 |
+
keep_feasible : dense array_like of bool, optional
|
| 147 |
+
Whether to keep the constraint components feasible throughout
|
| 148 |
+
iterations. A single value set this property for all components.
|
| 149 |
+
Default is False. Has no effect for equality constraints.
|
| 150 |
+
"""
|
| 151 |
+
def _input_validation(self):
|
| 152 |
+
if self.A.ndim != 2:
|
| 153 |
+
message = "`A` must have exactly two dimensions."
|
| 154 |
+
raise ValueError(message)
|
| 155 |
+
|
| 156 |
+
try:
|
| 157 |
+
shape = self.A.shape[0:1]
|
| 158 |
+
self.lb = np.broadcast_to(self.lb, shape)
|
| 159 |
+
self.ub = np.broadcast_to(self.ub, shape)
|
| 160 |
+
self.keep_feasible = np.broadcast_to(self.keep_feasible, shape)
|
| 161 |
+
except ValueError:
|
| 162 |
+
message = ("`lb`, `ub`, and `keep_feasible` must be broadcastable "
|
| 163 |
+
"to shape `A.shape[0:1]`")
|
| 164 |
+
raise ValueError(message)
|
| 165 |
+
|
| 166 |
+
def __init__(self, A, lb=-np.inf, ub=np.inf, keep_feasible=False):
|
| 167 |
+
if not issparse(A):
|
| 168 |
+
# In some cases, if the constraint is not valid, this emits a
|
| 169 |
+
# VisibleDeprecationWarning about ragged nested sequences
|
| 170 |
+
# before eventually causing an error. `scipy.optimize.milp` would
|
| 171 |
+
# prefer that this just error out immediately so it can handle it
|
| 172 |
+
# rather than concerning the user.
|
| 173 |
+
with catch_warnings():
|
| 174 |
+
simplefilter("error")
|
| 175 |
+
self.A = np.atleast_2d(A).astype(np.float64)
|
| 176 |
+
else:
|
| 177 |
+
self.A = A
|
| 178 |
+
if issparse(lb) or issparse(ub):
|
| 179 |
+
raise ValueError("Constraint limits must be dense arrays.")
|
| 180 |
+
self.lb = np.atleast_1d(lb).astype(np.float64)
|
| 181 |
+
self.ub = np.atleast_1d(ub).astype(np.float64)
|
| 182 |
+
|
| 183 |
+
if issparse(keep_feasible):
|
| 184 |
+
raise ValueError("`keep_feasible` must be a dense array.")
|
| 185 |
+
self.keep_feasible = np.atleast_1d(keep_feasible).astype(bool)
|
| 186 |
+
self._input_validation()
|
| 187 |
+
|
| 188 |
+
def residual(self, x):
|
| 189 |
+
"""
|
| 190 |
+
Calculate the residual between the constraint function and the limits
|
| 191 |
+
|
| 192 |
+
For a linear constraint of the form::
|
| 193 |
+
|
| 194 |
+
lb <= A@x <= ub
|
| 195 |
+
|
| 196 |
+
the lower and upper residuals between ``A@x`` and the limits are values
|
| 197 |
+
``sl`` and ``sb`` such that::
|
| 198 |
+
|
| 199 |
+
lb + sl == A@x == ub - sb
|
| 200 |
+
|
| 201 |
+
When all elements of ``sl`` and ``sb`` are positive, all elements of
|
| 202 |
+
the constraint are satisfied; a negative element in ``sl`` or ``sb``
|
| 203 |
+
indicates that the corresponding element of the constraint is not
|
| 204 |
+
satisfied.
|
| 205 |
+
|
| 206 |
+
Parameters
|
| 207 |
+
----------
|
| 208 |
+
x: array_like
|
| 209 |
+
Vector of independent variables
|
| 210 |
+
|
| 211 |
+
Returns
|
| 212 |
+
-------
|
| 213 |
+
sl, sb : array-like
|
| 214 |
+
The lower and upper residuals
|
| 215 |
+
"""
|
| 216 |
+
return self.A@x - self.lb, self.ub - self.A@x
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
class Bounds:
|
| 220 |
+
"""Bounds constraint on the variables.
|
| 221 |
+
|
| 222 |
+
The constraint has the general inequality form::
|
| 223 |
+
|
| 224 |
+
lb <= x <= ub
|
| 225 |
+
|
| 226 |
+
It is possible to use equal bounds to represent an equality constraint or
|
| 227 |
+
infinite bounds to represent a one-sided constraint.
|
| 228 |
+
|
| 229 |
+
Parameters
|
| 230 |
+
----------
|
| 231 |
+
lb, ub : dense array_like, optional
|
| 232 |
+
Lower and upper bounds on independent variables. `lb`, `ub`, and
|
| 233 |
+
`keep_feasible` must be the same shape or broadcastable.
|
| 234 |
+
Set components of `lb` and `ub` equal
|
| 235 |
+
to fix a variable. Use ``np.inf`` with an appropriate sign to disable
|
| 236 |
+
bounds on all or some variables. Note that you can mix constraints of
|
| 237 |
+
different types: interval, one-sided or equality, by setting different
|
| 238 |
+
components of `lb` and `ub` as necessary. Defaults to ``lb = -np.inf``
|
| 239 |
+
and ``ub = np.inf`` (no bounds).
|
| 240 |
+
keep_feasible : dense array_like of bool, optional
|
| 241 |
+
Whether to keep the constraint components feasible throughout
|
| 242 |
+
iterations. Must be broadcastable with `lb` and `ub`.
|
| 243 |
+
Default is False. Has no effect for equality constraints.
|
| 244 |
+
"""
|
| 245 |
+
def _input_validation(self):
|
| 246 |
+
try:
|
| 247 |
+
res = np.broadcast_arrays(self.lb, self.ub, self.keep_feasible)
|
| 248 |
+
self.lb, self.ub, self.keep_feasible = res
|
| 249 |
+
except ValueError:
|
| 250 |
+
message = "`lb`, `ub`, and `keep_feasible` must be broadcastable."
|
| 251 |
+
raise ValueError(message)
|
| 252 |
+
|
| 253 |
+
def __init__(self, lb=-np.inf, ub=np.inf, keep_feasible=False):
|
| 254 |
+
if issparse(lb) or issparse(ub):
|
| 255 |
+
raise ValueError("Lower and upper bounds must be dense arrays.")
|
| 256 |
+
self.lb = np.atleast_1d(lb)
|
| 257 |
+
self.ub = np.atleast_1d(ub)
|
| 258 |
+
|
| 259 |
+
if issparse(keep_feasible):
|
| 260 |
+
raise ValueError("`keep_feasible` must be a dense array.")
|
| 261 |
+
self.keep_feasible = np.atleast_1d(keep_feasible).astype(bool)
|
| 262 |
+
self._input_validation()
|
| 263 |
+
|
| 264 |
+
def __repr__(self):
|
| 265 |
+
start = f"{type(self).__name__}({self.lb!r}, {self.ub!r}"
|
| 266 |
+
if np.any(self.keep_feasible):
|
| 267 |
+
end = f", keep_feasible={self.keep_feasible!r})"
|
| 268 |
+
else:
|
| 269 |
+
end = ")"
|
| 270 |
+
return start + end
|
| 271 |
+
|
| 272 |
+
def residual(self, x):
|
| 273 |
+
"""Calculate the residual (slack) between the input and the bounds
|
| 274 |
+
|
| 275 |
+
For a bound constraint of the form::
|
| 276 |
+
|
| 277 |
+
lb <= x <= ub
|
| 278 |
+
|
| 279 |
+
the lower and upper residuals between `x` and the bounds are values
|
| 280 |
+
``sl`` and ``sb`` such that::
|
| 281 |
+
|
| 282 |
+
lb + sl == x == ub - sb
|
| 283 |
+
|
| 284 |
+
When all elements of ``sl`` and ``sb`` are positive, all elements of
|
| 285 |
+
``x`` lie within the bounds; a negative element in ``sl`` or ``sb``
|
| 286 |
+
indicates that the corresponding element of ``x`` is out of bounds.
|
| 287 |
+
|
| 288 |
+
Parameters
|
| 289 |
+
----------
|
| 290 |
+
x: array_like
|
| 291 |
+
Vector of independent variables
|
| 292 |
+
|
| 293 |
+
Returns
|
| 294 |
+
-------
|
| 295 |
+
sl, sb : array-like
|
| 296 |
+
The lower and upper residuals
|
| 297 |
+
"""
|
| 298 |
+
return x - self.lb, self.ub - x
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
class PreparedConstraint:
|
| 302 |
+
"""Constraint prepared from a user defined constraint.
|
| 303 |
+
|
| 304 |
+
On creation it will check whether a constraint definition is valid and
|
| 305 |
+
the initial point is feasible. If created successfully, it will contain
|
| 306 |
+
the attributes listed below.
|
| 307 |
+
|
| 308 |
+
Parameters
|
| 309 |
+
----------
|
| 310 |
+
constraint : {NonlinearConstraint, LinearConstraint`, Bounds}
|
| 311 |
+
Constraint to check and prepare.
|
| 312 |
+
x0 : array_like
|
| 313 |
+
Initial vector of independent variables.
|
| 314 |
+
sparse_jacobian : bool or None, optional
|
| 315 |
+
If bool, then the Jacobian of the constraint will be converted
|
| 316 |
+
to the corresponded format if necessary. If None (default), such
|
| 317 |
+
conversion is not made.
|
| 318 |
+
finite_diff_bounds : 2-tuple, optional
|
| 319 |
+
Lower and upper bounds on the independent variables for the finite
|
| 320 |
+
difference approximation, if applicable. Defaults to no bounds.
|
| 321 |
+
|
| 322 |
+
Attributes
|
| 323 |
+
----------
|
| 324 |
+
fun : {VectorFunction, LinearVectorFunction, IdentityVectorFunction}
|
| 325 |
+
Function defining the constraint wrapped by one of the convenience
|
| 326 |
+
classes.
|
| 327 |
+
bounds : 2-tuple
|
| 328 |
+
Contains lower and upper bounds for the constraints --- lb and ub.
|
| 329 |
+
These are converted to ndarray and have a size equal to the number of
|
| 330 |
+
the constraints.
|
| 331 |
+
keep_feasible : ndarray
|
| 332 |
+
Array indicating which components must be kept feasible with a size
|
| 333 |
+
equal to the number of the constraints.
|
| 334 |
+
"""
|
| 335 |
+
def __init__(self, constraint, x0, sparse_jacobian=None,
|
| 336 |
+
finite_diff_bounds=(-np.inf, np.inf)):
|
| 337 |
+
if isinstance(constraint, NonlinearConstraint):
|
| 338 |
+
fun = VectorFunction(constraint.fun, x0,
|
| 339 |
+
constraint.jac, constraint.hess,
|
| 340 |
+
constraint.finite_diff_rel_step,
|
| 341 |
+
constraint.finite_diff_jac_sparsity,
|
| 342 |
+
finite_diff_bounds, sparse_jacobian)
|
| 343 |
+
elif isinstance(constraint, LinearConstraint):
|
| 344 |
+
fun = LinearVectorFunction(constraint.A, x0, sparse_jacobian)
|
| 345 |
+
elif isinstance(constraint, Bounds):
|
| 346 |
+
fun = IdentityVectorFunction(x0, sparse_jacobian)
|
| 347 |
+
else:
|
| 348 |
+
raise ValueError("`constraint` of an unknown type is passed.")
|
| 349 |
+
|
| 350 |
+
m = fun.m
|
| 351 |
+
|
| 352 |
+
lb = np.asarray(constraint.lb, dtype=float)
|
| 353 |
+
ub = np.asarray(constraint.ub, dtype=float)
|
| 354 |
+
keep_feasible = np.asarray(constraint.keep_feasible, dtype=bool)
|
| 355 |
+
|
| 356 |
+
lb = np.broadcast_to(lb, m)
|
| 357 |
+
ub = np.broadcast_to(ub, m)
|
| 358 |
+
keep_feasible = np.broadcast_to(keep_feasible, m)
|
| 359 |
+
|
| 360 |
+
if keep_feasible.shape != (m,):
|
| 361 |
+
raise ValueError("`keep_feasible` has a wrong shape.")
|
| 362 |
+
|
| 363 |
+
mask = keep_feasible & (lb != ub)
|
| 364 |
+
f0 = fun.f
|
| 365 |
+
if np.any(f0[mask] < lb[mask]) or np.any(f0[mask] > ub[mask]):
|
| 366 |
+
raise ValueError("`x0` is infeasible with respect to some "
|
| 367 |
+
"inequality constraint with `keep_feasible` "
|
| 368 |
+
"set to True.")
|
| 369 |
+
|
| 370 |
+
self.fun = fun
|
| 371 |
+
self.bounds = (lb, ub)
|
| 372 |
+
self.keep_feasible = keep_feasible
|
| 373 |
+
|
| 374 |
+
def violation(self, x):
|
| 375 |
+
"""How much the constraint is exceeded by.
|
| 376 |
+
|
| 377 |
+
Parameters
|
| 378 |
+
----------
|
| 379 |
+
x : array-like
|
| 380 |
+
Vector of independent variables
|
| 381 |
+
|
| 382 |
+
Returns
|
| 383 |
+
-------
|
| 384 |
+
excess : array-like
|
| 385 |
+
How much the constraint is exceeded by, for each of the
|
| 386 |
+
constraints specified by `PreparedConstraint.fun`.
|
| 387 |
+
"""
|
| 388 |
+
with catch_warnings():
|
| 389 |
+
# Ignore the following warning, it's not important when
|
| 390 |
+
# figuring out total violation
|
| 391 |
+
# UserWarning: delta_grad == 0.0. Check if the approximated
|
| 392 |
+
# function is linear
|
| 393 |
+
filterwarnings("ignore", "delta_grad", UserWarning)
|
| 394 |
+
ev = self.fun.fun(np.asarray(x))
|
| 395 |
+
|
| 396 |
+
excess_lb = np.maximum(self.bounds[0] - ev, 0)
|
| 397 |
+
excess_ub = np.maximum(ev - self.bounds[1], 0)
|
| 398 |
+
|
| 399 |
+
return excess_lb + excess_ub
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def new_bounds_to_old(lb, ub, n):
|
| 403 |
+
"""Convert the new bounds representation to the old one.
|
| 404 |
+
|
| 405 |
+
The new representation is a tuple (lb, ub) and the old one is a list
|
| 406 |
+
containing n tuples, ith containing lower and upper bound on a ith
|
| 407 |
+
variable.
|
| 408 |
+
If any of the entries in lb/ub are -np.inf/np.inf they are replaced by
|
| 409 |
+
None.
|
| 410 |
+
"""
|
| 411 |
+
lb = np.broadcast_to(lb, n)
|
| 412 |
+
ub = np.broadcast_to(ub, n)
|
| 413 |
+
|
| 414 |
+
lb = [float(x) if x > -np.inf else None for x in lb]
|
| 415 |
+
ub = [float(x) if x < np.inf else None for x in ub]
|
| 416 |
+
|
| 417 |
+
return list(zip(lb, ub))
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
def old_bound_to_new(bounds):
|
| 421 |
+
"""Convert the old bounds representation to the new one.
|
| 422 |
+
|
| 423 |
+
The new representation is a tuple (lb, ub) and the old one is a list
|
| 424 |
+
containing n tuples, ith containing lower and upper bound on a ith
|
| 425 |
+
variable.
|
| 426 |
+
If any of the entries in lb/ub are None they are replaced by
|
| 427 |
+
-np.inf/np.inf.
|
| 428 |
+
"""
|
| 429 |
+
lb, ub = zip(*bounds)
|
| 430 |
+
|
| 431 |
+
# Convert occurrences of None to -inf or inf, and replace occurrences of
|
| 432 |
+
# any numpy array x with x.item(). Then wrap the results in numpy arrays.
|
| 433 |
+
lb = np.array([float(_arr_to_scalar(x)) if x is not None else -np.inf
|
| 434 |
+
for x in lb])
|
| 435 |
+
ub = np.array([float(_arr_to_scalar(x)) if x is not None else np.inf
|
| 436 |
+
for x in ub])
|
| 437 |
+
|
| 438 |
+
return lb, ub
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
def strict_bounds(lb, ub, keep_feasible, n_vars):
|
| 442 |
+
"""Remove bounds which are not asked to be kept feasible."""
|
| 443 |
+
strict_lb = np.resize(lb, n_vars).astype(float)
|
| 444 |
+
strict_ub = np.resize(ub, n_vars).astype(float)
|
| 445 |
+
keep_feasible = np.resize(keep_feasible, n_vars)
|
| 446 |
+
strict_lb[~keep_feasible] = -np.inf
|
| 447 |
+
strict_ub[~keep_feasible] = np.inf
|
| 448 |
+
return strict_lb, strict_ub
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
def new_constraint_to_old(con, x0):
|
| 452 |
+
"""
|
| 453 |
+
Converts new-style constraint objects to old-style constraint dictionaries.
|
| 454 |
+
"""
|
| 455 |
+
if isinstance(con, NonlinearConstraint):
|
| 456 |
+
if (con.finite_diff_jac_sparsity is not None or
|
| 457 |
+
con.finite_diff_rel_step is not None or
|
| 458 |
+
not isinstance(con.hess, BFGS) or # misses user specified BFGS
|
| 459 |
+
con.keep_feasible):
|
| 460 |
+
warn("Constraint options `finite_diff_jac_sparsity`, "
|
| 461 |
+
"`finite_diff_rel_step`, `keep_feasible`, and `hess`"
|
| 462 |
+
"are ignored by this method.",
|
| 463 |
+
OptimizeWarning, stacklevel=3)
|
| 464 |
+
|
| 465 |
+
fun = con.fun
|
| 466 |
+
if callable(con.jac):
|
| 467 |
+
jac = con.jac
|
| 468 |
+
else:
|
| 469 |
+
jac = None
|
| 470 |
+
|
| 471 |
+
else: # LinearConstraint
|
| 472 |
+
if np.any(con.keep_feasible):
|
| 473 |
+
warn("Constraint option `keep_feasible` is ignored by this method.",
|
| 474 |
+
OptimizeWarning, stacklevel=3)
|
| 475 |
+
|
| 476 |
+
A = con.A
|
| 477 |
+
if issparse(A):
|
| 478 |
+
A = A.toarray()
|
| 479 |
+
def fun(x):
|
| 480 |
+
return np.dot(A, x)
|
| 481 |
+
def jac(x):
|
| 482 |
+
return A
|
| 483 |
+
|
| 484 |
+
# FIXME: when bugs in VectorFunction/LinearVectorFunction are worked out,
|
| 485 |
+
# use pcon.fun.fun and pcon.fun.jac. Until then, get fun/jac above.
|
| 486 |
+
pcon = PreparedConstraint(con, x0)
|
| 487 |
+
lb, ub = pcon.bounds
|
| 488 |
+
|
| 489 |
+
i_eq = lb == ub
|
| 490 |
+
i_bound_below = np.logical_xor(lb != -np.inf, i_eq)
|
| 491 |
+
i_bound_above = np.logical_xor(ub != np.inf, i_eq)
|
| 492 |
+
i_unbounded = np.logical_and(lb == -np.inf, ub == np.inf)
|
| 493 |
+
|
| 494 |
+
if np.any(i_unbounded):
|
| 495 |
+
warn("At least one constraint is unbounded above and below. Such "
|
| 496 |
+
"constraints are ignored.",
|
| 497 |
+
OptimizeWarning, stacklevel=3)
|
| 498 |
+
|
| 499 |
+
ceq = []
|
| 500 |
+
if np.any(i_eq):
|
| 501 |
+
def f_eq(x):
|
| 502 |
+
y = np.array(fun(x)).flatten()
|
| 503 |
+
return y[i_eq] - lb[i_eq]
|
| 504 |
+
ceq = [{"type": "eq", "fun": f_eq}]
|
| 505 |
+
|
| 506 |
+
if jac is not None:
|
| 507 |
+
def j_eq(x):
|
| 508 |
+
dy = jac(x)
|
| 509 |
+
if issparse(dy):
|
| 510 |
+
dy = dy.toarray()
|
| 511 |
+
dy = np.atleast_2d(dy)
|
| 512 |
+
return dy[i_eq, :]
|
| 513 |
+
ceq[0]["jac"] = j_eq
|
| 514 |
+
|
| 515 |
+
cineq = []
|
| 516 |
+
n_bound_below = np.sum(i_bound_below)
|
| 517 |
+
n_bound_above = np.sum(i_bound_above)
|
| 518 |
+
if n_bound_below + n_bound_above:
|
| 519 |
+
def f_ineq(x):
|
| 520 |
+
y = np.zeros(n_bound_below + n_bound_above)
|
| 521 |
+
y_all = np.array(fun(x)).flatten()
|
| 522 |
+
y[:n_bound_below] = y_all[i_bound_below] - lb[i_bound_below]
|
| 523 |
+
y[n_bound_below:] = -(y_all[i_bound_above] - ub[i_bound_above])
|
| 524 |
+
return y
|
| 525 |
+
cineq = [{"type": "ineq", "fun": f_ineq}]
|
| 526 |
+
|
| 527 |
+
if jac is not None:
|
| 528 |
+
def j_ineq(x):
|
| 529 |
+
dy = np.zeros((n_bound_below + n_bound_above, len(x0)))
|
| 530 |
+
dy_all = jac(x)
|
| 531 |
+
if issparse(dy_all):
|
| 532 |
+
dy_all = dy_all.toarray()
|
| 533 |
+
dy_all = np.atleast_2d(dy_all)
|
| 534 |
+
dy[:n_bound_below, :] = dy_all[i_bound_below]
|
| 535 |
+
dy[n_bound_below:, :] = -dy_all[i_bound_above]
|
| 536 |
+
return dy
|
| 537 |
+
cineq[0]["jac"] = j_ineq
|
| 538 |
+
|
| 539 |
+
old_constraints = ceq + cineq
|
| 540 |
+
|
| 541 |
+
if len(old_constraints) > 1:
|
| 542 |
+
warn("Equality and inequality constraints are specified in the same "
|
| 543 |
+
"element of the constraint list. For efficient use with this "
|
| 544 |
+
"method, equality and inequality constraints should be specified "
|
| 545 |
+
"in separate elements of the constraint list. ",
|
| 546 |
+
OptimizeWarning, stacklevel=3)
|
| 547 |
+
return old_constraints
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
def old_constraint_to_new(ic, con):
|
| 551 |
+
"""
|
| 552 |
+
Converts old-style constraint dictionaries to new-style constraint objects.
|
| 553 |
+
"""
|
| 554 |
+
# check type
|
| 555 |
+
try:
|
| 556 |
+
ctype = con['type'].lower()
|
| 557 |
+
except KeyError as e:
|
| 558 |
+
raise KeyError('Constraint %d has no type defined.' % ic) from e
|
| 559 |
+
except TypeError as e:
|
| 560 |
+
raise TypeError(
|
| 561 |
+
'Constraints must be a sequence of dictionaries.'
|
| 562 |
+
) from e
|
| 563 |
+
except AttributeError as e:
|
| 564 |
+
raise TypeError("Constraint's type must be a string.") from e
|
| 565 |
+
else:
|
| 566 |
+
if ctype not in ['eq', 'ineq']:
|
| 567 |
+
raise ValueError("Unknown constraint type '%s'." % con['type'])
|
| 568 |
+
if 'fun' not in con:
|
| 569 |
+
raise ValueError('Constraint %d has no function defined.' % ic)
|
| 570 |
+
|
| 571 |
+
lb = 0
|
| 572 |
+
if ctype == 'eq':
|
| 573 |
+
ub = 0
|
| 574 |
+
else:
|
| 575 |
+
ub = np.inf
|
| 576 |
+
|
| 577 |
+
jac = '2-point'
|
| 578 |
+
if 'args' in con:
|
| 579 |
+
args = con['args']
|
| 580 |
+
def fun(x):
|
| 581 |
+
return con["fun"](x, *args)
|
| 582 |
+
if 'jac' in con:
|
| 583 |
+
def jac(x):
|
| 584 |
+
return con["jac"](x, *args)
|
| 585 |
+
else:
|
| 586 |
+
fun = con['fun']
|
| 587 |
+
if 'jac' in con:
|
| 588 |
+
jac = con['jac']
|
| 589 |
+
|
| 590 |
+
return NonlinearConstraint(fun, lb, ub, jac)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dcsrch.py
ADDED
|
@@ -0,0 +1,728 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
# 2023 - ported from minpack2.dcsrch, dcstep (Fortran) to Python
|
| 5 |
+
c MINPACK-1 Project. June 1983.
|
| 6 |
+
c Argonne National Laboratory.
|
| 7 |
+
c Jorge J. More' and David J. Thuente.
|
| 8 |
+
c
|
| 9 |
+
c MINPACK-2 Project. November 1993.
|
| 10 |
+
c Argonne National Laboratory and University of Minnesota.
|
| 11 |
+
c Brett M. Averick, Richard G. Carter, and Jorge J. More'.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
# NOTE this file was linted by black on first commit, and can be kept that way.
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class DCSRCH:
|
| 18 |
+
"""
|
| 19 |
+
Parameters
|
| 20 |
+
----------
|
| 21 |
+
phi : callable phi(alpha)
|
| 22 |
+
Function at point `alpha`
|
| 23 |
+
derphi : callable phi'(alpha)
|
| 24 |
+
Objective function derivative. Returns a scalar.
|
| 25 |
+
ftol : float
|
| 26 |
+
A nonnegative tolerance for the sufficient decrease condition.
|
| 27 |
+
gtol : float
|
| 28 |
+
A nonnegative tolerance for the curvature condition.
|
| 29 |
+
xtol : float
|
| 30 |
+
A nonnegative relative tolerance for an acceptable step. The
|
| 31 |
+
subroutine exits with a warning if the relative difference between
|
| 32 |
+
sty and stx is less than xtol.
|
| 33 |
+
stpmin : float
|
| 34 |
+
A nonnegative lower bound for the step.
|
| 35 |
+
stpmax :
|
| 36 |
+
A nonnegative upper bound for the step.
|
| 37 |
+
|
| 38 |
+
Notes
|
| 39 |
+
-----
|
| 40 |
+
|
| 41 |
+
This subroutine finds a step that satisfies a sufficient
|
| 42 |
+
decrease condition and a curvature condition.
|
| 43 |
+
|
| 44 |
+
Each call of the subroutine updates an interval with
|
| 45 |
+
endpoints stx and sty. The interval is initially chosen
|
| 46 |
+
so that it contains a minimizer of the modified function
|
| 47 |
+
|
| 48 |
+
psi(stp) = f(stp) - f(0) - ftol*stp*f'(0).
|
| 49 |
+
|
| 50 |
+
If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
|
| 51 |
+
interval is chosen so that it contains a minimizer of f.
|
| 52 |
+
|
| 53 |
+
The algorithm is designed to find a step that satisfies
|
| 54 |
+
the sufficient decrease condition
|
| 55 |
+
|
| 56 |
+
f(stp) <= f(0) + ftol*stp*f'(0),
|
| 57 |
+
|
| 58 |
+
and the curvature condition
|
| 59 |
+
|
| 60 |
+
abs(f'(stp)) <= gtol*abs(f'(0)).
|
| 61 |
+
|
| 62 |
+
If ftol is less than gtol and if, for example, the function
|
| 63 |
+
is bounded below, then there is always a step which satisfies
|
| 64 |
+
both conditions.
|
| 65 |
+
|
| 66 |
+
If no step can be found that satisfies both conditions, then
|
| 67 |
+
the algorithm stops with a warning. In this case stp only
|
| 68 |
+
satisfies the sufficient decrease condition.
|
| 69 |
+
|
| 70 |
+
A typical invocation of dcsrch has the following outline:
|
| 71 |
+
|
| 72 |
+
Evaluate the function at stp = 0.0d0; store in f.
|
| 73 |
+
Evaluate the gradient at stp = 0.0d0; store in g.
|
| 74 |
+
Choose a starting step stp.
|
| 75 |
+
|
| 76 |
+
task = 'START'
|
| 77 |
+
10 continue
|
| 78 |
+
call dcsrch(stp,f,g,ftol,gtol,xtol,task,stpmin,stpmax,
|
| 79 |
+
isave,dsave)
|
| 80 |
+
if (task .eq. 'FG') then
|
| 81 |
+
Evaluate the function and the gradient at stp
|
| 82 |
+
go to 10
|
| 83 |
+
end if
|
| 84 |
+
|
| 85 |
+
NOTE: The user must not alter work arrays between calls.
|
| 86 |
+
|
| 87 |
+
The subroutine statement is
|
| 88 |
+
|
| 89 |
+
subroutine dcsrch(f,g,stp,ftol,gtol,xtol,stpmin,stpmax,
|
| 90 |
+
task,isave,dsave)
|
| 91 |
+
where
|
| 92 |
+
|
| 93 |
+
stp is a double precision variable.
|
| 94 |
+
On entry stp is the current estimate of a satisfactory
|
| 95 |
+
step. On initial entry, a positive initial estimate
|
| 96 |
+
must be provided.
|
| 97 |
+
On exit stp is the current estimate of a satisfactory step
|
| 98 |
+
if task = 'FG'. If task = 'CONV' then stp satisfies
|
| 99 |
+
the sufficient decrease and curvature condition.
|
| 100 |
+
|
| 101 |
+
f is a double precision variable.
|
| 102 |
+
On initial entry f is the value of the function at 0.
|
| 103 |
+
On subsequent entries f is the value of the
|
| 104 |
+
function at stp.
|
| 105 |
+
On exit f is the value of the function at stp.
|
| 106 |
+
|
| 107 |
+
g is a double precision variable.
|
| 108 |
+
On initial entry g is the derivative of the function at 0.
|
| 109 |
+
On subsequent entries g is the derivative of the
|
| 110 |
+
function at stp.
|
| 111 |
+
On exit g is the derivative of the function at stp.
|
| 112 |
+
|
| 113 |
+
ftol is a double precision variable.
|
| 114 |
+
On entry ftol specifies a nonnegative tolerance for the
|
| 115 |
+
sufficient decrease condition.
|
| 116 |
+
On exit ftol is unchanged.
|
| 117 |
+
|
| 118 |
+
gtol is a double precision variable.
|
| 119 |
+
On entry gtol specifies a nonnegative tolerance for the
|
| 120 |
+
curvature condition.
|
| 121 |
+
On exit gtol is unchanged.
|
| 122 |
+
|
| 123 |
+
xtol is a double precision variable.
|
| 124 |
+
On entry xtol specifies a nonnegative relative tolerance
|
| 125 |
+
for an acceptable step. The subroutine exits with a
|
| 126 |
+
warning if the relative difference between sty and stx
|
| 127 |
+
is less than xtol.
|
| 128 |
+
|
| 129 |
+
On exit xtol is unchanged.
|
| 130 |
+
|
| 131 |
+
task is a character variable of length at least 60.
|
| 132 |
+
On initial entry task must be set to 'START'.
|
| 133 |
+
On exit task indicates the required action:
|
| 134 |
+
|
| 135 |
+
If task(1:2) = 'FG' then evaluate the function and
|
| 136 |
+
derivative at stp and call dcsrch again.
|
| 137 |
+
|
| 138 |
+
If task(1:4) = 'CONV' then the search is successful.
|
| 139 |
+
|
| 140 |
+
If task(1:4) = 'WARN' then the subroutine is not able
|
| 141 |
+
to satisfy the convergence conditions. The exit value of
|
| 142 |
+
stp contains the best point found during the search.
|
| 143 |
+
|
| 144 |
+
If task(1:5) = 'ERROR' then there is an error in the
|
| 145 |
+
input arguments.
|
| 146 |
+
|
| 147 |
+
On exit with convergence, a warning or an error, the
|
| 148 |
+
variable task contains additional information.
|
| 149 |
+
|
| 150 |
+
stpmin is a double precision variable.
|
| 151 |
+
On entry stpmin is a nonnegative lower bound for the step.
|
| 152 |
+
On exit stpmin is unchanged.
|
| 153 |
+
|
| 154 |
+
stpmax is a double precision variable.
|
| 155 |
+
On entry stpmax is a nonnegative upper bound for the step.
|
| 156 |
+
On exit stpmax is unchanged.
|
| 157 |
+
|
| 158 |
+
isave is an integer work array of dimension 2.
|
| 159 |
+
|
| 160 |
+
dsave is a double precision work array of dimension 13.
|
| 161 |
+
|
| 162 |
+
Subprograms called
|
| 163 |
+
|
| 164 |
+
MINPACK-2 ... dcstep
|
| 165 |
+
MINPACK-1 Project. June 1983.
|
| 166 |
+
Argonne National Laboratory.
|
| 167 |
+
Jorge J. More' and David J. Thuente.
|
| 168 |
+
|
| 169 |
+
MINPACK-2 Project. November 1993.
|
| 170 |
+
Argonne National Laboratory and University of Minnesota.
|
| 171 |
+
Brett M. Averick, Richard G. Carter, and Jorge J. More'.
|
| 172 |
+
"""
|
| 173 |
+
|
| 174 |
+
def __init__(self, phi, derphi, ftol, gtol, xtol, stpmin, stpmax):
|
| 175 |
+
self.stage = None
|
| 176 |
+
self.ginit = None
|
| 177 |
+
self.gtest = None
|
| 178 |
+
self.gx = None
|
| 179 |
+
self.gy = None
|
| 180 |
+
self.finit = None
|
| 181 |
+
self.fx = None
|
| 182 |
+
self.fy = None
|
| 183 |
+
self.stx = None
|
| 184 |
+
self.sty = None
|
| 185 |
+
self.stmin = None
|
| 186 |
+
self.stmax = None
|
| 187 |
+
self.width = None
|
| 188 |
+
self.width1 = None
|
| 189 |
+
|
| 190 |
+
# leave all assessment of tolerances/limits to the first call of
|
| 191 |
+
# this object
|
| 192 |
+
self.ftol = ftol
|
| 193 |
+
self.gtol = gtol
|
| 194 |
+
self.xtol = xtol
|
| 195 |
+
self.stpmin = stpmin
|
| 196 |
+
self.stpmax = stpmax
|
| 197 |
+
|
| 198 |
+
self.phi = phi
|
| 199 |
+
self.derphi = derphi
|
| 200 |
+
|
| 201 |
+
def __call__(self, alpha1, phi0=None, derphi0=None, maxiter=100):
|
| 202 |
+
"""
|
| 203 |
+
Parameters
|
| 204 |
+
----------
|
| 205 |
+
alpha1 : float
|
| 206 |
+
alpha1 is the current estimate of a satisfactory
|
| 207 |
+
step. A positive initial estimate must be provided.
|
| 208 |
+
phi0 : float
|
| 209 |
+
the value of `phi` at 0 (if known).
|
| 210 |
+
derphi0 : float
|
| 211 |
+
the derivative of `derphi` at 0 (if known).
|
| 212 |
+
maxiter : int
|
| 213 |
+
|
| 214 |
+
Returns
|
| 215 |
+
-------
|
| 216 |
+
alpha : float
|
| 217 |
+
Step size, or None if no suitable step was found.
|
| 218 |
+
phi : float
|
| 219 |
+
Value of `phi` at the new point `alpha`.
|
| 220 |
+
phi0 : float
|
| 221 |
+
Value of `phi` at `alpha=0`.
|
| 222 |
+
task : bytes
|
| 223 |
+
On exit task indicates status information.
|
| 224 |
+
|
| 225 |
+
If task[:4] == b'CONV' then the search is successful.
|
| 226 |
+
|
| 227 |
+
If task[:4] == b'WARN' then the subroutine is not able
|
| 228 |
+
to satisfy the convergence conditions. The exit value of
|
| 229 |
+
stp contains the best point found during the search.
|
| 230 |
+
|
| 231 |
+
If task[:5] == b'ERROR' then there is an error in the
|
| 232 |
+
input arguments.
|
| 233 |
+
"""
|
| 234 |
+
if phi0 is None:
|
| 235 |
+
phi0 = self.phi(0.0)
|
| 236 |
+
if derphi0 is None:
|
| 237 |
+
derphi0 = self.derphi(0.0)
|
| 238 |
+
|
| 239 |
+
phi1 = phi0
|
| 240 |
+
derphi1 = derphi0
|
| 241 |
+
|
| 242 |
+
task = b"START"
|
| 243 |
+
for i in range(maxiter):
|
| 244 |
+
stp, phi1, derphi1, task = self._iterate(
|
| 245 |
+
alpha1, phi1, derphi1, task
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
if not np.isfinite(stp):
|
| 249 |
+
task = b"WARN"
|
| 250 |
+
stp = None
|
| 251 |
+
break
|
| 252 |
+
|
| 253 |
+
if task[:2] == b"FG":
|
| 254 |
+
alpha1 = stp
|
| 255 |
+
phi1 = self.phi(stp)
|
| 256 |
+
derphi1 = self.derphi(stp)
|
| 257 |
+
else:
|
| 258 |
+
break
|
| 259 |
+
else:
|
| 260 |
+
# maxiter reached, the line search did not converge
|
| 261 |
+
stp = None
|
| 262 |
+
task = b"WARNING: dcsrch did not converge within max iterations"
|
| 263 |
+
|
| 264 |
+
if task[:5] == b"ERROR" or task[:4] == b"WARN":
|
| 265 |
+
stp = None # failed
|
| 266 |
+
|
| 267 |
+
return stp, phi1, phi0, task
|
| 268 |
+
|
| 269 |
+
def _iterate(self, stp, f, g, task):
|
| 270 |
+
"""
|
| 271 |
+
Parameters
|
| 272 |
+
----------
|
| 273 |
+
stp : float
|
| 274 |
+
The current estimate of a satisfactory step. On initial entry, a
|
| 275 |
+
positive initial estimate must be provided.
|
| 276 |
+
f : float
|
| 277 |
+
On first call f is the value of the function at 0. On subsequent
|
| 278 |
+
entries f should be the value of the function at stp.
|
| 279 |
+
g : float
|
| 280 |
+
On initial entry g is the derivative of the function at 0. On
|
| 281 |
+
subsequent entries g is the derivative of the function at stp.
|
| 282 |
+
task : bytes
|
| 283 |
+
On initial entry task must be set to 'START'.
|
| 284 |
+
|
| 285 |
+
On exit with convergence, a warning or an error, the
|
| 286 |
+
variable task contains additional information.
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
Returns
|
| 290 |
+
-------
|
| 291 |
+
stp, f, g, task: tuple
|
| 292 |
+
|
| 293 |
+
stp : float
|
| 294 |
+
the current estimate of a satisfactory step if task = 'FG'. If
|
| 295 |
+
task = 'CONV' then stp satisfies the sufficient decrease and
|
| 296 |
+
curvature condition.
|
| 297 |
+
f : float
|
| 298 |
+
the value of the function at stp.
|
| 299 |
+
g : float
|
| 300 |
+
the derivative of the function at stp.
|
| 301 |
+
task : bytes
|
| 302 |
+
On exit task indicates the required action:
|
| 303 |
+
|
| 304 |
+
If task(1:2) == b'FG' then evaluate the function and
|
| 305 |
+
derivative at stp and call dcsrch again.
|
| 306 |
+
|
| 307 |
+
If task(1:4) == b'CONV' then the search is successful.
|
| 308 |
+
|
| 309 |
+
If task(1:4) == b'WARN' then the subroutine is not able
|
| 310 |
+
to satisfy the convergence conditions. The exit value of
|
| 311 |
+
stp contains the best point found during the search.
|
| 312 |
+
|
| 313 |
+
If task(1:5) == b'ERROR' then there is an error in the
|
| 314 |
+
input arguments.
|
| 315 |
+
"""
|
| 316 |
+
p5 = 0.5
|
| 317 |
+
p66 = 0.66
|
| 318 |
+
xtrapl = 1.1
|
| 319 |
+
xtrapu = 4.0
|
| 320 |
+
|
| 321 |
+
if task[:5] == b"START":
|
| 322 |
+
if stp < self.stpmin:
|
| 323 |
+
task = b"ERROR: STP .LT. STPMIN"
|
| 324 |
+
if stp > self.stpmax:
|
| 325 |
+
task = b"ERROR: STP .GT. STPMAX"
|
| 326 |
+
if g >= 0:
|
| 327 |
+
task = b"ERROR: INITIAL G .GE. ZERO"
|
| 328 |
+
if self.ftol < 0:
|
| 329 |
+
task = b"ERROR: FTOL .LT. ZERO"
|
| 330 |
+
if self.gtol < 0:
|
| 331 |
+
task = b"ERROR: GTOL .LT. ZERO"
|
| 332 |
+
if self.xtol < 0:
|
| 333 |
+
task = b"ERROR: XTOL .LT. ZERO"
|
| 334 |
+
if self.stpmin < 0:
|
| 335 |
+
task = b"ERROR: STPMIN .LT. ZERO"
|
| 336 |
+
if self.stpmax < self.stpmin:
|
| 337 |
+
task = b"ERROR: STPMAX .LT. STPMIN"
|
| 338 |
+
|
| 339 |
+
if task[:5] == b"ERROR":
|
| 340 |
+
return stp, f, g, task
|
| 341 |
+
|
| 342 |
+
# Initialize local variables.
|
| 343 |
+
|
| 344 |
+
self.brackt = False
|
| 345 |
+
self.stage = 1
|
| 346 |
+
self.finit = f
|
| 347 |
+
self.ginit = g
|
| 348 |
+
self.gtest = self.ftol * self.ginit
|
| 349 |
+
self.width = self.stpmax - self.stpmin
|
| 350 |
+
self.width1 = self.width / p5
|
| 351 |
+
|
| 352 |
+
# The variables stx, fx, gx contain the values of the step,
|
| 353 |
+
# function, and derivative at the best step.
|
| 354 |
+
# The variables sty, fy, gy contain the value of the step,
|
| 355 |
+
# function, and derivative at sty.
|
| 356 |
+
# The variables stp, f, g contain the values of the step,
|
| 357 |
+
# function, and derivative at stp.
|
| 358 |
+
|
| 359 |
+
self.stx = 0.0
|
| 360 |
+
self.fx = self.finit
|
| 361 |
+
self.gx = self.ginit
|
| 362 |
+
self.sty = 0.0
|
| 363 |
+
self.fy = self.finit
|
| 364 |
+
self.gy = self.ginit
|
| 365 |
+
self.stmin = 0
|
| 366 |
+
self.stmax = stp + xtrapu * stp
|
| 367 |
+
task = b"FG"
|
| 368 |
+
return stp, f, g, task
|
| 369 |
+
|
| 370 |
+
# in the original Fortran this was a location to restore variables
|
| 371 |
+
# we don't need to do that because they're attributes.
|
| 372 |
+
|
| 373 |
+
# If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
|
| 374 |
+
# algorithm enters the second stage.
|
| 375 |
+
ftest = self.finit + stp * self.gtest
|
| 376 |
+
|
| 377 |
+
if self.stage == 1 and f <= ftest and g >= 0:
|
| 378 |
+
self.stage = 2
|
| 379 |
+
|
| 380 |
+
# test for warnings
|
| 381 |
+
if self.brackt and (stp <= self.stmin or stp >= self.stmax):
|
| 382 |
+
task = b"WARNING: ROUNDING ERRORS PREVENT PROGRESS"
|
| 383 |
+
if self.brackt and self.stmax - self.stmin <= self.xtol * self.stmax:
|
| 384 |
+
task = b"WARNING: XTOL TEST SATISFIED"
|
| 385 |
+
if stp == self.stpmax and f <= ftest and g <= self.gtest:
|
| 386 |
+
task = b"WARNING: STP = STPMAX"
|
| 387 |
+
if stp == self.stpmin and (f > ftest or g >= self.gtest):
|
| 388 |
+
task = b"WARNING: STP = STPMIN"
|
| 389 |
+
|
| 390 |
+
# test for convergence
|
| 391 |
+
if f <= ftest and abs(g) <= self.gtol * -self.ginit:
|
| 392 |
+
task = b"CONVERGENCE"
|
| 393 |
+
|
| 394 |
+
# test for termination
|
| 395 |
+
if task[:4] == b"WARN" or task[:4] == b"CONV":
|
| 396 |
+
return stp, f, g, task
|
| 397 |
+
|
| 398 |
+
# A modified function is used to predict the step during the
|
| 399 |
+
# first stage if a lower function value has been obtained but
|
| 400 |
+
# the decrease is not sufficient.
|
| 401 |
+
if self.stage == 1 and f <= self.fx and f > ftest:
|
| 402 |
+
# Define the modified function and derivative values.
|
| 403 |
+
fm = f - stp * self.gtest
|
| 404 |
+
fxm = self.fx - self.stx * self.gtest
|
| 405 |
+
fym = self.fy - self.sty * self.gtest
|
| 406 |
+
gm = g - self.gtest
|
| 407 |
+
gxm = self.gx - self.gtest
|
| 408 |
+
gym = self.gy - self.gtest
|
| 409 |
+
|
| 410 |
+
# Call dcstep to update stx, sty, and to compute the new step.
|
| 411 |
+
# dcstep can have several operations which can produce NaN
|
| 412 |
+
# e.g. inf/inf. Filter these out.
|
| 413 |
+
with np.errstate(invalid="ignore", over="ignore"):
|
| 414 |
+
tup = dcstep(
|
| 415 |
+
self.stx,
|
| 416 |
+
fxm,
|
| 417 |
+
gxm,
|
| 418 |
+
self.sty,
|
| 419 |
+
fym,
|
| 420 |
+
gym,
|
| 421 |
+
stp,
|
| 422 |
+
fm,
|
| 423 |
+
gm,
|
| 424 |
+
self.brackt,
|
| 425 |
+
self.stmin,
|
| 426 |
+
self.stmax,
|
| 427 |
+
)
|
| 428 |
+
self.stx, fxm, gxm, self.sty, fym, gym, stp, self.brackt = tup
|
| 429 |
+
|
| 430 |
+
# Reset the function and derivative values for f
|
| 431 |
+
self.fx = fxm + self.stx * self.gtest
|
| 432 |
+
self.fy = fym + self.sty * self.gtest
|
| 433 |
+
self.gx = gxm + self.gtest
|
| 434 |
+
self.gy = gym + self.gtest
|
| 435 |
+
|
| 436 |
+
else:
|
| 437 |
+
# Call dcstep to update stx, sty, and to compute the new step.
|
| 438 |
+
# dcstep can have several operations which can produce NaN
|
| 439 |
+
# e.g. inf/inf. Filter these out.
|
| 440 |
+
|
| 441 |
+
with np.errstate(invalid="ignore", over="ignore"):
|
| 442 |
+
tup = dcstep(
|
| 443 |
+
self.stx,
|
| 444 |
+
self.fx,
|
| 445 |
+
self.gx,
|
| 446 |
+
self.sty,
|
| 447 |
+
self.fy,
|
| 448 |
+
self.gy,
|
| 449 |
+
stp,
|
| 450 |
+
f,
|
| 451 |
+
g,
|
| 452 |
+
self.brackt,
|
| 453 |
+
self.stmin,
|
| 454 |
+
self.stmax,
|
| 455 |
+
)
|
| 456 |
+
(
|
| 457 |
+
self.stx,
|
| 458 |
+
self.fx,
|
| 459 |
+
self.gx,
|
| 460 |
+
self.sty,
|
| 461 |
+
self.fy,
|
| 462 |
+
self.gy,
|
| 463 |
+
stp,
|
| 464 |
+
self.brackt,
|
| 465 |
+
) = tup
|
| 466 |
+
|
| 467 |
+
# Decide if a bisection step is needed
|
| 468 |
+
if self.brackt:
|
| 469 |
+
if abs(self.sty - self.stx) >= p66 * self.width1:
|
| 470 |
+
stp = self.stx + p5 * (self.sty - self.stx)
|
| 471 |
+
self.width1 = self.width
|
| 472 |
+
self.width = abs(self.sty - self.stx)
|
| 473 |
+
|
| 474 |
+
# Set the minimum and maximum steps allowed for stp.
|
| 475 |
+
if self.brackt:
|
| 476 |
+
self.stmin = min(self.stx, self.sty)
|
| 477 |
+
self.stmax = max(self.stx, self.sty)
|
| 478 |
+
else:
|
| 479 |
+
self.stmin = stp + xtrapl * (stp - self.stx)
|
| 480 |
+
self.stmax = stp + xtrapu * (stp - self.stx)
|
| 481 |
+
|
| 482 |
+
# Force the step to be within the bounds stpmax and stpmin.
|
| 483 |
+
stp = np.clip(stp, self.stpmin, self.stpmax)
|
| 484 |
+
|
| 485 |
+
# If further progress is not possible, let stp be the best
|
| 486 |
+
# point obtained during the search.
|
| 487 |
+
if (
|
| 488 |
+
self.brackt
|
| 489 |
+
and (stp <= self.stmin or stp >= self.stmax)
|
| 490 |
+
or (
|
| 491 |
+
self.brackt
|
| 492 |
+
and self.stmax - self.stmin <= self.xtol * self.stmax
|
| 493 |
+
)
|
| 494 |
+
):
|
| 495 |
+
stp = self.stx
|
| 496 |
+
|
| 497 |
+
# Obtain another function and derivative
|
| 498 |
+
task = b"FG"
|
| 499 |
+
return stp, f, g, task
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
def dcstep(stx, fx, dx, sty, fy, dy, stp, fp, dp, brackt, stpmin, stpmax):
|
| 503 |
+
"""
|
| 504 |
+
Subroutine dcstep
|
| 505 |
+
|
| 506 |
+
This subroutine computes a safeguarded step for a search
|
| 507 |
+
procedure and updates an interval that contains a step that
|
| 508 |
+
satisfies a sufficient decrease and a curvature condition.
|
| 509 |
+
|
| 510 |
+
The parameter stx contains the step with the least function
|
| 511 |
+
value. If brackt is set to .true. then a minimizer has
|
| 512 |
+
been bracketed in an interval with endpoints stx and sty.
|
| 513 |
+
The parameter stp contains the current step.
|
| 514 |
+
The subroutine assumes that if brackt is set to .true. then
|
| 515 |
+
|
| 516 |
+
min(stx,sty) < stp < max(stx,sty),
|
| 517 |
+
|
| 518 |
+
and that the derivative at stx is negative in the direction
|
| 519 |
+
of the step.
|
| 520 |
+
|
| 521 |
+
The subroutine statement is
|
| 522 |
+
|
| 523 |
+
subroutine dcstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,
|
| 524 |
+
stpmin,stpmax)
|
| 525 |
+
|
| 526 |
+
where
|
| 527 |
+
|
| 528 |
+
stx is a double precision variable.
|
| 529 |
+
On entry stx is the best step obtained so far and is an
|
| 530 |
+
endpoint of the interval that contains the minimizer.
|
| 531 |
+
On exit stx is the updated best step.
|
| 532 |
+
|
| 533 |
+
fx is a double precision variable.
|
| 534 |
+
On entry fx is the function at stx.
|
| 535 |
+
On exit fx is the function at stx.
|
| 536 |
+
|
| 537 |
+
dx is a double precision variable.
|
| 538 |
+
On entry dx is the derivative of the function at
|
| 539 |
+
stx. The derivative must be negative in the direction of
|
| 540 |
+
the step, that is, dx and stp - stx must have opposite
|
| 541 |
+
signs.
|
| 542 |
+
On exit dx is the derivative of the function at stx.
|
| 543 |
+
|
| 544 |
+
sty is a double precision variable.
|
| 545 |
+
On entry sty is the second endpoint of the interval that
|
| 546 |
+
contains the minimizer.
|
| 547 |
+
On exit sty is the updated endpoint of the interval that
|
| 548 |
+
contains the minimizer.
|
| 549 |
+
|
| 550 |
+
fy is a double precision variable.
|
| 551 |
+
On entry fy is the function at sty.
|
| 552 |
+
On exit fy is the function at sty.
|
| 553 |
+
|
| 554 |
+
dy is a double precision variable.
|
| 555 |
+
On entry dy is the derivative of the function at sty.
|
| 556 |
+
On exit dy is the derivative of the function at the exit sty.
|
| 557 |
+
|
| 558 |
+
stp is a double precision variable.
|
| 559 |
+
On entry stp is the current step. If brackt is set to .true.
|
| 560 |
+
then on input stp must be between stx and sty.
|
| 561 |
+
On exit stp is a new trial step.
|
| 562 |
+
|
| 563 |
+
fp is a double precision variable.
|
| 564 |
+
On entry fp is the function at stp
|
| 565 |
+
On exit fp is unchanged.
|
| 566 |
+
|
| 567 |
+
dp is a double precision variable.
|
| 568 |
+
On entry dp is the derivative of the function at stp.
|
| 569 |
+
On exit dp is unchanged.
|
| 570 |
+
|
| 571 |
+
brackt is an logical variable.
|
| 572 |
+
On entry brackt specifies if a minimizer has been bracketed.
|
| 573 |
+
Initially brackt must be set to .false.
|
| 574 |
+
On exit brackt specifies if a minimizer has been bracketed.
|
| 575 |
+
When a minimizer is bracketed brackt is set to .true.
|
| 576 |
+
|
| 577 |
+
stpmin is a double precision variable.
|
| 578 |
+
On entry stpmin is a lower bound for the step.
|
| 579 |
+
On exit stpmin is unchanged.
|
| 580 |
+
|
| 581 |
+
stpmax is a double precision variable.
|
| 582 |
+
On entry stpmax is an upper bound for the step.
|
| 583 |
+
On exit stpmax is unchanged.
|
| 584 |
+
|
| 585 |
+
MINPACK-1 Project. June 1983
|
| 586 |
+
Argonne National Laboratory.
|
| 587 |
+
Jorge J. More' and David J. Thuente.
|
| 588 |
+
|
| 589 |
+
MINPACK-2 Project. November 1993.
|
| 590 |
+
Argonne National Laboratory and University of Minnesota.
|
| 591 |
+
Brett M. Averick and Jorge J. More'.
|
| 592 |
+
|
| 593 |
+
"""
|
| 594 |
+
sgn_dp = np.sign(dp)
|
| 595 |
+
sgn_dx = np.sign(dx)
|
| 596 |
+
|
| 597 |
+
# sgnd = dp * (dx / abs(dx))
|
| 598 |
+
sgnd = sgn_dp * sgn_dx
|
| 599 |
+
|
| 600 |
+
# First case: A higher function value. The minimum is bracketed.
|
| 601 |
+
# If the cubic step is closer to stx than the quadratic step, the
|
| 602 |
+
# cubic step is taken, otherwise the average of the cubic and
|
| 603 |
+
# quadratic steps is taken.
|
| 604 |
+
if fp > fx:
|
| 605 |
+
theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp
|
| 606 |
+
s = max(abs(theta), abs(dx), abs(dp))
|
| 607 |
+
gamma = s * np.sqrt((theta / s) ** 2 - (dx / s) * (dp / s))
|
| 608 |
+
if stp < stx:
|
| 609 |
+
gamma *= -1
|
| 610 |
+
p = (gamma - dx) + theta
|
| 611 |
+
q = ((gamma - dx) + gamma) + dp
|
| 612 |
+
r = p / q
|
| 613 |
+
stpc = stx + r * (stp - stx)
|
| 614 |
+
stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / 2.0) * (stp - stx)
|
| 615 |
+
if abs(stpc - stx) <= abs(stpq - stx):
|
| 616 |
+
stpf = stpc
|
| 617 |
+
else:
|
| 618 |
+
stpf = stpc + (stpq - stpc) / 2.0
|
| 619 |
+
brackt = True
|
| 620 |
+
elif sgnd < 0.0:
|
| 621 |
+
# Second case: A lower function value and derivatives of opposite
|
| 622 |
+
# sign. The minimum is bracketed. If the cubic step is farther from
|
| 623 |
+
# stp than the secant step, the cubic step is taken, otherwise the
|
| 624 |
+
# secant step is taken.
|
| 625 |
+
theta = 3 * (fx - fp) / (stp - stx) + dx + dp
|
| 626 |
+
s = max(abs(theta), abs(dx), abs(dp))
|
| 627 |
+
gamma = s * np.sqrt((theta / s) ** 2 - (dx / s) * (dp / s))
|
| 628 |
+
if stp > stx:
|
| 629 |
+
gamma *= -1
|
| 630 |
+
p = (gamma - dp) + theta
|
| 631 |
+
q = ((gamma - dp) + gamma) + dx
|
| 632 |
+
r = p / q
|
| 633 |
+
stpc = stp + r * (stx - stp)
|
| 634 |
+
stpq = stp + (dp / (dp - dx)) * (stx - stp)
|
| 635 |
+
if abs(stpc - stp) > abs(stpq - stp):
|
| 636 |
+
stpf = stpc
|
| 637 |
+
else:
|
| 638 |
+
stpf = stpq
|
| 639 |
+
brackt = True
|
| 640 |
+
elif abs(dp) < abs(dx):
|
| 641 |
+
# Third case: A lower function value, derivatives of the same sign,
|
| 642 |
+
# and the magnitude of the derivative decreases.
|
| 643 |
+
|
| 644 |
+
# The cubic step is computed only if the cubic tends to infinity
|
| 645 |
+
# in the direction of the step or if the minimum of the cubic
|
| 646 |
+
# is beyond stp. Otherwise the cubic step is defined to be the
|
| 647 |
+
# secant step.
|
| 648 |
+
theta = 3 * (fx - fp) / (stp - stx) + dx + dp
|
| 649 |
+
s = max(abs(theta), abs(dx), abs(dp))
|
| 650 |
+
|
| 651 |
+
# The case gamma = 0 only arises if the cubic does not tend
|
| 652 |
+
# to infinity in the direction of the step.
|
| 653 |
+
gamma = s * np.sqrt(max(0, (theta / s) ** 2 - (dx / s) * (dp / s)))
|
| 654 |
+
if stp > stx:
|
| 655 |
+
gamma = -gamma
|
| 656 |
+
p = (gamma - dp) + theta
|
| 657 |
+
q = (gamma + (dx - dp)) + gamma
|
| 658 |
+
r = p / q
|
| 659 |
+
if r < 0 and gamma != 0:
|
| 660 |
+
stpc = stp + r * (stx - stp)
|
| 661 |
+
elif stp > stx:
|
| 662 |
+
stpc = stpmax
|
| 663 |
+
else:
|
| 664 |
+
stpc = stpmin
|
| 665 |
+
stpq = stp + (dp / (dp - dx)) * (stx - stp)
|
| 666 |
+
|
| 667 |
+
if brackt:
|
| 668 |
+
# A minimizer has been bracketed. If the cubic step is
|
| 669 |
+
# closer to stp than the secant step, the cubic step is
|
| 670 |
+
# taken, otherwise the secant step is taken.
|
| 671 |
+
if abs(stpc - stp) < abs(stpq - stp):
|
| 672 |
+
stpf = stpc
|
| 673 |
+
else:
|
| 674 |
+
stpf = stpq
|
| 675 |
+
|
| 676 |
+
if stp > stx:
|
| 677 |
+
stpf = min(stp + 0.66 * (sty - stp), stpf)
|
| 678 |
+
else:
|
| 679 |
+
stpf = max(stp + 0.66 * (sty - stp), stpf)
|
| 680 |
+
else:
|
| 681 |
+
# A minimizer has not been bracketed. If the cubic step is
|
| 682 |
+
# farther from stp than the secant step, the cubic step is
|
| 683 |
+
# taken, otherwise the secant step is taken.
|
| 684 |
+
if abs(stpc - stp) > abs(stpq - stp):
|
| 685 |
+
stpf = stpc
|
| 686 |
+
else:
|
| 687 |
+
stpf = stpq
|
| 688 |
+
stpf = np.clip(stpf, stpmin, stpmax)
|
| 689 |
+
|
| 690 |
+
else:
|
| 691 |
+
# Fourth case: A lower function value, derivatives of the same sign,
|
| 692 |
+
# and the magnitude of the derivative does not decrease. If the
|
| 693 |
+
# minimum is not bracketed, the step is either stpmin or stpmax,
|
| 694 |
+
# otherwise the cubic step is taken.
|
| 695 |
+
if brackt:
|
| 696 |
+
theta = 3.0 * (fp - fy) / (sty - stp) + dy + dp
|
| 697 |
+
s = max(abs(theta), abs(dy), abs(dp))
|
| 698 |
+
gamma = s * np.sqrt((theta / s) ** 2 - (dy / s) * (dp / s))
|
| 699 |
+
if stp > sty:
|
| 700 |
+
gamma = -gamma
|
| 701 |
+
p = (gamma - dp) + theta
|
| 702 |
+
q = ((gamma - dp) + gamma) + dy
|
| 703 |
+
r = p / q
|
| 704 |
+
stpc = stp + r * (sty - stp)
|
| 705 |
+
stpf = stpc
|
| 706 |
+
elif stp > stx:
|
| 707 |
+
stpf = stpmax
|
| 708 |
+
else:
|
| 709 |
+
stpf = stpmin
|
| 710 |
+
|
| 711 |
+
# Update the interval which contains a minimizer.
|
| 712 |
+
if fp > fx:
|
| 713 |
+
sty = stp
|
| 714 |
+
fy = fp
|
| 715 |
+
dy = dp
|
| 716 |
+
else:
|
| 717 |
+
if sgnd < 0:
|
| 718 |
+
sty = stx
|
| 719 |
+
fy = fx
|
| 720 |
+
dy = dx
|
| 721 |
+
stx = stp
|
| 722 |
+
fx = fp
|
| 723 |
+
dx = dp
|
| 724 |
+
|
| 725 |
+
# Compute the new step.
|
| 726 |
+
stp = stpf
|
| 727 |
+
|
| 728 |
+
return stx, fx, dx, sty, fy, dy, stp, brackt
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py
ADDED
|
@@ -0,0 +1,693 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import scipy.sparse as sps
|
| 3 |
+
from ._numdiff import approx_derivative, group_columns
|
| 4 |
+
from ._hessian_update_strategy import HessianUpdateStrategy
|
| 5 |
+
from scipy.sparse.linalg import LinearOperator
|
| 6 |
+
from scipy._lib._array_api import atleast_nd, array_namespace
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
FD_METHODS = ('2-point', '3-point', 'cs')
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _wrapper_fun(fun, args=()):
|
| 13 |
+
ncalls = [0]
|
| 14 |
+
|
| 15 |
+
def wrapped(x):
|
| 16 |
+
ncalls[0] += 1
|
| 17 |
+
# Send a copy because the user may overwrite it.
|
| 18 |
+
# Overwriting results in undefined behaviour because
|
| 19 |
+
# fun(self.x) will change self.x, with the two no longer linked.
|
| 20 |
+
fx = fun(np.copy(x), *args)
|
| 21 |
+
# Make sure the function returns a true scalar
|
| 22 |
+
if not np.isscalar(fx):
|
| 23 |
+
try:
|
| 24 |
+
fx = np.asarray(fx).item()
|
| 25 |
+
except (TypeError, ValueError) as e:
|
| 26 |
+
raise ValueError(
|
| 27 |
+
"The user-provided objective function "
|
| 28 |
+
"must return a scalar value."
|
| 29 |
+
) from e
|
| 30 |
+
return fx
|
| 31 |
+
return wrapped, ncalls
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _wrapper_grad(grad, fun=None, args=(), finite_diff_options=None):
|
| 35 |
+
ncalls = [0]
|
| 36 |
+
|
| 37 |
+
if callable(grad):
|
| 38 |
+
def wrapped(x, **kwds):
|
| 39 |
+
# kwds present to give function same signature as numdiff variant
|
| 40 |
+
ncalls[0] += 1
|
| 41 |
+
return np.atleast_1d(grad(np.copy(x), *args))
|
| 42 |
+
return wrapped, ncalls
|
| 43 |
+
|
| 44 |
+
elif grad in FD_METHODS:
|
| 45 |
+
def wrapped1(x, f0=None):
|
| 46 |
+
ncalls[0] += 1
|
| 47 |
+
return approx_derivative(
|
| 48 |
+
fun, x, f0=f0, **finite_diff_options
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
return wrapped1, ncalls
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _wrapper_hess(hess, grad=None, x0=None, args=(), finite_diff_options=None):
|
| 55 |
+
if callable(hess):
|
| 56 |
+
H = hess(np.copy(x0), *args)
|
| 57 |
+
ncalls = [1]
|
| 58 |
+
|
| 59 |
+
if sps.issparse(H):
|
| 60 |
+
def wrapped(x, **kwds):
|
| 61 |
+
ncalls[0] += 1
|
| 62 |
+
return sps.csr_matrix(hess(np.copy(x), *args))
|
| 63 |
+
|
| 64 |
+
H = sps.csr_matrix(H)
|
| 65 |
+
|
| 66 |
+
elif isinstance(H, LinearOperator):
|
| 67 |
+
def wrapped(x, **kwds):
|
| 68 |
+
ncalls[0] += 1
|
| 69 |
+
return hess(np.copy(x), *args)
|
| 70 |
+
|
| 71 |
+
else: # dense
|
| 72 |
+
def wrapped(x, **kwds):
|
| 73 |
+
ncalls[0] += 1
|
| 74 |
+
return np.atleast_2d(np.asarray(hess(np.copy(x), *args)))
|
| 75 |
+
|
| 76 |
+
H = np.atleast_2d(np.asarray(H))
|
| 77 |
+
|
| 78 |
+
return wrapped, ncalls, H
|
| 79 |
+
elif hess in FD_METHODS:
|
| 80 |
+
ncalls = [0]
|
| 81 |
+
|
| 82 |
+
def wrapped1(x, f0=None):
|
| 83 |
+
return approx_derivative(
|
| 84 |
+
grad, x, f0=f0, **finite_diff_options
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
return wrapped1, ncalls, None
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class ScalarFunction:
|
| 91 |
+
"""Scalar function and its derivatives.
|
| 92 |
+
|
| 93 |
+
This class defines a scalar function F: R^n->R and methods for
|
| 94 |
+
computing or approximating its first and second derivatives.
|
| 95 |
+
|
| 96 |
+
Parameters
|
| 97 |
+
----------
|
| 98 |
+
fun : callable
|
| 99 |
+
evaluates the scalar function. Must be of the form ``fun(x, *args)``,
|
| 100 |
+
where ``x`` is the argument in the form of a 1-D array and ``args`` is
|
| 101 |
+
a tuple of any additional fixed parameters needed to completely specify
|
| 102 |
+
the function. Should return a scalar.
|
| 103 |
+
x0 : array-like
|
| 104 |
+
Provides an initial set of variables for evaluating fun. Array of real
|
| 105 |
+
elements of size (n,), where 'n' is the number of independent
|
| 106 |
+
variables.
|
| 107 |
+
args : tuple, optional
|
| 108 |
+
Any additional fixed parameters needed to completely specify the scalar
|
| 109 |
+
function.
|
| 110 |
+
grad : {callable, '2-point', '3-point', 'cs'}
|
| 111 |
+
Method for computing the gradient vector.
|
| 112 |
+
If it is a callable, it should be a function that returns the gradient
|
| 113 |
+
vector:
|
| 114 |
+
|
| 115 |
+
``grad(x, *args) -> array_like, shape (n,)``
|
| 116 |
+
|
| 117 |
+
where ``x`` is an array with shape (n,) and ``args`` is a tuple with
|
| 118 |
+
the fixed parameters.
|
| 119 |
+
Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used
|
| 120 |
+
to select a finite difference scheme for numerical estimation of the
|
| 121 |
+
gradient with a relative step size. These finite difference schemes
|
| 122 |
+
obey any specified `bounds`.
|
| 123 |
+
hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
|
| 124 |
+
Method for computing the Hessian matrix. If it is callable, it should
|
| 125 |
+
return the Hessian matrix:
|
| 126 |
+
|
| 127 |
+
``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
|
| 128 |
+
|
| 129 |
+
where x is a (n,) ndarray and `args` is a tuple with the fixed
|
| 130 |
+
parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
|
| 131 |
+
select a finite difference scheme for numerical estimation. Or, objects
|
| 132 |
+
implementing `HessianUpdateStrategy` interface can be used to
|
| 133 |
+
approximate the Hessian.
|
| 134 |
+
Whenever the gradient is estimated via finite-differences, the Hessian
|
| 135 |
+
cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
|
| 136 |
+
to be estimated using one of the quasi-Newton strategies.
|
| 137 |
+
finite_diff_rel_step : None or array_like
|
| 138 |
+
Relative step size to use. The absolute step size is computed as
|
| 139 |
+
``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
|
| 140 |
+
adjusted to fit into the bounds. For ``method='3-point'`` the sign
|
| 141 |
+
of `h` is ignored. If None then finite_diff_rel_step is selected
|
| 142 |
+
automatically,
|
| 143 |
+
finite_diff_bounds : tuple of array_like
|
| 144 |
+
Lower and upper bounds on independent variables. Defaults to no bounds,
|
| 145 |
+
(-np.inf, np.inf). Each bound must match the size of `x0` or be a
|
| 146 |
+
scalar, in the latter case the bound will be the same for all
|
| 147 |
+
variables. Use it to limit the range of function evaluation.
|
| 148 |
+
epsilon : None or array_like, optional
|
| 149 |
+
Absolute step size to use, possibly adjusted to fit into the bounds.
|
| 150 |
+
For ``method='3-point'`` the sign of `epsilon` is ignored. By default
|
| 151 |
+
relative steps are used, only if ``epsilon is not None`` are absolute
|
| 152 |
+
steps used.
|
| 153 |
+
|
| 154 |
+
Notes
|
| 155 |
+
-----
|
| 156 |
+
This class implements a memoization logic. There are methods `fun`,
|
| 157 |
+
`grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
|
| 158 |
+
things should be considered:
|
| 159 |
+
|
| 160 |
+
1. Use only public methods `fun`, `grad` and `hess`.
|
| 161 |
+
2. After one of the methods is called, the corresponding attribute
|
| 162 |
+
will be set. However, a subsequent call with a different argument
|
| 163 |
+
of *any* of the methods may overwrite the attribute.
|
| 164 |
+
"""
|
| 165 |
+
def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
|
| 166 |
+
finite_diff_bounds, epsilon=None):
|
| 167 |
+
if not callable(grad) and grad not in FD_METHODS:
|
| 168 |
+
raise ValueError(
|
| 169 |
+
f"`grad` must be either callable or one of {FD_METHODS}."
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
if not (callable(hess) or hess in FD_METHODS
|
| 173 |
+
or isinstance(hess, HessianUpdateStrategy)):
|
| 174 |
+
raise ValueError(
|
| 175 |
+
f"`hess` must be either callable, HessianUpdateStrategy"
|
| 176 |
+
f" or one of {FD_METHODS}."
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
if grad in FD_METHODS and hess in FD_METHODS:
|
| 180 |
+
raise ValueError("Whenever the gradient is estimated via "
|
| 181 |
+
"finite-differences, we require the Hessian "
|
| 182 |
+
"to be estimated using one of the "
|
| 183 |
+
"quasi-Newton strategies.")
|
| 184 |
+
|
| 185 |
+
self.xp = xp = array_namespace(x0)
|
| 186 |
+
_x = atleast_nd(x0, ndim=1, xp=xp)
|
| 187 |
+
_dtype = xp.float64
|
| 188 |
+
if xp.isdtype(_x.dtype, "real floating"):
|
| 189 |
+
_dtype = _x.dtype
|
| 190 |
+
|
| 191 |
+
# original arguments
|
| 192 |
+
self._wrapped_fun, self._nfev = _wrapper_fun(fun, args=args)
|
| 193 |
+
self._orig_fun = fun
|
| 194 |
+
self._orig_grad = grad
|
| 195 |
+
self._orig_hess = hess
|
| 196 |
+
self._args = args
|
| 197 |
+
|
| 198 |
+
# promotes to floating
|
| 199 |
+
self.x = xp.astype(_x, _dtype)
|
| 200 |
+
self.x_dtype = _dtype
|
| 201 |
+
self.n = self.x.size
|
| 202 |
+
self.f_updated = False
|
| 203 |
+
self.g_updated = False
|
| 204 |
+
self.H_updated = False
|
| 205 |
+
|
| 206 |
+
self._lowest_x = None
|
| 207 |
+
self._lowest_f = np.inf
|
| 208 |
+
|
| 209 |
+
finite_diff_options = {}
|
| 210 |
+
if grad in FD_METHODS:
|
| 211 |
+
finite_diff_options["method"] = grad
|
| 212 |
+
finite_diff_options["rel_step"] = finite_diff_rel_step
|
| 213 |
+
finite_diff_options["abs_step"] = epsilon
|
| 214 |
+
finite_diff_options["bounds"] = finite_diff_bounds
|
| 215 |
+
if hess in FD_METHODS:
|
| 216 |
+
finite_diff_options["method"] = hess
|
| 217 |
+
finite_diff_options["rel_step"] = finite_diff_rel_step
|
| 218 |
+
finite_diff_options["abs_step"] = epsilon
|
| 219 |
+
finite_diff_options["as_linear_operator"] = True
|
| 220 |
+
|
| 221 |
+
# Initial function evaluation
|
| 222 |
+
self._update_fun()
|
| 223 |
+
|
| 224 |
+
# Initial gradient evaluation
|
| 225 |
+
self._wrapped_grad, self._ngev = _wrapper_grad(
|
| 226 |
+
grad,
|
| 227 |
+
fun=self._wrapped_fun,
|
| 228 |
+
args=args,
|
| 229 |
+
finite_diff_options=finite_diff_options
|
| 230 |
+
)
|
| 231 |
+
self._update_grad()
|
| 232 |
+
|
| 233 |
+
# Hessian evaluation
|
| 234 |
+
if callable(hess):
|
| 235 |
+
self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
|
| 236 |
+
hess, x0=x0, args=args
|
| 237 |
+
)
|
| 238 |
+
self.H_updated = True
|
| 239 |
+
elif hess in FD_METHODS:
|
| 240 |
+
self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
|
| 241 |
+
hess,
|
| 242 |
+
grad=self._wrapped_grad,
|
| 243 |
+
x0=x0,
|
| 244 |
+
finite_diff_options=finite_diff_options
|
| 245 |
+
)
|
| 246 |
+
self._update_grad()
|
| 247 |
+
self.H = self._wrapped_hess(self.x, f0=self.g)
|
| 248 |
+
self.H_updated = True
|
| 249 |
+
elif isinstance(hess, HessianUpdateStrategy):
|
| 250 |
+
self.H = hess
|
| 251 |
+
self.H.initialize(self.n, 'hess')
|
| 252 |
+
self.H_updated = True
|
| 253 |
+
self.x_prev = None
|
| 254 |
+
self.g_prev = None
|
| 255 |
+
self._nhev = [0]
|
| 256 |
+
|
| 257 |
+
@property
|
| 258 |
+
def nfev(self):
|
| 259 |
+
return self._nfev[0]
|
| 260 |
+
|
| 261 |
+
@property
|
| 262 |
+
def ngev(self):
|
| 263 |
+
return self._ngev[0]
|
| 264 |
+
|
| 265 |
+
@property
|
| 266 |
+
def nhev(self):
|
| 267 |
+
return self._nhev[0]
|
| 268 |
+
|
| 269 |
+
def _update_x(self, x):
|
| 270 |
+
if isinstance(self._orig_hess, HessianUpdateStrategy):
|
| 271 |
+
self._update_grad()
|
| 272 |
+
self.x_prev = self.x
|
| 273 |
+
self.g_prev = self.g
|
| 274 |
+
# ensure that self.x is a copy of x. Don't store a reference
|
| 275 |
+
# otherwise the memoization doesn't work properly.
|
| 276 |
+
|
| 277 |
+
_x = atleast_nd(x, ndim=1, xp=self.xp)
|
| 278 |
+
self.x = self.xp.astype(_x, self.x_dtype)
|
| 279 |
+
self.f_updated = False
|
| 280 |
+
self.g_updated = False
|
| 281 |
+
self.H_updated = False
|
| 282 |
+
self._update_hess()
|
| 283 |
+
else:
|
| 284 |
+
# ensure that self.x is a copy of x. Don't store a reference
|
| 285 |
+
# otherwise the memoization doesn't work properly.
|
| 286 |
+
_x = atleast_nd(x, ndim=1, xp=self.xp)
|
| 287 |
+
self.x = self.xp.astype(_x, self.x_dtype)
|
| 288 |
+
self.f_updated = False
|
| 289 |
+
self.g_updated = False
|
| 290 |
+
self.H_updated = False
|
| 291 |
+
|
| 292 |
+
def _update_fun(self):
|
| 293 |
+
if not self.f_updated:
|
| 294 |
+
fx = self._wrapped_fun(self.x)
|
| 295 |
+
if fx < self._lowest_f:
|
| 296 |
+
self._lowest_x = self.x
|
| 297 |
+
self._lowest_f = fx
|
| 298 |
+
|
| 299 |
+
self.f = fx
|
| 300 |
+
self.f_updated = True
|
| 301 |
+
|
| 302 |
+
def _update_grad(self):
|
| 303 |
+
if not self.g_updated:
|
| 304 |
+
if self._orig_grad in FD_METHODS:
|
| 305 |
+
self._update_fun()
|
| 306 |
+
self.g = self._wrapped_grad(self.x, f0=self.f)
|
| 307 |
+
self.g_updated = True
|
| 308 |
+
|
| 309 |
+
def _update_hess(self):
|
| 310 |
+
if not self.H_updated:
|
| 311 |
+
if self._orig_hess in FD_METHODS:
|
| 312 |
+
self._update_grad()
|
| 313 |
+
self.H = self._wrapped_hess(self.x, f0=self.g)
|
| 314 |
+
elif isinstance(self._orig_hess, HessianUpdateStrategy):
|
| 315 |
+
self._update_grad()
|
| 316 |
+
self.H.update(self.x - self.x_prev, self.g - self.g_prev)
|
| 317 |
+
else: # should be callable(hess)
|
| 318 |
+
self.H = self._wrapped_hess(self.x)
|
| 319 |
+
|
| 320 |
+
self.H_updated = True
|
| 321 |
+
|
| 322 |
+
def fun(self, x):
|
| 323 |
+
if not np.array_equal(x, self.x):
|
| 324 |
+
self._update_x(x)
|
| 325 |
+
self._update_fun()
|
| 326 |
+
return self.f
|
| 327 |
+
|
| 328 |
+
def grad(self, x):
|
| 329 |
+
if not np.array_equal(x, self.x):
|
| 330 |
+
self._update_x(x)
|
| 331 |
+
self._update_grad()
|
| 332 |
+
return self.g
|
| 333 |
+
|
| 334 |
+
def hess(self, x):
|
| 335 |
+
if not np.array_equal(x, self.x):
|
| 336 |
+
self._update_x(x)
|
| 337 |
+
self._update_hess()
|
| 338 |
+
return self.H
|
| 339 |
+
|
| 340 |
+
def fun_and_grad(self, x):
|
| 341 |
+
if not np.array_equal(x, self.x):
|
| 342 |
+
self._update_x(x)
|
| 343 |
+
self._update_fun()
|
| 344 |
+
self._update_grad()
|
| 345 |
+
return self.f, self.g
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
class VectorFunction:
|
| 349 |
+
"""Vector function and its derivatives.
|
| 350 |
+
|
| 351 |
+
This class defines a vector function F: R^n->R^m and methods for
|
| 352 |
+
computing or approximating its first and second derivatives.
|
| 353 |
+
|
| 354 |
+
Notes
|
| 355 |
+
-----
|
| 356 |
+
This class implements a memoization logic. There are methods `fun`,
|
| 357 |
+
`jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
|
| 358 |
+
things should be considered:
|
| 359 |
+
|
| 360 |
+
1. Use only public methods `fun`, `jac` and `hess`.
|
| 361 |
+
2. After one of the methods is called, the corresponding attribute
|
| 362 |
+
will be set. However, a subsequent call with a different argument
|
| 363 |
+
of *any* of the methods may overwrite the attribute.
|
| 364 |
+
"""
|
| 365 |
+
def __init__(self, fun, x0, jac, hess,
|
| 366 |
+
finite_diff_rel_step, finite_diff_jac_sparsity,
|
| 367 |
+
finite_diff_bounds, sparse_jacobian):
|
| 368 |
+
if not callable(jac) and jac not in FD_METHODS:
|
| 369 |
+
raise ValueError(f"`jac` must be either callable or one of {FD_METHODS}.")
|
| 370 |
+
|
| 371 |
+
if not (callable(hess) or hess in FD_METHODS
|
| 372 |
+
or isinstance(hess, HessianUpdateStrategy)):
|
| 373 |
+
raise ValueError("`hess` must be either callable,"
|
| 374 |
+
f"HessianUpdateStrategy or one of {FD_METHODS}.")
|
| 375 |
+
|
| 376 |
+
if jac in FD_METHODS and hess in FD_METHODS:
|
| 377 |
+
raise ValueError("Whenever the Jacobian is estimated via "
|
| 378 |
+
"finite-differences, we require the Hessian to "
|
| 379 |
+
"be estimated using one of the quasi-Newton "
|
| 380 |
+
"strategies.")
|
| 381 |
+
|
| 382 |
+
self.xp = xp = array_namespace(x0)
|
| 383 |
+
_x = atleast_nd(x0, ndim=1, xp=xp)
|
| 384 |
+
_dtype = xp.float64
|
| 385 |
+
if xp.isdtype(_x.dtype, "real floating"):
|
| 386 |
+
_dtype = _x.dtype
|
| 387 |
+
|
| 388 |
+
# promotes to floating
|
| 389 |
+
self.x = xp.astype(_x, _dtype)
|
| 390 |
+
self.x_dtype = _dtype
|
| 391 |
+
|
| 392 |
+
self.n = self.x.size
|
| 393 |
+
self.nfev = 0
|
| 394 |
+
self.njev = 0
|
| 395 |
+
self.nhev = 0
|
| 396 |
+
self.f_updated = False
|
| 397 |
+
self.J_updated = False
|
| 398 |
+
self.H_updated = False
|
| 399 |
+
|
| 400 |
+
finite_diff_options = {}
|
| 401 |
+
if jac in FD_METHODS:
|
| 402 |
+
finite_diff_options["method"] = jac
|
| 403 |
+
finite_diff_options["rel_step"] = finite_diff_rel_step
|
| 404 |
+
if finite_diff_jac_sparsity is not None:
|
| 405 |
+
sparsity_groups = group_columns(finite_diff_jac_sparsity)
|
| 406 |
+
finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
|
| 407 |
+
sparsity_groups)
|
| 408 |
+
finite_diff_options["bounds"] = finite_diff_bounds
|
| 409 |
+
self.x_diff = np.copy(self.x)
|
| 410 |
+
if hess in FD_METHODS:
|
| 411 |
+
finite_diff_options["method"] = hess
|
| 412 |
+
finite_diff_options["rel_step"] = finite_diff_rel_step
|
| 413 |
+
finite_diff_options["as_linear_operator"] = True
|
| 414 |
+
self.x_diff = np.copy(self.x)
|
| 415 |
+
if jac in FD_METHODS and hess in FD_METHODS:
|
| 416 |
+
raise ValueError("Whenever the Jacobian is estimated via "
|
| 417 |
+
"finite-differences, we require the Hessian to "
|
| 418 |
+
"be estimated using one of the quasi-Newton "
|
| 419 |
+
"strategies.")
|
| 420 |
+
|
| 421 |
+
# Function evaluation
|
| 422 |
+
def fun_wrapped(x):
|
| 423 |
+
self.nfev += 1
|
| 424 |
+
return np.atleast_1d(fun(x))
|
| 425 |
+
|
| 426 |
+
def update_fun():
|
| 427 |
+
self.f = fun_wrapped(self.x)
|
| 428 |
+
|
| 429 |
+
self._update_fun_impl = update_fun
|
| 430 |
+
update_fun()
|
| 431 |
+
|
| 432 |
+
self.v = np.zeros_like(self.f)
|
| 433 |
+
self.m = self.v.size
|
| 434 |
+
|
| 435 |
+
# Jacobian Evaluation
|
| 436 |
+
if callable(jac):
|
| 437 |
+
self.J = jac(self.x)
|
| 438 |
+
self.J_updated = True
|
| 439 |
+
self.njev += 1
|
| 440 |
+
|
| 441 |
+
if (sparse_jacobian or
|
| 442 |
+
sparse_jacobian is None and sps.issparse(self.J)):
|
| 443 |
+
def jac_wrapped(x):
|
| 444 |
+
self.njev += 1
|
| 445 |
+
return sps.csr_matrix(jac(x))
|
| 446 |
+
self.J = sps.csr_matrix(self.J)
|
| 447 |
+
self.sparse_jacobian = True
|
| 448 |
+
|
| 449 |
+
elif sps.issparse(self.J):
|
| 450 |
+
def jac_wrapped(x):
|
| 451 |
+
self.njev += 1
|
| 452 |
+
return jac(x).toarray()
|
| 453 |
+
self.J = self.J.toarray()
|
| 454 |
+
self.sparse_jacobian = False
|
| 455 |
+
|
| 456 |
+
else:
|
| 457 |
+
def jac_wrapped(x):
|
| 458 |
+
self.njev += 1
|
| 459 |
+
return np.atleast_2d(jac(x))
|
| 460 |
+
self.J = np.atleast_2d(self.J)
|
| 461 |
+
self.sparse_jacobian = False
|
| 462 |
+
|
| 463 |
+
def update_jac():
|
| 464 |
+
self.J = jac_wrapped(self.x)
|
| 465 |
+
|
| 466 |
+
elif jac in FD_METHODS:
|
| 467 |
+
self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
|
| 468 |
+
**finite_diff_options)
|
| 469 |
+
self.J_updated = True
|
| 470 |
+
|
| 471 |
+
if (sparse_jacobian or
|
| 472 |
+
sparse_jacobian is None and sps.issparse(self.J)):
|
| 473 |
+
def update_jac():
|
| 474 |
+
self._update_fun()
|
| 475 |
+
self.J = sps.csr_matrix(
|
| 476 |
+
approx_derivative(fun_wrapped, self.x, f0=self.f,
|
| 477 |
+
**finite_diff_options))
|
| 478 |
+
self.J = sps.csr_matrix(self.J)
|
| 479 |
+
self.sparse_jacobian = True
|
| 480 |
+
|
| 481 |
+
elif sps.issparse(self.J):
|
| 482 |
+
def update_jac():
|
| 483 |
+
self._update_fun()
|
| 484 |
+
self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
|
| 485 |
+
**finite_diff_options).toarray()
|
| 486 |
+
self.J = self.J.toarray()
|
| 487 |
+
self.sparse_jacobian = False
|
| 488 |
+
|
| 489 |
+
else:
|
| 490 |
+
def update_jac():
|
| 491 |
+
self._update_fun()
|
| 492 |
+
self.J = np.atleast_2d(
|
| 493 |
+
approx_derivative(fun_wrapped, self.x, f0=self.f,
|
| 494 |
+
**finite_diff_options))
|
| 495 |
+
self.J = np.atleast_2d(self.J)
|
| 496 |
+
self.sparse_jacobian = False
|
| 497 |
+
|
| 498 |
+
self._update_jac_impl = update_jac
|
| 499 |
+
|
| 500 |
+
# Define Hessian
|
| 501 |
+
if callable(hess):
|
| 502 |
+
self.H = hess(self.x, self.v)
|
| 503 |
+
self.H_updated = True
|
| 504 |
+
self.nhev += 1
|
| 505 |
+
|
| 506 |
+
if sps.issparse(self.H):
|
| 507 |
+
def hess_wrapped(x, v):
|
| 508 |
+
self.nhev += 1
|
| 509 |
+
return sps.csr_matrix(hess(x, v))
|
| 510 |
+
self.H = sps.csr_matrix(self.H)
|
| 511 |
+
|
| 512 |
+
elif isinstance(self.H, LinearOperator):
|
| 513 |
+
def hess_wrapped(x, v):
|
| 514 |
+
self.nhev += 1
|
| 515 |
+
return hess(x, v)
|
| 516 |
+
|
| 517 |
+
else:
|
| 518 |
+
def hess_wrapped(x, v):
|
| 519 |
+
self.nhev += 1
|
| 520 |
+
return np.atleast_2d(np.asarray(hess(x, v)))
|
| 521 |
+
self.H = np.atleast_2d(np.asarray(self.H))
|
| 522 |
+
|
| 523 |
+
def update_hess():
|
| 524 |
+
self.H = hess_wrapped(self.x, self.v)
|
| 525 |
+
elif hess in FD_METHODS:
|
| 526 |
+
def jac_dot_v(x, v):
|
| 527 |
+
return jac_wrapped(x).T.dot(v)
|
| 528 |
+
|
| 529 |
+
def update_hess():
|
| 530 |
+
self._update_jac()
|
| 531 |
+
self.H = approx_derivative(jac_dot_v, self.x,
|
| 532 |
+
f0=self.J.T.dot(self.v),
|
| 533 |
+
args=(self.v,),
|
| 534 |
+
**finite_diff_options)
|
| 535 |
+
update_hess()
|
| 536 |
+
self.H_updated = True
|
| 537 |
+
elif isinstance(hess, HessianUpdateStrategy):
|
| 538 |
+
self.H = hess
|
| 539 |
+
self.H.initialize(self.n, 'hess')
|
| 540 |
+
self.H_updated = True
|
| 541 |
+
self.x_prev = None
|
| 542 |
+
self.J_prev = None
|
| 543 |
+
|
| 544 |
+
def update_hess():
|
| 545 |
+
self._update_jac()
|
| 546 |
+
# When v is updated before x was updated, then x_prev and
|
| 547 |
+
# J_prev are None and we need this check.
|
| 548 |
+
if self.x_prev is not None and self.J_prev is not None:
|
| 549 |
+
delta_x = self.x - self.x_prev
|
| 550 |
+
delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
|
| 551 |
+
self.H.update(delta_x, delta_g)
|
| 552 |
+
|
| 553 |
+
self._update_hess_impl = update_hess
|
| 554 |
+
|
| 555 |
+
if isinstance(hess, HessianUpdateStrategy):
|
| 556 |
+
def update_x(x):
|
| 557 |
+
self._update_jac()
|
| 558 |
+
self.x_prev = self.x
|
| 559 |
+
self.J_prev = self.J
|
| 560 |
+
_x = atleast_nd(x, ndim=1, xp=self.xp)
|
| 561 |
+
self.x = self.xp.astype(_x, self.x_dtype)
|
| 562 |
+
self.f_updated = False
|
| 563 |
+
self.J_updated = False
|
| 564 |
+
self.H_updated = False
|
| 565 |
+
self._update_hess()
|
| 566 |
+
else:
|
| 567 |
+
def update_x(x):
|
| 568 |
+
_x = atleast_nd(x, ndim=1, xp=self.xp)
|
| 569 |
+
self.x = self.xp.astype(_x, self.x_dtype)
|
| 570 |
+
self.f_updated = False
|
| 571 |
+
self.J_updated = False
|
| 572 |
+
self.H_updated = False
|
| 573 |
+
|
| 574 |
+
self._update_x_impl = update_x
|
| 575 |
+
|
| 576 |
+
def _update_v(self, v):
|
| 577 |
+
if not np.array_equal(v, self.v):
|
| 578 |
+
self.v = v
|
| 579 |
+
self.H_updated = False
|
| 580 |
+
|
| 581 |
+
def _update_x(self, x):
|
| 582 |
+
if not np.array_equal(x, self.x):
|
| 583 |
+
self._update_x_impl(x)
|
| 584 |
+
|
| 585 |
+
def _update_fun(self):
|
| 586 |
+
if not self.f_updated:
|
| 587 |
+
self._update_fun_impl()
|
| 588 |
+
self.f_updated = True
|
| 589 |
+
|
| 590 |
+
def _update_jac(self):
|
| 591 |
+
if not self.J_updated:
|
| 592 |
+
self._update_jac_impl()
|
| 593 |
+
self.J_updated = True
|
| 594 |
+
|
| 595 |
+
def _update_hess(self):
|
| 596 |
+
if not self.H_updated:
|
| 597 |
+
self._update_hess_impl()
|
| 598 |
+
self.H_updated = True
|
| 599 |
+
|
| 600 |
+
def fun(self, x):
|
| 601 |
+
self._update_x(x)
|
| 602 |
+
self._update_fun()
|
| 603 |
+
return self.f
|
| 604 |
+
|
| 605 |
+
def jac(self, x):
|
| 606 |
+
self._update_x(x)
|
| 607 |
+
self._update_jac()
|
| 608 |
+
return self.J
|
| 609 |
+
|
| 610 |
+
def hess(self, x, v):
|
| 611 |
+
# v should be updated before x.
|
| 612 |
+
self._update_v(v)
|
| 613 |
+
self._update_x(x)
|
| 614 |
+
self._update_hess()
|
| 615 |
+
return self.H
|
| 616 |
+
|
| 617 |
+
|
| 618 |
+
class LinearVectorFunction:
|
| 619 |
+
"""Linear vector function and its derivatives.
|
| 620 |
+
|
| 621 |
+
Defines a linear function F = A x, where x is N-D vector and
|
| 622 |
+
A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
|
| 623 |
+
is identically zero and it is returned as a csr matrix.
|
| 624 |
+
"""
|
| 625 |
+
def __init__(self, A, x0, sparse_jacobian):
|
| 626 |
+
if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
|
| 627 |
+
self.J = sps.csr_matrix(A)
|
| 628 |
+
self.sparse_jacobian = True
|
| 629 |
+
elif sps.issparse(A):
|
| 630 |
+
self.J = A.toarray()
|
| 631 |
+
self.sparse_jacobian = False
|
| 632 |
+
else:
|
| 633 |
+
# np.asarray makes sure A is ndarray and not matrix
|
| 634 |
+
self.J = np.atleast_2d(np.asarray(A))
|
| 635 |
+
self.sparse_jacobian = False
|
| 636 |
+
|
| 637 |
+
self.m, self.n = self.J.shape
|
| 638 |
+
|
| 639 |
+
self.xp = xp = array_namespace(x0)
|
| 640 |
+
_x = atleast_nd(x0, ndim=1, xp=xp)
|
| 641 |
+
_dtype = xp.float64
|
| 642 |
+
if xp.isdtype(_x.dtype, "real floating"):
|
| 643 |
+
_dtype = _x.dtype
|
| 644 |
+
|
| 645 |
+
# promotes to floating
|
| 646 |
+
self.x = xp.astype(_x, _dtype)
|
| 647 |
+
self.x_dtype = _dtype
|
| 648 |
+
|
| 649 |
+
self.f = self.J.dot(self.x)
|
| 650 |
+
self.f_updated = True
|
| 651 |
+
|
| 652 |
+
self.v = np.zeros(self.m, dtype=float)
|
| 653 |
+
self.H = sps.csr_matrix((self.n, self.n))
|
| 654 |
+
|
| 655 |
+
def _update_x(self, x):
|
| 656 |
+
if not np.array_equal(x, self.x):
|
| 657 |
+
_x = atleast_nd(x, ndim=1, xp=self.xp)
|
| 658 |
+
self.x = self.xp.astype(_x, self.x_dtype)
|
| 659 |
+
self.f_updated = False
|
| 660 |
+
|
| 661 |
+
def fun(self, x):
|
| 662 |
+
self._update_x(x)
|
| 663 |
+
if not self.f_updated:
|
| 664 |
+
self.f = self.J.dot(x)
|
| 665 |
+
self.f_updated = True
|
| 666 |
+
return self.f
|
| 667 |
+
|
| 668 |
+
def jac(self, x):
|
| 669 |
+
self._update_x(x)
|
| 670 |
+
return self.J
|
| 671 |
+
|
| 672 |
+
def hess(self, x, v):
|
| 673 |
+
self._update_x(x)
|
| 674 |
+
self.v = v
|
| 675 |
+
return self.H
|
| 676 |
+
|
| 677 |
+
|
| 678 |
+
class IdentityVectorFunction(LinearVectorFunction):
|
| 679 |
+
"""Identity vector function and its derivatives.
|
| 680 |
+
|
| 681 |
+
The Jacobian is the identity matrix, returned as a dense array when
|
| 682 |
+
`sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
|
| 683 |
+
identically zero and it is returned as a csr matrix.
|
| 684 |
+
"""
|
| 685 |
+
def __init__(self, x0, sparse_jacobian):
|
| 686 |
+
n = len(x0)
|
| 687 |
+
if sparse_jacobian or sparse_jacobian is None:
|
| 688 |
+
A = sps.eye(n, format='csr')
|
| 689 |
+
sparse_jacobian = True
|
| 690 |
+
else:
|
| 691 |
+
A = np.eye(n)
|
| 692 |
+
sparse_jacobian = False
|
| 693 |
+
super().__init__(A, x0, sparse_jacobian)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentialevolution.py
ADDED
|
@@ -0,0 +1,1951 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
differential_evolution: The differential evolution global optimization algorithm
|
| 3 |
+
Added by Andrew Nelson 2014
|
| 4 |
+
"""
|
| 5 |
+
import warnings
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from scipy.optimize import OptimizeResult, minimize
|
| 9 |
+
from scipy.optimize._optimize import _status_message, _wrap_callback
|
| 10 |
+
from scipy._lib._util import (check_random_state, MapWrapper, _FunctionWrapper,
|
| 11 |
+
rng_integers)
|
| 12 |
+
|
| 13 |
+
from scipy.optimize._constraints import (Bounds, new_bounds_to_old,
|
| 14 |
+
NonlinearConstraint, LinearConstraint)
|
| 15 |
+
from scipy.sparse import issparse
|
| 16 |
+
|
| 17 |
+
__all__ = ['differential_evolution']
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
_MACHEPS = np.finfo(np.float64).eps
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def differential_evolution(func, bounds, args=(), strategy='best1bin',
|
| 24 |
+
maxiter=1000, popsize=15, tol=0.01,
|
| 25 |
+
mutation=(0.5, 1), recombination=0.7, seed=None,
|
| 26 |
+
callback=None, disp=False, polish=True,
|
| 27 |
+
init='latinhypercube', atol=0, updating='immediate',
|
| 28 |
+
workers=1, constraints=(), x0=None, *,
|
| 29 |
+
integrality=None, vectorized=False):
|
| 30 |
+
"""Finds the global minimum of a multivariate function.
|
| 31 |
+
|
| 32 |
+
The differential evolution method [1]_ is stochastic in nature. It does
|
| 33 |
+
not use gradient methods to find the minimum, and can search large areas
|
| 34 |
+
of candidate space, but often requires larger numbers of function
|
| 35 |
+
evaluations than conventional gradient-based techniques.
|
| 36 |
+
|
| 37 |
+
The algorithm is due to Storn and Price [2]_.
|
| 38 |
+
|
| 39 |
+
Parameters
|
| 40 |
+
----------
|
| 41 |
+
func : callable
|
| 42 |
+
The objective function to be minimized. Must be in the form
|
| 43 |
+
``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
|
| 44 |
+
and ``args`` is a tuple of any additional fixed parameters needed to
|
| 45 |
+
completely specify the function. The number of parameters, N, is equal
|
| 46 |
+
to ``len(x)``.
|
| 47 |
+
bounds : sequence or `Bounds`
|
| 48 |
+
Bounds for variables. There are two ways to specify the bounds:
|
| 49 |
+
|
| 50 |
+
1. Instance of `Bounds` class.
|
| 51 |
+
2. ``(min, max)`` pairs for each element in ``x``, defining the
|
| 52 |
+
finite lower and upper bounds for the optimizing argument of
|
| 53 |
+
`func`.
|
| 54 |
+
|
| 55 |
+
The total number of bounds is used to determine the number of
|
| 56 |
+
parameters, N. If there are parameters whose bounds are equal the total
|
| 57 |
+
number of free parameters is ``N - N_equal``.
|
| 58 |
+
|
| 59 |
+
args : tuple, optional
|
| 60 |
+
Any additional fixed parameters needed to
|
| 61 |
+
completely specify the objective function.
|
| 62 |
+
strategy : {str, callable}, optional
|
| 63 |
+
The differential evolution strategy to use. Should be one of:
|
| 64 |
+
|
| 65 |
+
- 'best1bin'
|
| 66 |
+
- 'best1exp'
|
| 67 |
+
- 'rand1bin'
|
| 68 |
+
- 'rand1exp'
|
| 69 |
+
- 'rand2bin'
|
| 70 |
+
- 'rand2exp'
|
| 71 |
+
- 'randtobest1bin'
|
| 72 |
+
- 'randtobest1exp'
|
| 73 |
+
- 'currenttobest1bin'
|
| 74 |
+
- 'currenttobest1exp'
|
| 75 |
+
- 'best2exp'
|
| 76 |
+
- 'best2bin'
|
| 77 |
+
|
| 78 |
+
The default is 'best1bin'. Strategies that may be implemented are
|
| 79 |
+
outlined in 'Notes'.
|
| 80 |
+
Alternatively the differential evolution strategy can be customized by
|
| 81 |
+
providing a callable that constructs a trial vector. The callable must
|
| 82 |
+
have the form ``strategy(candidate: int, population: np.ndarray, rng=None)``,
|
| 83 |
+
where ``candidate`` is an integer specifying which entry of the
|
| 84 |
+
population is being evolved, ``population`` is an array of shape
|
| 85 |
+
``(S, N)`` containing all the population members (where S is the
|
| 86 |
+
total population size), and ``rng`` is the random number generator
|
| 87 |
+
being used within the solver.
|
| 88 |
+
``candidate`` will be in the range ``[0, S)``.
|
| 89 |
+
``strategy`` must return a trial vector with shape `(N,)`. The
|
| 90 |
+
fitness of this trial vector is compared against the fitness of
|
| 91 |
+
``population[candidate]``.
|
| 92 |
+
|
| 93 |
+
.. versionchanged:: 1.12.0
|
| 94 |
+
Customization of evolution strategy via a callable.
|
| 95 |
+
|
| 96 |
+
maxiter : int, optional
|
| 97 |
+
The maximum number of generations over which the entire population is
|
| 98 |
+
evolved. The maximum number of function evaluations (with no polishing)
|
| 99 |
+
is: ``(maxiter + 1) * popsize * (N - N_equal)``
|
| 100 |
+
popsize : int, optional
|
| 101 |
+
A multiplier for setting the total population size. The population has
|
| 102 |
+
``popsize * (N - N_equal)`` individuals. This keyword is overridden if
|
| 103 |
+
an initial population is supplied via the `init` keyword. When using
|
| 104 |
+
``init='sobol'`` the population size is calculated as the next power
|
| 105 |
+
of 2 after ``popsize * (N - N_equal)``.
|
| 106 |
+
tol : float, optional
|
| 107 |
+
Relative tolerance for convergence, the solving stops when
|
| 108 |
+
``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
|
| 109 |
+
where and `atol` and `tol` are the absolute and relative tolerance
|
| 110 |
+
respectively.
|
| 111 |
+
mutation : float or tuple(float, float), optional
|
| 112 |
+
The mutation constant. In the literature this is also known as
|
| 113 |
+
differential weight, being denoted by F.
|
| 114 |
+
If specified as a float it should be in the range [0, 2].
|
| 115 |
+
If specified as a tuple ``(min, max)`` dithering is employed. Dithering
|
| 116 |
+
randomly changes the mutation constant on a generation by generation
|
| 117 |
+
basis. The mutation constant for that generation is taken from
|
| 118 |
+
``U[min, max)``. Dithering can help speed convergence significantly.
|
| 119 |
+
Increasing the mutation constant increases the search radius, but will
|
| 120 |
+
slow down convergence.
|
| 121 |
+
recombination : float, optional
|
| 122 |
+
The recombination constant, should be in the range [0, 1]. In the
|
| 123 |
+
literature this is also known as the crossover probability, being
|
| 124 |
+
denoted by CR. Increasing this value allows a larger number of mutants
|
| 125 |
+
to progress into the next generation, but at the risk of population
|
| 126 |
+
stability.
|
| 127 |
+
seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
|
| 128 |
+
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
| 129 |
+
singleton is used.
|
| 130 |
+
If `seed` is an int, a new ``RandomState`` instance is used,
|
| 131 |
+
seeded with `seed`.
|
| 132 |
+
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
| 133 |
+
that instance is used.
|
| 134 |
+
Specify `seed` for repeatable minimizations.
|
| 135 |
+
disp : bool, optional
|
| 136 |
+
Prints the evaluated `func` at every iteration.
|
| 137 |
+
callback : callable, optional
|
| 138 |
+
A callable called after each iteration. Has the signature:
|
| 139 |
+
|
| 140 |
+
``callback(intermediate_result: OptimizeResult)``
|
| 141 |
+
|
| 142 |
+
where ``intermediate_result`` is a keyword parameter containing an
|
| 143 |
+
`OptimizeResult` with attributes ``x`` and ``fun``, the best solution
|
| 144 |
+
found so far and the objective function. Note that the name
|
| 145 |
+
of the parameter must be ``intermediate_result`` for the callback
|
| 146 |
+
to be passed an `OptimizeResult`.
|
| 147 |
+
|
| 148 |
+
The callback also supports a signature like:
|
| 149 |
+
|
| 150 |
+
``callback(x, convergence: float=val)``
|
| 151 |
+
|
| 152 |
+
``val`` represents the fractional value of the population convergence.
|
| 153 |
+
When ``val`` is greater than ``1.0``, the function halts.
|
| 154 |
+
|
| 155 |
+
Introspection is used to determine which of the signatures is invoked.
|
| 156 |
+
|
| 157 |
+
Global minimization will halt if the callback raises ``StopIteration``
|
| 158 |
+
or returns ``True``; any polishing is still carried out.
|
| 159 |
+
|
| 160 |
+
.. versionchanged:: 1.12.0
|
| 161 |
+
callback accepts the ``intermediate_result`` keyword.
|
| 162 |
+
|
| 163 |
+
polish : bool, optional
|
| 164 |
+
If True (default), then `scipy.optimize.minimize` with the `L-BFGS-B`
|
| 165 |
+
method is used to polish the best population member at the end, which
|
| 166 |
+
can improve the minimization slightly. If a constrained problem is
|
| 167 |
+
being studied then the `trust-constr` method is used instead. For large
|
| 168 |
+
problems with many constraints, polishing can take a long time due to
|
| 169 |
+
the Jacobian computations.
|
| 170 |
+
init : str or array-like, optional
|
| 171 |
+
Specify which type of population initialization is performed. Should be
|
| 172 |
+
one of:
|
| 173 |
+
|
| 174 |
+
- 'latinhypercube'
|
| 175 |
+
- 'sobol'
|
| 176 |
+
- 'halton'
|
| 177 |
+
- 'random'
|
| 178 |
+
- array specifying the initial population. The array should have
|
| 179 |
+
shape ``(S, N)``, where S is the total population size and N is
|
| 180 |
+
the number of parameters.
|
| 181 |
+
`init` is clipped to `bounds` before use.
|
| 182 |
+
|
| 183 |
+
The default is 'latinhypercube'. Latin Hypercube sampling tries to
|
| 184 |
+
maximize coverage of the available parameter space.
|
| 185 |
+
|
| 186 |
+
'sobol' and 'halton' are superior alternatives and maximize even more
|
| 187 |
+
the parameter space. 'sobol' will enforce an initial population
|
| 188 |
+
size which is calculated as the next power of 2 after
|
| 189 |
+
``popsize * (N - N_equal)``. 'halton' has no requirements but is a bit
|
| 190 |
+
less efficient. See `scipy.stats.qmc` for more details.
|
| 191 |
+
|
| 192 |
+
'random' initializes the population randomly - this has the drawback
|
| 193 |
+
that clustering can occur, preventing the whole of parameter space
|
| 194 |
+
being covered. Use of an array to specify a population could be used,
|
| 195 |
+
for example, to create a tight bunch of initial guesses in an location
|
| 196 |
+
where the solution is known to exist, thereby reducing time for
|
| 197 |
+
convergence.
|
| 198 |
+
atol : float, optional
|
| 199 |
+
Absolute tolerance for convergence, the solving stops when
|
| 200 |
+
``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
|
| 201 |
+
where and `atol` and `tol` are the absolute and relative tolerance
|
| 202 |
+
respectively.
|
| 203 |
+
updating : {'immediate', 'deferred'}, optional
|
| 204 |
+
If ``'immediate'``, the best solution vector is continuously updated
|
| 205 |
+
within a single generation [4]_. This can lead to faster convergence as
|
| 206 |
+
trial vectors can take advantage of continuous improvements in the best
|
| 207 |
+
solution.
|
| 208 |
+
With ``'deferred'``, the best solution vector is updated once per
|
| 209 |
+
generation. Only ``'deferred'`` is compatible with parallelization or
|
| 210 |
+
vectorization, and the `workers` and `vectorized` keywords can
|
| 211 |
+
over-ride this option.
|
| 212 |
+
|
| 213 |
+
.. versionadded:: 1.2.0
|
| 214 |
+
|
| 215 |
+
workers : int or map-like callable, optional
|
| 216 |
+
If `workers` is an int the population is subdivided into `workers`
|
| 217 |
+
sections and evaluated in parallel
|
| 218 |
+
(uses `multiprocessing.Pool <multiprocessing>`).
|
| 219 |
+
Supply -1 to use all available CPU cores.
|
| 220 |
+
Alternatively supply a map-like callable, such as
|
| 221 |
+
`multiprocessing.Pool.map` for evaluating the population in parallel.
|
| 222 |
+
This evaluation is carried out as ``workers(func, iterable)``.
|
| 223 |
+
This option will override the `updating` keyword to
|
| 224 |
+
``updating='deferred'`` if ``workers != 1``.
|
| 225 |
+
This option overrides the `vectorized` keyword if ``workers != 1``.
|
| 226 |
+
Requires that `func` be pickleable.
|
| 227 |
+
|
| 228 |
+
.. versionadded:: 1.2.0
|
| 229 |
+
|
| 230 |
+
constraints : {NonLinearConstraint, LinearConstraint, Bounds}
|
| 231 |
+
Constraints on the solver, over and above those applied by the `bounds`
|
| 232 |
+
kwd. Uses the approach by Lampinen [5]_.
|
| 233 |
+
|
| 234 |
+
.. versionadded:: 1.4.0
|
| 235 |
+
|
| 236 |
+
x0 : None or array-like, optional
|
| 237 |
+
Provides an initial guess to the minimization. Once the population has
|
| 238 |
+
been initialized this vector replaces the first (best) member. This
|
| 239 |
+
replacement is done even if `init` is given an initial population.
|
| 240 |
+
``x0.shape == (N,)``.
|
| 241 |
+
|
| 242 |
+
.. versionadded:: 1.7.0
|
| 243 |
+
|
| 244 |
+
integrality : 1-D array, optional
|
| 245 |
+
For each decision variable, a boolean value indicating whether the
|
| 246 |
+
decision variable is constrained to integer values. The array is
|
| 247 |
+
broadcast to ``(N,)``.
|
| 248 |
+
If any decision variables are constrained to be integral, they will not
|
| 249 |
+
be changed during polishing.
|
| 250 |
+
Only integer values lying between the lower and upper bounds are used.
|
| 251 |
+
If there are no integer values lying between the bounds then a
|
| 252 |
+
`ValueError` is raised.
|
| 253 |
+
|
| 254 |
+
.. versionadded:: 1.9.0
|
| 255 |
+
|
| 256 |
+
vectorized : bool, optional
|
| 257 |
+
If ``vectorized is True``, `func` is sent an `x` array with
|
| 258 |
+
``x.shape == (N, S)``, and is expected to return an array of shape
|
| 259 |
+
``(S,)``, where `S` is the number of solution vectors to be calculated.
|
| 260 |
+
If constraints are applied, each of the functions used to construct
|
| 261 |
+
a `Constraint` object should accept an `x` array with
|
| 262 |
+
``x.shape == (N, S)``, and return an array of shape ``(M, S)``, where
|
| 263 |
+
`M` is the number of constraint components.
|
| 264 |
+
This option is an alternative to the parallelization offered by
|
| 265 |
+
`workers`, and may help in optimization speed by reducing interpreter
|
| 266 |
+
overhead from multiple function calls. This keyword is ignored if
|
| 267 |
+
``workers != 1``.
|
| 268 |
+
This option will override the `updating` keyword to
|
| 269 |
+
``updating='deferred'``.
|
| 270 |
+
See the notes section for further discussion on when to use
|
| 271 |
+
``'vectorized'``, and when to use ``'workers'``.
|
| 272 |
+
|
| 273 |
+
.. versionadded:: 1.9.0
|
| 274 |
+
|
| 275 |
+
Returns
|
| 276 |
+
-------
|
| 277 |
+
res : OptimizeResult
|
| 278 |
+
The optimization result represented as a `OptimizeResult` object.
|
| 279 |
+
Important attributes are: ``x`` the solution array, ``success`` a
|
| 280 |
+
Boolean flag indicating if the optimizer exited successfully,
|
| 281 |
+
``message`` which describes the cause of the termination,
|
| 282 |
+
``population`` the solution vectors present in the population, and
|
| 283 |
+
``population_energies`` the value of the objective function for each
|
| 284 |
+
entry in ``population``.
|
| 285 |
+
See `OptimizeResult` for a description of other attributes. If `polish`
|
| 286 |
+
was employed, and a lower minimum was obtained by the polishing, then
|
| 287 |
+
OptimizeResult also contains the ``jac`` attribute.
|
| 288 |
+
If the eventual solution does not satisfy the applied constraints
|
| 289 |
+
``success`` will be `False`.
|
| 290 |
+
|
| 291 |
+
Notes
|
| 292 |
+
-----
|
| 293 |
+
Differential evolution is a stochastic population based method that is
|
| 294 |
+
useful for global optimization problems. At each pass through the
|
| 295 |
+
population the algorithm mutates each candidate solution by mixing with
|
| 296 |
+
other candidate solutions to create a trial candidate. There are several
|
| 297 |
+
strategies [3]_ for creating trial candidates, which suit some problems
|
| 298 |
+
more than others. The 'best1bin' strategy is a good starting point for
|
| 299 |
+
many systems. In this strategy two members of the population are randomly
|
| 300 |
+
chosen. Their difference is used to mutate the best member (the 'best' in
|
| 301 |
+
'best1bin'), :math:`x_0`, so far:
|
| 302 |
+
|
| 303 |
+
.. math::
|
| 304 |
+
|
| 305 |
+
b' = x_0 + mutation * (x_{r_0} - x_{r_1})
|
| 306 |
+
|
| 307 |
+
A trial vector is then constructed. Starting with a randomly chosen ith
|
| 308 |
+
parameter the trial is sequentially filled (in modulo) with parameters
|
| 309 |
+
from ``b'`` or the original candidate. The choice of whether to use ``b'``
|
| 310 |
+
or the original candidate is made with a binomial distribution (the 'bin'
|
| 311 |
+
in 'best1bin') - a random number in [0, 1) is generated. If this number is
|
| 312 |
+
less than the `recombination` constant then the parameter is loaded from
|
| 313 |
+
``b'``, otherwise it is loaded from the original candidate. The final
|
| 314 |
+
parameter is always loaded from ``b'``. Once the trial candidate is built
|
| 315 |
+
its fitness is assessed. If the trial is better than the original candidate
|
| 316 |
+
then it takes its place. If it is also better than the best overall
|
| 317 |
+
candidate it also replaces that.
|
| 318 |
+
|
| 319 |
+
The other strategies available are outlined in Qiang and
|
| 320 |
+
Mitchell (2014) [3]_.
|
| 321 |
+
|
| 322 |
+
.. math::
|
| 323 |
+
rand1* : b' = x_{r_0} + mutation*(x_{r_1} - x_{r_2})
|
| 324 |
+
|
| 325 |
+
rand2* : b' = x_{r_0} + mutation*(x_{r_1} + x_{r_2}
|
| 326 |
+
- x_{r_3} - x_{r_4})
|
| 327 |
+
|
| 328 |
+
best1* : b' = x_0 + mutation*(x_{r_0} - x_{r_1})
|
| 329 |
+
|
| 330 |
+
best2* : b' = x_0 + mutation*(x_{r_0} + x_{r_1}
|
| 331 |
+
- x_{r_2} - x_{r_3})
|
| 332 |
+
|
| 333 |
+
currenttobest1* : b' = x_i + mutation*(x_0 - x_i
|
| 334 |
+
+ x_{r_0} - x_{r_1})
|
| 335 |
+
|
| 336 |
+
randtobest1* : b' = x_{r_0} + mutation*(x_0 - x_{r_0}
|
| 337 |
+
+ x_{r_1} - x_{r_2})
|
| 338 |
+
|
| 339 |
+
where the integers :math:`r_0, r_1, r_2, r_3, r_4` are chosen randomly
|
| 340 |
+
from the interval [0, NP) with `NP` being the total population size and
|
| 341 |
+
the original candidate having index `i`. The user can fully customize the
|
| 342 |
+
generation of the trial candidates by supplying a callable to ``strategy``.
|
| 343 |
+
|
| 344 |
+
To improve your chances of finding a global minimum use higher `popsize`
|
| 345 |
+
values, with higher `mutation` and (dithering), but lower `recombination`
|
| 346 |
+
values. This has the effect of widening the search radius, but slowing
|
| 347 |
+
convergence.
|
| 348 |
+
|
| 349 |
+
By default the best solution vector is updated continuously within a single
|
| 350 |
+
iteration (``updating='immediate'``). This is a modification [4]_ of the
|
| 351 |
+
original differential evolution algorithm which can lead to faster
|
| 352 |
+
convergence as trial vectors can immediately benefit from improved
|
| 353 |
+
solutions. To use the original Storn and Price behaviour, updating the best
|
| 354 |
+
solution once per iteration, set ``updating='deferred'``.
|
| 355 |
+
The ``'deferred'`` approach is compatible with both parallelization and
|
| 356 |
+
vectorization (``'workers'`` and ``'vectorized'`` keywords). These may
|
| 357 |
+
improve minimization speed by using computer resources more efficiently.
|
| 358 |
+
The ``'workers'`` distribute calculations over multiple processors. By
|
| 359 |
+
default the Python `multiprocessing` module is used, but other approaches
|
| 360 |
+
are also possible, such as the Message Passing Interface (MPI) used on
|
| 361 |
+
clusters [6]_ [7]_. The overhead from these approaches (creating new
|
| 362 |
+
Processes, etc) may be significant, meaning that computational speed
|
| 363 |
+
doesn't necessarily scale with the number of processors used.
|
| 364 |
+
Parallelization is best suited to computationally expensive objective
|
| 365 |
+
functions. If the objective function is less expensive, then
|
| 366 |
+
``'vectorized'`` may aid by only calling the objective function once per
|
| 367 |
+
iteration, rather than multiple times for all the population members; the
|
| 368 |
+
interpreter overhead is reduced.
|
| 369 |
+
|
| 370 |
+
.. versionadded:: 0.15.0
|
| 371 |
+
|
| 372 |
+
References
|
| 373 |
+
----------
|
| 374 |
+
.. [1] Differential evolution, Wikipedia,
|
| 375 |
+
http://en.wikipedia.org/wiki/Differential_evolution
|
| 376 |
+
.. [2] Storn, R and Price, K, Differential Evolution - a Simple and
|
| 377 |
+
Efficient Heuristic for Global Optimization over Continuous Spaces,
|
| 378 |
+
Journal of Global Optimization, 1997, 11, 341 - 359.
|
| 379 |
+
.. [3] Qiang, J., Mitchell, C., A Unified Differential Evolution Algorithm
|
| 380 |
+
for Global Optimization, 2014, https://www.osti.gov/servlets/purl/1163659
|
| 381 |
+
.. [4] Wormington, M., Panaccione, C., Matney, K. M., Bowen, D. K., -
|
| 382 |
+
Characterization of structures from X-ray scattering data using
|
| 383 |
+
genetic algorithms, Phil. Trans. R. Soc. Lond. A, 1999, 357,
|
| 384 |
+
2827-2848
|
| 385 |
+
.. [5] Lampinen, J., A constraint handling approach for the differential
|
| 386 |
+
evolution algorithm. Proceedings of the 2002 Congress on
|
| 387 |
+
Evolutionary Computation. CEC'02 (Cat. No. 02TH8600). Vol. 2. IEEE,
|
| 388 |
+
2002.
|
| 389 |
+
.. [6] https://mpi4py.readthedocs.io/en/stable/
|
| 390 |
+
.. [7] https://schwimmbad.readthedocs.io/en/latest/
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
Examples
|
| 394 |
+
--------
|
| 395 |
+
Let us consider the problem of minimizing the Rosenbrock function. This
|
| 396 |
+
function is implemented in `rosen` in `scipy.optimize`.
|
| 397 |
+
|
| 398 |
+
>>> import numpy as np
|
| 399 |
+
>>> from scipy.optimize import rosen, differential_evolution
|
| 400 |
+
>>> bounds = [(0,2), (0, 2), (0, 2), (0, 2), (0, 2)]
|
| 401 |
+
>>> result = differential_evolution(rosen, bounds)
|
| 402 |
+
>>> result.x, result.fun
|
| 403 |
+
(array([1., 1., 1., 1., 1.]), 1.9216496320061384e-19)
|
| 404 |
+
|
| 405 |
+
Now repeat, but with parallelization.
|
| 406 |
+
|
| 407 |
+
>>> result = differential_evolution(rosen, bounds, updating='deferred',
|
| 408 |
+
... workers=2)
|
| 409 |
+
>>> result.x, result.fun
|
| 410 |
+
(array([1., 1., 1., 1., 1.]), 1.9216496320061384e-19)
|
| 411 |
+
|
| 412 |
+
Let's do a constrained minimization.
|
| 413 |
+
|
| 414 |
+
>>> from scipy.optimize import LinearConstraint, Bounds
|
| 415 |
+
|
| 416 |
+
We add the constraint that the sum of ``x[0]`` and ``x[1]`` must be less
|
| 417 |
+
than or equal to 1.9. This is a linear constraint, which may be written
|
| 418 |
+
``A @ x <= 1.9``, where ``A = array([[1, 1]])``. This can be encoded as
|
| 419 |
+
a `LinearConstraint` instance:
|
| 420 |
+
|
| 421 |
+
>>> lc = LinearConstraint([[1, 1]], -np.inf, 1.9)
|
| 422 |
+
|
| 423 |
+
Specify limits using a `Bounds` object.
|
| 424 |
+
|
| 425 |
+
>>> bounds = Bounds([0., 0.], [2., 2.])
|
| 426 |
+
>>> result = differential_evolution(rosen, bounds, constraints=lc,
|
| 427 |
+
... seed=1)
|
| 428 |
+
>>> result.x, result.fun
|
| 429 |
+
(array([0.96632622, 0.93367155]), 0.0011352416852625719)
|
| 430 |
+
|
| 431 |
+
Next find the minimum of the Ackley function
|
| 432 |
+
(https://en.wikipedia.org/wiki/Test_functions_for_optimization).
|
| 433 |
+
|
| 434 |
+
>>> def ackley(x):
|
| 435 |
+
... arg1 = -0.2 * np.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2))
|
| 436 |
+
... arg2 = 0.5 * (np.cos(2. * np.pi * x[0]) + np.cos(2. * np.pi * x[1]))
|
| 437 |
+
... return -20. * np.exp(arg1) - np.exp(arg2) + 20. + np.e
|
| 438 |
+
>>> bounds = [(-5, 5), (-5, 5)]
|
| 439 |
+
>>> result = differential_evolution(ackley, bounds, seed=1)
|
| 440 |
+
>>> result.x, result.fun
|
| 441 |
+
(array([0., 0.]), 4.440892098500626e-16)
|
| 442 |
+
|
| 443 |
+
The Ackley function is written in a vectorized manner, so the
|
| 444 |
+
``'vectorized'`` keyword can be employed. Note the reduced number of
|
| 445 |
+
function evaluations.
|
| 446 |
+
|
| 447 |
+
>>> result = differential_evolution(
|
| 448 |
+
... ackley, bounds, vectorized=True, updating='deferred', seed=1
|
| 449 |
+
... )
|
| 450 |
+
>>> result.x, result.fun
|
| 451 |
+
(array([0., 0.]), 4.440892098500626e-16)
|
| 452 |
+
|
| 453 |
+
The following custom strategy function mimics 'best1bin':
|
| 454 |
+
|
| 455 |
+
>>> def custom_strategy_fn(candidate, population, rng=None):
|
| 456 |
+
... parameter_count = population.shape(-1)
|
| 457 |
+
... mutation, recombination = 0.7, 0.9
|
| 458 |
+
... trial = np.copy(population[candidate])
|
| 459 |
+
... fill_point = rng.choice(parameter_count)
|
| 460 |
+
...
|
| 461 |
+
... pool = np.arange(len(population))
|
| 462 |
+
... rng.shuffle(pool)
|
| 463 |
+
...
|
| 464 |
+
... # two unique random numbers that aren't the same, and
|
| 465 |
+
... # aren't equal to candidate.
|
| 466 |
+
... idxs = []
|
| 467 |
+
... while len(idxs) < 2 and len(pool) > 0:
|
| 468 |
+
... idx = pool[0]
|
| 469 |
+
... pool = pool[1:]
|
| 470 |
+
... if idx != candidate:
|
| 471 |
+
... idxs.append(idx)
|
| 472 |
+
...
|
| 473 |
+
... r0, r1 = idxs[:2]
|
| 474 |
+
...
|
| 475 |
+
... bprime = (population[0] + mutation *
|
| 476 |
+
... (population[r0] - population[r1]))
|
| 477 |
+
...
|
| 478 |
+
... crossovers = rng.uniform(size=parameter_count)
|
| 479 |
+
... crossovers = crossovers < recombination
|
| 480 |
+
... crossovers[fill_point] = True
|
| 481 |
+
... trial = np.where(crossovers, bprime, trial)
|
| 482 |
+
... return trial
|
| 483 |
+
|
| 484 |
+
"""
|
| 485 |
+
|
| 486 |
+
# using a context manager means that any created Pool objects are
|
| 487 |
+
# cleared up.
|
| 488 |
+
with DifferentialEvolutionSolver(func, bounds, args=args,
|
| 489 |
+
strategy=strategy,
|
| 490 |
+
maxiter=maxiter,
|
| 491 |
+
popsize=popsize, tol=tol,
|
| 492 |
+
mutation=mutation,
|
| 493 |
+
recombination=recombination,
|
| 494 |
+
seed=seed, polish=polish,
|
| 495 |
+
callback=callback,
|
| 496 |
+
disp=disp, init=init, atol=atol,
|
| 497 |
+
updating=updating,
|
| 498 |
+
workers=workers,
|
| 499 |
+
constraints=constraints,
|
| 500 |
+
x0=x0,
|
| 501 |
+
integrality=integrality,
|
| 502 |
+
vectorized=vectorized) as solver:
|
| 503 |
+
ret = solver.solve()
|
| 504 |
+
|
| 505 |
+
return ret
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
class DifferentialEvolutionSolver:
|
| 509 |
+
|
| 510 |
+
"""This class implements the differential evolution solver
|
| 511 |
+
|
| 512 |
+
Parameters
|
| 513 |
+
----------
|
| 514 |
+
func : callable
|
| 515 |
+
The objective function to be minimized. Must be in the form
|
| 516 |
+
``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
|
| 517 |
+
and ``args`` is a tuple of any additional fixed parameters needed to
|
| 518 |
+
completely specify the function. The number of parameters, N, is equal
|
| 519 |
+
to ``len(x)``.
|
| 520 |
+
bounds : sequence or `Bounds`
|
| 521 |
+
Bounds for variables. There are two ways to specify the bounds:
|
| 522 |
+
|
| 523 |
+
1. Instance of `Bounds` class.
|
| 524 |
+
2. ``(min, max)`` pairs for each element in ``x``, defining the
|
| 525 |
+
finite lower and upper bounds for the optimizing argument of
|
| 526 |
+
`func`.
|
| 527 |
+
|
| 528 |
+
The total number of bounds is used to determine the number of
|
| 529 |
+
parameters, N. If there are parameters whose bounds are equal the total
|
| 530 |
+
number of free parameters is ``N - N_equal``.
|
| 531 |
+
args : tuple, optional
|
| 532 |
+
Any additional fixed parameters needed to
|
| 533 |
+
completely specify the objective function.
|
| 534 |
+
strategy : {str, callable}, optional
|
| 535 |
+
The differential evolution strategy to use. Should be one of:
|
| 536 |
+
|
| 537 |
+
- 'best1bin'
|
| 538 |
+
- 'best1exp'
|
| 539 |
+
- 'rand1bin'
|
| 540 |
+
- 'rand1exp'
|
| 541 |
+
- 'rand2bin'
|
| 542 |
+
- 'rand2exp'
|
| 543 |
+
- 'randtobest1bin'
|
| 544 |
+
- 'randtobest1exp'
|
| 545 |
+
- 'currenttobest1bin'
|
| 546 |
+
- 'currenttobest1exp'
|
| 547 |
+
- 'best2exp'
|
| 548 |
+
- 'best2bin'
|
| 549 |
+
|
| 550 |
+
The default is 'best1bin'. Strategies that may be
|
| 551 |
+
implemented are outlined in 'Notes'.
|
| 552 |
+
|
| 553 |
+
Alternatively the differential evolution strategy can be customized
|
| 554 |
+
by providing a callable that constructs a trial vector. The callable
|
| 555 |
+
must have the form
|
| 556 |
+
``strategy(candidate: int, population: np.ndarray, rng=None)``,
|
| 557 |
+
where ``candidate`` is an integer specifying which entry of the
|
| 558 |
+
population is being evolved, ``population`` is an array of shape
|
| 559 |
+
``(S, N)`` containing all the population members (where S is the
|
| 560 |
+
total population size), and ``rng`` is the random number generator
|
| 561 |
+
being used within the solver.
|
| 562 |
+
``candidate`` will be in the range ``[0, S)``.
|
| 563 |
+
``strategy`` must return a trial vector with shape `(N,)`. The
|
| 564 |
+
fitness of this trial vector is compared against the fitness of
|
| 565 |
+
``population[candidate]``.
|
| 566 |
+
maxiter : int, optional
|
| 567 |
+
The maximum number of generations over which the entire population is
|
| 568 |
+
evolved. The maximum number of function evaluations (with no polishing)
|
| 569 |
+
is: ``(maxiter + 1) * popsize * (N - N_equal)``
|
| 570 |
+
popsize : int, optional
|
| 571 |
+
A multiplier for setting the total population size. The population has
|
| 572 |
+
``popsize * (N - N_equal)`` individuals. This keyword is overridden if
|
| 573 |
+
an initial population is supplied via the `init` keyword. When using
|
| 574 |
+
``init='sobol'`` the population size is calculated as the next power
|
| 575 |
+
of 2 after ``popsize * (N - N_equal)``.
|
| 576 |
+
tol : float, optional
|
| 577 |
+
Relative tolerance for convergence, the solving stops when
|
| 578 |
+
``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
|
| 579 |
+
where and `atol` and `tol` are the absolute and relative tolerance
|
| 580 |
+
respectively.
|
| 581 |
+
mutation : float or tuple(float, float), optional
|
| 582 |
+
The mutation constant. In the literature this is also known as
|
| 583 |
+
differential weight, being denoted by F.
|
| 584 |
+
If specified as a float it should be in the range [0, 2].
|
| 585 |
+
If specified as a tuple ``(min, max)`` dithering is employed. Dithering
|
| 586 |
+
randomly changes the mutation constant on a generation by generation
|
| 587 |
+
basis. The mutation constant for that generation is taken from
|
| 588 |
+
U[min, max). Dithering can help speed convergence significantly.
|
| 589 |
+
Increasing the mutation constant increases the search radius, but will
|
| 590 |
+
slow down convergence.
|
| 591 |
+
recombination : float, optional
|
| 592 |
+
The recombination constant, should be in the range [0, 1]. In the
|
| 593 |
+
literature this is also known as the crossover probability, being
|
| 594 |
+
denoted by CR. Increasing this value allows a larger number of mutants
|
| 595 |
+
to progress into the next generation, but at the risk of population
|
| 596 |
+
stability.
|
| 597 |
+
seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
|
| 598 |
+
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
| 599 |
+
singleton is used.
|
| 600 |
+
If `seed` is an int, a new ``RandomState`` instance is used,
|
| 601 |
+
seeded with `seed`.
|
| 602 |
+
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
| 603 |
+
that instance is used.
|
| 604 |
+
Specify `seed` for repeatable minimizations.
|
| 605 |
+
disp : bool, optional
|
| 606 |
+
Prints the evaluated `func` at every iteration.
|
| 607 |
+
callback : callable, optional
|
| 608 |
+
A callable called after each iteration. Has the signature:
|
| 609 |
+
|
| 610 |
+
``callback(intermediate_result: OptimizeResult)``
|
| 611 |
+
|
| 612 |
+
where ``intermediate_result`` is a keyword parameter containing an
|
| 613 |
+
`OptimizeResult` with attributes ``x`` and ``fun``, the best solution
|
| 614 |
+
found so far and the objective function. Note that the name
|
| 615 |
+
of the parameter must be ``intermediate_result`` for the callback
|
| 616 |
+
to be passed an `OptimizeResult`.
|
| 617 |
+
|
| 618 |
+
The callback also supports a signature like:
|
| 619 |
+
|
| 620 |
+
``callback(x, convergence: float=val)``
|
| 621 |
+
|
| 622 |
+
``val`` represents the fractional value of the population convergence.
|
| 623 |
+
When ``val`` is greater than ``1.0``, the function halts.
|
| 624 |
+
|
| 625 |
+
Introspection is used to determine which of the signatures is invoked.
|
| 626 |
+
|
| 627 |
+
Global minimization will halt if the callback raises ``StopIteration``
|
| 628 |
+
or returns ``True``; any polishing is still carried out.
|
| 629 |
+
|
| 630 |
+
.. versionchanged:: 1.12.0
|
| 631 |
+
callback accepts the ``intermediate_result`` keyword.
|
| 632 |
+
|
| 633 |
+
polish : bool, optional
|
| 634 |
+
If True (default), then `scipy.optimize.minimize` with the `L-BFGS-B`
|
| 635 |
+
method is used to polish the best population member at the end, which
|
| 636 |
+
can improve the minimization slightly. If a constrained problem is
|
| 637 |
+
being studied then the `trust-constr` method is used instead. For large
|
| 638 |
+
problems with many constraints, polishing can take a long time due to
|
| 639 |
+
the Jacobian computations.
|
| 640 |
+
maxfun : int, optional
|
| 641 |
+
Set the maximum number of function evaluations. However, it probably
|
| 642 |
+
makes more sense to set `maxiter` instead.
|
| 643 |
+
init : str or array-like, optional
|
| 644 |
+
Specify which type of population initialization is performed. Should be
|
| 645 |
+
one of:
|
| 646 |
+
|
| 647 |
+
- 'latinhypercube'
|
| 648 |
+
- 'sobol'
|
| 649 |
+
- 'halton'
|
| 650 |
+
- 'random'
|
| 651 |
+
- array specifying the initial population. The array should have
|
| 652 |
+
shape ``(S, N)``, where S is the total population size and
|
| 653 |
+
N is the number of parameters.
|
| 654 |
+
`init` is clipped to `bounds` before use.
|
| 655 |
+
|
| 656 |
+
The default is 'latinhypercube'. Latin Hypercube sampling tries to
|
| 657 |
+
maximize coverage of the available parameter space.
|
| 658 |
+
|
| 659 |
+
'sobol' and 'halton' are superior alternatives and maximize even more
|
| 660 |
+
the parameter space. 'sobol' will enforce an initial population
|
| 661 |
+
size which is calculated as the next power of 2 after
|
| 662 |
+
``popsize * (N - N_equal)``. 'halton' has no requirements but is a bit
|
| 663 |
+
less efficient. See `scipy.stats.qmc` for more details.
|
| 664 |
+
|
| 665 |
+
'random' initializes the population randomly - this has the drawback
|
| 666 |
+
that clustering can occur, preventing the whole of parameter space
|
| 667 |
+
being covered. Use of an array to specify a population could be used,
|
| 668 |
+
for example, to create a tight bunch of initial guesses in an location
|
| 669 |
+
where the solution is known to exist, thereby reducing time for
|
| 670 |
+
convergence.
|
| 671 |
+
atol : float, optional
|
| 672 |
+
Absolute tolerance for convergence, the solving stops when
|
| 673 |
+
``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
|
| 674 |
+
where and `atol` and `tol` are the absolute and relative tolerance
|
| 675 |
+
respectively.
|
| 676 |
+
updating : {'immediate', 'deferred'}, optional
|
| 677 |
+
If ``'immediate'``, the best solution vector is continuously updated
|
| 678 |
+
within a single generation [4]_. This can lead to faster convergence as
|
| 679 |
+
trial vectors can take advantage of continuous improvements in the best
|
| 680 |
+
solution.
|
| 681 |
+
With ``'deferred'``, the best solution vector is updated once per
|
| 682 |
+
generation. Only ``'deferred'`` is compatible with parallelization or
|
| 683 |
+
vectorization, and the `workers` and `vectorized` keywords can
|
| 684 |
+
over-ride this option.
|
| 685 |
+
workers : int or map-like callable, optional
|
| 686 |
+
If `workers` is an int the population is subdivided into `workers`
|
| 687 |
+
sections and evaluated in parallel
|
| 688 |
+
(uses `multiprocessing.Pool <multiprocessing>`).
|
| 689 |
+
Supply `-1` to use all cores available to the Process.
|
| 690 |
+
Alternatively supply a map-like callable, such as
|
| 691 |
+
`multiprocessing.Pool.map` for evaluating the population in parallel.
|
| 692 |
+
This evaluation is carried out as ``workers(func, iterable)``.
|
| 693 |
+
This option will override the `updating` keyword to
|
| 694 |
+
`updating='deferred'` if `workers != 1`.
|
| 695 |
+
Requires that `func` be pickleable.
|
| 696 |
+
constraints : {NonLinearConstraint, LinearConstraint, Bounds}
|
| 697 |
+
Constraints on the solver, over and above those applied by the `bounds`
|
| 698 |
+
kwd. Uses the approach by Lampinen.
|
| 699 |
+
x0 : None or array-like, optional
|
| 700 |
+
Provides an initial guess to the minimization. Once the population has
|
| 701 |
+
been initialized this vector replaces the first (best) member. This
|
| 702 |
+
replacement is done even if `init` is given an initial population.
|
| 703 |
+
``x0.shape == (N,)``.
|
| 704 |
+
integrality : 1-D array, optional
|
| 705 |
+
For each decision variable, a boolean value indicating whether the
|
| 706 |
+
decision variable is constrained to integer values. The array is
|
| 707 |
+
broadcast to ``(N,)``.
|
| 708 |
+
If any decision variables are constrained to be integral, they will not
|
| 709 |
+
be changed during polishing.
|
| 710 |
+
Only integer values lying between the lower and upper bounds are used.
|
| 711 |
+
If there are no integer values lying between the bounds then a
|
| 712 |
+
`ValueError` is raised.
|
| 713 |
+
vectorized : bool, optional
|
| 714 |
+
If ``vectorized is True``, `func` is sent an `x` array with
|
| 715 |
+
``x.shape == (N, S)``, and is expected to return an array of shape
|
| 716 |
+
``(S,)``, where `S` is the number of solution vectors to be calculated.
|
| 717 |
+
If constraints are applied, each of the functions used to construct
|
| 718 |
+
a `Constraint` object should accept an `x` array with
|
| 719 |
+
``x.shape == (N, S)``, and return an array of shape ``(M, S)``, where
|
| 720 |
+
`M` is the number of constraint components.
|
| 721 |
+
This option is an alternative to the parallelization offered by
|
| 722 |
+
`workers`, and may help in optimization speed. This keyword is
|
| 723 |
+
ignored if ``workers != 1``.
|
| 724 |
+
This option will override the `updating` keyword to
|
| 725 |
+
``updating='deferred'``.
|
| 726 |
+
"""
|
| 727 |
+
|
| 728 |
+
# Dispatch of mutation strategy method (binomial or exponential).
|
| 729 |
+
_binomial = {'best1bin': '_best1',
|
| 730 |
+
'randtobest1bin': '_randtobest1',
|
| 731 |
+
'currenttobest1bin': '_currenttobest1',
|
| 732 |
+
'best2bin': '_best2',
|
| 733 |
+
'rand2bin': '_rand2',
|
| 734 |
+
'rand1bin': '_rand1'}
|
| 735 |
+
_exponential = {'best1exp': '_best1',
|
| 736 |
+
'rand1exp': '_rand1',
|
| 737 |
+
'randtobest1exp': '_randtobest1',
|
| 738 |
+
'currenttobest1exp': '_currenttobest1',
|
| 739 |
+
'best2exp': '_best2',
|
| 740 |
+
'rand2exp': '_rand2'}
|
| 741 |
+
|
| 742 |
+
__init_error_msg = ("The population initialization method must be one of "
|
| 743 |
+
"'latinhypercube' or 'random', or an array of shape "
|
| 744 |
+
"(S, N) where N is the number of parameters and S>5")
|
| 745 |
+
|
| 746 |
+
def __init__(self, func, bounds, args=(),
|
| 747 |
+
strategy='best1bin', maxiter=1000, popsize=15,
|
| 748 |
+
tol=0.01, mutation=(0.5, 1), recombination=0.7, seed=None,
|
| 749 |
+
maxfun=np.inf, callback=None, disp=False, polish=True,
|
| 750 |
+
init='latinhypercube', atol=0, updating='immediate',
|
| 751 |
+
workers=1, constraints=(), x0=None, *, integrality=None,
|
| 752 |
+
vectorized=False):
|
| 753 |
+
|
| 754 |
+
if callable(strategy):
|
| 755 |
+
# a callable strategy is going to be stored in self.strategy anyway
|
| 756 |
+
pass
|
| 757 |
+
elif strategy in self._binomial:
|
| 758 |
+
self.mutation_func = getattr(self, self._binomial[strategy])
|
| 759 |
+
elif strategy in self._exponential:
|
| 760 |
+
self.mutation_func = getattr(self, self._exponential[strategy])
|
| 761 |
+
else:
|
| 762 |
+
raise ValueError("Please select a valid mutation strategy")
|
| 763 |
+
self.strategy = strategy
|
| 764 |
+
|
| 765 |
+
self.callback = _wrap_callback(callback, "differential_evolution")
|
| 766 |
+
self.polish = polish
|
| 767 |
+
|
| 768 |
+
# set the updating / parallelisation options
|
| 769 |
+
if updating in ['immediate', 'deferred']:
|
| 770 |
+
self._updating = updating
|
| 771 |
+
|
| 772 |
+
self.vectorized = vectorized
|
| 773 |
+
|
| 774 |
+
# want to use parallelisation, but updating is immediate
|
| 775 |
+
if workers != 1 and updating == 'immediate':
|
| 776 |
+
warnings.warn("differential_evolution: the 'workers' keyword has"
|
| 777 |
+
" overridden updating='immediate' to"
|
| 778 |
+
" updating='deferred'", UserWarning, stacklevel=2)
|
| 779 |
+
self._updating = 'deferred'
|
| 780 |
+
|
| 781 |
+
if vectorized and workers != 1:
|
| 782 |
+
warnings.warn("differential_evolution: the 'workers' keyword"
|
| 783 |
+
" overrides the 'vectorized' keyword", stacklevel=2)
|
| 784 |
+
self.vectorized = vectorized = False
|
| 785 |
+
|
| 786 |
+
if vectorized and updating == 'immediate':
|
| 787 |
+
warnings.warn("differential_evolution: the 'vectorized' keyword"
|
| 788 |
+
" has overridden updating='immediate' to updating"
|
| 789 |
+
"='deferred'", UserWarning, stacklevel=2)
|
| 790 |
+
self._updating = 'deferred'
|
| 791 |
+
|
| 792 |
+
# an object with a map method.
|
| 793 |
+
if vectorized:
|
| 794 |
+
def maplike_for_vectorized_func(func, x):
|
| 795 |
+
# send an array (N, S) to the user func,
|
| 796 |
+
# expect to receive (S,). Transposition is required because
|
| 797 |
+
# internally the population is held as (S, N)
|
| 798 |
+
return np.atleast_1d(func(x.T))
|
| 799 |
+
workers = maplike_for_vectorized_func
|
| 800 |
+
|
| 801 |
+
self._mapwrapper = MapWrapper(workers)
|
| 802 |
+
|
| 803 |
+
# relative and absolute tolerances for convergence
|
| 804 |
+
self.tol, self.atol = tol, atol
|
| 805 |
+
|
| 806 |
+
# Mutation constant should be in [0, 2). If specified as a sequence
|
| 807 |
+
# then dithering is performed.
|
| 808 |
+
self.scale = mutation
|
| 809 |
+
if (not np.all(np.isfinite(mutation)) or
|
| 810 |
+
np.any(np.array(mutation) >= 2) or
|
| 811 |
+
np.any(np.array(mutation) < 0)):
|
| 812 |
+
raise ValueError('The mutation constant must be a float in '
|
| 813 |
+
'U[0, 2), or specified as a tuple(min, max)'
|
| 814 |
+
' where min < max and min, max are in U[0, 2).')
|
| 815 |
+
|
| 816 |
+
self.dither = None
|
| 817 |
+
if hasattr(mutation, '__iter__') and len(mutation) > 1:
|
| 818 |
+
self.dither = [mutation[0], mutation[1]]
|
| 819 |
+
self.dither.sort()
|
| 820 |
+
|
| 821 |
+
self.cross_over_probability = recombination
|
| 822 |
+
|
| 823 |
+
# we create a wrapped function to allow the use of map (and Pool.map
|
| 824 |
+
# in the future)
|
| 825 |
+
self.func = _FunctionWrapper(func, args)
|
| 826 |
+
self.args = args
|
| 827 |
+
|
| 828 |
+
# convert tuple of lower and upper bounds to limits
|
| 829 |
+
# [(low_0, high_0), ..., (low_n, high_n]
|
| 830 |
+
# -> [[low_0, ..., low_n], [high_0, ..., high_n]]
|
| 831 |
+
if isinstance(bounds, Bounds):
|
| 832 |
+
self.limits = np.array(new_bounds_to_old(bounds.lb,
|
| 833 |
+
bounds.ub,
|
| 834 |
+
len(bounds.lb)),
|
| 835 |
+
dtype=float).T
|
| 836 |
+
else:
|
| 837 |
+
self.limits = np.array(bounds, dtype='float').T
|
| 838 |
+
|
| 839 |
+
if (np.size(self.limits, 0) != 2 or not
|
| 840 |
+
np.all(np.isfinite(self.limits))):
|
| 841 |
+
raise ValueError('bounds should be a sequence containing finite '
|
| 842 |
+
'real valued (min, max) pairs for each value'
|
| 843 |
+
' in x')
|
| 844 |
+
|
| 845 |
+
if maxiter is None: # the default used to be None
|
| 846 |
+
maxiter = 1000
|
| 847 |
+
self.maxiter = maxiter
|
| 848 |
+
if maxfun is None: # the default used to be None
|
| 849 |
+
maxfun = np.inf
|
| 850 |
+
self.maxfun = maxfun
|
| 851 |
+
|
| 852 |
+
# population is scaled to between [0, 1].
|
| 853 |
+
# We have to scale between parameter <-> population
|
| 854 |
+
# save these arguments for _scale_parameter and
|
| 855 |
+
# _unscale_parameter. This is an optimization
|
| 856 |
+
self.__scale_arg1 = 0.5 * (self.limits[0] + self.limits[1])
|
| 857 |
+
self.__scale_arg2 = np.fabs(self.limits[0] - self.limits[1])
|
| 858 |
+
with np.errstate(divide='ignore'):
|
| 859 |
+
# if lb == ub then the following line will be 1/0, which is why
|
| 860 |
+
# we ignore the divide by zero warning. The result from 1/0 is
|
| 861 |
+
# inf, so replace those values by 0.
|
| 862 |
+
self.__recip_scale_arg2 = 1 / self.__scale_arg2
|
| 863 |
+
self.__recip_scale_arg2[~np.isfinite(self.__recip_scale_arg2)] = 0
|
| 864 |
+
|
| 865 |
+
self.parameter_count = np.size(self.limits, 1)
|
| 866 |
+
|
| 867 |
+
self.random_number_generator = check_random_state(seed)
|
| 868 |
+
|
| 869 |
+
# Which parameters are going to be integers?
|
| 870 |
+
if np.any(integrality):
|
| 871 |
+
# # user has provided a truth value for integer constraints
|
| 872 |
+
integrality = np.broadcast_to(
|
| 873 |
+
integrality,
|
| 874 |
+
self.parameter_count
|
| 875 |
+
)
|
| 876 |
+
integrality = np.asarray(integrality, bool)
|
| 877 |
+
# For integrality parameters change the limits to only allow
|
| 878 |
+
# integer values lying between the limits.
|
| 879 |
+
lb, ub = np.copy(self.limits)
|
| 880 |
+
|
| 881 |
+
lb = np.ceil(lb)
|
| 882 |
+
ub = np.floor(ub)
|
| 883 |
+
if not (lb[integrality] <= ub[integrality]).all():
|
| 884 |
+
# there's a parameter that doesn't have an integer value
|
| 885 |
+
# lying between the limits
|
| 886 |
+
raise ValueError("One of the integrality constraints does not"
|
| 887 |
+
" have any possible integer values between"
|
| 888 |
+
" the lower/upper bounds.")
|
| 889 |
+
nlb = np.nextafter(lb[integrality] - 0.5, np.inf)
|
| 890 |
+
nub = np.nextafter(ub[integrality] + 0.5, -np.inf)
|
| 891 |
+
|
| 892 |
+
self.integrality = integrality
|
| 893 |
+
self.limits[0, self.integrality] = nlb
|
| 894 |
+
self.limits[1, self.integrality] = nub
|
| 895 |
+
else:
|
| 896 |
+
self.integrality = False
|
| 897 |
+
|
| 898 |
+
# check for equal bounds
|
| 899 |
+
eb = self.limits[0] == self.limits[1]
|
| 900 |
+
eb_count = np.count_nonzero(eb)
|
| 901 |
+
|
| 902 |
+
# default population initialization is a latin hypercube design, but
|
| 903 |
+
# there are other population initializations possible.
|
| 904 |
+
# the minimum is 5 because 'best2bin' requires a population that's at
|
| 905 |
+
# least 5 long
|
| 906 |
+
# 202301 - reduced population size to account for parameters with
|
| 907 |
+
# equal bounds. If there are no varying parameters set N to at least 1
|
| 908 |
+
self.num_population_members = max(
|
| 909 |
+
5,
|
| 910 |
+
popsize * max(1, self.parameter_count - eb_count)
|
| 911 |
+
)
|
| 912 |
+
self.population_shape = (self.num_population_members,
|
| 913 |
+
self.parameter_count)
|
| 914 |
+
|
| 915 |
+
self._nfev = 0
|
| 916 |
+
# check first str otherwise will fail to compare str with array
|
| 917 |
+
if isinstance(init, str):
|
| 918 |
+
if init == 'latinhypercube':
|
| 919 |
+
self.init_population_lhs()
|
| 920 |
+
elif init == 'sobol':
|
| 921 |
+
# must be Ns = 2**m for Sobol'
|
| 922 |
+
n_s = int(2 ** np.ceil(np.log2(self.num_population_members)))
|
| 923 |
+
self.num_population_members = n_s
|
| 924 |
+
self.population_shape = (self.num_population_members,
|
| 925 |
+
self.parameter_count)
|
| 926 |
+
self.init_population_qmc(qmc_engine='sobol')
|
| 927 |
+
elif init == 'halton':
|
| 928 |
+
self.init_population_qmc(qmc_engine='halton')
|
| 929 |
+
elif init == 'random':
|
| 930 |
+
self.init_population_random()
|
| 931 |
+
else:
|
| 932 |
+
raise ValueError(self.__init_error_msg)
|
| 933 |
+
else:
|
| 934 |
+
self.init_population_array(init)
|
| 935 |
+
|
| 936 |
+
if x0 is not None:
|
| 937 |
+
# scale to within unit interval and
|
| 938 |
+
# ensure parameters are within bounds.
|
| 939 |
+
x0_scaled = self._unscale_parameters(np.asarray(x0))
|
| 940 |
+
if ((x0_scaled > 1.0) | (x0_scaled < 0.0)).any():
|
| 941 |
+
raise ValueError(
|
| 942 |
+
"Some entries in x0 lay outside the specified bounds"
|
| 943 |
+
)
|
| 944 |
+
self.population[0] = x0_scaled
|
| 945 |
+
|
| 946 |
+
# infrastructure for constraints
|
| 947 |
+
self.constraints = constraints
|
| 948 |
+
self._wrapped_constraints = []
|
| 949 |
+
|
| 950 |
+
if hasattr(constraints, '__len__'):
|
| 951 |
+
# sequence of constraints, this will also deal with default
|
| 952 |
+
# keyword parameter
|
| 953 |
+
for c in constraints:
|
| 954 |
+
self._wrapped_constraints.append(
|
| 955 |
+
_ConstraintWrapper(c, self.x)
|
| 956 |
+
)
|
| 957 |
+
else:
|
| 958 |
+
self._wrapped_constraints = [
|
| 959 |
+
_ConstraintWrapper(constraints, self.x)
|
| 960 |
+
]
|
| 961 |
+
self.total_constraints = np.sum(
|
| 962 |
+
[c.num_constr for c in self._wrapped_constraints]
|
| 963 |
+
)
|
| 964 |
+
self.constraint_violation = np.zeros((self.num_population_members, 1))
|
| 965 |
+
self.feasible = np.ones(self.num_population_members, bool)
|
| 966 |
+
|
| 967 |
+
# an array to shuffle when selecting candidates. Create it here
|
| 968 |
+
# rather than repeatedly creating it in _select_samples.
|
| 969 |
+
self._random_population_index = np.arange(self.num_population_members)
|
| 970 |
+
self.disp = disp
|
| 971 |
+
|
| 972 |
+
def init_population_lhs(self):
|
| 973 |
+
"""
|
| 974 |
+
Initializes the population with Latin Hypercube Sampling.
|
| 975 |
+
Latin Hypercube Sampling ensures that each parameter is uniformly
|
| 976 |
+
sampled over its range.
|
| 977 |
+
"""
|
| 978 |
+
rng = self.random_number_generator
|
| 979 |
+
|
| 980 |
+
# Each parameter range needs to be sampled uniformly. The scaled
|
| 981 |
+
# parameter range ([0, 1)) needs to be split into
|
| 982 |
+
# `self.num_population_members` segments, each of which has the following
|
| 983 |
+
# size:
|
| 984 |
+
segsize = 1.0 / self.num_population_members
|
| 985 |
+
|
| 986 |
+
# Within each segment we sample from a uniform random distribution.
|
| 987 |
+
# We need to do this sampling for each parameter.
|
| 988 |
+
samples = (segsize * rng.uniform(size=self.population_shape)
|
| 989 |
+
|
| 990 |
+
# Offset each segment to cover the entire parameter range [0, 1)
|
| 991 |
+
+ np.linspace(0., 1., self.num_population_members,
|
| 992 |
+
endpoint=False)[:, np.newaxis])
|
| 993 |
+
|
| 994 |
+
# Create an array for population of candidate solutions.
|
| 995 |
+
self.population = np.zeros_like(samples)
|
| 996 |
+
|
| 997 |
+
# Initialize population of candidate solutions by permutation of the
|
| 998 |
+
# random samples.
|
| 999 |
+
for j in range(self.parameter_count):
|
| 1000 |
+
order = rng.permutation(range(self.num_population_members))
|
| 1001 |
+
self.population[:, j] = samples[order, j]
|
| 1002 |
+
|
| 1003 |
+
# reset population energies
|
| 1004 |
+
self.population_energies = np.full(self.num_population_members,
|
| 1005 |
+
np.inf)
|
| 1006 |
+
|
| 1007 |
+
# reset number of function evaluations counter
|
| 1008 |
+
self._nfev = 0
|
| 1009 |
+
|
| 1010 |
+
def init_population_qmc(self, qmc_engine):
|
| 1011 |
+
"""Initializes the population with a QMC method.
|
| 1012 |
+
|
| 1013 |
+
QMC methods ensures that each parameter is uniformly
|
| 1014 |
+
sampled over its range.
|
| 1015 |
+
|
| 1016 |
+
Parameters
|
| 1017 |
+
----------
|
| 1018 |
+
qmc_engine : str
|
| 1019 |
+
The QMC method to use for initialization. Can be one of
|
| 1020 |
+
``latinhypercube``, ``sobol`` or ``halton``.
|
| 1021 |
+
|
| 1022 |
+
"""
|
| 1023 |
+
from scipy.stats import qmc
|
| 1024 |
+
|
| 1025 |
+
rng = self.random_number_generator
|
| 1026 |
+
|
| 1027 |
+
# Create an array for population of candidate solutions.
|
| 1028 |
+
if qmc_engine == 'latinhypercube':
|
| 1029 |
+
sampler = qmc.LatinHypercube(d=self.parameter_count, seed=rng)
|
| 1030 |
+
elif qmc_engine == 'sobol':
|
| 1031 |
+
sampler = qmc.Sobol(d=self.parameter_count, seed=rng)
|
| 1032 |
+
elif qmc_engine == 'halton':
|
| 1033 |
+
sampler = qmc.Halton(d=self.parameter_count, seed=rng)
|
| 1034 |
+
else:
|
| 1035 |
+
raise ValueError(self.__init_error_msg)
|
| 1036 |
+
|
| 1037 |
+
self.population = sampler.random(n=self.num_population_members)
|
| 1038 |
+
|
| 1039 |
+
# reset population energies
|
| 1040 |
+
self.population_energies = np.full(self.num_population_members,
|
| 1041 |
+
np.inf)
|
| 1042 |
+
|
| 1043 |
+
# reset number of function evaluations counter
|
| 1044 |
+
self._nfev = 0
|
| 1045 |
+
|
| 1046 |
+
def init_population_random(self):
|
| 1047 |
+
"""
|
| 1048 |
+
Initializes the population at random. This type of initialization
|
| 1049 |
+
can possess clustering, Latin Hypercube sampling is generally better.
|
| 1050 |
+
"""
|
| 1051 |
+
rng = self.random_number_generator
|
| 1052 |
+
self.population = rng.uniform(size=self.population_shape)
|
| 1053 |
+
|
| 1054 |
+
# reset population energies
|
| 1055 |
+
self.population_energies = np.full(self.num_population_members,
|
| 1056 |
+
np.inf)
|
| 1057 |
+
|
| 1058 |
+
# reset number of function evaluations counter
|
| 1059 |
+
self._nfev = 0
|
| 1060 |
+
|
| 1061 |
+
def init_population_array(self, init):
|
| 1062 |
+
"""
|
| 1063 |
+
Initializes the population with a user specified population.
|
| 1064 |
+
|
| 1065 |
+
Parameters
|
| 1066 |
+
----------
|
| 1067 |
+
init : np.ndarray
|
| 1068 |
+
Array specifying subset of the initial population. The array should
|
| 1069 |
+
have shape (S, N), where N is the number of parameters.
|
| 1070 |
+
The population is clipped to the lower and upper bounds.
|
| 1071 |
+
"""
|
| 1072 |
+
# make sure you're using a float array
|
| 1073 |
+
popn = np.asarray(init, dtype=np.float64)
|
| 1074 |
+
|
| 1075 |
+
if (np.size(popn, 0) < 5 or
|
| 1076 |
+
popn.shape[1] != self.parameter_count or
|
| 1077 |
+
len(popn.shape) != 2):
|
| 1078 |
+
raise ValueError("The population supplied needs to have shape"
|
| 1079 |
+
" (S, len(x)), where S > 4.")
|
| 1080 |
+
|
| 1081 |
+
# scale values and clip to bounds, assigning to population
|
| 1082 |
+
self.population = np.clip(self._unscale_parameters(popn), 0, 1)
|
| 1083 |
+
|
| 1084 |
+
self.num_population_members = np.size(self.population, 0)
|
| 1085 |
+
|
| 1086 |
+
self.population_shape = (self.num_population_members,
|
| 1087 |
+
self.parameter_count)
|
| 1088 |
+
|
| 1089 |
+
# reset population energies
|
| 1090 |
+
self.population_energies = np.full(self.num_population_members,
|
| 1091 |
+
np.inf)
|
| 1092 |
+
|
| 1093 |
+
# reset number of function evaluations counter
|
| 1094 |
+
self._nfev = 0
|
| 1095 |
+
|
| 1096 |
+
@property
|
| 1097 |
+
def x(self):
|
| 1098 |
+
"""
|
| 1099 |
+
The best solution from the solver
|
| 1100 |
+
"""
|
| 1101 |
+
return self._scale_parameters(self.population[0])
|
| 1102 |
+
|
| 1103 |
+
@property
|
| 1104 |
+
def convergence(self):
|
| 1105 |
+
"""
|
| 1106 |
+
The standard deviation of the population energies divided by their
|
| 1107 |
+
mean.
|
| 1108 |
+
"""
|
| 1109 |
+
if np.any(np.isinf(self.population_energies)):
|
| 1110 |
+
return np.inf
|
| 1111 |
+
return (np.std(self.population_energies) /
|
| 1112 |
+
(np.abs(np.mean(self.population_energies)) + _MACHEPS))
|
| 1113 |
+
|
| 1114 |
+
def converged(self):
|
| 1115 |
+
"""
|
| 1116 |
+
Return True if the solver has converged.
|
| 1117 |
+
"""
|
| 1118 |
+
if np.any(np.isinf(self.population_energies)):
|
| 1119 |
+
return False
|
| 1120 |
+
|
| 1121 |
+
return (np.std(self.population_energies) <=
|
| 1122 |
+
self.atol +
|
| 1123 |
+
self.tol * np.abs(np.mean(self.population_energies)))
|
| 1124 |
+
|
| 1125 |
+
def solve(self):
|
| 1126 |
+
"""
|
| 1127 |
+
Runs the DifferentialEvolutionSolver.
|
| 1128 |
+
|
| 1129 |
+
Returns
|
| 1130 |
+
-------
|
| 1131 |
+
res : OptimizeResult
|
| 1132 |
+
The optimization result represented as a `OptimizeResult` object.
|
| 1133 |
+
Important attributes are: ``x`` the solution array, ``success`` a
|
| 1134 |
+
Boolean flag indicating if the optimizer exited successfully,
|
| 1135 |
+
``message`` which describes the cause of the termination,
|
| 1136 |
+
``population`` the solution vectors present in the population, and
|
| 1137 |
+
``population_energies`` the value of the objective function for
|
| 1138 |
+
each entry in ``population``.
|
| 1139 |
+
See `OptimizeResult` for a description of other attributes. If
|
| 1140 |
+
`polish` was employed, and a lower minimum was obtained by the
|
| 1141 |
+
polishing, then OptimizeResult also contains the ``jac`` attribute.
|
| 1142 |
+
If the eventual solution does not satisfy the applied constraints
|
| 1143 |
+
``success`` will be `False`.
|
| 1144 |
+
"""
|
| 1145 |
+
nit, warning_flag = 0, False
|
| 1146 |
+
status_message = _status_message['success']
|
| 1147 |
+
|
| 1148 |
+
# The population may have just been initialized (all entries are
|
| 1149 |
+
# np.inf). If it has you have to calculate the initial energies.
|
| 1150 |
+
# Although this is also done in the evolve generator it's possible
|
| 1151 |
+
# that someone can set maxiter=0, at which point we still want the
|
| 1152 |
+
# initial energies to be calculated (the following loop isn't run).
|
| 1153 |
+
if np.all(np.isinf(self.population_energies)):
|
| 1154 |
+
self.feasible, self.constraint_violation = (
|
| 1155 |
+
self._calculate_population_feasibilities(self.population))
|
| 1156 |
+
|
| 1157 |
+
# only work out population energies for feasible solutions
|
| 1158 |
+
self.population_energies[self.feasible] = (
|
| 1159 |
+
self._calculate_population_energies(
|
| 1160 |
+
self.population[self.feasible]))
|
| 1161 |
+
|
| 1162 |
+
self._promote_lowest_energy()
|
| 1163 |
+
|
| 1164 |
+
# do the optimization.
|
| 1165 |
+
for nit in range(1, self.maxiter + 1):
|
| 1166 |
+
# evolve the population by a generation
|
| 1167 |
+
try:
|
| 1168 |
+
next(self)
|
| 1169 |
+
except StopIteration:
|
| 1170 |
+
warning_flag = True
|
| 1171 |
+
if self._nfev > self.maxfun:
|
| 1172 |
+
status_message = _status_message['maxfev']
|
| 1173 |
+
elif self._nfev == self.maxfun:
|
| 1174 |
+
status_message = ('Maximum number of function evaluations'
|
| 1175 |
+
' has been reached.')
|
| 1176 |
+
break
|
| 1177 |
+
|
| 1178 |
+
if self.disp:
|
| 1179 |
+
print(f"differential_evolution step {nit}: f(x)="
|
| 1180 |
+
f" {self.population_energies[0]}"
|
| 1181 |
+
)
|
| 1182 |
+
|
| 1183 |
+
if self.callback:
|
| 1184 |
+
c = self.tol / (self.convergence + _MACHEPS)
|
| 1185 |
+
res = self._result(nit=nit, message="in progress")
|
| 1186 |
+
res.convergence = c
|
| 1187 |
+
try:
|
| 1188 |
+
warning_flag = bool(self.callback(res))
|
| 1189 |
+
except StopIteration:
|
| 1190 |
+
warning_flag = True
|
| 1191 |
+
|
| 1192 |
+
if warning_flag:
|
| 1193 |
+
status_message = 'callback function requested stop early'
|
| 1194 |
+
|
| 1195 |
+
# should the solver terminate?
|
| 1196 |
+
if warning_flag or self.converged():
|
| 1197 |
+
break
|
| 1198 |
+
|
| 1199 |
+
else:
|
| 1200 |
+
status_message = _status_message['maxiter']
|
| 1201 |
+
warning_flag = True
|
| 1202 |
+
|
| 1203 |
+
DE_result = self._result(
|
| 1204 |
+
nit=nit, message=status_message, warning_flag=warning_flag
|
| 1205 |
+
)
|
| 1206 |
+
|
| 1207 |
+
if self.polish and not np.all(self.integrality):
|
| 1208 |
+
# can't polish if all the parameters are integers
|
| 1209 |
+
if np.any(self.integrality):
|
| 1210 |
+
# set the lower/upper bounds equal so that any integrality
|
| 1211 |
+
# constraints work.
|
| 1212 |
+
limits, integrality = self.limits, self.integrality
|
| 1213 |
+
limits[0, integrality] = DE_result.x[integrality]
|
| 1214 |
+
limits[1, integrality] = DE_result.x[integrality]
|
| 1215 |
+
|
| 1216 |
+
polish_method = 'L-BFGS-B'
|
| 1217 |
+
|
| 1218 |
+
if self._wrapped_constraints:
|
| 1219 |
+
polish_method = 'trust-constr'
|
| 1220 |
+
|
| 1221 |
+
constr_violation = self._constraint_violation_fn(DE_result.x)
|
| 1222 |
+
if np.any(constr_violation > 0.):
|
| 1223 |
+
warnings.warn("differential evolution didn't find a "
|
| 1224 |
+
"solution satisfying the constraints, "
|
| 1225 |
+
"attempting to polish from the least "
|
| 1226 |
+
"infeasible solution",
|
| 1227 |
+
UserWarning, stacklevel=2)
|
| 1228 |
+
if self.disp:
|
| 1229 |
+
print(f"Polishing solution with '{polish_method}'")
|
| 1230 |
+
result = minimize(self.func,
|
| 1231 |
+
np.copy(DE_result.x),
|
| 1232 |
+
method=polish_method,
|
| 1233 |
+
bounds=self.limits.T,
|
| 1234 |
+
constraints=self.constraints)
|
| 1235 |
+
|
| 1236 |
+
self._nfev += result.nfev
|
| 1237 |
+
DE_result.nfev = self._nfev
|
| 1238 |
+
|
| 1239 |
+
# Polishing solution is only accepted if there is an improvement in
|
| 1240 |
+
# cost function, the polishing was successful and the solution lies
|
| 1241 |
+
# within the bounds.
|
| 1242 |
+
if (result.fun < DE_result.fun and
|
| 1243 |
+
result.success and
|
| 1244 |
+
np.all(result.x <= self.limits[1]) and
|
| 1245 |
+
np.all(self.limits[0] <= result.x)):
|
| 1246 |
+
DE_result.fun = result.fun
|
| 1247 |
+
DE_result.x = result.x
|
| 1248 |
+
DE_result.jac = result.jac
|
| 1249 |
+
# to keep internal state consistent
|
| 1250 |
+
self.population_energies[0] = result.fun
|
| 1251 |
+
self.population[0] = self._unscale_parameters(result.x)
|
| 1252 |
+
|
| 1253 |
+
if self._wrapped_constraints:
|
| 1254 |
+
DE_result.constr = [c.violation(DE_result.x) for
|
| 1255 |
+
c in self._wrapped_constraints]
|
| 1256 |
+
DE_result.constr_violation = np.max(
|
| 1257 |
+
np.concatenate(DE_result.constr))
|
| 1258 |
+
DE_result.maxcv = DE_result.constr_violation
|
| 1259 |
+
if DE_result.maxcv > 0:
|
| 1260 |
+
# if the result is infeasible then success must be False
|
| 1261 |
+
DE_result.success = False
|
| 1262 |
+
DE_result.message = ("The solution does not satisfy the "
|
| 1263 |
+
f"constraints, MAXCV = {DE_result.maxcv}")
|
| 1264 |
+
|
| 1265 |
+
return DE_result
|
| 1266 |
+
|
| 1267 |
+
def _result(self, **kwds):
|
| 1268 |
+
# form an intermediate OptimizeResult
|
| 1269 |
+
nit = kwds.get('nit', None)
|
| 1270 |
+
message = kwds.get('message', None)
|
| 1271 |
+
warning_flag = kwds.get('warning_flag', False)
|
| 1272 |
+
result = OptimizeResult(
|
| 1273 |
+
x=self.x,
|
| 1274 |
+
fun=self.population_energies[0],
|
| 1275 |
+
nfev=self._nfev,
|
| 1276 |
+
nit=nit,
|
| 1277 |
+
message=message,
|
| 1278 |
+
success=(warning_flag is not True),
|
| 1279 |
+
population=self._scale_parameters(self.population),
|
| 1280 |
+
population_energies=self.population_energies
|
| 1281 |
+
)
|
| 1282 |
+
if self._wrapped_constraints:
|
| 1283 |
+
result.constr = [c.violation(result.x)
|
| 1284 |
+
for c in self._wrapped_constraints]
|
| 1285 |
+
result.constr_violation = np.max(np.concatenate(result.constr))
|
| 1286 |
+
result.maxcv = result.constr_violation
|
| 1287 |
+
if result.maxcv > 0:
|
| 1288 |
+
result.success = False
|
| 1289 |
+
|
| 1290 |
+
return result
|
| 1291 |
+
|
| 1292 |
+
def _calculate_population_energies(self, population):
|
| 1293 |
+
"""
|
| 1294 |
+
Calculate the energies of a population.
|
| 1295 |
+
|
| 1296 |
+
Parameters
|
| 1297 |
+
----------
|
| 1298 |
+
population : ndarray
|
| 1299 |
+
An array of parameter vectors normalised to [0, 1] using lower
|
| 1300 |
+
and upper limits. Has shape ``(np.size(population, 0), N)``.
|
| 1301 |
+
|
| 1302 |
+
Returns
|
| 1303 |
+
-------
|
| 1304 |
+
energies : ndarray
|
| 1305 |
+
An array of energies corresponding to each population member. If
|
| 1306 |
+
maxfun will be exceeded during this call, then the number of
|
| 1307 |
+
function evaluations will be reduced and energies will be
|
| 1308 |
+
right-padded with np.inf. Has shape ``(np.size(population, 0),)``
|
| 1309 |
+
"""
|
| 1310 |
+
num_members = np.size(population, 0)
|
| 1311 |
+
# S is the number of function evals left to stay under the
|
| 1312 |
+
# maxfun budget
|
| 1313 |
+
S = min(num_members, self.maxfun - self._nfev)
|
| 1314 |
+
|
| 1315 |
+
energies = np.full(num_members, np.inf)
|
| 1316 |
+
|
| 1317 |
+
parameters_pop = self._scale_parameters(population)
|
| 1318 |
+
try:
|
| 1319 |
+
calc_energies = list(
|
| 1320 |
+
self._mapwrapper(self.func, parameters_pop[0:S])
|
| 1321 |
+
)
|
| 1322 |
+
calc_energies = np.squeeze(calc_energies)
|
| 1323 |
+
except (TypeError, ValueError) as e:
|
| 1324 |
+
# wrong number of arguments for _mapwrapper
|
| 1325 |
+
# or wrong length returned from the mapper
|
| 1326 |
+
raise RuntimeError(
|
| 1327 |
+
"The map-like callable must be of the form f(func, iterable), "
|
| 1328 |
+
"returning a sequence of numbers the same length as 'iterable'"
|
| 1329 |
+
) from e
|
| 1330 |
+
|
| 1331 |
+
if calc_energies.size != S:
|
| 1332 |
+
if self.vectorized:
|
| 1333 |
+
raise RuntimeError("The vectorized function must return an"
|
| 1334 |
+
" array of shape (S,) when given an array"
|
| 1335 |
+
" of shape (len(x), S)")
|
| 1336 |
+
raise RuntimeError("func(x, *args) must return a scalar value")
|
| 1337 |
+
|
| 1338 |
+
energies[0:S] = calc_energies
|
| 1339 |
+
|
| 1340 |
+
if self.vectorized:
|
| 1341 |
+
self._nfev += 1
|
| 1342 |
+
else:
|
| 1343 |
+
self._nfev += S
|
| 1344 |
+
|
| 1345 |
+
return energies
|
| 1346 |
+
|
| 1347 |
+
def _promote_lowest_energy(self):
|
| 1348 |
+
# swaps 'best solution' into first population entry
|
| 1349 |
+
|
| 1350 |
+
idx = np.arange(self.num_population_members)
|
| 1351 |
+
feasible_solutions = idx[self.feasible]
|
| 1352 |
+
if feasible_solutions.size:
|
| 1353 |
+
# find the best feasible solution
|
| 1354 |
+
idx_t = np.argmin(self.population_energies[feasible_solutions])
|
| 1355 |
+
l = feasible_solutions[idx_t]
|
| 1356 |
+
else:
|
| 1357 |
+
# no solution was feasible, use 'best' infeasible solution, which
|
| 1358 |
+
# will violate constraints the least
|
| 1359 |
+
l = np.argmin(np.sum(self.constraint_violation, axis=1))
|
| 1360 |
+
|
| 1361 |
+
self.population_energies[[0, l]] = self.population_energies[[l, 0]]
|
| 1362 |
+
self.population[[0, l], :] = self.population[[l, 0], :]
|
| 1363 |
+
self.feasible[[0, l]] = self.feasible[[l, 0]]
|
| 1364 |
+
self.constraint_violation[[0, l], :] = (
|
| 1365 |
+
self.constraint_violation[[l, 0], :])
|
| 1366 |
+
|
| 1367 |
+
def _constraint_violation_fn(self, x):
|
| 1368 |
+
"""
|
| 1369 |
+
Calculates total constraint violation for all the constraints, for a
|
| 1370 |
+
set of solutions.
|
| 1371 |
+
|
| 1372 |
+
Parameters
|
| 1373 |
+
----------
|
| 1374 |
+
x : ndarray
|
| 1375 |
+
Solution vector(s). Has shape (S, N), or (N,), where S is the
|
| 1376 |
+
number of solutions to investigate and N is the number of
|
| 1377 |
+
parameters.
|
| 1378 |
+
|
| 1379 |
+
Returns
|
| 1380 |
+
-------
|
| 1381 |
+
cv : ndarray
|
| 1382 |
+
Total violation of constraints. Has shape ``(S, M)``, where M is
|
| 1383 |
+
the total number of constraint components (which is not necessarily
|
| 1384 |
+
equal to len(self._wrapped_constraints)).
|
| 1385 |
+
"""
|
| 1386 |
+
# how many solution vectors you're calculating constraint violations
|
| 1387 |
+
# for
|
| 1388 |
+
S = np.size(x) // self.parameter_count
|
| 1389 |
+
_out = np.zeros((S, self.total_constraints))
|
| 1390 |
+
offset = 0
|
| 1391 |
+
for con in self._wrapped_constraints:
|
| 1392 |
+
# the input/output of the (vectorized) constraint function is
|
| 1393 |
+
# {(N, S), (N,)} --> (M, S)
|
| 1394 |
+
# The input to _constraint_violation_fn is (S, N) or (N,), so
|
| 1395 |
+
# transpose to pass it to the constraint. The output is transposed
|
| 1396 |
+
# from (M, S) to (S, M) for further use.
|
| 1397 |
+
c = con.violation(x.T).T
|
| 1398 |
+
|
| 1399 |
+
# The shape of c should be (M,), (1, M), or (S, M). Check for
|
| 1400 |
+
# those shapes, as an incorrect shape indicates that the
|
| 1401 |
+
# user constraint function didn't return the right thing, and
|
| 1402 |
+
# the reshape operation will fail. Intercept the wrong shape
|
| 1403 |
+
# to give a reasonable error message. I'm not sure what failure
|
| 1404 |
+
# modes an inventive user will come up with.
|
| 1405 |
+
if c.shape[-1] != con.num_constr or (S > 1 and c.shape[0] != S):
|
| 1406 |
+
raise RuntimeError("An array returned from a Constraint has"
|
| 1407 |
+
" the wrong shape. If `vectorized is False`"
|
| 1408 |
+
" the Constraint should return an array of"
|
| 1409 |
+
" shape (M,). If `vectorized is True` then"
|
| 1410 |
+
" the Constraint must return an array of"
|
| 1411 |
+
" shape (M, S), where S is the number of"
|
| 1412 |
+
" solution vectors and M is the number of"
|
| 1413 |
+
" constraint components in a given"
|
| 1414 |
+
" Constraint object.")
|
| 1415 |
+
|
| 1416 |
+
# the violation function may return a 1D array, but is it a
|
| 1417 |
+
# sequence of constraints for one solution (S=1, M>=1), or the
|
| 1418 |
+
# value of a single constraint for a sequence of solutions
|
| 1419 |
+
# (S>=1, M=1)
|
| 1420 |
+
c = np.reshape(c, (S, con.num_constr))
|
| 1421 |
+
_out[:, offset:offset + con.num_constr] = c
|
| 1422 |
+
offset += con.num_constr
|
| 1423 |
+
|
| 1424 |
+
return _out
|
| 1425 |
+
|
| 1426 |
+
def _calculate_population_feasibilities(self, population):
|
| 1427 |
+
"""
|
| 1428 |
+
Calculate the feasibilities of a population.
|
| 1429 |
+
|
| 1430 |
+
Parameters
|
| 1431 |
+
----------
|
| 1432 |
+
population : ndarray
|
| 1433 |
+
An array of parameter vectors normalised to [0, 1] using lower
|
| 1434 |
+
and upper limits. Has shape ``(np.size(population, 0), N)``.
|
| 1435 |
+
|
| 1436 |
+
Returns
|
| 1437 |
+
-------
|
| 1438 |
+
feasible, constraint_violation : ndarray, ndarray
|
| 1439 |
+
Boolean array of feasibility for each population member, and an
|
| 1440 |
+
array of the constraint violation for each population member.
|
| 1441 |
+
constraint_violation has shape ``(np.size(population, 0), M)``,
|
| 1442 |
+
where M is the number of constraints.
|
| 1443 |
+
"""
|
| 1444 |
+
num_members = np.size(population, 0)
|
| 1445 |
+
if not self._wrapped_constraints:
|
| 1446 |
+
# shortcut for no constraints
|
| 1447 |
+
return np.ones(num_members, bool), np.zeros((num_members, 1))
|
| 1448 |
+
|
| 1449 |
+
# (S, N)
|
| 1450 |
+
parameters_pop = self._scale_parameters(population)
|
| 1451 |
+
|
| 1452 |
+
if self.vectorized:
|
| 1453 |
+
# (S, M)
|
| 1454 |
+
constraint_violation = np.array(
|
| 1455 |
+
self._constraint_violation_fn(parameters_pop)
|
| 1456 |
+
)
|
| 1457 |
+
else:
|
| 1458 |
+
# (S, 1, M)
|
| 1459 |
+
constraint_violation = np.array([self._constraint_violation_fn(x)
|
| 1460 |
+
for x in parameters_pop])
|
| 1461 |
+
# if you use the list comprehension in the line above it will
|
| 1462 |
+
# create an array of shape (S, 1, M), because each iteration
|
| 1463 |
+
# generates an array of (1, M). In comparison the vectorized
|
| 1464 |
+
# version returns (S, M). It's therefore necessary to remove axis 1
|
| 1465 |
+
constraint_violation = constraint_violation[:, 0]
|
| 1466 |
+
|
| 1467 |
+
feasible = ~(np.sum(constraint_violation, axis=1) > 0)
|
| 1468 |
+
|
| 1469 |
+
return feasible, constraint_violation
|
| 1470 |
+
|
| 1471 |
+
def __iter__(self):
|
| 1472 |
+
return self
|
| 1473 |
+
|
| 1474 |
+
def __enter__(self):
|
| 1475 |
+
return self
|
| 1476 |
+
|
| 1477 |
+
def __exit__(self, *args):
|
| 1478 |
+
return self._mapwrapper.__exit__(*args)
|
| 1479 |
+
|
| 1480 |
+
def _accept_trial(self, energy_trial, feasible_trial, cv_trial,
|
| 1481 |
+
energy_orig, feasible_orig, cv_orig):
|
| 1482 |
+
"""
|
| 1483 |
+
Trial is accepted if:
|
| 1484 |
+
* it satisfies all constraints and provides a lower or equal objective
|
| 1485 |
+
function value, while both the compared solutions are feasible
|
| 1486 |
+
- or -
|
| 1487 |
+
* it is feasible while the original solution is infeasible,
|
| 1488 |
+
- or -
|
| 1489 |
+
* it is infeasible, but provides a lower or equal constraint violation
|
| 1490 |
+
for all constraint functions.
|
| 1491 |
+
|
| 1492 |
+
This test corresponds to section III of Lampinen [1]_.
|
| 1493 |
+
|
| 1494 |
+
Parameters
|
| 1495 |
+
----------
|
| 1496 |
+
energy_trial : float
|
| 1497 |
+
Energy of the trial solution
|
| 1498 |
+
feasible_trial : float
|
| 1499 |
+
Feasibility of trial solution
|
| 1500 |
+
cv_trial : array-like
|
| 1501 |
+
Excess constraint violation for the trial solution
|
| 1502 |
+
energy_orig : float
|
| 1503 |
+
Energy of the original solution
|
| 1504 |
+
feasible_orig : float
|
| 1505 |
+
Feasibility of original solution
|
| 1506 |
+
cv_orig : array-like
|
| 1507 |
+
Excess constraint violation for the original solution
|
| 1508 |
+
|
| 1509 |
+
Returns
|
| 1510 |
+
-------
|
| 1511 |
+
accepted : bool
|
| 1512 |
+
|
| 1513 |
+
"""
|
| 1514 |
+
if feasible_orig and feasible_trial:
|
| 1515 |
+
return energy_trial <= energy_orig
|
| 1516 |
+
elif feasible_trial and not feasible_orig:
|
| 1517 |
+
return True
|
| 1518 |
+
elif not feasible_trial and (cv_trial <= cv_orig).all():
|
| 1519 |
+
# cv_trial < cv_orig would imply that both trial and orig are not
|
| 1520 |
+
# feasible
|
| 1521 |
+
return True
|
| 1522 |
+
|
| 1523 |
+
return False
|
| 1524 |
+
|
| 1525 |
+
def __next__(self):
|
| 1526 |
+
"""
|
| 1527 |
+
Evolve the population by a single generation
|
| 1528 |
+
|
| 1529 |
+
Returns
|
| 1530 |
+
-------
|
| 1531 |
+
x : ndarray
|
| 1532 |
+
The best solution from the solver.
|
| 1533 |
+
fun : float
|
| 1534 |
+
Value of objective function obtained from the best solution.
|
| 1535 |
+
"""
|
| 1536 |
+
# the population may have just been initialized (all entries are
|
| 1537 |
+
# np.inf). If it has you have to calculate the initial energies
|
| 1538 |
+
if np.all(np.isinf(self.population_energies)):
|
| 1539 |
+
self.feasible, self.constraint_violation = (
|
| 1540 |
+
self._calculate_population_feasibilities(self.population))
|
| 1541 |
+
|
| 1542 |
+
# only need to work out population energies for those that are
|
| 1543 |
+
# feasible
|
| 1544 |
+
self.population_energies[self.feasible] = (
|
| 1545 |
+
self._calculate_population_energies(
|
| 1546 |
+
self.population[self.feasible]))
|
| 1547 |
+
|
| 1548 |
+
self._promote_lowest_energy()
|
| 1549 |
+
|
| 1550 |
+
if self.dither is not None:
|
| 1551 |
+
self.scale = self.random_number_generator.uniform(self.dither[0],
|
| 1552 |
+
self.dither[1])
|
| 1553 |
+
|
| 1554 |
+
if self._updating == 'immediate':
|
| 1555 |
+
# update best solution immediately
|
| 1556 |
+
for candidate in range(self.num_population_members):
|
| 1557 |
+
if self._nfev > self.maxfun:
|
| 1558 |
+
raise StopIteration
|
| 1559 |
+
|
| 1560 |
+
# create a trial solution
|
| 1561 |
+
trial = self._mutate(candidate)
|
| 1562 |
+
|
| 1563 |
+
# ensuring that it's in the range [0, 1)
|
| 1564 |
+
self._ensure_constraint(trial)
|
| 1565 |
+
|
| 1566 |
+
# scale from [0, 1) to the actual parameter value
|
| 1567 |
+
parameters = self._scale_parameters(trial)
|
| 1568 |
+
|
| 1569 |
+
# determine the energy of the objective function
|
| 1570 |
+
if self._wrapped_constraints:
|
| 1571 |
+
cv = self._constraint_violation_fn(parameters)
|
| 1572 |
+
feasible = False
|
| 1573 |
+
energy = np.inf
|
| 1574 |
+
if not np.sum(cv) > 0:
|
| 1575 |
+
# solution is feasible
|
| 1576 |
+
feasible = True
|
| 1577 |
+
energy = self.func(parameters)
|
| 1578 |
+
self._nfev += 1
|
| 1579 |
+
else:
|
| 1580 |
+
feasible = True
|
| 1581 |
+
cv = np.atleast_2d([0.])
|
| 1582 |
+
energy = self.func(parameters)
|
| 1583 |
+
self._nfev += 1
|
| 1584 |
+
|
| 1585 |
+
# compare trial and population member
|
| 1586 |
+
if self._accept_trial(energy, feasible, cv,
|
| 1587 |
+
self.population_energies[candidate],
|
| 1588 |
+
self.feasible[candidate],
|
| 1589 |
+
self.constraint_violation[candidate]):
|
| 1590 |
+
self.population[candidate] = trial
|
| 1591 |
+
self.population_energies[candidate] = np.squeeze(energy)
|
| 1592 |
+
self.feasible[candidate] = feasible
|
| 1593 |
+
self.constraint_violation[candidate] = cv
|
| 1594 |
+
|
| 1595 |
+
# if the trial candidate is also better than the best
|
| 1596 |
+
# solution then promote it.
|
| 1597 |
+
if self._accept_trial(energy, feasible, cv,
|
| 1598 |
+
self.population_energies[0],
|
| 1599 |
+
self.feasible[0],
|
| 1600 |
+
self.constraint_violation[0]):
|
| 1601 |
+
self._promote_lowest_energy()
|
| 1602 |
+
|
| 1603 |
+
elif self._updating == 'deferred':
|
| 1604 |
+
# update best solution once per generation
|
| 1605 |
+
if self._nfev >= self.maxfun:
|
| 1606 |
+
raise StopIteration
|
| 1607 |
+
|
| 1608 |
+
# 'deferred' approach, vectorised form.
|
| 1609 |
+
# create trial solutions
|
| 1610 |
+
trial_pop = self._mutate_many(
|
| 1611 |
+
np.arange(self.num_population_members)
|
| 1612 |
+
)
|
| 1613 |
+
|
| 1614 |
+
# enforce bounds
|
| 1615 |
+
self._ensure_constraint(trial_pop)
|
| 1616 |
+
|
| 1617 |
+
# determine the energies of the objective function, but only for
|
| 1618 |
+
# feasible trials
|
| 1619 |
+
feasible, cv = self._calculate_population_feasibilities(trial_pop)
|
| 1620 |
+
trial_energies = np.full(self.num_population_members, np.inf)
|
| 1621 |
+
|
| 1622 |
+
# only calculate for feasible entries
|
| 1623 |
+
trial_energies[feasible] = self._calculate_population_energies(
|
| 1624 |
+
trial_pop[feasible])
|
| 1625 |
+
|
| 1626 |
+
# which solutions are 'improved'?
|
| 1627 |
+
loc = [self._accept_trial(*val) for val in
|
| 1628 |
+
zip(trial_energies, feasible, cv, self.population_energies,
|
| 1629 |
+
self.feasible, self.constraint_violation)]
|
| 1630 |
+
loc = np.array(loc)
|
| 1631 |
+
self.population = np.where(loc[:, np.newaxis],
|
| 1632 |
+
trial_pop,
|
| 1633 |
+
self.population)
|
| 1634 |
+
self.population_energies = np.where(loc,
|
| 1635 |
+
trial_energies,
|
| 1636 |
+
self.population_energies)
|
| 1637 |
+
self.feasible = np.where(loc,
|
| 1638 |
+
feasible,
|
| 1639 |
+
self.feasible)
|
| 1640 |
+
self.constraint_violation = np.where(loc[:, np.newaxis],
|
| 1641 |
+
cv,
|
| 1642 |
+
self.constraint_violation)
|
| 1643 |
+
|
| 1644 |
+
# make sure the best solution is updated if updating='deferred'.
|
| 1645 |
+
# put the lowest energy into the best solution position.
|
| 1646 |
+
self._promote_lowest_energy()
|
| 1647 |
+
|
| 1648 |
+
return self.x, self.population_energies[0]
|
| 1649 |
+
|
| 1650 |
+
def _scale_parameters(self, trial):
|
| 1651 |
+
"""Scale from a number between 0 and 1 to parameters."""
|
| 1652 |
+
# trial either has shape (N, ) or (L, N), where L is the number of
|
| 1653 |
+
# solutions being scaled
|
| 1654 |
+
scaled = self.__scale_arg1 + (trial - 0.5) * self.__scale_arg2
|
| 1655 |
+
if np.count_nonzero(self.integrality):
|
| 1656 |
+
i = np.broadcast_to(self.integrality, scaled.shape)
|
| 1657 |
+
scaled[i] = np.round(scaled[i])
|
| 1658 |
+
return scaled
|
| 1659 |
+
|
| 1660 |
+
def _unscale_parameters(self, parameters):
|
| 1661 |
+
"""Scale from parameters to a number between 0 and 1."""
|
| 1662 |
+
return (parameters - self.__scale_arg1) * self.__recip_scale_arg2 + 0.5
|
| 1663 |
+
|
| 1664 |
+
def _ensure_constraint(self, trial):
|
| 1665 |
+
"""Make sure the parameters lie between the limits."""
|
| 1666 |
+
mask = np.bitwise_or(trial > 1, trial < 0)
|
| 1667 |
+
if oob := np.count_nonzero(mask):
|
| 1668 |
+
trial[mask] = self.random_number_generator.uniform(size=oob)
|
| 1669 |
+
|
| 1670 |
+
def _mutate_custom(self, candidate):
|
| 1671 |
+
rng = self.random_number_generator
|
| 1672 |
+
msg = (
|
| 1673 |
+
"strategy must have signature"
|
| 1674 |
+
" f(candidate: int, population: np.ndarray, rng=None) returning an"
|
| 1675 |
+
" array of shape (N,)"
|
| 1676 |
+
)
|
| 1677 |
+
_population = self._scale_parameters(self.population)
|
| 1678 |
+
if not len(np.shape(candidate)):
|
| 1679 |
+
# single entry in population
|
| 1680 |
+
trial = self.strategy(candidate, _population, rng=rng)
|
| 1681 |
+
if trial.shape != (self.parameter_count,):
|
| 1682 |
+
raise RuntimeError(msg)
|
| 1683 |
+
else:
|
| 1684 |
+
S = candidate.shape[0]
|
| 1685 |
+
trial = np.array(
|
| 1686 |
+
[self.strategy(c, _population, rng=rng) for c in candidate],
|
| 1687 |
+
dtype=float
|
| 1688 |
+
)
|
| 1689 |
+
if trial.shape != (S, self.parameter_count):
|
| 1690 |
+
raise RuntimeError(msg)
|
| 1691 |
+
return self._unscale_parameters(trial)
|
| 1692 |
+
|
| 1693 |
+
def _mutate_many(self, candidates):
|
| 1694 |
+
"""Create trial vectors based on a mutation strategy."""
|
| 1695 |
+
rng = self.random_number_generator
|
| 1696 |
+
|
| 1697 |
+
S = len(candidates)
|
| 1698 |
+
if callable(self.strategy):
|
| 1699 |
+
return self._mutate_custom(candidates)
|
| 1700 |
+
|
| 1701 |
+
trial = np.copy(self.population[candidates])
|
| 1702 |
+
samples = np.array([self._select_samples(c, 5) for c in candidates])
|
| 1703 |
+
|
| 1704 |
+
if self.strategy in ['currenttobest1exp', 'currenttobest1bin']:
|
| 1705 |
+
bprime = self.mutation_func(candidates, samples)
|
| 1706 |
+
else:
|
| 1707 |
+
bprime = self.mutation_func(samples)
|
| 1708 |
+
|
| 1709 |
+
fill_point = rng_integers(rng, self.parameter_count, size=S)
|
| 1710 |
+
crossovers = rng.uniform(size=(S, self.parameter_count))
|
| 1711 |
+
crossovers = crossovers < self.cross_over_probability
|
| 1712 |
+
if self.strategy in self._binomial:
|
| 1713 |
+
# the last one is always from the bprime vector for binomial
|
| 1714 |
+
# If you fill in modulo with a loop you have to set the last one to
|
| 1715 |
+
# true. If you don't use a loop then you can have any random entry
|
| 1716 |
+
# be True.
|
| 1717 |
+
i = np.arange(S)
|
| 1718 |
+
crossovers[i, fill_point[i]] = True
|
| 1719 |
+
trial = np.where(crossovers, bprime, trial)
|
| 1720 |
+
return trial
|
| 1721 |
+
|
| 1722 |
+
elif self.strategy in self._exponential:
|
| 1723 |
+
crossovers[..., 0] = True
|
| 1724 |
+
for j in range(S):
|
| 1725 |
+
i = 0
|
| 1726 |
+
init_fill = fill_point[j]
|
| 1727 |
+
while (i < self.parameter_count and crossovers[j, i]):
|
| 1728 |
+
trial[j, init_fill] = bprime[j, init_fill]
|
| 1729 |
+
init_fill = (init_fill + 1) % self.parameter_count
|
| 1730 |
+
i += 1
|
| 1731 |
+
|
| 1732 |
+
return trial
|
| 1733 |
+
|
| 1734 |
+
def _mutate(self, candidate):
|
| 1735 |
+
"""Create a trial vector based on a mutation strategy."""
|
| 1736 |
+
rng = self.random_number_generator
|
| 1737 |
+
|
| 1738 |
+
if callable(self.strategy):
|
| 1739 |
+
return self._mutate_custom(candidate)
|
| 1740 |
+
|
| 1741 |
+
fill_point = rng_integers(rng, self.parameter_count)
|
| 1742 |
+
samples = self._select_samples(candidate, 5)
|
| 1743 |
+
|
| 1744 |
+
trial = np.copy(self.population[candidate])
|
| 1745 |
+
|
| 1746 |
+
if self.strategy in ['currenttobest1exp', 'currenttobest1bin']:
|
| 1747 |
+
bprime = self.mutation_func(candidate, samples)
|
| 1748 |
+
else:
|
| 1749 |
+
bprime = self.mutation_func(samples)
|
| 1750 |
+
|
| 1751 |
+
crossovers = rng.uniform(size=self.parameter_count)
|
| 1752 |
+
crossovers = crossovers < self.cross_over_probability
|
| 1753 |
+
if self.strategy in self._binomial:
|
| 1754 |
+
# the last one is always from the bprime vector for binomial
|
| 1755 |
+
# If you fill in modulo with a loop you have to set the last one to
|
| 1756 |
+
# true. If you don't use a loop then you can have any random entry
|
| 1757 |
+
# be True.
|
| 1758 |
+
crossovers[fill_point] = True
|
| 1759 |
+
trial = np.where(crossovers, bprime, trial)
|
| 1760 |
+
return trial
|
| 1761 |
+
|
| 1762 |
+
elif self.strategy in self._exponential:
|
| 1763 |
+
i = 0
|
| 1764 |
+
crossovers[0] = True
|
| 1765 |
+
while i < self.parameter_count and crossovers[i]:
|
| 1766 |
+
trial[fill_point] = bprime[fill_point]
|
| 1767 |
+
fill_point = (fill_point + 1) % self.parameter_count
|
| 1768 |
+
i += 1
|
| 1769 |
+
|
| 1770 |
+
return trial
|
| 1771 |
+
|
| 1772 |
+
def _best1(self, samples):
|
| 1773 |
+
"""best1bin, best1exp"""
|
| 1774 |
+
# samples.shape == (S, 5)
|
| 1775 |
+
# or
|
| 1776 |
+
# samples.shape(5,)
|
| 1777 |
+
r0, r1 = samples[..., :2].T
|
| 1778 |
+
return (self.population[0] + self.scale *
|
| 1779 |
+
(self.population[r0] - self.population[r1]))
|
| 1780 |
+
|
| 1781 |
+
def _rand1(self, samples):
|
| 1782 |
+
"""rand1bin, rand1exp"""
|
| 1783 |
+
r0, r1, r2 = samples[..., :3].T
|
| 1784 |
+
return (self.population[r0] + self.scale *
|
| 1785 |
+
(self.population[r1] - self.population[r2]))
|
| 1786 |
+
|
| 1787 |
+
def _randtobest1(self, samples):
|
| 1788 |
+
"""randtobest1bin, randtobest1exp"""
|
| 1789 |
+
r0, r1, r2 = samples[..., :3].T
|
| 1790 |
+
bprime = np.copy(self.population[r0])
|
| 1791 |
+
bprime += self.scale * (self.population[0] - bprime)
|
| 1792 |
+
bprime += self.scale * (self.population[r1] -
|
| 1793 |
+
self.population[r2])
|
| 1794 |
+
return bprime
|
| 1795 |
+
|
| 1796 |
+
def _currenttobest1(self, candidate, samples):
|
| 1797 |
+
"""currenttobest1bin, currenttobest1exp"""
|
| 1798 |
+
r0, r1 = samples[..., :2].T
|
| 1799 |
+
bprime = (self.population[candidate] + self.scale *
|
| 1800 |
+
(self.population[0] - self.population[candidate] +
|
| 1801 |
+
self.population[r0] - self.population[r1]))
|
| 1802 |
+
return bprime
|
| 1803 |
+
|
| 1804 |
+
def _best2(self, samples):
|
| 1805 |
+
"""best2bin, best2exp"""
|
| 1806 |
+
r0, r1, r2, r3 = samples[..., :4].T
|
| 1807 |
+
bprime = (self.population[0] + self.scale *
|
| 1808 |
+
(self.population[r0] + self.population[r1] -
|
| 1809 |
+
self.population[r2] - self.population[r3]))
|
| 1810 |
+
|
| 1811 |
+
return bprime
|
| 1812 |
+
|
| 1813 |
+
def _rand2(self, samples):
|
| 1814 |
+
"""rand2bin, rand2exp"""
|
| 1815 |
+
r0, r1, r2, r3, r4 = samples[..., :5].T
|
| 1816 |
+
bprime = (self.population[r0] + self.scale *
|
| 1817 |
+
(self.population[r1] + self.population[r2] -
|
| 1818 |
+
self.population[r3] - self.population[r4]))
|
| 1819 |
+
|
| 1820 |
+
return bprime
|
| 1821 |
+
|
| 1822 |
+
def _select_samples(self, candidate, number_samples):
|
| 1823 |
+
"""
|
| 1824 |
+
obtain random integers from range(self.num_population_members),
|
| 1825 |
+
without replacement. You can't have the original candidate either.
|
| 1826 |
+
"""
|
| 1827 |
+
self.random_number_generator.shuffle(self._random_population_index)
|
| 1828 |
+
idxs = self._random_population_index[:number_samples + 1]
|
| 1829 |
+
return idxs[idxs != candidate][:number_samples]
|
| 1830 |
+
|
| 1831 |
+
|
| 1832 |
+
class _ConstraintWrapper:
|
| 1833 |
+
"""Object to wrap/evaluate user defined constraints.
|
| 1834 |
+
|
| 1835 |
+
Very similar in practice to `PreparedConstraint`, except that no evaluation
|
| 1836 |
+
of jac/hess is performed (explicit or implicit).
|
| 1837 |
+
|
| 1838 |
+
If created successfully, it will contain the attributes listed below.
|
| 1839 |
+
|
| 1840 |
+
Parameters
|
| 1841 |
+
----------
|
| 1842 |
+
constraint : {`NonlinearConstraint`, `LinearConstraint`, `Bounds`}
|
| 1843 |
+
Constraint to check and prepare.
|
| 1844 |
+
x0 : array_like
|
| 1845 |
+
Initial vector of independent variables, shape (N,)
|
| 1846 |
+
|
| 1847 |
+
Attributes
|
| 1848 |
+
----------
|
| 1849 |
+
fun : callable
|
| 1850 |
+
Function defining the constraint wrapped by one of the convenience
|
| 1851 |
+
classes.
|
| 1852 |
+
bounds : 2-tuple
|
| 1853 |
+
Contains lower and upper bounds for the constraints --- lb and ub.
|
| 1854 |
+
These are converted to ndarray and have a size equal to the number of
|
| 1855 |
+
the constraints.
|
| 1856 |
+
|
| 1857 |
+
Notes
|
| 1858 |
+
-----
|
| 1859 |
+
_ConstraintWrapper.fun and _ConstraintWrapper.violation can get sent
|
| 1860 |
+
arrays of shape (N, S) or (N,), where S is the number of vectors of shape
|
| 1861 |
+
(N,) to consider constraints for.
|
| 1862 |
+
"""
|
| 1863 |
+
def __init__(self, constraint, x0):
|
| 1864 |
+
self.constraint = constraint
|
| 1865 |
+
|
| 1866 |
+
if isinstance(constraint, NonlinearConstraint):
|
| 1867 |
+
def fun(x):
|
| 1868 |
+
x = np.asarray(x)
|
| 1869 |
+
return np.atleast_1d(constraint.fun(x))
|
| 1870 |
+
elif isinstance(constraint, LinearConstraint):
|
| 1871 |
+
def fun(x):
|
| 1872 |
+
if issparse(constraint.A):
|
| 1873 |
+
A = constraint.A
|
| 1874 |
+
else:
|
| 1875 |
+
A = np.atleast_2d(constraint.A)
|
| 1876 |
+
|
| 1877 |
+
res = A.dot(x)
|
| 1878 |
+
# x either has shape (N, S) or (N)
|
| 1879 |
+
# (M, N) x (N, S) --> (M, S)
|
| 1880 |
+
# (M, N) x (N,) --> (M,)
|
| 1881 |
+
# However, if (M, N) is a matrix then:
|
| 1882 |
+
# (M, N) * (N,) --> (M, 1), we need this to be (M,)
|
| 1883 |
+
if x.ndim == 1 and res.ndim == 2:
|
| 1884 |
+
# deal with case that constraint.A is an np.matrix
|
| 1885 |
+
# see gh20041
|
| 1886 |
+
res = np.asarray(res)[:, 0]
|
| 1887 |
+
|
| 1888 |
+
return res
|
| 1889 |
+
elif isinstance(constraint, Bounds):
|
| 1890 |
+
def fun(x):
|
| 1891 |
+
return np.asarray(x)
|
| 1892 |
+
else:
|
| 1893 |
+
raise ValueError("`constraint` of an unknown type is passed.")
|
| 1894 |
+
|
| 1895 |
+
self.fun = fun
|
| 1896 |
+
|
| 1897 |
+
lb = np.asarray(constraint.lb, dtype=float)
|
| 1898 |
+
ub = np.asarray(constraint.ub, dtype=float)
|
| 1899 |
+
|
| 1900 |
+
x0 = np.asarray(x0)
|
| 1901 |
+
|
| 1902 |
+
# find out the number of constraints
|
| 1903 |
+
f0 = fun(x0)
|
| 1904 |
+
self.num_constr = m = f0.size
|
| 1905 |
+
self.parameter_count = x0.size
|
| 1906 |
+
|
| 1907 |
+
if lb.ndim == 0:
|
| 1908 |
+
lb = np.resize(lb, m)
|
| 1909 |
+
if ub.ndim == 0:
|
| 1910 |
+
ub = np.resize(ub, m)
|
| 1911 |
+
|
| 1912 |
+
self.bounds = (lb, ub)
|
| 1913 |
+
|
| 1914 |
+
def __call__(self, x):
|
| 1915 |
+
return np.atleast_1d(self.fun(x))
|
| 1916 |
+
|
| 1917 |
+
def violation(self, x):
|
| 1918 |
+
"""How much the constraint is exceeded by.
|
| 1919 |
+
|
| 1920 |
+
Parameters
|
| 1921 |
+
----------
|
| 1922 |
+
x : array-like
|
| 1923 |
+
Vector of independent variables, (N, S), where N is number of
|
| 1924 |
+
parameters and S is the number of solutions to be investigated.
|
| 1925 |
+
|
| 1926 |
+
Returns
|
| 1927 |
+
-------
|
| 1928 |
+
excess : array-like
|
| 1929 |
+
How much the constraint is exceeded by, for each of the
|
| 1930 |
+
constraints specified by `_ConstraintWrapper.fun`.
|
| 1931 |
+
Has shape (M, S) where M is the number of constraint components.
|
| 1932 |
+
"""
|
| 1933 |
+
# expect ev to have shape (num_constr, S) or (num_constr,)
|
| 1934 |
+
ev = self.fun(np.asarray(x))
|
| 1935 |
+
|
| 1936 |
+
try:
|
| 1937 |
+
excess_lb = np.maximum(self.bounds[0] - ev.T, 0)
|
| 1938 |
+
excess_ub = np.maximum(ev.T - self.bounds[1], 0)
|
| 1939 |
+
except ValueError as e:
|
| 1940 |
+
raise RuntimeError("An array returned from a Constraint has"
|
| 1941 |
+
" the wrong shape. If `vectorized is False`"
|
| 1942 |
+
" the Constraint should return an array of"
|
| 1943 |
+
" shape (M,). If `vectorized is True` then"
|
| 1944 |
+
" the Constraint must return an array of"
|
| 1945 |
+
" shape (M, S), where S is the number of"
|
| 1946 |
+
" solution vectors and M is the number of"
|
| 1947 |
+
" constraint components in a given"
|
| 1948 |
+
" Constraint object.") from e
|
| 1949 |
+
|
| 1950 |
+
v = (excess_lb + excess_ub).T
|
| 1951 |
+
return v
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiate.py
ADDED
|
@@ -0,0 +1,856 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: disable-error-code="attr-defined"
|
| 2 |
+
import numpy as np
|
| 3 |
+
import scipy._lib._elementwise_iterative_method as eim
|
| 4 |
+
from scipy._lib._util import _RichResult
|
| 5 |
+
|
| 6 |
+
_EERRORINCREASE = -1 # used in _differentiate
|
| 7 |
+
|
| 8 |
+
def _differentiate_iv(func, x, args, atol, rtol, maxiter, order, initial_step,
|
| 9 |
+
step_factor, step_direction, preserve_shape, callback):
|
| 10 |
+
# Input validation for `_differentiate`
|
| 11 |
+
|
| 12 |
+
if not callable(func):
|
| 13 |
+
raise ValueError('`func` must be callable.')
|
| 14 |
+
|
| 15 |
+
# x has more complex IV that is taken care of during initialization
|
| 16 |
+
x = np.asarray(x)
|
| 17 |
+
dtype = x.dtype if np.issubdtype(x.dtype, np.inexact) else np.float64
|
| 18 |
+
|
| 19 |
+
if not np.iterable(args):
|
| 20 |
+
args = (args,)
|
| 21 |
+
|
| 22 |
+
if atol is None:
|
| 23 |
+
atol = np.finfo(dtype).tiny
|
| 24 |
+
|
| 25 |
+
if rtol is None:
|
| 26 |
+
rtol = np.sqrt(np.finfo(dtype).eps)
|
| 27 |
+
|
| 28 |
+
message = 'Tolerances and step parameters must be non-negative scalars.'
|
| 29 |
+
tols = np.asarray([atol, rtol, initial_step, step_factor])
|
| 30 |
+
if (not np.issubdtype(tols.dtype, np.number)
|
| 31 |
+
or np.any(tols < 0)
|
| 32 |
+
or tols.shape != (4,)):
|
| 33 |
+
raise ValueError(message)
|
| 34 |
+
initial_step, step_factor = tols[2:].astype(dtype)
|
| 35 |
+
|
| 36 |
+
maxiter_int = int(maxiter)
|
| 37 |
+
if maxiter != maxiter_int or maxiter <= 0:
|
| 38 |
+
raise ValueError('`maxiter` must be a positive integer.')
|
| 39 |
+
|
| 40 |
+
order_int = int(order)
|
| 41 |
+
if order_int != order or order <= 0:
|
| 42 |
+
raise ValueError('`order` must be a positive integer.')
|
| 43 |
+
|
| 44 |
+
step_direction = np.sign(step_direction).astype(dtype)
|
| 45 |
+
x, step_direction = np.broadcast_arrays(x, step_direction)
|
| 46 |
+
x, step_direction = x[()], step_direction[()]
|
| 47 |
+
|
| 48 |
+
message = '`preserve_shape` must be True or False.'
|
| 49 |
+
if preserve_shape not in {True, False}:
|
| 50 |
+
raise ValueError(message)
|
| 51 |
+
|
| 52 |
+
if callback is not None and not callable(callback):
|
| 53 |
+
raise ValueError('`callback` must be callable.')
|
| 54 |
+
|
| 55 |
+
return (func, x, args, atol, rtol, maxiter_int, order_int, initial_step,
|
| 56 |
+
step_factor, step_direction, preserve_shape, callback)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _differentiate(func, x, *, args=(), atol=None, rtol=None, maxiter=10,
|
| 60 |
+
order=8, initial_step=0.5, step_factor=2.0,
|
| 61 |
+
step_direction=0, preserve_shape=False, callback=None):
|
| 62 |
+
"""Evaluate the derivative of an elementwise scalar function numerically.
|
| 63 |
+
|
| 64 |
+
Parameters
|
| 65 |
+
----------
|
| 66 |
+
func : callable
|
| 67 |
+
The function whose derivative is desired. The signature must be::
|
| 68 |
+
|
| 69 |
+
func(x: ndarray, *fargs) -> ndarray
|
| 70 |
+
|
| 71 |
+
where each element of ``x`` is a finite real number and ``fargs`` is a tuple,
|
| 72 |
+
which may contain an arbitrary number of arrays that are broadcastable
|
| 73 |
+
with `x`. ``func`` must be an elementwise function: each element
|
| 74 |
+
``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
|
| 75 |
+
x : array_like
|
| 76 |
+
Abscissae at which to evaluate the derivative.
|
| 77 |
+
args : tuple, optional
|
| 78 |
+
Additional positional arguments to be passed to `func`. Must be arrays
|
| 79 |
+
broadcastable with `x`. If the callable to be differentiated requires
|
| 80 |
+
arguments that are not broadcastable with `x`, wrap that callable with
|
| 81 |
+
`func`. See Examples.
|
| 82 |
+
atol, rtol : float, optional
|
| 83 |
+
Absolute and relative tolerances for the stopping condition: iteration
|
| 84 |
+
will stop when ``res.error < atol + rtol * abs(res.df)``. The default
|
| 85 |
+
`atol` is the smallest normal number of the appropriate dtype, and
|
| 86 |
+
the default `rtol` is the square root of the precision of the
|
| 87 |
+
appropriate dtype.
|
| 88 |
+
order : int, default: 8
|
| 89 |
+
The (positive integer) order of the finite difference formula to be
|
| 90 |
+
used. Odd integers will be rounded up to the next even integer.
|
| 91 |
+
initial_step : float, default: 0.5
|
| 92 |
+
The (absolute) initial step size for the finite difference derivative
|
| 93 |
+
approximation.
|
| 94 |
+
step_factor : float, default: 2.0
|
| 95 |
+
The factor by which the step size is *reduced* in each iteration; i.e.
|
| 96 |
+
the step size in iteration 1 is ``initial_step/step_factor``. If
|
| 97 |
+
``step_factor < 1``, subsequent steps will be greater than the initial
|
| 98 |
+
step; this may be useful if steps smaller than some threshold are
|
| 99 |
+
undesirable (e.g. due to subtractive cancellation error).
|
| 100 |
+
maxiter : int, default: 10
|
| 101 |
+
The maximum number of iterations of the algorithm to perform. See
|
| 102 |
+
notes.
|
| 103 |
+
step_direction : array_like
|
| 104 |
+
An array representing the direction of the finite difference steps (for
|
| 105 |
+
use when `x` lies near to the boundary of the domain of the function.)
|
| 106 |
+
Must be broadcastable with `x` and all `args`.
|
| 107 |
+
Where 0 (default), central differences are used; where negative (e.g.
|
| 108 |
+
-1), steps are non-positive; and where positive (e.g. 1), all steps are
|
| 109 |
+
non-negative.
|
| 110 |
+
preserve_shape : bool, default: False
|
| 111 |
+
In the following, "arguments of `func`" refers to the array ``x`` and
|
| 112 |
+
any arrays within ``fargs``. Let ``shape`` be the broadcasted shape
|
| 113 |
+
of `x` and all elements of `args` (which is conceptually
|
| 114 |
+
distinct from ``fargs`` passed into `f`).
|
| 115 |
+
|
| 116 |
+
- When ``preserve_shape=False`` (default), `f` must accept arguments
|
| 117 |
+
of *any* broadcastable shapes.
|
| 118 |
+
|
| 119 |
+
- When ``preserve_shape=True``, `f` must accept arguments of shape
|
| 120 |
+
``shape`` *or* ``shape + (n,)``, where ``(n,)`` is the number of
|
| 121 |
+
abscissae at which the function is being evaluated.
|
| 122 |
+
|
| 123 |
+
In either case, for each scalar element ``xi`` within `x`, the array
|
| 124 |
+
returned by `f` must include the scalar ``f(xi)`` at the same index.
|
| 125 |
+
Consequently, the shape of the output is always the shape of the input
|
| 126 |
+
``x``.
|
| 127 |
+
|
| 128 |
+
See Examples.
|
| 129 |
+
callback : callable, optional
|
| 130 |
+
An optional user-supplied function to be called before the first
|
| 131 |
+
iteration and after each iteration.
|
| 132 |
+
Called as ``callback(res)``, where ``res`` is a ``_RichResult``
|
| 133 |
+
similar to that returned by `_differentiate` (but containing the
|
| 134 |
+
current iterate's values of all variables). If `callback` raises a
|
| 135 |
+
``StopIteration``, the algorithm will terminate immediately and
|
| 136 |
+
`_differentiate` will return a result.
|
| 137 |
+
|
| 138 |
+
Returns
|
| 139 |
+
-------
|
| 140 |
+
res : _RichResult
|
| 141 |
+
An instance of `scipy._lib._util._RichResult` with the following
|
| 142 |
+
attributes. (The descriptions are written as though the values will be
|
| 143 |
+
scalars; however, if `func` returns an array, the outputs will be
|
| 144 |
+
arrays of the same shape.)
|
| 145 |
+
|
| 146 |
+
success : bool
|
| 147 |
+
``True`` when the algorithm terminated successfully (status ``0``).
|
| 148 |
+
status : int
|
| 149 |
+
An integer representing the exit status of the algorithm.
|
| 150 |
+
``0`` : The algorithm converged to the specified tolerances.
|
| 151 |
+
``-1`` : The error estimate increased, so iteration was terminated.
|
| 152 |
+
``-2`` : The maximum number of iterations was reached.
|
| 153 |
+
``-3`` : A non-finite value was encountered.
|
| 154 |
+
``-4`` : Iteration was terminated by `callback`.
|
| 155 |
+
``1`` : The algorithm is proceeding normally (in `callback` only).
|
| 156 |
+
df : float
|
| 157 |
+
The derivative of `func` at `x`, if the algorithm terminated
|
| 158 |
+
successfully.
|
| 159 |
+
error : float
|
| 160 |
+
An estimate of the error: the magnitude of the difference between
|
| 161 |
+
the current estimate of the derivative and the estimate in the
|
| 162 |
+
previous iteration.
|
| 163 |
+
nit : int
|
| 164 |
+
The number of iterations performed.
|
| 165 |
+
nfev : int
|
| 166 |
+
The number of points at which `func` was evaluated.
|
| 167 |
+
x : float
|
| 168 |
+
The value at which the derivative of `func` was evaluated
|
| 169 |
+
(after broadcasting with `args` and `step_direction`).
|
| 170 |
+
|
| 171 |
+
Notes
|
| 172 |
+
-----
|
| 173 |
+
The implementation was inspired by jacobi [1]_, numdifftools [2]_, and
|
| 174 |
+
DERIVEST [3]_, but the implementation follows the theory of Taylor series
|
| 175 |
+
more straightforwardly (and arguably naively so).
|
| 176 |
+
In the first iteration, the derivative is estimated using a finite
|
| 177 |
+
difference formula of order `order` with maximum step size `initial_step`.
|
| 178 |
+
Each subsequent iteration, the maximum step size is reduced by
|
| 179 |
+
`step_factor`, and the derivative is estimated again until a termination
|
| 180 |
+
condition is reached. The error estimate is the magnitude of the difference
|
| 181 |
+
between the current derivative approximation and that of the previous
|
| 182 |
+
iteration.
|
| 183 |
+
|
| 184 |
+
The stencils of the finite difference formulae are designed such that
|
| 185 |
+
abscissae are "nested": after `func` is evaluated at ``order + 1``
|
| 186 |
+
points in the first iteration, `func` is evaluated at only two new points
|
| 187 |
+
in each subsequent iteration; ``order - 1`` previously evaluated function
|
| 188 |
+
values required by the finite difference formula are reused, and two
|
| 189 |
+
function values (evaluations at the points furthest from `x`) are unused.
|
| 190 |
+
|
| 191 |
+
Step sizes are absolute. When the step size is small relative to the
|
| 192 |
+
magnitude of `x`, precision is lost; for example, if `x` is ``1e20``, the
|
| 193 |
+
default initial step size of ``0.5`` cannot be resolved. Accordingly,
|
| 194 |
+
consider using larger initial step sizes for large magnitudes of `x`.
|
| 195 |
+
|
| 196 |
+
The default tolerances are challenging to satisfy at points where the
|
| 197 |
+
true derivative is exactly zero. If the derivative may be exactly zero,
|
| 198 |
+
consider specifying an absolute tolerance (e.g. ``atol=1e-16``) to
|
| 199 |
+
improve convergence.
|
| 200 |
+
|
| 201 |
+
References
|
| 202 |
+
----------
|
| 203 |
+
[1]_ Hans Dembinski (@HDembinski). jacobi.
|
| 204 |
+
https://github.com/HDembinski/jacobi
|
| 205 |
+
[2]_ Per A. Brodtkorb and John D'Errico. numdifftools.
|
| 206 |
+
https://numdifftools.readthedocs.io/en/latest/
|
| 207 |
+
[3]_ John D'Errico. DERIVEST: Adaptive Robust Numerical Differentiation.
|
| 208 |
+
https://www.mathworks.com/matlabcentral/fileexchange/13490-adaptive-robust-numerical-differentiation
|
| 209 |
+
[4]_ Numerical Differentition. Wikipedia.
|
| 210 |
+
https://en.wikipedia.org/wiki/Numerical_differentiation
|
| 211 |
+
|
| 212 |
+
Examples
|
| 213 |
+
--------
|
| 214 |
+
Evaluate the derivative of ``np.exp`` at several points ``x``.
|
| 215 |
+
|
| 216 |
+
>>> import numpy as np
|
| 217 |
+
>>> from scipy.optimize._differentiate import _differentiate
|
| 218 |
+
>>> f = np.exp
|
| 219 |
+
>>> df = np.exp # true derivative
|
| 220 |
+
>>> x = np.linspace(1, 2, 5)
|
| 221 |
+
>>> res = _differentiate(f, x)
|
| 222 |
+
>>> res.df # approximation of the derivative
|
| 223 |
+
array([2.71828183, 3.49034296, 4.48168907, 5.75460268, 7.3890561 ])
|
| 224 |
+
>>> res.error # estimate of the error
|
| 225 |
+
array(
|
| 226 |
+
[7.12940817e-12, 9.16688947e-12, 1.17594823e-11, 1.50972568e-11, 1.93942640e-11]
|
| 227 |
+
)
|
| 228 |
+
>>> abs(res.df - df(x)) # true error
|
| 229 |
+
array(
|
| 230 |
+
[3.06421555e-14, 3.01980663e-14, 5.06261699e-14, 6.30606678e-14, 8.34887715e-14]
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
Show the convergence of the approximation as the step size is reduced.
|
| 234 |
+
Each iteration, the step size is reduced by `step_factor`, so for
|
| 235 |
+
sufficiently small initial step, each iteration reduces the error by a
|
| 236 |
+
factor of ``1/step_factor**order`` until finite precision arithmetic
|
| 237 |
+
inhibits further improvement.
|
| 238 |
+
|
| 239 |
+
>>> iter = list(range(1, 12)) # maximum iterations
|
| 240 |
+
>>> hfac = 2 # step size reduction per iteration
|
| 241 |
+
>>> hdir = [-1, 0, 1] # compare left-, central-, and right- steps
|
| 242 |
+
>>> order = 4 # order of differentiation formula
|
| 243 |
+
>>> x = 1
|
| 244 |
+
>>> ref = df(x)
|
| 245 |
+
>>> errors = [] # true error
|
| 246 |
+
>>> for i in iter:
|
| 247 |
+
... res = _differentiate(f, x, maxiter=i, step_factor=hfac,
|
| 248 |
+
... step_direction=hdir, order=order,
|
| 249 |
+
... atol=0, rtol=0) # prevent early termination
|
| 250 |
+
... errors.append(abs(res.df - ref))
|
| 251 |
+
>>> errors = np.array(errors)
|
| 252 |
+
>>> plt.semilogy(iter, errors[:, 0], label='left differences')
|
| 253 |
+
>>> plt.semilogy(iter, errors[:, 1], label='central differences')
|
| 254 |
+
>>> plt.semilogy(iter, errors[:, 2], label='right differences')
|
| 255 |
+
>>> plt.xlabel('iteration')
|
| 256 |
+
>>> plt.ylabel('error')
|
| 257 |
+
>>> plt.legend()
|
| 258 |
+
>>> plt.show()
|
| 259 |
+
>>> (errors[1, 1] / errors[0, 1], 1 / hfac**order)
|
| 260 |
+
(0.06215223140159822, 0.0625)
|
| 261 |
+
|
| 262 |
+
The implementation is vectorized over `x`, `step_direction`, and `args`.
|
| 263 |
+
The function is evaluated once before the first iteration to perform input
|
| 264 |
+
validation and standardization, and once per iteration thereafter.
|
| 265 |
+
|
| 266 |
+
>>> def f(x, p):
|
| 267 |
+
... print('here')
|
| 268 |
+
... f.nit += 1
|
| 269 |
+
... return x**p
|
| 270 |
+
>>> f.nit = 0
|
| 271 |
+
>>> def df(x, p):
|
| 272 |
+
... return p*x**(p-1)
|
| 273 |
+
>>> x = np.arange(1, 5)
|
| 274 |
+
>>> p = np.arange(1, 6).reshape((-1, 1))
|
| 275 |
+
>>> hdir = np.arange(-1, 2).reshape((-1, 1, 1))
|
| 276 |
+
>>> res = _differentiate(f, x, args=(p,), step_direction=hdir, maxiter=1)
|
| 277 |
+
>>> np.allclose(res.df, df(x, p))
|
| 278 |
+
True
|
| 279 |
+
>>> res.df.shape
|
| 280 |
+
(3, 5, 4)
|
| 281 |
+
>>> f.nit
|
| 282 |
+
2
|
| 283 |
+
|
| 284 |
+
By default, `preserve_shape` is False, and therefore the callable
|
| 285 |
+
`f` may be called with arrays of any broadcastable shapes.
|
| 286 |
+
For example:
|
| 287 |
+
|
| 288 |
+
>>> shapes = []
|
| 289 |
+
>>> def f(x, c):
|
| 290 |
+
... shape = np.broadcast_shapes(x.shape, c.shape)
|
| 291 |
+
... shapes.append(shape)
|
| 292 |
+
... return np.sin(c*x)
|
| 293 |
+
>>>
|
| 294 |
+
>>> c = [1, 5, 10, 20]
|
| 295 |
+
>>> res = _differentiate(f, 0, args=(c,))
|
| 296 |
+
>>> shapes
|
| 297 |
+
[(4,), (4, 8), (4, 2), (3, 2), (2, 2), (1, 2)]
|
| 298 |
+
|
| 299 |
+
To understand where these shapes are coming from - and to better
|
| 300 |
+
understand how `_differentiate` computes accurate results - note that
|
| 301 |
+
higher values of ``c`` correspond with higher frequency sinusoids.
|
| 302 |
+
The higher frequency sinusoids make the function's derivative change
|
| 303 |
+
faster, so more function evaluations are required to achieve the target
|
| 304 |
+
accuracy:
|
| 305 |
+
|
| 306 |
+
>>> res.nfev
|
| 307 |
+
array([11, 13, 15, 17])
|
| 308 |
+
|
| 309 |
+
The initial ``shape``, ``(4,)``, corresponds with evaluating the
|
| 310 |
+
function at a single abscissa and all four frequencies; this is used
|
| 311 |
+
for input validation and to determine the size and dtype of the arrays
|
| 312 |
+
that store results. The next shape corresponds with evaluating the
|
| 313 |
+
function at an initial grid of abscissae and all four frequencies.
|
| 314 |
+
Successive calls to the function evaluate the function at two more
|
| 315 |
+
abscissae, increasing the effective order of the approximation by two.
|
| 316 |
+
However, in later function evaluations, the function is evaluated at
|
| 317 |
+
fewer frequencies because the corresponding derivative has already
|
| 318 |
+
converged to the required tolerance. This saves function evaluations to
|
| 319 |
+
improve performance, but it requires the function to accept arguments of
|
| 320 |
+
any shape.
|
| 321 |
+
|
| 322 |
+
"Vector-valued" functions are unlikely to satisfy this requirement.
|
| 323 |
+
For example, consider
|
| 324 |
+
|
| 325 |
+
>>> def f(x):
|
| 326 |
+
... return [x, np.sin(3*x), x+np.sin(10*x), np.sin(20*x)*(x-1)**2]
|
| 327 |
+
|
| 328 |
+
This integrand is not compatible with `_differentiate` as written; for instance,
|
| 329 |
+
the shape of the output will not be the same as the shape of ``x``. Such a
|
| 330 |
+
function *could* be converted to a compatible form with the introduction of
|
| 331 |
+
additional parameters, but this would be inconvenient. In such cases,
|
| 332 |
+
a simpler solution would be to use `preserve_shape`.
|
| 333 |
+
|
| 334 |
+
>>> shapes = []
|
| 335 |
+
>>> def f(x):
|
| 336 |
+
... shapes.append(x.shape)
|
| 337 |
+
... x0, x1, x2, x3 = x
|
| 338 |
+
... return [x0, np.sin(3*x1), x2+np.sin(10*x2), np.sin(20*x3)*(x3-1)**2]
|
| 339 |
+
>>>
|
| 340 |
+
>>> x = np.zeros(4)
|
| 341 |
+
>>> res = _differentiate(f, x, preserve_shape=True)
|
| 342 |
+
>>> shapes
|
| 343 |
+
[(4,), (4, 8), (4, 2), (4, 2), (4, 2), (4, 2)]
|
| 344 |
+
|
| 345 |
+
Here, the shape of ``x`` is ``(4,)``. With ``preserve_shape=True``, the
|
| 346 |
+
function may be called with argument ``x`` of shape ``(4,)`` or ``(4, n)``,
|
| 347 |
+
and this is what we observe.
|
| 348 |
+
|
| 349 |
+
"""
|
| 350 |
+
# TODO (followup):
|
| 351 |
+
# - investigate behavior at saddle points
|
| 352 |
+
# - array initial_step / step_factor?
|
| 353 |
+
# - multivariate functions?
|
| 354 |
+
|
| 355 |
+
res = _differentiate_iv(func, x, args, atol, rtol, maxiter, order, initial_step,
|
| 356 |
+
step_factor, step_direction, preserve_shape, callback)
|
| 357 |
+
(func, x, args, atol, rtol, maxiter, order,
|
| 358 |
+
h0, fac, hdir, preserve_shape, callback) = res
|
| 359 |
+
|
| 360 |
+
# Initialization
|
| 361 |
+
# Since f(x) (no step) is not needed for central differences, it may be
|
| 362 |
+
# possible to eliminate this function evaluation. However, it's useful for
|
| 363 |
+
# input validation and standardization, and everything else is designed to
|
| 364 |
+
# reduce function calls, so let's keep it simple.
|
| 365 |
+
temp = eim._initialize(func, (x,), args, preserve_shape=preserve_shape)
|
| 366 |
+
func, xs, fs, args, shape, dtype, xp = temp
|
| 367 |
+
x, f = xs[0], fs[0]
|
| 368 |
+
df = np.full_like(f, np.nan)
|
| 369 |
+
# Ideally we'd broadcast the shape of `hdir` in `_elementwise_algo_init`, but
|
| 370 |
+
# it's simpler to do it here than to generalize `_elementwise_algo_init` further.
|
| 371 |
+
# `hdir` and `x` are already broadcasted in `_differentiate_iv`, so we know
|
| 372 |
+
# that `hdir` can be broadcasted to the final shape.
|
| 373 |
+
hdir = np.broadcast_to(hdir, shape).flatten()
|
| 374 |
+
|
| 375 |
+
status = np.full_like(x, eim._EINPROGRESS, dtype=int) # in progress
|
| 376 |
+
nit, nfev = 0, 1 # one function evaluations performed above
|
| 377 |
+
# Boolean indices of left, central, right, and (all) one-sided steps
|
| 378 |
+
il = hdir < 0
|
| 379 |
+
ic = hdir == 0
|
| 380 |
+
ir = hdir > 0
|
| 381 |
+
io = il | ir
|
| 382 |
+
|
| 383 |
+
# Most of these attributes are reasonably obvious, but:
|
| 384 |
+
# - `fs` holds all the function values of all active `x`. The zeroth
|
| 385 |
+
# axis corresponds with active points `x`, the first axis corresponds
|
| 386 |
+
# with the different steps (in the order described in
|
| 387 |
+
# `_differentiate_weights`).
|
| 388 |
+
# - `terms` (which could probably use a better name) is half the `order`,
|
| 389 |
+
# which is always even.
|
| 390 |
+
work = _RichResult(x=x, df=df, fs=f[:, np.newaxis], error=np.nan, h=h0,
|
| 391 |
+
df_last=np.nan, error_last=np.nan, h0=h0, fac=fac,
|
| 392 |
+
atol=atol, rtol=rtol, nit=nit, nfev=nfev,
|
| 393 |
+
status=status, dtype=dtype, terms=(order+1)//2,
|
| 394 |
+
hdir=hdir, il=il, ic=ic, ir=ir, io=io)
|
| 395 |
+
# This is the correspondence between terms in the `work` object and the
|
| 396 |
+
# final result. In this case, the mapping is trivial. Note that `success`
|
| 397 |
+
# is prepended automatically.
|
| 398 |
+
res_work_pairs = [('status', 'status'), ('df', 'df'), ('error', 'error'),
|
| 399 |
+
('nit', 'nit'), ('nfev', 'nfev'), ('x', 'x')]
|
| 400 |
+
|
| 401 |
+
def pre_func_eval(work):
|
| 402 |
+
"""Determine the abscissae at which the function needs to be evaluated.
|
| 403 |
+
|
| 404 |
+
See `_differentiate_weights` for a description of the stencil (pattern
|
| 405 |
+
of the abscissae).
|
| 406 |
+
|
| 407 |
+
In the first iteration, there is only one stored function value in
|
| 408 |
+
`work.fs`, `f(x)`, so we need to evaluate at `order` new points. In
|
| 409 |
+
subsequent iterations, we evaluate at two new points. Note that
|
| 410 |
+
`work.x` is always flattened into a 1D array after broadcasting with
|
| 411 |
+
all `args`, so we add a new axis at the end and evaluate all point
|
| 412 |
+
in one call to the function.
|
| 413 |
+
|
| 414 |
+
For improvement:
|
| 415 |
+
- Consider measuring the step size actually taken, since `(x + h) - x`
|
| 416 |
+
is not identically equal to `h` with floating point arithmetic.
|
| 417 |
+
- Adjust the step size automatically if `x` is too big to resolve the
|
| 418 |
+
step.
|
| 419 |
+
- We could probably save some work if there are no central difference
|
| 420 |
+
steps or no one-sided steps.
|
| 421 |
+
"""
|
| 422 |
+
n = work.terms # half the order
|
| 423 |
+
h = work.h # step size
|
| 424 |
+
c = work.fac # step reduction factor
|
| 425 |
+
d = c**0.5 # square root of step reduction factor (one-sided stencil)
|
| 426 |
+
# Note - no need to be careful about dtypes until we allocate `x_eval`
|
| 427 |
+
|
| 428 |
+
if work.nit == 0:
|
| 429 |
+
hc = h / c**np.arange(n)
|
| 430 |
+
hc = np.concatenate((-hc[::-1], hc))
|
| 431 |
+
else:
|
| 432 |
+
hc = np.asarray([-h, h]) / c**(n-1)
|
| 433 |
+
|
| 434 |
+
if work.nit == 0:
|
| 435 |
+
hr = h / d**np.arange(2*n)
|
| 436 |
+
else:
|
| 437 |
+
hr = np.asarray([h, h/d]) / c**(n-1)
|
| 438 |
+
|
| 439 |
+
n_new = 2*n if work.nit == 0 else 2 # number of new abscissae
|
| 440 |
+
x_eval = np.zeros((len(work.hdir), n_new), dtype=work.dtype)
|
| 441 |
+
il, ic, ir = work.il, work.ic, work.ir
|
| 442 |
+
x_eval[ir] = work.x[ir, np.newaxis] + hr
|
| 443 |
+
x_eval[ic] = work.x[ic, np.newaxis] + hc
|
| 444 |
+
x_eval[il] = work.x[il, np.newaxis] - hr
|
| 445 |
+
return x_eval
|
| 446 |
+
|
| 447 |
+
def post_func_eval(x, f, work):
|
| 448 |
+
""" Estimate the derivative and error from the function evaluations
|
| 449 |
+
|
| 450 |
+
As in `pre_func_eval`: in the first iteration, there is only one stored
|
| 451 |
+
function value in `work.fs`, `f(x)`, so we need to add the `order` new
|
| 452 |
+
points. In subsequent iterations, we add two new points. The tricky
|
| 453 |
+
part is getting the order to match that of the weights, which is
|
| 454 |
+
described in `_differentiate_weights`.
|
| 455 |
+
|
| 456 |
+
For improvement:
|
| 457 |
+
- Change the order of the weights (and steps in `pre_func_eval`) to
|
| 458 |
+
simplify `work_fc` concatenation and eliminate `fc` concatenation.
|
| 459 |
+
- It would be simple to do one-step Richardson extrapolation with `df`
|
| 460 |
+
and `df_last` to increase the order of the estimate and/or improve
|
| 461 |
+
the error estimate.
|
| 462 |
+
- Process the function evaluations in a more numerically favorable
|
| 463 |
+
way. For instance, combining the pairs of central difference evals
|
| 464 |
+
into a second-order approximation and using Richardson extrapolation
|
| 465 |
+
to produce a higher order approximation seemed to retain accuracy up
|
| 466 |
+
to very high order.
|
| 467 |
+
- Alternatively, we could use `polyfit` like Jacobi. An advantage of
|
| 468 |
+
fitting polynomial to more points than necessary is improved noise
|
| 469 |
+
tolerance.
|
| 470 |
+
"""
|
| 471 |
+
n = work.terms
|
| 472 |
+
n_new = n if work.nit == 0 else 1
|
| 473 |
+
il, ic, io = work.il, work.ic, work.io
|
| 474 |
+
|
| 475 |
+
# Central difference
|
| 476 |
+
# `work_fc` is *all* the points at which the function has been evaluated
|
| 477 |
+
# `fc` is the points we're using *this iteration* to produce the estimate
|
| 478 |
+
work_fc = (f[ic, :n_new], work.fs[ic, :], f[ic, -n_new:])
|
| 479 |
+
work_fc = np.concatenate(work_fc, axis=-1)
|
| 480 |
+
if work.nit == 0:
|
| 481 |
+
fc = work_fc
|
| 482 |
+
else:
|
| 483 |
+
fc = (work_fc[:, :n], work_fc[:, n:n+1], work_fc[:, -n:])
|
| 484 |
+
fc = np.concatenate(fc, axis=-1)
|
| 485 |
+
|
| 486 |
+
# One-sided difference
|
| 487 |
+
work_fo = np.concatenate((work.fs[io, :], f[io, :]), axis=-1)
|
| 488 |
+
if work.nit == 0:
|
| 489 |
+
fo = work_fo
|
| 490 |
+
else:
|
| 491 |
+
fo = np.concatenate((work_fo[:, 0:1], work_fo[:, -2*n:]), axis=-1)
|
| 492 |
+
|
| 493 |
+
work.fs = np.zeros((len(ic), work.fs.shape[-1] + 2*n_new))
|
| 494 |
+
work.fs[ic] = work_fc
|
| 495 |
+
work.fs[io] = work_fo
|
| 496 |
+
|
| 497 |
+
wc, wo = _differentiate_weights(work, n)
|
| 498 |
+
work.df_last = work.df.copy()
|
| 499 |
+
work.df[ic] = fc @ wc / work.h
|
| 500 |
+
work.df[io] = fo @ wo / work.h
|
| 501 |
+
work.df[il] *= -1
|
| 502 |
+
|
| 503 |
+
work.h /= work.fac
|
| 504 |
+
work.error_last = work.error
|
| 505 |
+
# Simple error estimate - the difference in derivative estimates between
|
| 506 |
+
# this iteration and the last. This is typically conservative because if
|
| 507 |
+
# convergence has begin, the true error is much closer to the difference
|
| 508 |
+
# between the current estimate and the *next* error estimate. However,
|
| 509 |
+
# we could use Richarson extrapolation to produce an error estimate that
|
| 510 |
+
# is one order higher, and take the difference between that and
|
| 511 |
+
# `work.df` (which would just be constant factor that depends on `fac`.)
|
| 512 |
+
work.error = abs(work.df - work.df_last)
|
| 513 |
+
|
| 514 |
+
def check_termination(work):
|
| 515 |
+
"""Terminate due to convergence, non-finite values, or error increase"""
|
| 516 |
+
stop = np.zeros_like(work.df).astype(bool)
|
| 517 |
+
|
| 518 |
+
i = work.error < work.atol + work.rtol*abs(work.df)
|
| 519 |
+
work.status[i] = eim._ECONVERGED
|
| 520 |
+
stop[i] = True
|
| 521 |
+
|
| 522 |
+
if work.nit > 0:
|
| 523 |
+
i = ~((np.isfinite(work.x) & np.isfinite(work.df)) | stop)
|
| 524 |
+
work.df[i], work.status[i] = np.nan, eim._EVALUEERR
|
| 525 |
+
stop[i] = True
|
| 526 |
+
|
| 527 |
+
# With infinite precision, there is a step size below which
|
| 528 |
+
# all smaller step sizes will reduce the error. But in floating point
|
| 529 |
+
# arithmetic, catastrophic cancellation will begin to cause the error
|
| 530 |
+
# to increase again. This heuristic tries to avoid step sizes that are
|
| 531 |
+
# too small. There may be more theoretically sound approaches for
|
| 532 |
+
# detecting a step size that minimizes the total error, but this
|
| 533 |
+
# heuristic seems simple and effective.
|
| 534 |
+
i = (work.error > work.error_last*10) & ~stop
|
| 535 |
+
work.status[i] = _EERRORINCREASE
|
| 536 |
+
stop[i] = True
|
| 537 |
+
|
| 538 |
+
return stop
|
| 539 |
+
|
| 540 |
+
def post_termination_check(work):
|
| 541 |
+
return
|
| 542 |
+
|
| 543 |
+
def customize_result(res, shape):
|
| 544 |
+
return shape
|
| 545 |
+
|
| 546 |
+
return eim._loop(work, callback, shape, maxiter, func, args, dtype,
|
| 547 |
+
pre_func_eval, post_func_eval, check_termination,
|
| 548 |
+
post_termination_check, customize_result, res_work_pairs,
|
| 549 |
+
xp, preserve_shape)
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
def _differentiate_weights(work, n):
|
| 553 |
+
# This produces the weights of the finite difference formula for a given
|
| 554 |
+
# stencil. In experiments, use of a second-order central difference formula
|
| 555 |
+
# with Richardson extrapolation was more accurate numerically, but it was
|
| 556 |
+
# more complicated, and it would have become even more complicated when
|
| 557 |
+
# adding support for one-sided differences. However, now that all the
|
| 558 |
+
# function evaluation values are stored, they can be processed in whatever
|
| 559 |
+
# way is desired to produce the derivative estimate. We leave alternative
|
| 560 |
+
# approaches to future work. To be more self-contained, here is the theory
|
| 561 |
+
# for deriving the weights below.
|
| 562 |
+
#
|
| 563 |
+
# Recall that the Taylor expansion of a univariate, scalar-values function
|
| 564 |
+
# about a point `x` may be expressed as:
|
| 565 |
+
# f(x + h) = f(x) + f'(x)*h + f''(x)/2!*h**2 + O(h**3)
|
| 566 |
+
# Suppose we evaluate f(x), f(x+h), and f(x-h). We have:
|
| 567 |
+
# f(x) = f(x)
|
| 568 |
+
# f(x + h) = f(x) + f'(x)*h + f''(x)/2!*h**2 + O(h**3)
|
| 569 |
+
# f(x - h) = f(x) - f'(x)*h + f''(x)/2!*h**2 + O(h**3)
|
| 570 |
+
# We can solve for weights `wi` such that:
|
| 571 |
+
# w1*f(x) = w1*(f(x))
|
| 572 |
+
# + w2*f(x + h) = w2*(f(x) + f'(x)*h + f''(x)/2!*h**2) + O(h**3)
|
| 573 |
+
# + w3*f(x - h) = w3*(f(x) - f'(x)*h + f''(x)/2!*h**2) + O(h**3)
|
| 574 |
+
# = 0 + f'(x)*h + 0 + O(h**3)
|
| 575 |
+
# Then
|
| 576 |
+
# f'(x) ~ (w1*f(x) + w2*f(x+h) + w3*f(x-h))/h
|
| 577 |
+
# is a finite difference derivative approximation with error O(h**2),
|
| 578 |
+
# and so it is said to be a "second-order" approximation. Under certain
|
| 579 |
+
# conditions (e.g. well-behaved function, `h` sufficiently small), the
|
| 580 |
+
# error in the approximation will decrease with h**2; that is, if `h` is
|
| 581 |
+
# reduced by a factor of 2, the error is reduced by a factor of 4.
|
| 582 |
+
#
|
| 583 |
+
# By default, we use eighth-order formulae. Our central-difference formula
|
| 584 |
+
# uses abscissae:
|
| 585 |
+
# x-h/c**3, x-h/c**2, x-h/c, x-h, x, x+h, x+h/c, x+h/c**2, x+h/c**3
|
| 586 |
+
# where `c` is the step factor. (Typically, the step factor is greater than
|
| 587 |
+
# one, so the outermost points - as written above - are actually closest to
|
| 588 |
+
# `x`.) This "stencil" is chosen so that each iteration, the step can be
|
| 589 |
+
# reduced by the factor `c`, and most of the function evaluations can be
|
| 590 |
+
# reused with the new step size. For example, in the next iteration, we
|
| 591 |
+
# will have:
|
| 592 |
+
# x-h/c**4, x-h/c**3, x-h/c**2, x-h/c, x, x+h/c, x+h/c**2, x+h/c**3, x+h/c**4
|
| 593 |
+
# We do not reuse `x-h` and `x+h` for the new derivative estimate.
|
| 594 |
+
# While this would increase the order of the formula and thus the
|
| 595 |
+
# theoretical convergence rate, it is also less stable numerically.
|
| 596 |
+
# (As noted above, there are other ways of processing the values that are
|
| 597 |
+
# more stable. Thus, even now we store `f(x-h)` and `f(x+h)` in `work.fs`
|
| 598 |
+
# to simplify future development of this sort of improvement.)
|
| 599 |
+
#
|
| 600 |
+
# The (right) one-sided formula is produced similarly using abscissae
|
| 601 |
+
# x, x+h, x+h/d, x+h/d**2, ..., x+h/d**6, x+h/d**7, x+h/d**7
|
| 602 |
+
# where `d` is the square root of `c`. (The left one-sided formula simply
|
| 603 |
+
# uses -h.) When the step size is reduced by factor `c = d**2`, we have
|
| 604 |
+
# abscissae:
|
| 605 |
+
# x, x+h/d**2, x+h/d**3..., x+h/d**8, x+h/d**9, x+h/d**9
|
| 606 |
+
# `d` is chosen as the square root of `c` so that the rate of the step-size
|
| 607 |
+
# reduction is the same per iteration as in the central difference case.
|
| 608 |
+
# Note that because the central difference formulas are inherently of even
|
| 609 |
+
# order, for simplicity, we use only even-order formulas for one-sided
|
| 610 |
+
# differences, too.
|
| 611 |
+
|
| 612 |
+
# It's possible for the user to specify `fac` in, say, double precision but
|
| 613 |
+
# `x` and `args` in single precision. `fac` gets converted to single
|
| 614 |
+
# precision, but we should always use double precision for the intermediate
|
| 615 |
+
# calculations here to avoid additional error in the weights.
|
| 616 |
+
fac = work.fac.astype(np.float64)
|
| 617 |
+
|
| 618 |
+
# Note that if the user switches back to floating point precision with
|
| 619 |
+
# `x` and `args`, then `fac` will not necessarily equal the (lower
|
| 620 |
+
# precision) cached `_differentiate_weights.fac`, and the weights will
|
| 621 |
+
# need to be recalculated. This could be fixed, but it's late, and of
|
| 622 |
+
# low consequence.
|
| 623 |
+
if fac != _differentiate_weights.fac:
|
| 624 |
+
_differentiate_weights.central = []
|
| 625 |
+
_differentiate_weights.right = []
|
| 626 |
+
_differentiate_weights.fac = fac
|
| 627 |
+
|
| 628 |
+
if len(_differentiate_weights.central) != 2*n + 1:
|
| 629 |
+
# Central difference weights. Consider refactoring this; it could
|
| 630 |
+
# probably be more compact.
|
| 631 |
+
i = np.arange(-n, n + 1)
|
| 632 |
+
p = np.abs(i) - 1. # center point has power `p` -1, but sign `s` is 0
|
| 633 |
+
s = np.sign(i)
|
| 634 |
+
|
| 635 |
+
h = s / fac ** p
|
| 636 |
+
A = np.vander(h, increasing=True).T
|
| 637 |
+
b = np.zeros(2*n + 1)
|
| 638 |
+
b[1] = 1
|
| 639 |
+
weights = np.linalg.solve(A, b)
|
| 640 |
+
|
| 641 |
+
# Enforce identities to improve accuracy
|
| 642 |
+
weights[n] = 0
|
| 643 |
+
for i in range(n):
|
| 644 |
+
weights[-i-1] = -weights[i]
|
| 645 |
+
|
| 646 |
+
# Cache the weights. We only need to calculate them once unless
|
| 647 |
+
# the step factor changes.
|
| 648 |
+
_differentiate_weights.central = weights
|
| 649 |
+
|
| 650 |
+
# One-sided difference weights. The left one-sided weights (with
|
| 651 |
+
# negative steps) are simply the negative of the right one-sided
|
| 652 |
+
# weights, so no need to compute them separately.
|
| 653 |
+
i = np.arange(2*n + 1)
|
| 654 |
+
p = i - 1.
|
| 655 |
+
s = np.sign(i)
|
| 656 |
+
|
| 657 |
+
h = s / np.sqrt(fac) ** p
|
| 658 |
+
A = np.vander(h, increasing=True).T
|
| 659 |
+
b = np.zeros(2 * n + 1)
|
| 660 |
+
b[1] = 1
|
| 661 |
+
weights = np.linalg.solve(A, b)
|
| 662 |
+
|
| 663 |
+
_differentiate_weights.right = weights
|
| 664 |
+
|
| 665 |
+
return (_differentiate_weights.central.astype(work.dtype, copy=False),
|
| 666 |
+
_differentiate_weights.right.astype(work.dtype, copy=False))
|
| 667 |
+
_differentiate_weights.central = []
|
| 668 |
+
_differentiate_weights.right = []
|
| 669 |
+
_differentiate_weights.fac = None
|
| 670 |
+
|
| 671 |
+
|
| 672 |
+
def _jacobian(func, x, *, atol=None, rtol=None, maxiter=10,
|
| 673 |
+
order=8, initial_step=0.5, step_factor=2.0):
|
| 674 |
+
r"""Evaluate the Jacobian of a function numerically.
|
| 675 |
+
|
| 676 |
+
Parameters
|
| 677 |
+
----------
|
| 678 |
+
func : callable
|
| 679 |
+
The function whose Jacobian is desired. The signature must be::
|
| 680 |
+
|
| 681 |
+
func(x: ndarray) -> ndarray
|
| 682 |
+
|
| 683 |
+
where each element of ``x`` is a finite real. If the function to be
|
| 684 |
+
differentiated accepts additional, arguments wrap it (e.g. using
|
| 685 |
+
`functools.partial` or ``lambda``) and pass the wrapped callable
|
| 686 |
+
into `_jacobian`. See Notes regarding vectorization and the dimensionality
|
| 687 |
+
of the input and output.
|
| 688 |
+
x : array_like
|
| 689 |
+
Points at which to evaluate the Jacobian. Must have at least one dimension.
|
| 690 |
+
See Notes regarding the dimensionality and vectorization.
|
| 691 |
+
atol, rtol : float, optional
|
| 692 |
+
Absolute and relative tolerances for the stopping condition: iteration
|
| 693 |
+
will stop for each element of the Jacobian when
|
| 694 |
+
``res.error < atol + rtol * abs(res.df)``. The default `atol` is the
|
| 695 |
+
smallest normal number of the appropriate dtype, and the default `rtol`
|
| 696 |
+
is the square root of the precision of the appropriate dtype.
|
| 697 |
+
order : int, default: 8
|
| 698 |
+
The (positive integer) order of the finite difference formula to be
|
| 699 |
+
used. Odd integers will be rounded up to the next even integer.
|
| 700 |
+
initial_step : float, default: 0.5
|
| 701 |
+
The (absolute) initial step size for the finite difference derivative
|
| 702 |
+
approximation.
|
| 703 |
+
step_factor : float, default: 2.0
|
| 704 |
+
The factor by which the step size is *reduced* in each iteration; i.e.
|
| 705 |
+
the step size in iteration 1 is ``initial_step/step_factor``. If
|
| 706 |
+
``step_factor < 1``, subsequent steps will be greater than the initial
|
| 707 |
+
step; this may be useful if steps smaller than some threshold are
|
| 708 |
+
undesirable (e.g. due to subtractive cancellation error).
|
| 709 |
+
maxiter : int, default: 10
|
| 710 |
+
The maximum number of iterations of the algorithm to perform.
|
| 711 |
+
|
| 712 |
+
Returns
|
| 713 |
+
-------
|
| 714 |
+
res : _RichResult
|
| 715 |
+
An instance of `scipy._lib._util._RichResult` with the following
|
| 716 |
+
attributes.
|
| 717 |
+
|
| 718 |
+
success : bool array
|
| 719 |
+
``True`` when the algorithm terminated successfully (status ``0``).
|
| 720 |
+
status : int array
|
| 721 |
+
An integer representing the exit status of the algorithm.
|
| 722 |
+
``0`` : The algorithm converged to the specified tolerances.
|
| 723 |
+
``-1`` : The error estimate increased, so iteration was terminated.
|
| 724 |
+
``-2`` : The maximum number of iterations was reached.
|
| 725 |
+
``-3`` : A non-finite value was encountered.
|
| 726 |
+
``-4`` : Iteration was terminated by `callback`.
|
| 727 |
+
``1`` : The algorithm is proceeding normally (in `callback` only).
|
| 728 |
+
df : float array
|
| 729 |
+
The Jacobian of `func` at `x`, if the algorithm terminated
|
| 730 |
+
successfully.
|
| 731 |
+
error : float array
|
| 732 |
+
An estimate of the error: the magnitude of the difference between
|
| 733 |
+
the current estimate of the derivative and the estimate in the
|
| 734 |
+
previous iteration.
|
| 735 |
+
nit : int array
|
| 736 |
+
The number of iterations performed.
|
| 737 |
+
nfev : int array
|
| 738 |
+
The number of points at which `func` was evaluated.
|
| 739 |
+
x : float array
|
| 740 |
+
The value at which the derivative of `func` was evaluated.
|
| 741 |
+
|
| 742 |
+
See Also
|
| 743 |
+
--------
|
| 744 |
+
_differentiate
|
| 745 |
+
|
| 746 |
+
Notes
|
| 747 |
+
-----
|
| 748 |
+
Suppose we wish to evaluate the Jacobian of a function
|
| 749 |
+
:math:`f: \mathbf{R^m} \rightarrow \mathbf{R^n}`, and assign to variables
|
| 750 |
+
``m`` and ``n`` the positive integer values of :math:`m` and :math:`n`,
|
| 751 |
+
respectively. If we wish to evaluate the Jacobian at a single point,
|
| 752 |
+
then:
|
| 753 |
+
|
| 754 |
+
- argument `x` must be an array of shape ``(m,)``
|
| 755 |
+
- argument `func` must be vectorized to accept an array of shape ``(m, p)``.
|
| 756 |
+
The first axis represents the :math:`m` inputs of :math:`f`; the second
|
| 757 |
+
is for evaluating the function at multiple points in a single call.
|
| 758 |
+
- argument `func` must return an array of shape ``(n, p)``. The first
|
| 759 |
+
axis represents the :math:`n` outputs of :math:`f`; the second
|
| 760 |
+
is for the result of evaluating the function at multiple points.
|
| 761 |
+
- attribute ``df`` of the result object will be an array of shape ``(n, m)``,
|
| 762 |
+
the Jacobian.
|
| 763 |
+
|
| 764 |
+
This function is also vectorized in the sense that the Jacobian can be
|
| 765 |
+
evaluated at ``k`` points in a single call. In this case, `x` would be an
|
| 766 |
+
array of shape ``(m, k)``, `func` would accept an array of shape
|
| 767 |
+
``(m, k, p)`` and return an array of shape ``(n, k, p)``, and the ``df``
|
| 768 |
+
attribute of the result would have shape ``(n, m, k)``.
|
| 769 |
+
|
| 770 |
+
References
|
| 771 |
+
----------
|
| 772 |
+
.. [1] Jacobian matrix and determinant, *Wikipedia*,
|
| 773 |
+
https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant
|
| 774 |
+
|
| 775 |
+
Examples
|
| 776 |
+
--------
|
| 777 |
+
The Rosenbrock function maps from :math:`\mathbf{R}^m \righarrow \mathbf{R}`;
|
| 778 |
+
the SciPy implementation `scipy.optimize.rosen` is vectorized to accept an
|
| 779 |
+
array of shape ``(m, p)`` and return an array of shape ``m``. Suppose we wish
|
| 780 |
+
to evaluate the Jacobian (AKA the gradient because the function returns a scalar)
|
| 781 |
+
at ``[0.5, 0.5, 0.5]``.
|
| 782 |
+
|
| 783 |
+
>>> import numpy as np
|
| 784 |
+
>>> from scipy.optimize._differentiate import _jacobian as jacobian
|
| 785 |
+
>>> from scipy.optimize import rosen, rosen_der
|
| 786 |
+
>>> m = 3
|
| 787 |
+
>>> x = np.full(m, 0.5)
|
| 788 |
+
>>> res = jacobian(rosen, x)
|
| 789 |
+
>>> ref = rosen_der(x) # reference value of the gradient
|
| 790 |
+
>>> res.df, ref
|
| 791 |
+
(array([-51., -1., 50.]), array([-51., -1., 50.]))
|
| 792 |
+
|
| 793 |
+
As an example of a function with multiple outputs, consider Example 4
|
| 794 |
+
from [1]_.
|
| 795 |
+
|
| 796 |
+
>>> def f(x):
|
| 797 |
+
... x1, x2, x3 = x ...
|
| 798 |
+
... return [x1, 5*x3, 4*x2**2 - 2*x3, x3*np.sin(x1)]
|
| 799 |
+
|
| 800 |
+
The true Jacobian is given by:
|
| 801 |
+
|
| 802 |
+
>>> def df(x):
|
| 803 |
+
... x1, x2, x3 = x
|
| 804 |
+
... one = np.ones_like(x1)
|
| 805 |
+
... return [[one, 0*one, 0*one],
|
| 806 |
+
... [0*one, 0*one, 5*one],
|
| 807 |
+
... [0*one, 8*x2, -2*one],
|
| 808 |
+
... [x3*np.cos(x1), 0*one, np.sin(x1)]]
|
| 809 |
+
|
| 810 |
+
Evaluate the Jacobian at an arbitrary point.
|
| 811 |
+
|
| 812 |
+
>>> rng = np.random.default_rng(389252938452)
|
| 813 |
+
>>> x = rng.random(size=3)
|
| 814 |
+
>>> res = jacobian(f, x)
|
| 815 |
+
>>> ref = df(x)
|
| 816 |
+
>>> res.df.shape == (4, 3)
|
| 817 |
+
True
|
| 818 |
+
>>> np.allclose(res.df, ref)
|
| 819 |
+
True
|
| 820 |
+
|
| 821 |
+
Evaluate the Jacobian at 10 arbitrary points in a single call.
|
| 822 |
+
|
| 823 |
+
>>> x = rng.random(size=(3, 10))
|
| 824 |
+
>>> res = jacobian(f, x)
|
| 825 |
+
>>> ref = df(x)
|
| 826 |
+
>>> res.df.shape == (4, 3, 10)
|
| 827 |
+
True
|
| 828 |
+
>>> np.allclose(res.df, ref)
|
| 829 |
+
True
|
| 830 |
+
|
| 831 |
+
"""
|
| 832 |
+
x = np.asarray(x)
|
| 833 |
+
int_dtype = np.issubdtype(x.dtype, np.integer)
|
| 834 |
+
x0 = np.asarray(x, dtype=float) if int_dtype else x
|
| 835 |
+
|
| 836 |
+
if x0.ndim < 1:
|
| 837 |
+
message = "Argument `x` must be at least 1-D."
|
| 838 |
+
raise ValueError(message)
|
| 839 |
+
|
| 840 |
+
m = x0.shape[0]
|
| 841 |
+
i = np.arange(m)
|
| 842 |
+
|
| 843 |
+
def wrapped(x):
|
| 844 |
+
p = () if x.ndim == x0.ndim else (x.shape[-1],) # number of abscissae
|
| 845 |
+
new_dims = (1,) if x.ndim == x0.ndim else (1, -1)
|
| 846 |
+
new_shape = (m, m) + x0.shape[1:] + p
|
| 847 |
+
xph = np.expand_dims(x0, new_dims)
|
| 848 |
+
xph = np.broadcast_to(xph, new_shape).copy()
|
| 849 |
+
xph[i, i] = x
|
| 850 |
+
return func(xph)
|
| 851 |
+
|
| 852 |
+
res = _differentiate(wrapped, x, atol=atol, rtol=rtol,
|
| 853 |
+
maxiter=maxiter, order=order, initial_step=initial_step,
|
| 854 |
+
step_factor=step_factor, preserve_shape=True)
|
| 855 |
+
del res.x # the user knows `x`, and the way it gets broadcasted is meaningless here
|
| 856 |
+
return res
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct.cpython-310-x86_64-linux-gnu.so
ADDED
|
Binary file (43.5 kB). View file
|
|
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct_py.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from typing import ( # noqa: UP035
|
| 3 |
+
Any, Callable, Iterable, TYPE_CHECKING
|
| 4 |
+
)
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
from scipy.optimize import OptimizeResult
|
| 8 |
+
from ._constraints import old_bound_to_new, Bounds
|
| 9 |
+
from ._direct import direct as _direct # type: ignore
|
| 10 |
+
|
| 11 |
+
if TYPE_CHECKING:
|
| 12 |
+
import numpy.typing as npt
|
| 13 |
+
|
| 14 |
+
__all__ = ['direct']
|
| 15 |
+
|
| 16 |
+
ERROR_MESSAGES = (
|
| 17 |
+
"Number of function evaluations done is larger than maxfun={}",
|
| 18 |
+
"Number of iterations is larger than maxiter={}",
|
| 19 |
+
"u[i] < l[i] for some i",
|
| 20 |
+
"maxfun is too large",
|
| 21 |
+
"Initialization failed",
|
| 22 |
+
"There was an error in the creation of the sample points",
|
| 23 |
+
"An error occurred while the function was sampled",
|
| 24 |
+
"Maximum number of levels has been reached.",
|
| 25 |
+
"Forced stop",
|
| 26 |
+
"Invalid arguments",
|
| 27 |
+
"Out of memory",
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
SUCCESS_MESSAGES = (
|
| 31 |
+
("The best function value found is within a relative error={} "
|
| 32 |
+
"of the (known) global optimum f_min"),
|
| 33 |
+
("The volume of the hyperrectangle containing the lowest function value "
|
| 34 |
+
"found is below vol_tol={}"),
|
| 35 |
+
("The side length measure of the hyperrectangle containing the lowest "
|
| 36 |
+
"function value found is below len_tol={}"),
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def direct(
|
| 41 |
+
func: Callable[[npt.ArrayLike, tuple[Any]], float],
|
| 42 |
+
bounds: Iterable | Bounds,
|
| 43 |
+
*,
|
| 44 |
+
args: tuple = (),
|
| 45 |
+
eps: float = 1e-4,
|
| 46 |
+
maxfun: int | None = None,
|
| 47 |
+
maxiter: int = 1000,
|
| 48 |
+
locally_biased: bool = True,
|
| 49 |
+
f_min: float = -np.inf,
|
| 50 |
+
f_min_rtol: float = 1e-4,
|
| 51 |
+
vol_tol: float = 1e-16,
|
| 52 |
+
len_tol: float = 1e-6,
|
| 53 |
+
callback: Callable[[npt.ArrayLike], None] | None = None
|
| 54 |
+
) -> OptimizeResult:
|
| 55 |
+
"""
|
| 56 |
+
Finds the global minimum of a function using the
|
| 57 |
+
DIRECT algorithm.
|
| 58 |
+
|
| 59 |
+
Parameters
|
| 60 |
+
----------
|
| 61 |
+
func : callable
|
| 62 |
+
The objective function to be minimized.
|
| 63 |
+
``func(x, *args) -> float``
|
| 64 |
+
where ``x`` is an 1-D array with shape (n,) and ``args`` is a tuple of
|
| 65 |
+
the fixed parameters needed to completely specify the function.
|
| 66 |
+
bounds : sequence or `Bounds`
|
| 67 |
+
Bounds for variables. There are two ways to specify the bounds:
|
| 68 |
+
|
| 69 |
+
1. Instance of `Bounds` class.
|
| 70 |
+
2. ``(min, max)`` pairs for each element in ``x``.
|
| 71 |
+
|
| 72 |
+
args : tuple, optional
|
| 73 |
+
Any additional fixed parameters needed to
|
| 74 |
+
completely specify the objective function.
|
| 75 |
+
eps : float, optional
|
| 76 |
+
Minimal required difference of the objective function values
|
| 77 |
+
between the current best hyperrectangle and the next potentially
|
| 78 |
+
optimal hyperrectangle to be divided. In consequence, `eps` serves as a
|
| 79 |
+
tradeoff between local and global search: the smaller, the more local
|
| 80 |
+
the search becomes. Default is 1e-4.
|
| 81 |
+
maxfun : int or None, optional
|
| 82 |
+
Approximate upper bound on objective function evaluations.
|
| 83 |
+
If `None`, will be automatically set to ``1000 * N`` where ``N``
|
| 84 |
+
represents the number of dimensions. Will be capped if necessary to
|
| 85 |
+
limit DIRECT's RAM usage to app. 1GiB. This will only occur for very
|
| 86 |
+
high dimensional problems and excessive `max_fun`. Default is `None`.
|
| 87 |
+
maxiter : int, optional
|
| 88 |
+
Maximum number of iterations. Default is 1000.
|
| 89 |
+
locally_biased : bool, optional
|
| 90 |
+
If `True` (default), use the locally biased variant of the
|
| 91 |
+
algorithm known as DIRECT_L. If `False`, use the original unbiased
|
| 92 |
+
DIRECT algorithm. For hard problems with many local minima,
|
| 93 |
+
`False` is recommended.
|
| 94 |
+
f_min : float, optional
|
| 95 |
+
Function value of the global optimum. Set this value only if the
|
| 96 |
+
global optimum is known. Default is ``-np.inf``, so that this
|
| 97 |
+
termination criterion is deactivated.
|
| 98 |
+
f_min_rtol : float, optional
|
| 99 |
+
Terminate the optimization once the relative error between the
|
| 100 |
+
current best minimum `f` and the supplied global minimum `f_min`
|
| 101 |
+
is smaller than `f_min_rtol`. This parameter is only used if
|
| 102 |
+
`f_min` is also set. Must lie between 0 and 1. Default is 1e-4.
|
| 103 |
+
vol_tol : float, optional
|
| 104 |
+
Terminate the optimization once the volume of the hyperrectangle
|
| 105 |
+
containing the lowest function value is smaller than `vol_tol`
|
| 106 |
+
of the complete search space. Must lie between 0 and 1.
|
| 107 |
+
Default is 1e-16.
|
| 108 |
+
len_tol : float, optional
|
| 109 |
+
If `locally_biased=True`, terminate the optimization once half of
|
| 110 |
+
the normalized maximal side length of the hyperrectangle containing
|
| 111 |
+
the lowest function value is smaller than `len_tol`.
|
| 112 |
+
If `locally_biased=False`, terminate the optimization once half of
|
| 113 |
+
the normalized diagonal of the hyperrectangle containing the lowest
|
| 114 |
+
function value is smaller than `len_tol`. Must lie between 0 and 1.
|
| 115 |
+
Default is 1e-6.
|
| 116 |
+
callback : callable, optional
|
| 117 |
+
A callback function with signature ``callback(xk)`` where ``xk``
|
| 118 |
+
represents the best function value found so far.
|
| 119 |
+
|
| 120 |
+
Returns
|
| 121 |
+
-------
|
| 122 |
+
res : OptimizeResult
|
| 123 |
+
The optimization result represented as a ``OptimizeResult`` object.
|
| 124 |
+
Important attributes are: ``x`` the solution array, ``success`` a
|
| 125 |
+
Boolean flag indicating if the optimizer exited successfully and
|
| 126 |
+
``message`` which describes the cause of the termination. See
|
| 127 |
+
`OptimizeResult` for a description of other attributes.
|
| 128 |
+
|
| 129 |
+
Notes
|
| 130 |
+
-----
|
| 131 |
+
DIviding RECTangles (DIRECT) is a deterministic global
|
| 132 |
+
optimization algorithm capable of minimizing a black box function with
|
| 133 |
+
its variables subject to lower and upper bound constraints by sampling
|
| 134 |
+
potential solutions in the search space [1]_. The algorithm starts by
|
| 135 |
+
normalising the search space to an n-dimensional unit hypercube.
|
| 136 |
+
It samples the function at the center of this hypercube and at 2n
|
| 137 |
+
(n is the number of variables) more points, 2 in each coordinate
|
| 138 |
+
direction. Using these function values, DIRECT then divides the
|
| 139 |
+
domain into hyperrectangles, each having exactly one of the sampling
|
| 140 |
+
points as its center. In each iteration, DIRECT chooses, using the `eps`
|
| 141 |
+
parameter which defaults to 1e-4, some of the existing hyperrectangles
|
| 142 |
+
to be further divided. This division process continues until either the
|
| 143 |
+
maximum number of iterations or maximum function evaluations allowed
|
| 144 |
+
are exceeded, or the hyperrectangle containing the minimal value found
|
| 145 |
+
so far becomes small enough. If `f_min` is specified, the optimization
|
| 146 |
+
will stop once this function value is reached within a relative tolerance.
|
| 147 |
+
The locally biased variant of DIRECT (originally called DIRECT_L) [2]_ is
|
| 148 |
+
used by default. It makes the search more locally biased and more
|
| 149 |
+
efficient for cases with only a few local minima.
|
| 150 |
+
|
| 151 |
+
A note about termination criteria: `vol_tol` refers to the volume of the
|
| 152 |
+
hyperrectangle containing the lowest function value found so far. This
|
| 153 |
+
volume decreases exponentially with increasing dimensionality of the
|
| 154 |
+
problem. Therefore `vol_tol` should be decreased to avoid premature
|
| 155 |
+
termination of the algorithm for higher dimensions. This does not hold
|
| 156 |
+
for `len_tol`: it refers either to half of the maximal side length
|
| 157 |
+
(for ``locally_biased=True``) or half of the diagonal of the
|
| 158 |
+
hyperrectangle (for ``locally_biased=False``).
|
| 159 |
+
|
| 160 |
+
This code is based on the DIRECT 2.0.4 Fortran code by Gablonsky et al. at
|
| 161 |
+
https://ctk.math.ncsu.edu/SOFTWARE/DIRECTv204.tar.gz .
|
| 162 |
+
This original version was initially converted via f2c and then cleaned up
|
| 163 |
+
and reorganized by Steven G. Johnson, August 2007, for the NLopt project.
|
| 164 |
+
The `direct` function wraps the C implementation.
|
| 165 |
+
|
| 166 |
+
.. versionadded:: 1.9.0
|
| 167 |
+
|
| 168 |
+
References
|
| 169 |
+
----------
|
| 170 |
+
.. [1] Jones, D.R., Perttunen, C.D. & Stuckman, B.E. Lipschitzian
|
| 171 |
+
optimization without the Lipschitz constant. J Optim Theory Appl
|
| 172 |
+
79, 157-181 (1993).
|
| 173 |
+
.. [2] Gablonsky, J., Kelley, C. A Locally-Biased form of the DIRECT
|
| 174 |
+
Algorithm. Journal of Global Optimization 21, 27-37 (2001).
|
| 175 |
+
|
| 176 |
+
Examples
|
| 177 |
+
--------
|
| 178 |
+
The following example is a 2-D problem with four local minima: minimizing
|
| 179 |
+
the Styblinski-Tang function
|
| 180 |
+
(https://en.wikipedia.org/wiki/Test_functions_for_optimization).
|
| 181 |
+
|
| 182 |
+
>>> from scipy.optimize import direct, Bounds
|
| 183 |
+
>>> def styblinski_tang(pos):
|
| 184 |
+
... x, y = pos
|
| 185 |
+
... return 0.5 * (x**4 - 16*x**2 + 5*x + y**4 - 16*y**2 + 5*y)
|
| 186 |
+
>>> bounds = Bounds([-4., -4.], [4., 4.])
|
| 187 |
+
>>> result = direct(styblinski_tang, bounds)
|
| 188 |
+
>>> result.x, result.fun, result.nfev
|
| 189 |
+
array([-2.90321597, -2.90321597]), -78.3323279095383, 2011
|
| 190 |
+
|
| 191 |
+
The correct global minimum was found but with a huge number of function
|
| 192 |
+
evaluations (2011). Loosening the termination tolerances `vol_tol` and
|
| 193 |
+
`len_tol` can be used to stop DIRECT earlier.
|
| 194 |
+
|
| 195 |
+
>>> result = direct(styblinski_tang, bounds, len_tol=1e-3)
|
| 196 |
+
>>> result.x, result.fun, result.nfev
|
| 197 |
+
array([-2.9044353, -2.9044353]), -78.33230330754142, 207
|
| 198 |
+
|
| 199 |
+
"""
|
| 200 |
+
# convert bounds to new Bounds class if necessary
|
| 201 |
+
if not isinstance(bounds, Bounds):
|
| 202 |
+
if isinstance(bounds, list) or isinstance(bounds, tuple):
|
| 203 |
+
lb, ub = old_bound_to_new(bounds)
|
| 204 |
+
bounds = Bounds(lb, ub)
|
| 205 |
+
else:
|
| 206 |
+
message = ("bounds must be a sequence or "
|
| 207 |
+
"instance of Bounds class")
|
| 208 |
+
raise ValueError(message)
|
| 209 |
+
|
| 210 |
+
lb = np.ascontiguousarray(bounds.lb, dtype=np.float64)
|
| 211 |
+
ub = np.ascontiguousarray(bounds.ub, dtype=np.float64)
|
| 212 |
+
|
| 213 |
+
# validate bounds
|
| 214 |
+
# check that lower bounds are smaller than upper bounds
|
| 215 |
+
if not np.all(lb < ub):
|
| 216 |
+
raise ValueError('Bounds are not consistent min < max')
|
| 217 |
+
# check for infs
|
| 218 |
+
if (np.any(np.isinf(lb)) or np.any(np.isinf(ub))):
|
| 219 |
+
raise ValueError("Bounds must not be inf.")
|
| 220 |
+
|
| 221 |
+
# validate tolerances
|
| 222 |
+
if (vol_tol < 0 or vol_tol > 1):
|
| 223 |
+
raise ValueError("vol_tol must be between 0 and 1.")
|
| 224 |
+
if (len_tol < 0 or len_tol > 1):
|
| 225 |
+
raise ValueError("len_tol must be between 0 and 1.")
|
| 226 |
+
if (f_min_rtol < 0 or f_min_rtol > 1):
|
| 227 |
+
raise ValueError("f_min_rtol must be between 0 and 1.")
|
| 228 |
+
|
| 229 |
+
# validate maxfun and maxiter
|
| 230 |
+
if maxfun is None:
|
| 231 |
+
maxfun = 1000 * lb.shape[0]
|
| 232 |
+
if not isinstance(maxfun, int):
|
| 233 |
+
raise ValueError("maxfun must be of type int.")
|
| 234 |
+
if maxfun < 0:
|
| 235 |
+
raise ValueError("maxfun must be > 0.")
|
| 236 |
+
if not isinstance(maxiter, int):
|
| 237 |
+
raise ValueError("maxiter must be of type int.")
|
| 238 |
+
if maxiter < 0:
|
| 239 |
+
raise ValueError("maxiter must be > 0.")
|
| 240 |
+
|
| 241 |
+
# validate boolean parameters
|
| 242 |
+
if not isinstance(locally_biased, bool):
|
| 243 |
+
raise ValueError("locally_biased must be True or False.")
|
| 244 |
+
|
| 245 |
+
def _func_wrap(x, args=None):
|
| 246 |
+
x = np.asarray(x)
|
| 247 |
+
if args is None:
|
| 248 |
+
f = func(x)
|
| 249 |
+
else:
|
| 250 |
+
f = func(x, *args)
|
| 251 |
+
# always return a float
|
| 252 |
+
return np.asarray(f).item()
|
| 253 |
+
|
| 254 |
+
# TODO: fix disp argument
|
| 255 |
+
x, fun, ret_code, nfev, nit = _direct(
|
| 256 |
+
_func_wrap,
|
| 257 |
+
np.asarray(lb), np.asarray(ub),
|
| 258 |
+
args,
|
| 259 |
+
False, eps, maxfun, maxiter,
|
| 260 |
+
locally_biased,
|
| 261 |
+
f_min, f_min_rtol,
|
| 262 |
+
vol_tol, len_tol, callback
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
format_val = (maxfun, maxiter, f_min_rtol, vol_tol, len_tol)
|
| 266 |
+
if ret_code > 2:
|
| 267 |
+
message = SUCCESS_MESSAGES[ret_code - 3].format(
|
| 268 |
+
format_val[ret_code - 1])
|
| 269 |
+
elif 0 < ret_code <= 2:
|
| 270 |
+
message = ERROR_MESSAGES[ret_code - 1].format(format_val[ret_code - 1])
|
| 271 |
+
elif 0 > ret_code > -100:
|
| 272 |
+
message = ERROR_MESSAGES[abs(ret_code) + 1]
|
| 273 |
+
else:
|
| 274 |
+
message = ERROR_MESSAGES[ret_code + 99]
|
| 275 |
+
|
| 276 |
+
return OptimizeResult(x=np.asarray(x), fun=fun, status=ret_code,
|
| 277 |
+
success=ret_code > 2, message=message,
|
| 278 |
+
nfev=nfev, nit=nit)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dual_annealing.py
ADDED
|
@@ -0,0 +1,732 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dual Annealing implementation.
|
| 2 |
+
# Copyright (c) 2018 Sylvain Gubian <sylvain.gubian@pmi.com>,
|
| 3 |
+
# Yang Xiang <yang.xiang@pmi.com>
|
| 4 |
+
# Author: Sylvain Gubian, Yang Xiang, PMP S.A.
|
| 5 |
+
|
| 6 |
+
"""
|
| 7 |
+
A Dual Annealing global optimization algorithm
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
from scipy.optimize import OptimizeResult
|
| 12 |
+
from scipy.optimize import minimize, Bounds
|
| 13 |
+
from scipy.special import gammaln
|
| 14 |
+
from scipy._lib._util import check_random_state
|
| 15 |
+
from scipy.optimize._constraints import new_bounds_to_old
|
| 16 |
+
|
| 17 |
+
__all__ = ['dual_annealing']
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class VisitingDistribution:
|
| 21 |
+
"""
|
| 22 |
+
Class used to generate new coordinates based on the distorted
|
| 23 |
+
Cauchy-Lorentz distribution. Depending on the steps within the strategy
|
| 24 |
+
chain, the class implements the strategy for generating new location
|
| 25 |
+
changes.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
lb : array_like
|
| 30 |
+
A 1-D NumPy ndarray containing lower bounds of the generated
|
| 31 |
+
components. Neither NaN or inf are allowed.
|
| 32 |
+
ub : array_like
|
| 33 |
+
A 1-D NumPy ndarray containing upper bounds for the generated
|
| 34 |
+
components. Neither NaN or inf are allowed.
|
| 35 |
+
visiting_param : float
|
| 36 |
+
Parameter for visiting distribution. Default value is 2.62.
|
| 37 |
+
Higher values give the visiting distribution a heavier tail, this
|
| 38 |
+
makes the algorithm jump to a more distant region.
|
| 39 |
+
The value range is (1, 3]. Its value is fixed for the life of the
|
| 40 |
+
object.
|
| 41 |
+
rand_gen : {`~numpy.random.RandomState`, `~numpy.random.Generator`}
|
| 42 |
+
A `~numpy.random.RandomState`, `~numpy.random.Generator` object
|
| 43 |
+
for using the current state of the created random generator container.
|
| 44 |
+
|
| 45 |
+
"""
|
| 46 |
+
TAIL_LIMIT = 1.e8
|
| 47 |
+
MIN_VISIT_BOUND = 1.e-10
|
| 48 |
+
|
| 49 |
+
def __init__(self, lb, ub, visiting_param, rand_gen):
|
| 50 |
+
# if you wish to make _visiting_param adjustable during the life of
|
| 51 |
+
# the object then _factor2, _factor3, _factor5, _d1, _factor6 will
|
| 52 |
+
# have to be dynamically calculated in `visit_fn`. They're factored
|
| 53 |
+
# out here so they don't need to be recalculated all the time.
|
| 54 |
+
self._visiting_param = visiting_param
|
| 55 |
+
self.rand_gen = rand_gen
|
| 56 |
+
self.lower = lb
|
| 57 |
+
self.upper = ub
|
| 58 |
+
self.bound_range = ub - lb
|
| 59 |
+
|
| 60 |
+
# these are invariant numbers unless visiting_param changes
|
| 61 |
+
self._factor2 = np.exp((4.0 - self._visiting_param) * np.log(
|
| 62 |
+
self._visiting_param - 1.0))
|
| 63 |
+
self._factor3 = np.exp((2.0 - self._visiting_param) * np.log(2.0)
|
| 64 |
+
/ (self._visiting_param - 1.0))
|
| 65 |
+
self._factor4_p = np.sqrt(np.pi) * self._factor2 / (self._factor3 * (
|
| 66 |
+
3.0 - self._visiting_param))
|
| 67 |
+
|
| 68 |
+
self._factor5 = 1.0 / (self._visiting_param - 1.0) - 0.5
|
| 69 |
+
self._d1 = 2.0 - self._factor5
|
| 70 |
+
self._factor6 = np.pi * (1.0 - self._factor5) / np.sin(
|
| 71 |
+
np.pi * (1.0 - self._factor5)) / np.exp(gammaln(self._d1))
|
| 72 |
+
|
| 73 |
+
def visiting(self, x, step, temperature):
|
| 74 |
+
""" Based on the step in the strategy chain, new coordinates are
|
| 75 |
+
generated by changing all components is the same time or only
|
| 76 |
+
one of them, the new values are computed with visit_fn method
|
| 77 |
+
"""
|
| 78 |
+
dim = x.size
|
| 79 |
+
if step < dim:
|
| 80 |
+
# Changing all coordinates with a new visiting value
|
| 81 |
+
visits = self.visit_fn(temperature, dim)
|
| 82 |
+
upper_sample, lower_sample = self.rand_gen.uniform(size=2)
|
| 83 |
+
visits[visits > self.TAIL_LIMIT] = self.TAIL_LIMIT * upper_sample
|
| 84 |
+
visits[visits < -self.TAIL_LIMIT] = -self.TAIL_LIMIT * lower_sample
|
| 85 |
+
x_visit = visits + x
|
| 86 |
+
a = x_visit - self.lower
|
| 87 |
+
b = np.fmod(a, self.bound_range) + self.bound_range
|
| 88 |
+
x_visit = np.fmod(b, self.bound_range) + self.lower
|
| 89 |
+
x_visit[np.fabs(
|
| 90 |
+
x_visit - self.lower) < self.MIN_VISIT_BOUND] += 1.e-10
|
| 91 |
+
else:
|
| 92 |
+
# Changing only one coordinate at a time based on strategy
|
| 93 |
+
# chain step
|
| 94 |
+
x_visit = np.copy(x)
|
| 95 |
+
visit = self.visit_fn(temperature, 1)[0]
|
| 96 |
+
if visit > self.TAIL_LIMIT:
|
| 97 |
+
visit = self.TAIL_LIMIT * self.rand_gen.uniform()
|
| 98 |
+
elif visit < -self.TAIL_LIMIT:
|
| 99 |
+
visit = -self.TAIL_LIMIT * self.rand_gen.uniform()
|
| 100 |
+
index = step - dim
|
| 101 |
+
x_visit[index] = visit + x[index]
|
| 102 |
+
a = x_visit[index] - self.lower[index]
|
| 103 |
+
b = np.fmod(a, self.bound_range[index]) + self.bound_range[index]
|
| 104 |
+
x_visit[index] = np.fmod(b, self.bound_range[
|
| 105 |
+
index]) + self.lower[index]
|
| 106 |
+
if np.fabs(x_visit[index] - self.lower[
|
| 107 |
+
index]) < self.MIN_VISIT_BOUND:
|
| 108 |
+
x_visit[index] += self.MIN_VISIT_BOUND
|
| 109 |
+
return x_visit
|
| 110 |
+
|
| 111 |
+
def visit_fn(self, temperature, dim):
|
| 112 |
+
""" Formula Visita from p. 405 of reference [2] """
|
| 113 |
+
x, y = self.rand_gen.normal(size=(dim, 2)).T
|
| 114 |
+
|
| 115 |
+
factor1 = np.exp(np.log(temperature) / (self._visiting_param - 1.0))
|
| 116 |
+
factor4 = self._factor4_p * factor1
|
| 117 |
+
|
| 118 |
+
# sigmax
|
| 119 |
+
x *= np.exp(-(self._visiting_param - 1.0) * np.log(
|
| 120 |
+
self._factor6 / factor4) / (3.0 - self._visiting_param))
|
| 121 |
+
|
| 122 |
+
den = np.exp((self._visiting_param - 1.0) * np.log(np.fabs(y)) /
|
| 123 |
+
(3.0 - self._visiting_param))
|
| 124 |
+
|
| 125 |
+
return x / den
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
class EnergyState:
|
| 129 |
+
"""
|
| 130 |
+
Class used to record the energy state. At any time, it knows what is the
|
| 131 |
+
currently used coordinates and the most recent best location.
|
| 132 |
+
|
| 133 |
+
Parameters
|
| 134 |
+
----------
|
| 135 |
+
lower : array_like
|
| 136 |
+
A 1-D NumPy ndarray containing lower bounds for generating an initial
|
| 137 |
+
random components in the `reset` method.
|
| 138 |
+
upper : array_like
|
| 139 |
+
A 1-D NumPy ndarray containing upper bounds for generating an initial
|
| 140 |
+
random components in the `reset` method
|
| 141 |
+
components. Neither NaN or inf are allowed.
|
| 142 |
+
callback : callable, ``callback(x, f, context)``, optional
|
| 143 |
+
A callback function which will be called for all minima found.
|
| 144 |
+
``x`` and ``f`` are the coordinates and function value of the
|
| 145 |
+
latest minimum found, and `context` has value in [0, 1, 2]
|
| 146 |
+
"""
|
| 147 |
+
# Maximum number of trials for generating a valid starting point
|
| 148 |
+
MAX_REINIT_COUNT = 1000
|
| 149 |
+
|
| 150 |
+
def __init__(self, lower, upper, callback=None):
|
| 151 |
+
self.ebest = None
|
| 152 |
+
self.current_energy = None
|
| 153 |
+
self.current_location = None
|
| 154 |
+
self.xbest = None
|
| 155 |
+
self.lower = lower
|
| 156 |
+
self.upper = upper
|
| 157 |
+
self.callback = callback
|
| 158 |
+
|
| 159 |
+
def reset(self, func_wrapper, rand_gen, x0=None):
|
| 160 |
+
"""
|
| 161 |
+
Initialize current location is the search domain. If `x0` is not
|
| 162 |
+
provided, a random location within the bounds is generated.
|
| 163 |
+
"""
|
| 164 |
+
if x0 is None:
|
| 165 |
+
self.current_location = rand_gen.uniform(self.lower, self.upper,
|
| 166 |
+
size=len(self.lower))
|
| 167 |
+
else:
|
| 168 |
+
self.current_location = np.copy(x0)
|
| 169 |
+
init_error = True
|
| 170 |
+
reinit_counter = 0
|
| 171 |
+
while init_error:
|
| 172 |
+
self.current_energy = func_wrapper.fun(self.current_location)
|
| 173 |
+
if self.current_energy is None:
|
| 174 |
+
raise ValueError('Objective function is returning None')
|
| 175 |
+
if (not np.isfinite(self.current_energy) or np.isnan(
|
| 176 |
+
self.current_energy)):
|
| 177 |
+
if reinit_counter >= EnergyState.MAX_REINIT_COUNT:
|
| 178 |
+
init_error = False
|
| 179 |
+
message = (
|
| 180 |
+
'Stopping algorithm because function '
|
| 181 |
+
'create NaN or (+/-) infinity values even with '
|
| 182 |
+
'trying new random parameters'
|
| 183 |
+
)
|
| 184 |
+
raise ValueError(message)
|
| 185 |
+
self.current_location = rand_gen.uniform(self.lower,
|
| 186 |
+
self.upper,
|
| 187 |
+
size=self.lower.size)
|
| 188 |
+
reinit_counter += 1
|
| 189 |
+
else:
|
| 190 |
+
init_error = False
|
| 191 |
+
# If first time reset, initialize ebest and xbest
|
| 192 |
+
if self.ebest is None and self.xbest is None:
|
| 193 |
+
self.ebest = self.current_energy
|
| 194 |
+
self.xbest = np.copy(self.current_location)
|
| 195 |
+
# Otherwise, we keep them in case of reannealing reset
|
| 196 |
+
|
| 197 |
+
def update_best(self, e, x, context):
|
| 198 |
+
self.ebest = e
|
| 199 |
+
self.xbest = np.copy(x)
|
| 200 |
+
if self.callback is not None:
|
| 201 |
+
val = self.callback(x, e, context)
|
| 202 |
+
if val is not None:
|
| 203 |
+
if val:
|
| 204 |
+
return ('Callback function requested to stop early by '
|
| 205 |
+
'returning True')
|
| 206 |
+
|
| 207 |
+
def update_current(self, e, x):
|
| 208 |
+
self.current_energy = e
|
| 209 |
+
self.current_location = np.copy(x)
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
class StrategyChain:
|
| 213 |
+
"""
|
| 214 |
+
Class that implements within a Markov chain the strategy for location
|
| 215 |
+
acceptance and local search decision making.
|
| 216 |
+
|
| 217 |
+
Parameters
|
| 218 |
+
----------
|
| 219 |
+
acceptance_param : float
|
| 220 |
+
Parameter for acceptance distribution. It is used to control the
|
| 221 |
+
probability of acceptance. The lower the acceptance parameter, the
|
| 222 |
+
smaller the probability of acceptance. Default value is -5.0 with
|
| 223 |
+
a range (-1e4, -5].
|
| 224 |
+
visit_dist : VisitingDistribution
|
| 225 |
+
Instance of `VisitingDistribution` class.
|
| 226 |
+
func_wrapper : ObjectiveFunWrapper
|
| 227 |
+
Instance of `ObjectiveFunWrapper` class.
|
| 228 |
+
minimizer_wrapper: LocalSearchWrapper
|
| 229 |
+
Instance of `LocalSearchWrapper` class.
|
| 230 |
+
rand_gen : {None, int, `numpy.random.Generator`,
|
| 231 |
+
`numpy.random.RandomState`}, optional
|
| 232 |
+
|
| 233 |
+
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
| 234 |
+
singleton is used.
|
| 235 |
+
If `seed` is an int, a new ``RandomState`` instance is used,
|
| 236 |
+
seeded with `seed`.
|
| 237 |
+
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
| 238 |
+
that instance is used.
|
| 239 |
+
energy_state: EnergyState
|
| 240 |
+
Instance of `EnergyState` class.
|
| 241 |
+
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
def __init__(self, acceptance_param, visit_dist, func_wrapper,
|
| 245 |
+
minimizer_wrapper, rand_gen, energy_state):
|
| 246 |
+
# Local strategy chain minimum energy and location
|
| 247 |
+
self.emin = energy_state.current_energy
|
| 248 |
+
self.xmin = np.array(energy_state.current_location)
|
| 249 |
+
# Global optimizer state
|
| 250 |
+
self.energy_state = energy_state
|
| 251 |
+
# Acceptance parameter
|
| 252 |
+
self.acceptance_param = acceptance_param
|
| 253 |
+
# Visiting distribution instance
|
| 254 |
+
self.visit_dist = visit_dist
|
| 255 |
+
# Wrapper to objective function
|
| 256 |
+
self.func_wrapper = func_wrapper
|
| 257 |
+
# Wrapper to the local minimizer
|
| 258 |
+
self.minimizer_wrapper = minimizer_wrapper
|
| 259 |
+
self.not_improved_idx = 0
|
| 260 |
+
self.not_improved_max_idx = 1000
|
| 261 |
+
self._rand_gen = rand_gen
|
| 262 |
+
self.temperature_step = 0
|
| 263 |
+
self.K = 100 * len(energy_state.current_location)
|
| 264 |
+
|
| 265 |
+
def accept_reject(self, j, e, x_visit):
|
| 266 |
+
r = self._rand_gen.uniform()
|
| 267 |
+
pqv_temp = 1.0 - ((1.0 - self.acceptance_param) *
|
| 268 |
+
(e - self.energy_state.current_energy) / self.temperature_step)
|
| 269 |
+
if pqv_temp <= 0.:
|
| 270 |
+
pqv = 0.
|
| 271 |
+
else:
|
| 272 |
+
pqv = np.exp(np.log(pqv_temp) / (
|
| 273 |
+
1. - self.acceptance_param))
|
| 274 |
+
|
| 275 |
+
if r <= pqv:
|
| 276 |
+
# We accept the new location and update state
|
| 277 |
+
self.energy_state.update_current(e, x_visit)
|
| 278 |
+
self.xmin = np.copy(self.energy_state.current_location)
|
| 279 |
+
|
| 280 |
+
# No improvement for a long time
|
| 281 |
+
if self.not_improved_idx >= self.not_improved_max_idx:
|
| 282 |
+
if j == 0 or self.energy_state.current_energy < self.emin:
|
| 283 |
+
self.emin = self.energy_state.current_energy
|
| 284 |
+
self.xmin = np.copy(self.energy_state.current_location)
|
| 285 |
+
|
| 286 |
+
def run(self, step, temperature):
|
| 287 |
+
self.temperature_step = temperature / float(step + 1)
|
| 288 |
+
self.not_improved_idx += 1
|
| 289 |
+
for j in range(self.energy_state.current_location.size * 2):
|
| 290 |
+
if j == 0:
|
| 291 |
+
if step == 0:
|
| 292 |
+
self.energy_state_improved = True
|
| 293 |
+
else:
|
| 294 |
+
self.energy_state_improved = False
|
| 295 |
+
x_visit = self.visit_dist.visiting(
|
| 296 |
+
self.energy_state.current_location, j, temperature)
|
| 297 |
+
# Calling the objective function
|
| 298 |
+
e = self.func_wrapper.fun(x_visit)
|
| 299 |
+
if e < self.energy_state.current_energy:
|
| 300 |
+
# We have got a better energy value
|
| 301 |
+
self.energy_state.update_current(e, x_visit)
|
| 302 |
+
if e < self.energy_state.ebest:
|
| 303 |
+
val = self.energy_state.update_best(e, x_visit, 0)
|
| 304 |
+
if val is not None:
|
| 305 |
+
if val:
|
| 306 |
+
return val
|
| 307 |
+
self.energy_state_improved = True
|
| 308 |
+
self.not_improved_idx = 0
|
| 309 |
+
else:
|
| 310 |
+
# We have not improved but do we accept the new location?
|
| 311 |
+
self.accept_reject(j, e, x_visit)
|
| 312 |
+
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
| 313 |
+
return ('Maximum number of function call reached '
|
| 314 |
+
'during annealing')
|
| 315 |
+
# End of StrategyChain loop
|
| 316 |
+
|
| 317 |
+
def local_search(self):
|
| 318 |
+
# Decision making for performing a local search
|
| 319 |
+
# based on strategy chain results
|
| 320 |
+
# If energy has been improved or no improvement since too long,
|
| 321 |
+
# performing a local search with the best strategy chain location
|
| 322 |
+
if self.energy_state_improved:
|
| 323 |
+
# Global energy has improved, let's see if LS improves further
|
| 324 |
+
e, x = self.minimizer_wrapper.local_search(self.energy_state.xbest,
|
| 325 |
+
self.energy_state.ebest)
|
| 326 |
+
if e < self.energy_state.ebest:
|
| 327 |
+
self.not_improved_idx = 0
|
| 328 |
+
val = self.energy_state.update_best(e, x, 1)
|
| 329 |
+
if val is not None:
|
| 330 |
+
if val:
|
| 331 |
+
return val
|
| 332 |
+
self.energy_state.update_current(e, x)
|
| 333 |
+
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
| 334 |
+
return ('Maximum number of function call reached '
|
| 335 |
+
'during local search')
|
| 336 |
+
# Check probability of a need to perform a LS even if no improvement
|
| 337 |
+
do_ls = False
|
| 338 |
+
if self.K < 90 * len(self.energy_state.current_location):
|
| 339 |
+
pls = np.exp(self.K * (
|
| 340 |
+
self.energy_state.ebest - self.energy_state.current_energy) /
|
| 341 |
+
self.temperature_step)
|
| 342 |
+
if pls >= self._rand_gen.uniform():
|
| 343 |
+
do_ls = True
|
| 344 |
+
# Global energy not improved, let's see what LS gives
|
| 345 |
+
# on the best strategy chain location
|
| 346 |
+
if self.not_improved_idx >= self.not_improved_max_idx:
|
| 347 |
+
do_ls = True
|
| 348 |
+
if do_ls:
|
| 349 |
+
e, x = self.minimizer_wrapper.local_search(self.xmin, self.emin)
|
| 350 |
+
self.xmin = np.copy(x)
|
| 351 |
+
self.emin = e
|
| 352 |
+
self.not_improved_idx = 0
|
| 353 |
+
self.not_improved_max_idx = self.energy_state.current_location.size
|
| 354 |
+
if e < self.energy_state.ebest:
|
| 355 |
+
val = self.energy_state.update_best(
|
| 356 |
+
self.emin, self.xmin, 2)
|
| 357 |
+
if val is not None:
|
| 358 |
+
if val:
|
| 359 |
+
return val
|
| 360 |
+
self.energy_state.update_current(e, x)
|
| 361 |
+
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
| 362 |
+
return ('Maximum number of function call reached '
|
| 363 |
+
'during dual annealing')
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
class ObjectiveFunWrapper:
|
| 367 |
+
|
| 368 |
+
def __init__(self, func, maxfun=1e7, *args):
|
| 369 |
+
self.func = func
|
| 370 |
+
self.args = args
|
| 371 |
+
# Number of objective function evaluations
|
| 372 |
+
self.nfev = 0
|
| 373 |
+
# Number of gradient function evaluation if used
|
| 374 |
+
self.ngev = 0
|
| 375 |
+
# Number of hessian of the objective function if used
|
| 376 |
+
self.nhev = 0
|
| 377 |
+
self.maxfun = maxfun
|
| 378 |
+
|
| 379 |
+
def fun(self, x):
|
| 380 |
+
self.nfev += 1
|
| 381 |
+
return self.func(x, *self.args)
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
class LocalSearchWrapper:
|
| 385 |
+
"""
|
| 386 |
+
Class used to wrap around the minimizer used for local search
|
| 387 |
+
Default local minimizer is SciPy minimizer L-BFGS-B
|
| 388 |
+
"""
|
| 389 |
+
|
| 390 |
+
LS_MAXITER_RATIO = 6
|
| 391 |
+
LS_MAXITER_MIN = 100
|
| 392 |
+
LS_MAXITER_MAX = 1000
|
| 393 |
+
|
| 394 |
+
def __init__(self, search_bounds, func_wrapper, *args, **kwargs):
|
| 395 |
+
self.func_wrapper = func_wrapper
|
| 396 |
+
self.kwargs = kwargs
|
| 397 |
+
self.jac = self.kwargs.get('jac', None)
|
| 398 |
+
self.hess = self.kwargs.get('hess', None)
|
| 399 |
+
self.hessp = self.kwargs.get('hessp', None)
|
| 400 |
+
self.kwargs.pop("args", None)
|
| 401 |
+
self.minimizer = minimize
|
| 402 |
+
bounds_list = list(zip(*search_bounds))
|
| 403 |
+
self.lower = np.array(bounds_list[0])
|
| 404 |
+
self.upper = np.array(bounds_list[1])
|
| 405 |
+
|
| 406 |
+
# If no minimizer specified, use SciPy minimize with 'L-BFGS-B' method
|
| 407 |
+
if not self.kwargs:
|
| 408 |
+
n = len(self.lower)
|
| 409 |
+
ls_max_iter = min(max(n * self.LS_MAXITER_RATIO,
|
| 410 |
+
self.LS_MAXITER_MIN),
|
| 411 |
+
self.LS_MAXITER_MAX)
|
| 412 |
+
self.kwargs['method'] = 'L-BFGS-B'
|
| 413 |
+
self.kwargs['options'] = {
|
| 414 |
+
'maxiter': ls_max_iter,
|
| 415 |
+
}
|
| 416 |
+
self.kwargs['bounds'] = list(zip(self.lower, self.upper))
|
| 417 |
+
else:
|
| 418 |
+
if callable(self.jac):
|
| 419 |
+
def wrapped_jac(x):
|
| 420 |
+
return self.jac(x, *args)
|
| 421 |
+
self.kwargs['jac'] = wrapped_jac
|
| 422 |
+
if callable(self.hess):
|
| 423 |
+
def wrapped_hess(x):
|
| 424 |
+
return self.hess(x, *args)
|
| 425 |
+
self.kwargs['hess'] = wrapped_hess
|
| 426 |
+
if callable(self.hessp):
|
| 427 |
+
def wrapped_hessp(x, p):
|
| 428 |
+
return self.hessp(x, p, *args)
|
| 429 |
+
self.kwargs['hessp'] = wrapped_hessp
|
| 430 |
+
|
| 431 |
+
def local_search(self, x, e):
|
| 432 |
+
# Run local search from the given x location where energy value is e
|
| 433 |
+
x_tmp = np.copy(x)
|
| 434 |
+
mres = self.minimizer(self.func_wrapper.fun, x, **self.kwargs)
|
| 435 |
+
if 'njev' in mres:
|
| 436 |
+
self.func_wrapper.ngev += mres.njev
|
| 437 |
+
if 'nhev' in mres:
|
| 438 |
+
self.func_wrapper.nhev += mres.nhev
|
| 439 |
+
# Check if is valid value
|
| 440 |
+
is_finite = np.all(np.isfinite(mres.x)) and np.isfinite(mres.fun)
|
| 441 |
+
in_bounds = np.all(mres.x >= self.lower) and np.all(
|
| 442 |
+
mres.x <= self.upper)
|
| 443 |
+
is_valid = is_finite and in_bounds
|
| 444 |
+
|
| 445 |
+
# Use the new point only if it is valid and return a better results
|
| 446 |
+
if is_valid and mres.fun < e:
|
| 447 |
+
return mres.fun, mres.x
|
| 448 |
+
else:
|
| 449 |
+
return e, x_tmp
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def dual_annealing(func, bounds, args=(), maxiter=1000,
|
| 453 |
+
minimizer_kwargs=None, initial_temp=5230.,
|
| 454 |
+
restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0,
|
| 455 |
+
maxfun=1e7, seed=None, no_local_search=False,
|
| 456 |
+
callback=None, x0=None):
|
| 457 |
+
"""
|
| 458 |
+
Find the global minimum of a function using Dual Annealing.
|
| 459 |
+
|
| 460 |
+
Parameters
|
| 461 |
+
----------
|
| 462 |
+
func : callable
|
| 463 |
+
The objective function to be minimized. Must be in the form
|
| 464 |
+
``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
|
| 465 |
+
and ``args`` is a tuple of any additional fixed parameters needed to
|
| 466 |
+
completely specify the function.
|
| 467 |
+
bounds : sequence or `Bounds`
|
| 468 |
+
Bounds for variables. There are two ways to specify the bounds:
|
| 469 |
+
|
| 470 |
+
1. Instance of `Bounds` class.
|
| 471 |
+
2. Sequence of ``(min, max)`` pairs for each element in `x`.
|
| 472 |
+
|
| 473 |
+
args : tuple, optional
|
| 474 |
+
Any additional fixed parameters needed to completely specify the
|
| 475 |
+
objective function.
|
| 476 |
+
maxiter : int, optional
|
| 477 |
+
The maximum number of global search iterations. Default value is 1000.
|
| 478 |
+
minimizer_kwargs : dict, optional
|
| 479 |
+
Keyword arguments to be passed to the local minimizer
|
| 480 |
+
(`minimize`). An important option could be ``method`` for the minimizer
|
| 481 |
+
method to use.
|
| 482 |
+
If no keyword arguments are provided, the local minimizer defaults to
|
| 483 |
+
'L-BFGS-B' and uses the already supplied bounds. If `minimizer_kwargs`
|
| 484 |
+
is specified, then the dict must contain all parameters required to
|
| 485 |
+
control the local minimization. `args` is ignored in this dict, as it is
|
| 486 |
+
passed automatically. `bounds` is not automatically passed on to the
|
| 487 |
+
local minimizer as the method may not support them.
|
| 488 |
+
initial_temp : float, optional
|
| 489 |
+
The initial temperature, use higher values to facilitates a wider
|
| 490 |
+
search of the energy landscape, allowing dual_annealing to escape
|
| 491 |
+
local minima that it is trapped in. Default value is 5230. Range is
|
| 492 |
+
(0.01, 5.e4].
|
| 493 |
+
restart_temp_ratio : float, optional
|
| 494 |
+
During the annealing process, temperature is decreasing, when it
|
| 495 |
+
reaches ``initial_temp * restart_temp_ratio``, the reannealing process
|
| 496 |
+
is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
|
| 497 |
+
visit : float, optional
|
| 498 |
+
Parameter for visiting distribution. Default value is 2.62. Higher
|
| 499 |
+
values give the visiting distribution a heavier tail, this makes
|
| 500 |
+
the algorithm jump to a more distant region. The value range is (1, 3].
|
| 501 |
+
accept : float, optional
|
| 502 |
+
Parameter for acceptance distribution. It is used to control the
|
| 503 |
+
probability of acceptance. The lower the acceptance parameter, the
|
| 504 |
+
smaller the probability of acceptance. Default value is -5.0 with
|
| 505 |
+
a range (-1e4, -5].
|
| 506 |
+
maxfun : int, optional
|
| 507 |
+
Soft limit for the number of objective function calls. If the
|
| 508 |
+
algorithm is in the middle of a local search, this number will be
|
| 509 |
+
exceeded, the algorithm will stop just after the local search is
|
| 510 |
+
done. Default value is 1e7.
|
| 511 |
+
seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
|
| 512 |
+
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
| 513 |
+
singleton is used.
|
| 514 |
+
If `seed` is an int, a new ``RandomState`` instance is used,
|
| 515 |
+
seeded with `seed`.
|
| 516 |
+
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
| 517 |
+
that instance is used.
|
| 518 |
+
Specify `seed` for repeatable minimizations. The random numbers
|
| 519 |
+
generated with this seed only affect the visiting distribution function
|
| 520 |
+
and new coordinates generation.
|
| 521 |
+
no_local_search : bool, optional
|
| 522 |
+
If `no_local_search` is set to True, a traditional Generalized
|
| 523 |
+
Simulated Annealing will be performed with no local search
|
| 524 |
+
strategy applied.
|
| 525 |
+
callback : callable, optional
|
| 526 |
+
A callback function with signature ``callback(x, f, context)``,
|
| 527 |
+
which will be called for all minima found.
|
| 528 |
+
``x`` and ``f`` are the coordinates and function value of the
|
| 529 |
+
latest minimum found, and ``context`` has value in [0, 1, 2], with the
|
| 530 |
+
following meaning:
|
| 531 |
+
|
| 532 |
+
- 0: minimum detected in the annealing process.
|
| 533 |
+
- 1: detection occurred in the local search process.
|
| 534 |
+
- 2: detection done in the dual annealing process.
|
| 535 |
+
|
| 536 |
+
If the callback implementation returns True, the algorithm will stop.
|
| 537 |
+
x0 : ndarray, shape(n,), optional
|
| 538 |
+
Coordinates of a single N-D starting point.
|
| 539 |
+
|
| 540 |
+
Returns
|
| 541 |
+
-------
|
| 542 |
+
res : OptimizeResult
|
| 543 |
+
The optimization result represented as a `OptimizeResult` object.
|
| 544 |
+
Important attributes are: ``x`` the solution array, ``fun`` the value
|
| 545 |
+
of the function at the solution, and ``message`` which describes the
|
| 546 |
+
cause of the termination.
|
| 547 |
+
See `OptimizeResult` for a description of other attributes.
|
| 548 |
+
|
| 549 |
+
Notes
|
| 550 |
+
-----
|
| 551 |
+
This function implements the Dual Annealing optimization. This stochastic
|
| 552 |
+
approach derived from [3]_ combines the generalization of CSA (Classical
|
| 553 |
+
Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
|
| 554 |
+
to a strategy for applying a local search on accepted locations [4]_.
|
| 555 |
+
An alternative implementation of this same algorithm is described in [5]_
|
| 556 |
+
and benchmarks are presented in [6]_. This approach introduces an advanced
|
| 557 |
+
method to refine the solution found by the generalized annealing
|
| 558 |
+
process. This algorithm uses a distorted Cauchy-Lorentz visiting
|
| 559 |
+
distribution, with its shape controlled by the parameter :math:`q_{v}`
|
| 560 |
+
|
| 561 |
+
.. math::
|
| 562 |
+
|
| 563 |
+
g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
|
| 564 |
+
\\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
|
| 565 |
+
\\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
|
| 566 |
+
\\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
|
| 567 |
+
\\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}
|
| 568 |
+
|
| 569 |
+
Where :math:`t` is the artificial time. This visiting distribution is used
|
| 570 |
+
to generate a trial jump distance :math:`\\Delta x(t)` of variable
|
| 571 |
+
:math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.
|
| 572 |
+
|
| 573 |
+
From the starting point, after calling the visiting distribution
|
| 574 |
+
function, the acceptance probability is computed as follows:
|
| 575 |
+
|
| 576 |
+
.. math::
|
| 577 |
+
|
| 578 |
+
p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
|
| 579 |
+
\\frac{1}{1-q_{a}}}\\}}
|
| 580 |
+
|
| 581 |
+
Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
|
| 582 |
+
acceptance probability is assigned to the cases where
|
| 583 |
+
|
| 584 |
+
.. math::
|
| 585 |
+
|
| 586 |
+
[1-(1-q_{a}) \\beta \\Delta E] < 0
|
| 587 |
+
|
| 588 |
+
The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to
|
| 589 |
+
|
| 590 |
+
.. math::
|
| 591 |
+
|
| 592 |
+
T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
|
| 593 |
+
1 + t\\right)^{q_{v}-1}-1}
|
| 594 |
+
|
| 595 |
+
Where :math:`q_{v}` is the visiting parameter.
|
| 596 |
+
|
| 597 |
+
.. versionadded:: 1.2.0
|
| 598 |
+
|
| 599 |
+
References
|
| 600 |
+
----------
|
| 601 |
+
.. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
|
| 602 |
+
statistics. Journal of Statistical Physics, 52, 479-487 (1998).
|
| 603 |
+
.. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
|
| 604 |
+
Physica A, 233, 395-406 (1996).
|
| 605 |
+
.. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
|
| 606 |
+
Annealing Algorithm and Its Application to the Thomson Model.
|
| 607 |
+
Physics Letters A, 233, 216-220 (1997).
|
| 608 |
+
.. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
|
| 609 |
+
Annealing. Physical Review E, 62, 4473 (2000).
|
| 610 |
+
.. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
|
| 611 |
+
Simulated Annealing for Efficient Global Optimization: the GenSA
|
| 612 |
+
Package for R. The R Journal, Volume 5/1 (2013).
|
| 613 |
+
.. [6] Mullen, K. Continuous Global Optimization in R. Journal of
|
| 614 |
+
Statistical Software, 60(6), 1 - 45, (2014).
|
| 615 |
+
:doi:`10.18637/jss.v060.i06`
|
| 616 |
+
|
| 617 |
+
Examples
|
| 618 |
+
--------
|
| 619 |
+
The following example is a 10-D problem, with many local minima.
|
| 620 |
+
The function involved is called Rastrigin
|
| 621 |
+
(https://en.wikipedia.org/wiki/Rastrigin_function)
|
| 622 |
+
|
| 623 |
+
>>> import numpy as np
|
| 624 |
+
>>> from scipy.optimize import dual_annealing
|
| 625 |
+
>>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
|
| 626 |
+
>>> lw = [-5.12] * 10
|
| 627 |
+
>>> up = [5.12] * 10
|
| 628 |
+
>>> ret = dual_annealing(func, bounds=list(zip(lw, up)))
|
| 629 |
+
>>> ret.x
|
| 630 |
+
array([-4.26437714e-09, -3.91699361e-09, -1.86149218e-09, -3.97165720e-09,
|
| 631 |
+
-6.29151648e-09, -6.53145322e-09, -3.93616815e-09, -6.55623025e-09,
|
| 632 |
+
-6.05775280e-09, -5.00668935e-09]) # random
|
| 633 |
+
>>> ret.fun
|
| 634 |
+
0.000000
|
| 635 |
+
|
| 636 |
+
"""
|
| 637 |
+
|
| 638 |
+
if isinstance(bounds, Bounds):
|
| 639 |
+
bounds = new_bounds_to_old(bounds.lb, bounds.ub, len(bounds.lb))
|
| 640 |
+
|
| 641 |
+
if x0 is not None and not len(x0) == len(bounds):
|
| 642 |
+
raise ValueError('Bounds size does not match x0')
|
| 643 |
+
|
| 644 |
+
lu = list(zip(*bounds))
|
| 645 |
+
lower = np.array(lu[0])
|
| 646 |
+
upper = np.array(lu[1])
|
| 647 |
+
# Check that restart temperature ratio is correct
|
| 648 |
+
if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
|
| 649 |
+
raise ValueError('Restart temperature ratio has to be in range (0, 1)')
|
| 650 |
+
# Checking bounds are valid
|
| 651 |
+
if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(
|
| 652 |
+
np.isnan(lower)) or np.any(np.isnan(upper))):
|
| 653 |
+
raise ValueError('Some bounds values are inf values or nan values')
|
| 654 |
+
# Checking that bounds are consistent
|
| 655 |
+
if not np.all(lower < upper):
|
| 656 |
+
raise ValueError('Bounds are not consistent min < max')
|
| 657 |
+
# Checking that bounds are the same length
|
| 658 |
+
if not len(lower) == len(upper):
|
| 659 |
+
raise ValueError('Bounds do not have the same dimensions')
|
| 660 |
+
|
| 661 |
+
# Wrapper for the objective function
|
| 662 |
+
func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
|
| 663 |
+
|
| 664 |
+
# minimizer_kwargs has to be a dict, not None
|
| 665 |
+
minimizer_kwargs = minimizer_kwargs or {}
|
| 666 |
+
|
| 667 |
+
minimizer_wrapper = LocalSearchWrapper(
|
| 668 |
+
bounds, func_wrapper, *args, **minimizer_kwargs)
|
| 669 |
+
|
| 670 |
+
# Initialization of random Generator for reproducible runs if seed provided
|
| 671 |
+
rand_state = check_random_state(seed)
|
| 672 |
+
# Initialization of the energy state
|
| 673 |
+
energy_state = EnergyState(lower, upper, callback)
|
| 674 |
+
energy_state.reset(func_wrapper, rand_state, x0)
|
| 675 |
+
# Minimum value of annealing temperature reached to perform
|
| 676 |
+
# re-annealing
|
| 677 |
+
temperature_restart = initial_temp * restart_temp_ratio
|
| 678 |
+
# VisitingDistribution instance
|
| 679 |
+
visit_dist = VisitingDistribution(lower, upper, visit, rand_state)
|
| 680 |
+
# Strategy chain instance
|
| 681 |
+
strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
|
| 682 |
+
minimizer_wrapper, rand_state, energy_state)
|
| 683 |
+
need_to_stop = False
|
| 684 |
+
iteration = 0
|
| 685 |
+
message = []
|
| 686 |
+
# OptimizeResult object to be returned
|
| 687 |
+
optimize_res = OptimizeResult()
|
| 688 |
+
optimize_res.success = True
|
| 689 |
+
optimize_res.status = 0
|
| 690 |
+
|
| 691 |
+
t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
|
| 692 |
+
# Run the search loop
|
| 693 |
+
while not need_to_stop:
|
| 694 |
+
for i in range(maxiter):
|
| 695 |
+
# Compute temperature for this step
|
| 696 |
+
s = float(i) + 2.0
|
| 697 |
+
t2 = np.exp((visit - 1) * np.log(s)) - 1.0
|
| 698 |
+
temperature = initial_temp * t1 / t2
|
| 699 |
+
if iteration >= maxiter:
|
| 700 |
+
message.append("Maximum number of iteration reached")
|
| 701 |
+
need_to_stop = True
|
| 702 |
+
break
|
| 703 |
+
# Need a re-annealing process?
|
| 704 |
+
if temperature < temperature_restart:
|
| 705 |
+
energy_state.reset(func_wrapper, rand_state)
|
| 706 |
+
break
|
| 707 |
+
# starting strategy chain
|
| 708 |
+
val = strategy_chain.run(i, temperature)
|
| 709 |
+
if val is not None:
|
| 710 |
+
message.append(val)
|
| 711 |
+
need_to_stop = True
|
| 712 |
+
optimize_res.success = False
|
| 713 |
+
break
|
| 714 |
+
# Possible local search at the end of the strategy chain
|
| 715 |
+
if not no_local_search:
|
| 716 |
+
val = strategy_chain.local_search()
|
| 717 |
+
if val is not None:
|
| 718 |
+
message.append(val)
|
| 719 |
+
need_to_stop = True
|
| 720 |
+
optimize_res.success = False
|
| 721 |
+
break
|
| 722 |
+
iteration += 1
|
| 723 |
+
|
| 724 |
+
# Setting the OptimizeResult values
|
| 725 |
+
optimize_res.x = energy_state.xbest
|
| 726 |
+
optimize_res.fun = energy_state.ebest
|
| 727 |
+
optimize_res.nit = iteration
|
| 728 |
+
optimize_res.nfev = func_wrapper.nfev
|
| 729 |
+
optimize_res.njev = func_wrapper.ngev
|
| 730 |
+
optimize_res.nhev = func_wrapper.nhev
|
| 731 |
+
optimize_res.message = message
|
| 732 |
+
return optimize_res
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_group_columns.cpython-310-x86_64-linux-gnu.so
ADDED
|
Binary file (99.8 kB). View file
|
|
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_hessian_update_strategy.py
ADDED
|
@@ -0,0 +1,475 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Hessian update strategies for quasi-Newton optimization methods."""
|
| 2 |
+
import numpy as np
|
| 3 |
+
from numpy.linalg import norm
|
| 4 |
+
from scipy.linalg import get_blas_funcs, issymmetric
|
| 5 |
+
from warnings import warn
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
__all__ = ['HessianUpdateStrategy', 'BFGS', 'SR1']
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class HessianUpdateStrategy:
|
| 12 |
+
"""Interface for implementing Hessian update strategies.
|
| 13 |
+
|
| 14 |
+
Many optimization methods make use of Hessian (or inverse Hessian)
|
| 15 |
+
approximations, such as the quasi-Newton methods BFGS, SR1, L-BFGS.
|
| 16 |
+
Some of these approximations, however, do not actually need to store
|
| 17 |
+
the entire matrix or can compute the internal matrix product with a
|
| 18 |
+
given vector in a very efficiently manner. This class serves as an
|
| 19 |
+
abstract interface between the optimization algorithm and the
|
| 20 |
+
quasi-Newton update strategies, giving freedom of implementation
|
| 21 |
+
to store and update the internal matrix as efficiently as possible.
|
| 22 |
+
Different choices of initialization and update procedure will result
|
| 23 |
+
in different quasi-Newton strategies.
|
| 24 |
+
|
| 25 |
+
Four methods should be implemented in derived classes: ``initialize``,
|
| 26 |
+
``update``, ``dot`` and ``get_matrix``.
|
| 27 |
+
|
| 28 |
+
Notes
|
| 29 |
+
-----
|
| 30 |
+
Any instance of a class that implements this interface,
|
| 31 |
+
can be accepted by the method ``minimize`` and used by
|
| 32 |
+
the compatible solvers to approximate the Hessian (or
|
| 33 |
+
inverse Hessian) used by the optimization algorithms.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def initialize(self, n, approx_type):
|
| 37 |
+
"""Initialize internal matrix.
|
| 38 |
+
|
| 39 |
+
Allocate internal memory for storing and updating
|
| 40 |
+
the Hessian or its inverse.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
n : int
|
| 45 |
+
Problem dimension.
|
| 46 |
+
approx_type : {'hess', 'inv_hess'}
|
| 47 |
+
Selects either the Hessian or the inverse Hessian.
|
| 48 |
+
When set to 'hess' the Hessian will be stored and updated.
|
| 49 |
+
When set to 'inv_hess' its inverse will be used instead.
|
| 50 |
+
"""
|
| 51 |
+
raise NotImplementedError("The method ``initialize(n, approx_type)``"
|
| 52 |
+
" is not implemented.")
|
| 53 |
+
|
| 54 |
+
def update(self, delta_x, delta_grad):
|
| 55 |
+
"""Update internal matrix.
|
| 56 |
+
|
| 57 |
+
Update Hessian matrix or its inverse (depending on how 'approx_type'
|
| 58 |
+
is defined) using information about the last evaluated points.
|
| 59 |
+
|
| 60 |
+
Parameters
|
| 61 |
+
----------
|
| 62 |
+
delta_x : ndarray
|
| 63 |
+
The difference between two points the gradient
|
| 64 |
+
function have been evaluated at: ``delta_x = x2 - x1``.
|
| 65 |
+
delta_grad : ndarray
|
| 66 |
+
The difference between the gradients:
|
| 67 |
+
``delta_grad = grad(x2) - grad(x1)``.
|
| 68 |
+
"""
|
| 69 |
+
raise NotImplementedError("The method ``update(delta_x, delta_grad)``"
|
| 70 |
+
" is not implemented.")
|
| 71 |
+
|
| 72 |
+
def dot(self, p):
|
| 73 |
+
"""Compute the product of the internal matrix with the given vector.
|
| 74 |
+
|
| 75 |
+
Parameters
|
| 76 |
+
----------
|
| 77 |
+
p : array_like
|
| 78 |
+
1-D array representing a vector.
|
| 79 |
+
|
| 80 |
+
Returns
|
| 81 |
+
-------
|
| 82 |
+
Hp : array
|
| 83 |
+
1-D represents the result of multiplying the approximation matrix
|
| 84 |
+
by vector p.
|
| 85 |
+
"""
|
| 86 |
+
raise NotImplementedError("The method ``dot(p)``"
|
| 87 |
+
" is not implemented.")
|
| 88 |
+
|
| 89 |
+
def get_matrix(self):
|
| 90 |
+
"""Return current internal matrix.
|
| 91 |
+
|
| 92 |
+
Returns
|
| 93 |
+
-------
|
| 94 |
+
H : ndarray, shape (n, n)
|
| 95 |
+
Dense matrix containing either the Hessian
|
| 96 |
+
or its inverse (depending on how 'approx_type'
|
| 97 |
+
is defined).
|
| 98 |
+
"""
|
| 99 |
+
raise NotImplementedError("The method ``get_matrix(p)``"
|
| 100 |
+
" is not implemented.")
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class FullHessianUpdateStrategy(HessianUpdateStrategy):
|
| 104 |
+
"""Hessian update strategy with full dimensional internal representation.
|
| 105 |
+
"""
|
| 106 |
+
_syr = get_blas_funcs('syr', dtype='d') # Symmetric rank 1 update
|
| 107 |
+
_syr2 = get_blas_funcs('syr2', dtype='d') # Symmetric rank 2 update
|
| 108 |
+
# Symmetric matrix-vector product
|
| 109 |
+
_symv = get_blas_funcs('symv', dtype='d')
|
| 110 |
+
|
| 111 |
+
def __init__(self, init_scale='auto'):
|
| 112 |
+
self.init_scale = init_scale
|
| 113 |
+
# Until initialize is called we can't really use the class,
|
| 114 |
+
# so it makes sense to set everything to None.
|
| 115 |
+
self.first_iteration = None
|
| 116 |
+
self.approx_type = None
|
| 117 |
+
self.B = None
|
| 118 |
+
self.H = None
|
| 119 |
+
|
| 120 |
+
def initialize(self, n, approx_type):
|
| 121 |
+
"""Initialize internal matrix.
|
| 122 |
+
|
| 123 |
+
Allocate internal memory for storing and updating
|
| 124 |
+
the Hessian or its inverse.
|
| 125 |
+
|
| 126 |
+
Parameters
|
| 127 |
+
----------
|
| 128 |
+
n : int
|
| 129 |
+
Problem dimension.
|
| 130 |
+
approx_type : {'hess', 'inv_hess'}
|
| 131 |
+
Selects either the Hessian or the inverse Hessian.
|
| 132 |
+
When set to 'hess' the Hessian will be stored and updated.
|
| 133 |
+
When set to 'inv_hess' its inverse will be used instead.
|
| 134 |
+
"""
|
| 135 |
+
self.first_iteration = True
|
| 136 |
+
self.n = n
|
| 137 |
+
self.approx_type = approx_type
|
| 138 |
+
if approx_type not in ('hess', 'inv_hess'):
|
| 139 |
+
raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.")
|
| 140 |
+
# Create matrix
|
| 141 |
+
if self.approx_type == 'hess':
|
| 142 |
+
self.B = np.eye(n, dtype=float)
|
| 143 |
+
else:
|
| 144 |
+
self.H = np.eye(n, dtype=float)
|
| 145 |
+
|
| 146 |
+
def _auto_scale(self, delta_x, delta_grad):
|
| 147 |
+
# Heuristic to scale matrix at first iteration.
|
| 148 |
+
# Described in Nocedal and Wright "Numerical Optimization"
|
| 149 |
+
# p.143 formula (6.20).
|
| 150 |
+
s_norm2 = np.dot(delta_x, delta_x)
|
| 151 |
+
y_norm2 = np.dot(delta_grad, delta_grad)
|
| 152 |
+
ys = np.abs(np.dot(delta_grad, delta_x))
|
| 153 |
+
if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0:
|
| 154 |
+
return 1
|
| 155 |
+
if self.approx_type == 'hess':
|
| 156 |
+
return y_norm2 / ys
|
| 157 |
+
else:
|
| 158 |
+
return ys / y_norm2
|
| 159 |
+
|
| 160 |
+
def _update_implementation(self, delta_x, delta_grad):
|
| 161 |
+
raise NotImplementedError("The method ``_update_implementation``"
|
| 162 |
+
" is not implemented.")
|
| 163 |
+
|
| 164 |
+
def update(self, delta_x, delta_grad):
|
| 165 |
+
"""Update internal matrix.
|
| 166 |
+
|
| 167 |
+
Update Hessian matrix or its inverse (depending on how 'approx_type'
|
| 168 |
+
is defined) using information about the last evaluated points.
|
| 169 |
+
|
| 170 |
+
Parameters
|
| 171 |
+
----------
|
| 172 |
+
delta_x : ndarray
|
| 173 |
+
The difference between two points the gradient
|
| 174 |
+
function have been evaluated at: ``delta_x = x2 - x1``.
|
| 175 |
+
delta_grad : ndarray
|
| 176 |
+
The difference between the gradients:
|
| 177 |
+
``delta_grad = grad(x2) - grad(x1)``.
|
| 178 |
+
"""
|
| 179 |
+
if np.all(delta_x == 0.0):
|
| 180 |
+
return
|
| 181 |
+
if np.all(delta_grad == 0.0):
|
| 182 |
+
warn('delta_grad == 0.0. Check if the approximated '
|
| 183 |
+
'function is linear. If the function is linear '
|
| 184 |
+
'better results can be obtained by defining the '
|
| 185 |
+
'Hessian as zero instead of using quasi-Newton '
|
| 186 |
+
'approximations.',
|
| 187 |
+
UserWarning, stacklevel=2)
|
| 188 |
+
return
|
| 189 |
+
if self.first_iteration:
|
| 190 |
+
# Get user specific scale
|
| 191 |
+
if isinstance(self.init_scale, str) and self.init_scale == "auto":
|
| 192 |
+
scale = self._auto_scale(delta_x, delta_grad)
|
| 193 |
+
else:
|
| 194 |
+
scale = self.init_scale
|
| 195 |
+
|
| 196 |
+
# Check for complex: numpy will silently cast a complex array to
|
| 197 |
+
# a real one but not so for scalar as it raises a TypeError.
|
| 198 |
+
# Checking here brings a consistent behavior.
|
| 199 |
+
replace = False
|
| 200 |
+
if np.size(scale) == 1:
|
| 201 |
+
# to account for the legacy behavior having the exact same cast
|
| 202 |
+
scale = float(scale)
|
| 203 |
+
elif np.iscomplexobj(scale):
|
| 204 |
+
raise TypeError("init_scale contains complex elements, "
|
| 205 |
+
"must be real.")
|
| 206 |
+
else: # test explicitly for allowed shapes and values
|
| 207 |
+
replace = True
|
| 208 |
+
if self.approx_type == 'hess':
|
| 209 |
+
shape = np.shape(self.B)
|
| 210 |
+
dtype = self.B.dtype
|
| 211 |
+
else:
|
| 212 |
+
shape = np.shape(self.H)
|
| 213 |
+
dtype = self.H.dtype
|
| 214 |
+
# copy, will replace the original
|
| 215 |
+
scale = np.array(scale, dtype=dtype, copy=True)
|
| 216 |
+
|
| 217 |
+
# it has to match the shape of the matrix for the multiplication,
|
| 218 |
+
# no implicit broadcasting is allowed
|
| 219 |
+
if shape != (init_shape := np.shape(scale)):
|
| 220 |
+
raise ValueError("If init_scale is an array, it must have the "
|
| 221 |
+
f"dimensions of the hess/inv_hess: {shape}."
|
| 222 |
+
f" Got {init_shape}.")
|
| 223 |
+
if not issymmetric(scale):
|
| 224 |
+
raise ValueError("If init_scale is an array, it must be"
|
| 225 |
+
" symmetric (passing scipy.linalg.issymmetric)"
|
| 226 |
+
" to be an approximation of a hess/inv_hess.")
|
| 227 |
+
|
| 228 |
+
# Scale initial matrix with ``scale * np.eye(n)`` or replace
|
| 229 |
+
# This is not ideal, we could assign the scale directly in
|
| 230 |
+
# initialize, but we would need to
|
| 231 |
+
if self.approx_type == 'hess':
|
| 232 |
+
if replace:
|
| 233 |
+
self.B = scale
|
| 234 |
+
else:
|
| 235 |
+
self.B *= scale
|
| 236 |
+
else:
|
| 237 |
+
if replace:
|
| 238 |
+
self.H = scale
|
| 239 |
+
else:
|
| 240 |
+
self.H *= scale
|
| 241 |
+
self.first_iteration = False
|
| 242 |
+
self._update_implementation(delta_x, delta_grad)
|
| 243 |
+
|
| 244 |
+
def dot(self, p):
|
| 245 |
+
"""Compute the product of the internal matrix with the given vector.
|
| 246 |
+
|
| 247 |
+
Parameters
|
| 248 |
+
----------
|
| 249 |
+
p : array_like
|
| 250 |
+
1-D array representing a vector.
|
| 251 |
+
|
| 252 |
+
Returns
|
| 253 |
+
-------
|
| 254 |
+
Hp : array
|
| 255 |
+
1-D represents the result of multiplying the approximation matrix
|
| 256 |
+
by vector p.
|
| 257 |
+
"""
|
| 258 |
+
if self.approx_type == 'hess':
|
| 259 |
+
return self._symv(1, self.B, p)
|
| 260 |
+
else:
|
| 261 |
+
return self._symv(1, self.H, p)
|
| 262 |
+
|
| 263 |
+
def get_matrix(self):
|
| 264 |
+
"""Return the current internal matrix.
|
| 265 |
+
|
| 266 |
+
Returns
|
| 267 |
+
-------
|
| 268 |
+
M : ndarray, shape (n, n)
|
| 269 |
+
Dense matrix containing either the Hessian or its inverse
|
| 270 |
+
(depending on how `approx_type` was defined).
|
| 271 |
+
"""
|
| 272 |
+
if self.approx_type == 'hess':
|
| 273 |
+
M = np.copy(self.B)
|
| 274 |
+
else:
|
| 275 |
+
M = np.copy(self.H)
|
| 276 |
+
li = np.tril_indices_from(M, k=-1)
|
| 277 |
+
M[li] = M.T[li]
|
| 278 |
+
return M
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
class BFGS(FullHessianUpdateStrategy):
|
| 282 |
+
"""Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
|
| 283 |
+
|
| 284 |
+
Parameters
|
| 285 |
+
----------
|
| 286 |
+
exception_strategy : {'skip_update', 'damp_update'}, optional
|
| 287 |
+
Define how to proceed when the curvature condition is violated.
|
| 288 |
+
Set it to 'skip_update' to just skip the update. Or, alternatively,
|
| 289 |
+
set it to 'damp_update' to interpolate between the actual BFGS
|
| 290 |
+
result and the unmodified matrix. Both exceptions strategies
|
| 291 |
+
are explained in [1]_, p.536-537.
|
| 292 |
+
min_curvature : float
|
| 293 |
+
This number, scaled by a normalization factor, defines the
|
| 294 |
+
minimum curvature ``dot(delta_grad, delta_x)`` allowed to go
|
| 295 |
+
unaffected by the exception strategy. By default is equal to
|
| 296 |
+
1e-8 when ``exception_strategy = 'skip_update'`` and equal
|
| 297 |
+
to 0.2 when ``exception_strategy = 'damp_update'``.
|
| 298 |
+
init_scale : {float, np.array, 'auto'}
|
| 299 |
+
This parameter can be used to initialize the Hessian or its
|
| 300 |
+
inverse. When a float is given, the relevant array is initialized
|
| 301 |
+
to ``np.eye(n) * init_scale``, where ``n`` is the problem dimension.
|
| 302 |
+
Alternatively, if a precisely ``(n, n)`` shaped, symmetric array is given,
|
| 303 |
+
this array will be used. Otherwise an error is generated.
|
| 304 |
+
Set it to 'auto' in order to use an automatic heuristic for choosing
|
| 305 |
+
the initial scale. The heuristic is described in [1]_, p.143.
|
| 306 |
+
The default is 'auto'.
|
| 307 |
+
|
| 308 |
+
Notes
|
| 309 |
+
-----
|
| 310 |
+
The update is based on the description in [1]_, p.140.
|
| 311 |
+
|
| 312 |
+
References
|
| 313 |
+
----------
|
| 314 |
+
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
| 315 |
+
Second Edition (2006).
|
| 316 |
+
"""
|
| 317 |
+
|
| 318 |
+
def __init__(self, exception_strategy='skip_update', min_curvature=None,
|
| 319 |
+
init_scale='auto'):
|
| 320 |
+
if exception_strategy == 'skip_update':
|
| 321 |
+
if min_curvature is not None:
|
| 322 |
+
self.min_curvature = min_curvature
|
| 323 |
+
else:
|
| 324 |
+
self.min_curvature = 1e-8
|
| 325 |
+
elif exception_strategy == 'damp_update':
|
| 326 |
+
if min_curvature is not None:
|
| 327 |
+
self.min_curvature = min_curvature
|
| 328 |
+
else:
|
| 329 |
+
self.min_curvature = 0.2
|
| 330 |
+
else:
|
| 331 |
+
raise ValueError("`exception_strategy` must be 'skip_update' "
|
| 332 |
+
"or 'damp_update'.")
|
| 333 |
+
|
| 334 |
+
super().__init__(init_scale)
|
| 335 |
+
self.exception_strategy = exception_strategy
|
| 336 |
+
|
| 337 |
+
def _update_inverse_hessian(self, ys, Hy, yHy, s):
|
| 338 |
+
"""Update the inverse Hessian matrix.
|
| 339 |
+
|
| 340 |
+
BFGS update using the formula:
|
| 341 |
+
|
| 342 |
+
``H <- H + ((H*y).T*y + s.T*y)/(s.T*y)^2 * (s*s.T)
|
| 343 |
+
- 1/(s.T*y) * ((H*y)*s.T + s*(H*y).T)``
|
| 344 |
+
|
| 345 |
+
where ``s = delta_x`` and ``y = delta_grad``. This formula is
|
| 346 |
+
equivalent to (6.17) in [1]_ written in a more efficient way
|
| 347 |
+
for implementation.
|
| 348 |
+
|
| 349 |
+
References
|
| 350 |
+
----------
|
| 351 |
+
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
| 352 |
+
Second Edition (2006).
|
| 353 |
+
"""
|
| 354 |
+
self.H = self._syr2(-1.0 / ys, s, Hy, a=self.H)
|
| 355 |
+
self.H = self._syr((ys + yHy) / ys ** 2, s, a=self.H)
|
| 356 |
+
|
| 357 |
+
def _update_hessian(self, ys, Bs, sBs, y):
|
| 358 |
+
"""Update the Hessian matrix.
|
| 359 |
+
|
| 360 |
+
BFGS update using the formula:
|
| 361 |
+
|
| 362 |
+
``B <- B - (B*s)*(B*s).T/s.T*(B*s) + y*y^T/s.T*y``
|
| 363 |
+
|
| 364 |
+
where ``s`` is short for ``delta_x`` and ``y`` is short
|
| 365 |
+
for ``delta_grad``. Formula (6.19) in [1]_.
|
| 366 |
+
|
| 367 |
+
References
|
| 368 |
+
----------
|
| 369 |
+
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
| 370 |
+
Second Edition (2006).
|
| 371 |
+
"""
|
| 372 |
+
self.B = self._syr(1.0 / ys, y, a=self.B)
|
| 373 |
+
self.B = self._syr(-1.0 / sBs, Bs, a=self.B)
|
| 374 |
+
|
| 375 |
+
def _update_implementation(self, delta_x, delta_grad):
|
| 376 |
+
# Auxiliary variables w and z
|
| 377 |
+
if self.approx_type == 'hess':
|
| 378 |
+
w = delta_x
|
| 379 |
+
z = delta_grad
|
| 380 |
+
else:
|
| 381 |
+
w = delta_grad
|
| 382 |
+
z = delta_x
|
| 383 |
+
# Do some common operations
|
| 384 |
+
wz = np.dot(w, z)
|
| 385 |
+
Mw = self.dot(w)
|
| 386 |
+
wMw = Mw.dot(w)
|
| 387 |
+
# Guarantee that wMw > 0 by reinitializing matrix.
|
| 388 |
+
# While this is always true in exact arithmetic,
|
| 389 |
+
# indefinite matrix may appear due to roundoff errors.
|
| 390 |
+
if wMw <= 0.0:
|
| 391 |
+
scale = self._auto_scale(delta_x, delta_grad)
|
| 392 |
+
# Reinitialize matrix
|
| 393 |
+
if self.approx_type == 'hess':
|
| 394 |
+
self.B = scale * np.eye(self.n, dtype=float)
|
| 395 |
+
else:
|
| 396 |
+
self.H = scale * np.eye(self.n, dtype=float)
|
| 397 |
+
# Do common operations for new matrix
|
| 398 |
+
Mw = self.dot(w)
|
| 399 |
+
wMw = Mw.dot(w)
|
| 400 |
+
# Check if curvature condition is violated
|
| 401 |
+
if wz <= self.min_curvature * wMw:
|
| 402 |
+
# If the option 'skip_update' is set
|
| 403 |
+
# we just skip the update when the condition
|
| 404 |
+
# is violated.
|
| 405 |
+
if self.exception_strategy == 'skip_update':
|
| 406 |
+
return
|
| 407 |
+
# If the option 'damp_update' is set we
|
| 408 |
+
# interpolate between the actual BFGS
|
| 409 |
+
# result and the unmodified matrix.
|
| 410 |
+
elif self.exception_strategy == 'damp_update':
|
| 411 |
+
update_factor = (1-self.min_curvature) / (1 - wz/wMw)
|
| 412 |
+
z = update_factor*z + (1-update_factor)*Mw
|
| 413 |
+
wz = np.dot(w, z)
|
| 414 |
+
# Update matrix
|
| 415 |
+
if self.approx_type == 'hess':
|
| 416 |
+
self._update_hessian(wz, Mw, wMw, z)
|
| 417 |
+
else:
|
| 418 |
+
self._update_inverse_hessian(wz, Mw, wMw, z)
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
class SR1(FullHessianUpdateStrategy):
|
| 422 |
+
"""Symmetric-rank-1 Hessian update strategy.
|
| 423 |
+
|
| 424 |
+
Parameters
|
| 425 |
+
----------
|
| 426 |
+
min_denominator : float
|
| 427 |
+
This number, scaled by a normalization factor,
|
| 428 |
+
defines the minimum denominator magnitude allowed
|
| 429 |
+
in the update. When the condition is violated we skip
|
| 430 |
+
the update. By default uses ``1e-8``.
|
| 431 |
+
init_scale : {float, np.array, 'auto'}, optional
|
| 432 |
+
This parameter can be used to initialize the Hessian or its
|
| 433 |
+
inverse. When a float is given, the relevant array is initialized
|
| 434 |
+
to ``np.eye(n) * init_scale``, where ``n`` is the problem dimension.
|
| 435 |
+
Alternatively, if a precisely ``(n, n)`` shaped, symmetric array is given,
|
| 436 |
+
this array will be used. Otherwise an error is generated.
|
| 437 |
+
Set it to 'auto' in order to use an automatic heuristic for choosing
|
| 438 |
+
the initial scale. The heuristic is described in [1]_, p.143.
|
| 439 |
+
The default is 'auto'.
|
| 440 |
+
|
| 441 |
+
Notes
|
| 442 |
+
-----
|
| 443 |
+
The update is based on the description in [1]_, p.144-146.
|
| 444 |
+
|
| 445 |
+
References
|
| 446 |
+
----------
|
| 447 |
+
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
| 448 |
+
Second Edition (2006).
|
| 449 |
+
"""
|
| 450 |
+
|
| 451 |
+
def __init__(self, min_denominator=1e-8, init_scale='auto'):
|
| 452 |
+
self.min_denominator = min_denominator
|
| 453 |
+
super().__init__(init_scale)
|
| 454 |
+
|
| 455 |
+
def _update_implementation(self, delta_x, delta_grad):
|
| 456 |
+
# Auxiliary variables w and z
|
| 457 |
+
if self.approx_type == 'hess':
|
| 458 |
+
w = delta_x
|
| 459 |
+
z = delta_grad
|
| 460 |
+
else:
|
| 461 |
+
w = delta_grad
|
| 462 |
+
z = delta_x
|
| 463 |
+
# Do some common operations
|
| 464 |
+
Mw = self.dot(w)
|
| 465 |
+
z_minus_Mw = z - Mw
|
| 466 |
+
denominator = np.dot(w, z_minus_Mw)
|
| 467 |
+
# If the denominator is too small
|
| 468 |
+
# we just skip the update.
|
| 469 |
+
if np.abs(denominator) <= self.min_denominator*norm(w)*norm(z_minus_Mw):
|
| 470 |
+
return
|
| 471 |
+
# Update matrix
|
| 472 |
+
if self.approx_type == 'hess':
|
| 473 |
+
self.B = self._syr(1/denominator, z_minus_Mw, a=self.B)
|
| 474 |
+
else:
|
| 475 |
+
self.H = self._syr(1/denominator, z_minus_Mw, a=self.H)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_isotonic.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from typing import TYPE_CHECKING
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
from ._optimize import OptimizeResult
|
| 7 |
+
from ._pava_pybind import pava
|
| 8 |
+
|
| 9 |
+
if TYPE_CHECKING:
|
| 10 |
+
import numpy.typing as npt
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
__all__ = ["isotonic_regression"]
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def isotonic_regression(
|
| 17 |
+
y: npt.ArrayLike,
|
| 18 |
+
*,
|
| 19 |
+
weights: npt.ArrayLike | None = None,
|
| 20 |
+
increasing: bool = True,
|
| 21 |
+
) -> OptimizeResult:
|
| 22 |
+
r"""Nonparametric isotonic regression.
|
| 23 |
+
|
| 24 |
+
A (not strictly) monotonically increasing array `x` with the same length
|
| 25 |
+
as `y` is calculated by the pool adjacent violators algorithm (PAVA), see
|
| 26 |
+
[1]_. See the Notes section for more details.
|
| 27 |
+
|
| 28 |
+
Parameters
|
| 29 |
+
----------
|
| 30 |
+
y : (N,) array_like
|
| 31 |
+
Response variable.
|
| 32 |
+
weights : (N,) array_like or None
|
| 33 |
+
Case weights.
|
| 34 |
+
increasing : bool
|
| 35 |
+
If True, fit monotonic increasing, i.e. isotonic, regression.
|
| 36 |
+
If False, fit a monotonic decreasing, i.e. antitonic, regression.
|
| 37 |
+
Default is True.
|
| 38 |
+
|
| 39 |
+
Returns
|
| 40 |
+
-------
|
| 41 |
+
res : OptimizeResult
|
| 42 |
+
The optimization result represented as a ``OptimizeResult`` object.
|
| 43 |
+
Important attributes are:
|
| 44 |
+
|
| 45 |
+
- ``x``: The isotonic regression solution, i.e. an increasing (or
|
| 46 |
+
decreasing) array of the same length than y, with elements in the
|
| 47 |
+
range from min(y) to max(y).
|
| 48 |
+
- ``weights`` : Array with the sum of case weights for each block
|
| 49 |
+
(or pool) B.
|
| 50 |
+
- ``blocks``: Array of length B+1 with the indices of the start
|
| 51 |
+
positions of each block (or pool) B. The j-th block is given by
|
| 52 |
+
``x[blocks[j]:blocks[j+1]]`` for which all values are the same.
|
| 53 |
+
|
| 54 |
+
Notes
|
| 55 |
+
-----
|
| 56 |
+
Given data :math:`y` and case weights :math:`w`, the isotonic regression
|
| 57 |
+
solves the following optimization problem:
|
| 58 |
+
|
| 59 |
+
.. math::
|
| 60 |
+
|
| 61 |
+
\operatorname{argmin}_{x_i} \sum_i w_i (y_i - x_i)^2 \quad
|
| 62 |
+
\text{subject to } x_i \leq x_j \text{ whenever } i \leq j \,.
|
| 63 |
+
|
| 64 |
+
For every input value :math:`y_i`, it generates a value :math:`x_i` such
|
| 65 |
+
that :math:`x` is increasing (but not strictly), i.e.
|
| 66 |
+
:math:`x_i \leq x_{i+1}`. This is accomplished by the PAVA.
|
| 67 |
+
The solution consists of pools or blocks, i.e. neighboring elements of
|
| 68 |
+
:math:`x`, e.g. :math:`x_i` and :math:`x_{i+1}`, that all have the same
|
| 69 |
+
value.
|
| 70 |
+
|
| 71 |
+
Most interestingly, the solution stays the same if the squared loss is
|
| 72 |
+
replaced by the wide class of Bregman functions which are the unique
|
| 73 |
+
class of strictly consistent scoring functions for the mean, see [2]_
|
| 74 |
+
and references therein.
|
| 75 |
+
|
| 76 |
+
The implemented version of PAVA according to [1]_ has a computational
|
| 77 |
+
complexity of O(N) with input size N.
|
| 78 |
+
|
| 79 |
+
References
|
| 80 |
+
----------
|
| 81 |
+
.. [1] Busing, F. M. T. A. (2022).
|
| 82 |
+
Monotone Regression: A Simple and Fast O(n) PAVA Implementation.
|
| 83 |
+
Journal of Statistical Software, Code Snippets, 102(1), 1-25.
|
| 84 |
+
:doi:`10.18637/jss.v102.c01`
|
| 85 |
+
.. [2] Jordan, A.I., Mühlemann, A. & Ziegel, J.F.
|
| 86 |
+
Characterizing the optimal solutions to the isotonic regression
|
| 87 |
+
problem for identifiable functionals.
|
| 88 |
+
Ann Inst Stat Math 74, 489-514 (2022).
|
| 89 |
+
:doi:`10.1007/s10463-021-00808-0`
|
| 90 |
+
|
| 91 |
+
Examples
|
| 92 |
+
--------
|
| 93 |
+
This example demonstrates that ``isotonic_regression`` really solves a
|
| 94 |
+
constrained optimization problem.
|
| 95 |
+
|
| 96 |
+
>>> import numpy as np
|
| 97 |
+
>>> from scipy.optimize import isotonic_regression, minimize
|
| 98 |
+
>>> y = [1.5, 1.0, 4.0, 6.0, 5.7, 5.0, 7.8, 9.0, 7.5, 9.5, 9.0]
|
| 99 |
+
>>> def objective(yhat, y):
|
| 100 |
+
... return np.sum((yhat - y)**2)
|
| 101 |
+
>>> def constraint(yhat, y):
|
| 102 |
+
... # This is for a monotonically increasing regression.
|
| 103 |
+
... return np.diff(yhat)
|
| 104 |
+
>>> result = minimize(objective, x0=y, args=(y,),
|
| 105 |
+
... constraints=[{'type': 'ineq',
|
| 106 |
+
... 'fun': lambda x: constraint(x, y)}])
|
| 107 |
+
>>> result.x
|
| 108 |
+
array([1.25 , 1.25 , 4. , 5.56666667, 5.56666667,
|
| 109 |
+
5.56666667, 7.8 , 8.25 , 8.25 , 9.25 ,
|
| 110 |
+
9.25 ])
|
| 111 |
+
>>> result = isotonic_regression(y)
|
| 112 |
+
>>> result.x
|
| 113 |
+
array([1.25 , 1.25 , 4. , 5.56666667, 5.56666667,
|
| 114 |
+
5.56666667, 7.8 , 8.25 , 8.25 , 9.25 ,
|
| 115 |
+
9.25 ])
|
| 116 |
+
|
| 117 |
+
The big advantage of ``isotonic_regression`` compared to calling
|
| 118 |
+
``minimize`` is that it is more user friendly, i.e. one does not need to
|
| 119 |
+
define objective and constraint functions, and that it is orders of
|
| 120 |
+
magnitudes faster. On commodity hardware (in 2023), for normal distributed
|
| 121 |
+
input y of length 1000, the minimizer takes about 4 seconds, while
|
| 122 |
+
``isotonic_regression`` takes about 200 microseconds.
|
| 123 |
+
"""
|
| 124 |
+
yarr = np.atleast_1d(y) # Check yarr.ndim == 1 is implicit (pybind11) in pava.
|
| 125 |
+
order = slice(None) if increasing else slice(None, None, -1)
|
| 126 |
+
x = np.array(yarr[order], order="C", dtype=np.float64, copy=True)
|
| 127 |
+
if weights is None:
|
| 128 |
+
wx = np.ones_like(yarr, dtype=np.float64)
|
| 129 |
+
else:
|
| 130 |
+
warr = np.atleast_1d(weights)
|
| 131 |
+
|
| 132 |
+
if not (yarr.ndim == warr.ndim == 1 and yarr.shape[0] == warr.shape[0]):
|
| 133 |
+
raise ValueError(
|
| 134 |
+
"Input arrays y and w must have one dimension of equal length."
|
| 135 |
+
)
|
| 136 |
+
if np.any(warr <= 0):
|
| 137 |
+
raise ValueError("Weights w must be strictly positive.")
|
| 138 |
+
|
| 139 |
+
wx = np.array(warr[order], order="C", dtype=np.float64, copy=True)
|
| 140 |
+
n = x.shape[0]
|
| 141 |
+
r = np.full(shape=n + 1, fill_value=-1, dtype=np.intp)
|
| 142 |
+
x, wx, r, b = pava(x, wx, r)
|
| 143 |
+
# Now that we know the number of blocks b, we only keep the relevant part
|
| 144 |
+
# of r and wx.
|
| 145 |
+
# As information: Due to the pava implementation, after the last block
|
| 146 |
+
# index, there might be smaller numbers appended to r, e.g.
|
| 147 |
+
# r = [0, 10, 8, 7] which in the end should be r = [0, 10].
|
| 148 |
+
r = r[:b + 1]
|
| 149 |
+
wx = wx[:b]
|
| 150 |
+
if not increasing:
|
| 151 |
+
x = x[::-1]
|
| 152 |
+
wx = wx[::-1]
|
| 153 |
+
r = r[-1] - r[::-1]
|
| 154 |
+
return OptimizeResult(
|
| 155 |
+
x=x,
|
| 156 |
+
weights=wx,
|
| 157 |
+
blocks=r,
|
| 158 |
+
)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lbfgsb_py.py
ADDED
|
@@ -0,0 +1,543 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Functions
|
| 3 |
+
---------
|
| 4 |
+
.. autosummary::
|
| 5 |
+
:toctree: generated/
|
| 6 |
+
|
| 7 |
+
fmin_l_bfgs_b
|
| 8 |
+
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
## License for the Python wrapper
|
| 12 |
+
## ==============================
|
| 13 |
+
|
| 14 |
+
## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
|
| 15 |
+
|
| 16 |
+
## Permission is hereby granted, free of charge, to any person obtaining a
|
| 17 |
+
## copy of this software and associated documentation files (the "Software"),
|
| 18 |
+
## to deal in the Software without restriction, including without limitation
|
| 19 |
+
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 20 |
+
## and/or sell copies of the Software, and to permit persons to whom the
|
| 21 |
+
## Software is furnished to do so, subject to the following conditions:
|
| 22 |
+
|
| 23 |
+
## The above copyright notice and this permission notice shall be included in
|
| 24 |
+
## all copies or substantial portions of the Software.
|
| 25 |
+
|
| 26 |
+
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 27 |
+
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 28 |
+
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 29 |
+
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 30 |
+
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 31 |
+
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 32 |
+
## DEALINGS IN THE SOFTWARE.
|
| 33 |
+
|
| 34 |
+
## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
|
| 35 |
+
|
| 36 |
+
import numpy as np
|
| 37 |
+
from numpy import array, asarray, float64, zeros
|
| 38 |
+
from . import _lbfgsb
|
| 39 |
+
from ._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt,
|
| 40 |
+
_wrap_callback, _check_unknown_options,
|
| 41 |
+
_prepare_scalar_function)
|
| 42 |
+
from ._constraints import old_bound_to_new
|
| 43 |
+
|
| 44 |
+
from scipy.sparse.linalg import LinearOperator
|
| 45 |
+
|
| 46 |
+
__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
|
| 50 |
+
approx_grad=0,
|
| 51 |
+
bounds=None, m=10, factr=1e7, pgtol=1e-5,
|
| 52 |
+
epsilon=1e-8,
|
| 53 |
+
iprint=-1, maxfun=15000, maxiter=15000, disp=None,
|
| 54 |
+
callback=None, maxls=20):
|
| 55 |
+
"""
|
| 56 |
+
Minimize a function func using the L-BFGS-B algorithm.
|
| 57 |
+
|
| 58 |
+
Parameters
|
| 59 |
+
----------
|
| 60 |
+
func : callable f(x,*args)
|
| 61 |
+
Function to minimize.
|
| 62 |
+
x0 : ndarray
|
| 63 |
+
Initial guess.
|
| 64 |
+
fprime : callable fprime(x,*args), optional
|
| 65 |
+
The gradient of `func`. If None, then `func` returns the function
|
| 66 |
+
value and the gradient (``f, g = func(x, *args)``), unless
|
| 67 |
+
`approx_grad` is True in which case `func` returns only ``f``.
|
| 68 |
+
args : sequence, optional
|
| 69 |
+
Arguments to pass to `func` and `fprime`.
|
| 70 |
+
approx_grad : bool, optional
|
| 71 |
+
Whether to approximate the gradient numerically (in which case
|
| 72 |
+
`func` returns only the function value).
|
| 73 |
+
bounds : list, optional
|
| 74 |
+
``(min, max)`` pairs for each element in ``x``, defining
|
| 75 |
+
the bounds on that parameter. Use None or +-inf for one of ``min`` or
|
| 76 |
+
``max`` when there is no bound in that direction.
|
| 77 |
+
m : int, optional
|
| 78 |
+
The maximum number of variable metric corrections
|
| 79 |
+
used to define the limited memory matrix. (The limited memory BFGS
|
| 80 |
+
method does not store the full hessian but uses this many terms in an
|
| 81 |
+
approximation to it.)
|
| 82 |
+
factr : float, optional
|
| 83 |
+
The iteration stops when
|
| 84 |
+
``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
|
| 85 |
+
where ``eps`` is the machine precision, which is automatically
|
| 86 |
+
generated by the code. Typical values for `factr` are: 1e12 for
|
| 87 |
+
low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
|
| 88 |
+
high accuracy. See Notes for relationship to `ftol`, which is exposed
|
| 89 |
+
(instead of `factr`) by the `scipy.optimize.minimize` interface to
|
| 90 |
+
L-BFGS-B.
|
| 91 |
+
pgtol : float, optional
|
| 92 |
+
The iteration will stop when
|
| 93 |
+
``max{|proj g_i | i = 1, ..., n} <= pgtol``
|
| 94 |
+
where ``proj g_i`` is the i-th component of the projected gradient.
|
| 95 |
+
epsilon : float, optional
|
| 96 |
+
Step size used when `approx_grad` is True, for numerically
|
| 97 |
+
calculating the gradient
|
| 98 |
+
iprint : int, optional
|
| 99 |
+
Controls the frequency of output. ``iprint < 0`` means no output;
|
| 100 |
+
``iprint = 0`` print only one line at the last iteration;
|
| 101 |
+
``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
|
| 102 |
+
``iprint = 99`` print details of every iteration except n-vectors;
|
| 103 |
+
``iprint = 100`` print also the changes of active set and final x;
|
| 104 |
+
``iprint > 100`` print details of every iteration including x and g.
|
| 105 |
+
disp : int, optional
|
| 106 |
+
If zero, then no output. If a positive number, then this over-rides
|
| 107 |
+
`iprint` (i.e., `iprint` gets the value of `disp`).
|
| 108 |
+
maxfun : int, optional
|
| 109 |
+
Maximum number of function evaluations. Note that this function
|
| 110 |
+
may violate the limit because of evaluating gradients by numerical
|
| 111 |
+
differentiation.
|
| 112 |
+
maxiter : int, optional
|
| 113 |
+
Maximum number of iterations.
|
| 114 |
+
callback : callable, optional
|
| 115 |
+
Called after each iteration, as ``callback(xk)``, where ``xk`` is the
|
| 116 |
+
current parameter vector.
|
| 117 |
+
maxls : int, optional
|
| 118 |
+
Maximum number of line search steps (per iteration). Default is 20.
|
| 119 |
+
|
| 120 |
+
Returns
|
| 121 |
+
-------
|
| 122 |
+
x : array_like
|
| 123 |
+
Estimated position of the minimum.
|
| 124 |
+
f : float
|
| 125 |
+
Value of `func` at the minimum.
|
| 126 |
+
d : dict
|
| 127 |
+
Information dictionary.
|
| 128 |
+
|
| 129 |
+
* d['warnflag'] is
|
| 130 |
+
|
| 131 |
+
- 0 if converged,
|
| 132 |
+
- 1 if too many function evaluations or too many iterations,
|
| 133 |
+
- 2 if stopped for another reason, given in d['task']
|
| 134 |
+
|
| 135 |
+
* d['grad'] is the gradient at the minimum (should be 0 ish)
|
| 136 |
+
* d['funcalls'] is the number of function calls made.
|
| 137 |
+
* d['nit'] is the number of iterations.
|
| 138 |
+
|
| 139 |
+
See also
|
| 140 |
+
--------
|
| 141 |
+
minimize: Interface to minimization algorithms for multivariate
|
| 142 |
+
functions. See the 'L-BFGS-B' `method` in particular. Note that the
|
| 143 |
+
`ftol` option is made available via that interface, while `factr` is
|
| 144 |
+
provided via this interface, where `factr` is the factor multiplying
|
| 145 |
+
the default machine floating-point precision to arrive at `ftol`:
|
| 146 |
+
``ftol = factr * numpy.finfo(float).eps``.
|
| 147 |
+
|
| 148 |
+
Notes
|
| 149 |
+
-----
|
| 150 |
+
License of L-BFGS-B (FORTRAN code):
|
| 151 |
+
|
| 152 |
+
The version included here (in fortran code) is 3.0
|
| 153 |
+
(released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
|
| 154 |
+
and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following
|
| 155 |
+
condition for use:
|
| 156 |
+
|
| 157 |
+
This software is freely available, but we expect that all publications
|
| 158 |
+
describing work using this software, or all commercial products using it,
|
| 159 |
+
quote at least one of the references given below. This software is released
|
| 160 |
+
under the BSD License.
|
| 161 |
+
|
| 162 |
+
References
|
| 163 |
+
----------
|
| 164 |
+
* R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
|
| 165 |
+
Constrained Optimization, (1995), SIAM Journal on Scientific and
|
| 166 |
+
Statistical Computing, 16, 5, pp. 1190-1208.
|
| 167 |
+
* C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
|
| 168 |
+
FORTRAN routines for large scale bound constrained optimization (1997),
|
| 169 |
+
ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
|
| 170 |
+
* J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
|
| 171 |
+
FORTRAN routines for large scale bound constrained optimization (2011),
|
| 172 |
+
ACM Transactions on Mathematical Software, 38, 1.
|
| 173 |
+
|
| 174 |
+
Examples
|
| 175 |
+
--------
|
| 176 |
+
Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we define
|
| 177 |
+
an objective function ``f(m, b) = (y - y_model)**2``, where `y` describes the
|
| 178 |
+
observations and `y_model` the prediction of the linear model as
|
| 179 |
+
``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``, are arbitrarily
|
| 180 |
+
chosen as ``(0,5)`` and ``(5,10)`` for this example.
|
| 181 |
+
|
| 182 |
+
>>> import numpy as np
|
| 183 |
+
>>> from scipy.optimize import fmin_l_bfgs_b
|
| 184 |
+
>>> X = np.arange(0, 10, 1)
|
| 185 |
+
>>> M = 2
|
| 186 |
+
>>> B = 3
|
| 187 |
+
>>> Y = M * X + B
|
| 188 |
+
>>> def func(parameters, *args):
|
| 189 |
+
... x = args[0]
|
| 190 |
+
... y = args[1]
|
| 191 |
+
... m, b = parameters
|
| 192 |
+
... y_model = m*x + b
|
| 193 |
+
... error = sum(np.power((y - y_model), 2))
|
| 194 |
+
... return error
|
| 195 |
+
|
| 196 |
+
>>> initial_values = np.array([0.0, 1.0])
|
| 197 |
+
|
| 198 |
+
>>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
|
| 199 |
+
... approx_grad=True)
|
| 200 |
+
>>> x_opt, f_opt
|
| 201 |
+
array([1.99999999, 3.00000006]), 1.7746231151323805e-14 # may vary
|
| 202 |
+
|
| 203 |
+
The optimized parameters in ``x_opt`` agree with the ground truth parameters
|
| 204 |
+
``m`` and ``b``. Next, let us perform a bound contrained optimization using the `bounds`
|
| 205 |
+
parameter.
|
| 206 |
+
|
| 207 |
+
>>> bounds = [(0, 5), (5, 10)]
|
| 208 |
+
>>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
|
| 209 |
+
... approx_grad=True, bounds=bounds)
|
| 210 |
+
>>> x_opt, f_opt
|
| 211 |
+
array([1.65990508, 5.31649385]), 15.721334516453945 # may vary
|
| 212 |
+
"""
|
| 213 |
+
# handle fprime/approx_grad
|
| 214 |
+
if approx_grad:
|
| 215 |
+
fun = func
|
| 216 |
+
jac = None
|
| 217 |
+
elif fprime is None:
|
| 218 |
+
fun = MemoizeJac(func)
|
| 219 |
+
jac = fun.derivative
|
| 220 |
+
else:
|
| 221 |
+
fun = func
|
| 222 |
+
jac = fprime
|
| 223 |
+
|
| 224 |
+
# build options
|
| 225 |
+
callback = _wrap_callback(callback)
|
| 226 |
+
opts = {'disp': disp,
|
| 227 |
+
'iprint': iprint,
|
| 228 |
+
'maxcor': m,
|
| 229 |
+
'ftol': factr * np.finfo(float).eps,
|
| 230 |
+
'gtol': pgtol,
|
| 231 |
+
'eps': epsilon,
|
| 232 |
+
'maxfun': maxfun,
|
| 233 |
+
'maxiter': maxiter,
|
| 234 |
+
'callback': callback,
|
| 235 |
+
'maxls': maxls}
|
| 236 |
+
|
| 237 |
+
res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
|
| 238 |
+
**opts)
|
| 239 |
+
d = {'grad': res['jac'],
|
| 240 |
+
'task': res['message'],
|
| 241 |
+
'funcalls': res['nfev'],
|
| 242 |
+
'nit': res['nit'],
|
| 243 |
+
'warnflag': res['status']}
|
| 244 |
+
f = res['fun']
|
| 245 |
+
x = res['x']
|
| 246 |
+
|
| 247 |
+
return x, f, d
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
|
| 251 |
+
disp=None, maxcor=10, ftol=2.2204460492503131e-09,
|
| 252 |
+
gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
|
| 253 |
+
iprint=-1, callback=None, maxls=20,
|
| 254 |
+
finite_diff_rel_step=None, **unknown_options):
|
| 255 |
+
"""
|
| 256 |
+
Minimize a scalar function of one or more variables using the L-BFGS-B
|
| 257 |
+
algorithm.
|
| 258 |
+
|
| 259 |
+
Options
|
| 260 |
+
-------
|
| 261 |
+
disp : None or int
|
| 262 |
+
If `disp is None` (the default), then the supplied version of `iprint`
|
| 263 |
+
is used. If `disp is not None`, then it overrides the supplied version
|
| 264 |
+
of `iprint` with the behaviour you outlined.
|
| 265 |
+
maxcor : int
|
| 266 |
+
The maximum number of variable metric corrections used to
|
| 267 |
+
define the limited memory matrix. (The limited memory BFGS
|
| 268 |
+
method does not store the full hessian but uses this many terms
|
| 269 |
+
in an approximation to it.)
|
| 270 |
+
ftol : float
|
| 271 |
+
The iteration stops when ``(f^k -
|
| 272 |
+
f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
|
| 273 |
+
gtol : float
|
| 274 |
+
The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
|
| 275 |
+
<= gtol`` where ``proj g_i`` is the i-th component of the
|
| 276 |
+
projected gradient.
|
| 277 |
+
eps : float or ndarray
|
| 278 |
+
If `jac is None` the absolute step size used for numerical
|
| 279 |
+
approximation of the jacobian via forward differences.
|
| 280 |
+
maxfun : int
|
| 281 |
+
Maximum number of function evaluations. Note that this function
|
| 282 |
+
may violate the limit because of evaluating gradients by numerical
|
| 283 |
+
differentiation.
|
| 284 |
+
maxiter : int
|
| 285 |
+
Maximum number of iterations.
|
| 286 |
+
iprint : int, optional
|
| 287 |
+
Controls the frequency of output. ``iprint < 0`` means no output;
|
| 288 |
+
``iprint = 0`` print only one line at the last iteration;
|
| 289 |
+
``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
|
| 290 |
+
``iprint = 99`` print details of every iteration except n-vectors;
|
| 291 |
+
``iprint = 100`` print also the changes of active set and final x;
|
| 292 |
+
``iprint > 100`` print details of every iteration including x and g.
|
| 293 |
+
maxls : int, optional
|
| 294 |
+
Maximum number of line search steps (per iteration). Default is 20.
|
| 295 |
+
finite_diff_rel_step : None or array_like, optional
|
| 296 |
+
If `jac in ['2-point', '3-point', 'cs']` the relative step size to
|
| 297 |
+
use for numerical approximation of the jacobian. The absolute step
|
| 298 |
+
size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
|
| 299 |
+
possibly adjusted to fit into the bounds. For ``method='3-point'``
|
| 300 |
+
the sign of `h` is ignored. If None (default) then step is selected
|
| 301 |
+
automatically.
|
| 302 |
+
|
| 303 |
+
Notes
|
| 304 |
+
-----
|
| 305 |
+
The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
|
| 306 |
+
but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
|
| 307 |
+
relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
|
| 308 |
+
I.e., `factr` multiplies the default machine floating-point precision to
|
| 309 |
+
arrive at `ftol`.
|
| 310 |
+
|
| 311 |
+
"""
|
| 312 |
+
_check_unknown_options(unknown_options)
|
| 313 |
+
m = maxcor
|
| 314 |
+
pgtol = gtol
|
| 315 |
+
factr = ftol / np.finfo(float).eps
|
| 316 |
+
|
| 317 |
+
x0 = asarray(x0).ravel()
|
| 318 |
+
n, = x0.shape
|
| 319 |
+
|
| 320 |
+
# historically old-style bounds were/are expected by lbfgsb.
|
| 321 |
+
# That's still the case but we'll deal with new-style from here on,
|
| 322 |
+
# it's easier
|
| 323 |
+
if bounds is None:
|
| 324 |
+
pass
|
| 325 |
+
elif len(bounds) != n:
|
| 326 |
+
raise ValueError('length of x0 != length of bounds')
|
| 327 |
+
else:
|
| 328 |
+
bounds = np.array(old_bound_to_new(bounds))
|
| 329 |
+
|
| 330 |
+
# check bounds
|
| 331 |
+
if (bounds[0] > bounds[1]).any():
|
| 332 |
+
raise ValueError(
|
| 333 |
+
"LBFGSB - one of the lower bounds is greater than an upper bound."
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
# initial vector must lie within the bounds. Otherwise ScalarFunction and
|
| 337 |
+
# approx_derivative will cause problems
|
| 338 |
+
x0 = np.clip(x0, bounds[0], bounds[1])
|
| 339 |
+
|
| 340 |
+
if disp is not None:
|
| 341 |
+
if disp == 0:
|
| 342 |
+
iprint = -1
|
| 343 |
+
else:
|
| 344 |
+
iprint = disp
|
| 345 |
+
|
| 346 |
+
# _prepare_scalar_function can use bounds=None to represent no bounds
|
| 347 |
+
sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
|
| 348 |
+
bounds=bounds,
|
| 349 |
+
finite_diff_rel_step=finite_diff_rel_step)
|
| 350 |
+
|
| 351 |
+
func_and_grad = sf.fun_and_grad
|
| 352 |
+
|
| 353 |
+
fortran_int = _lbfgsb.types.intvar.dtype
|
| 354 |
+
|
| 355 |
+
nbd = zeros(n, fortran_int)
|
| 356 |
+
low_bnd = zeros(n, float64)
|
| 357 |
+
upper_bnd = zeros(n, float64)
|
| 358 |
+
bounds_map = {(-np.inf, np.inf): 0,
|
| 359 |
+
(1, np.inf): 1,
|
| 360 |
+
(1, 1): 2,
|
| 361 |
+
(-np.inf, 1): 3}
|
| 362 |
+
|
| 363 |
+
if bounds is not None:
|
| 364 |
+
for i in range(0, n):
|
| 365 |
+
l, u = bounds[0, i], bounds[1, i]
|
| 366 |
+
if not np.isinf(l):
|
| 367 |
+
low_bnd[i] = l
|
| 368 |
+
l = 1
|
| 369 |
+
if not np.isinf(u):
|
| 370 |
+
upper_bnd[i] = u
|
| 371 |
+
u = 1
|
| 372 |
+
nbd[i] = bounds_map[l, u]
|
| 373 |
+
|
| 374 |
+
if not maxls > 0:
|
| 375 |
+
raise ValueError('maxls must be positive.')
|
| 376 |
+
|
| 377 |
+
x = array(x0, float64)
|
| 378 |
+
f = array(0.0, float64)
|
| 379 |
+
g = zeros((n,), float64)
|
| 380 |
+
wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
|
| 381 |
+
iwa = zeros(3*n, fortran_int)
|
| 382 |
+
task = zeros(1, 'S60')
|
| 383 |
+
csave = zeros(1, 'S60')
|
| 384 |
+
lsave = zeros(4, fortran_int)
|
| 385 |
+
isave = zeros(44, fortran_int)
|
| 386 |
+
dsave = zeros(29, float64)
|
| 387 |
+
|
| 388 |
+
task[:] = 'START'
|
| 389 |
+
|
| 390 |
+
n_iterations = 0
|
| 391 |
+
|
| 392 |
+
while 1:
|
| 393 |
+
# g may become float32 if a user provides a function that calculates
|
| 394 |
+
# the Jacobian in float32 (see gh-18730). The underlying Fortran code
|
| 395 |
+
# expects float64, so upcast it
|
| 396 |
+
g = g.astype(np.float64)
|
| 397 |
+
# x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
|
| 398 |
+
_lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
|
| 399 |
+
pgtol, wa, iwa, task, iprint, csave, lsave,
|
| 400 |
+
isave, dsave, maxls)
|
| 401 |
+
task_str = task.tobytes()
|
| 402 |
+
if task_str.startswith(b'FG'):
|
| 403 |
+
# The minimization routine wants f and g at the current x.
|
| 404 |
+
# Note that interruptions due to maxfun are postponed
|
| 405 |
+
# until the completion of the current minimization iteration.
|
| 406 |
+
# Overwrite f and g:
|
| 407 |
+
f, g = func_and_grad(x)
|
| 408 |
+
elif task_str.startswith(b'NEW_X'):
|
| 409 |
+
# new iteration
|
| 410 |
+
n_iterations += 1
|
| 411 |
+
|
| 412 |
+
intermediate_result = OptimizeResult(x=x, fun=f)
|
| 413 |
+
if _call_callback_maybe_halt(callback, intermediate_result):
|
| 414 |
+
task[:] = 'STOP: CALLBACK REQUESTED HALT'
|
| 415 |
+
if n_iterations >= maxiter:
|
| 416 |
+
task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
|
| 417 |
+
elif sf.nfev > maxfun:
|
| 418 |
+
task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
|
| 419 |
+
'EXCEEDS LIMIT')
|
| 420 |
+
else:
|
| 421 |
+
break
|
| 422 |
+
|
| 423 |
+
task_str = task.tobytes().strip(b'\x00').strip()
|
| 424 |
+
if task_str.startswith(b'CONV'):
|
| 425 |
+
warnflag = 0
|
| 426 |
+
elif sf.nfev > maxfun or n_iterations >= maxiter:
|
| 427 |
+
warnflag = 1
|
| 428 |
+
else:
|
| 429 |
+
warnflag = 2
|
| 430 |
+
|
| 431 |
+
# These two portions of the workspace are described in the mainlb
|
| 432 |
+
# subroutine in lbfgsb.f. See line 363.
|
| 433 |
+
s = wa[0: m*n].reshape(m, n)
|
| 434 |
+
y = wa[m*n: 2*m*n].reshape(m, n)
|
| 435 |
+
|
| 436 |
+
# See lbfgsb.f line 160 for this portion of the workspace.
|
| 437 |
+
# isave(31) = the total number of BFGS updates prior the current iteration;
|
| 438 |
+
n_bfgs_updates = isave[30]
|
| 439 |
+
|
| 440 |
+
n_corrs = min(n_bfgs_updates, maxcor)
|
| 441 |
+
hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
|
| 442 |
+
|
| 443 |
+
task_str = task_str.decode()
|
| 444 |
+
return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
|
| 445 |
+
njev=sf.ngev,
|
| 446 |
+
nit=n_iterations, status=warnflag, message=task_str,
|
| 447 |
+
x=x, success=(warnflag == 0), hess_inv=hess_inv)
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
class LbfgsInvHessProduct(LinearOperator):
|
| 451 |
+
"""Linear operator for the L-BFGS approximate inverse Hessian.
|
| 452 |
+
|
| 453 |
+
This operator computes the product of a vector with the approximate inverse
|
| 454 |
+
of the Hessian of the objective function, using the L-BFGS limited
|
| 455 |
+
memory approximation to the inverse Hessian, accumulated during the
|
| 456 |
+
optimization.
|
| 457 |
+
|
| 458 |
+
Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
|
| 459 |
+
interface.
|
| 460 |
+
|
| 461 |
+
Parameters
|
| 462 |
+
----------
|
| 463 |
+
sk : array_like, shape=(n_corr, n)
|
| 464 |
+
Array of `n_corr` most recent updates to the solution vector.
|
| 465 |
+
(See [1]).
|
| 466 |
+
yk : array_like, shape=(n_corr, n)
|
| 467 |
+
Array of `n_corr` most recent updates to the gradient. (See [1]).
|
| 468 |
+
|
| 469 |
+
References
|
| 470 |
+
----------
|
| 471 |
+
.. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
|
| 472 |
+
storage." Mathematics of computation 35.151 (1980): 773-782.
|
| 473 |
+
|
| 474 |
+
"""
|
| 475 |
+
|
| 476 |
+
def __init__(self, sk, yk):
|
| 477 |
+
"""Construct the operator."""
|
| 478 |
+
if sk.shape != yk.shape or sk.ndim != 2:
|
| 479 |
+
raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
|
| 480 |
+
n_corrs, n = sk.shape
|
| 481 |
+
|
| 482 |
+
super().__init__(dtype=np.float64, shape=(n, n))
|
| 483 |
+
|
| 484 |
+
self.sk = sk
|
| 485 |
+
self.yk = yk
|
| 486 |
+
self.n_corrs = n_corrs
|
| 487 |
+
self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
|
| 488 |
+
|
| 489 |
+
def _matvec(self, x):
|
| 490 |
+
"""Efficient matrix-vector multiply with the BFGS matrices.
|
| 491 |
+
|
| 492 |
+
This calculation is described in Section (4) of [1].
|
| 493 |
+
|
| 494 |
+
Parameters
|
| 495 |
+
----------
|
| 496 |
+
x : ndarray
|
| 497 |
+
An array with shape (n,) or (n,1).
|
| 498 |
+
|
| 499 |
+
Returns
|
| 500 |
+
-------
|
| 501 |
+
y : ndarray
|
| 502 |
+
The matrix-vector product
|
| 503 |
+
|
| 504 |
+
"""
|
| 505 |
+
s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
|
| 506 |
+
q = np.array(x, dtype=self.dtype, copy=True)
|
| 507 |
+
if q.ndim == 2 and q.shape[1] == 1:
|
| 508 |
+
q = q.reshape(-1)
|
| 509 |
+
|
| 510 |
+
alpha = np.empty(n_corrs)
|
| 511 |
+
|
| 512 |
+
for i in range(n_corrs-1, -1, -1):
|
| 513 |
+
alpha[i] = rho[i] * np.dot(s[i], q)
|
| 514 |
+
q = q - alpha[i]*y[i]
|
| 515 |
+
|
| 516 |
+
r = q
|
| 517 |
+
for i in range(n_corrs):
|
| 518 |
+
beta = rho[i] * np.dot(y[i], r)
|
| 519 |
+
r = r + s[i] * (alpha[i] - beta)
|
| 520 |
+
|
| 521 |
+
return r
|
| 522 |
+
|
| 523 |
+
def todense(self):
|
| 524 |
+
"""Return a dense array representation of this operator.
|
| 525 |
+
|
| 526 |
+
Returns
|
| 527 |
+
-------
|
| 528 |
+
arr : ndarray, shape=(n, n)
|
| 529 |
+
An array with the same shape and containing
|
| 530 |
+
the same data represented by this `LinearOperator`.
|
| 531 |
+
|
| 532 |
+
"""
|
| 533 |
+
s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
|
| 534 |
+
I = np.eye(*self.shape, dtype=self.dtype)
|
| 535 |
+
Hk = I
|
| 536 |
+
|
| 537 |
+
for i in range(n_corrs):
|
| 538 |
+
A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
|
| 539 |
+
A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
|
| 540 |
+
|
| 541 |
+
Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *
|
| 542 |
+
s[i][np.newaxis, :])
|
| 543 |
+
return Hk
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linesearch.py
ADDED
|
@@ -0,0 +1,896 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Functions
|
| 3 |
+
---------
|
| 4 |
+
.. autosummary::
|
| 5 |
+
:toctree: generated/
|
| 6 |
+
|
| 7 |
+
line_search_armijo
|
| 8 |
+
line_search_wolfe1
|
| 9 |
+
line_search_wolfe2
|
| 10 |
+
scalar_search_wolfe1
|
| 11 |
+
scalar_search_wolfe2
|
| 12 |
+
|
| 13 |
+
"""
|
| 14 |
+
from warnings import warn
|
| 15 |
+
|
| 16 |
+
from ._dcsrch import DCSRCH
|
| 17 |
+
import numpy as np
|
| 18 |
+
|
| 19 |
+
__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
|
| 20 |
+
'scalar_search_wolfe1', 'scalar_search_wolfe2',
|
| 21 |
+
'line_search_armijo']
|
| 22 |
+
|
| 23 |
+
class LineSearchWarning(RuntimeWarning):
|
| 24 |
+
pass
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _check_c1_c2(c1, c2):
|
| 28 |
+
if not (0 < c1 < c2 < 1):
|
| 29 |
+
raise ValueError("'c1' and 'c2' do not satisfy"
|
| 30 |
+
"'0 < c1 < c2 < 1'.")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
#------------------------------------------------------------------------------
|
| 34 |
+
# Minpack's Wolfe line and scalar searches
|
| 35 |
+
#------------------------------------------------------------------------------
|
| 36 |
+
|
| 37 |
+
def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
|
| 38 |
+
old_fval=None, old_old_fval=None,
|
| 39 |
+
args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
|
| 40 |
+
xtol=1e-14):
|
| 41 |
+
"""
|
| 42 |
+
As `scalar_search_wolfe1` but do a line search to direction `pk`
|
| 43 |
+
|
| 44 |
+
Parameters
|
| 45 |
+
----------
|
| 46 |
+
f : callable
|
| 47 |
+
Function `f(x)`
|
| 48 |
+
fprime : callable
|
| 49 |
+
Gradient of `f`
|
| 50 |
+
xk : array_like
|
| 51 |
+
Current point
|
| 52 |
+
pk : array_like
|
| 53 |
+
Search direction
|
| 54 |
+
gfk : array_like, optional
|
| 55 |
+
Gradient of `f` at point `xk`
|
| 56 |
+
old_fval : float, optional
|
| 57 |
+
Value of `f` at point `xk`
|
| 58 |
+
old_old_fval : float, optional
|
| 59 |
+
Value of `f` at point preceding `xk`
|
| 60 |
+
|
| 61 |
+
The rest of the parameters are the same as for `scalar_search_wolfe1`.
|
| 62 |
+
|
| 63 |
+
Returns
|
| 64 |
+
-------
|
| 65 |
+
stp, f_count, g_count, fval, old_fval
|
| 66 |
+
As in `line_search_wolfe1`
|
| 67 |
+
gval : array
|
| 68 |
+
Gradient of `f` at the final point
|
| 69 |
+
|
| 70 |
+
Notes
|
| 71 |
+
-----
|
| 72 |
+
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
| 73 |
+
|
| 74 |
+
"""
|
| 75 |
+
if gfk is None:
|
| 76 |
+
gfk = fprime(xk, *args)
|
| 77 |
+
|
| 78 |
+
gval = [gfk]
|
| 79 |
+
gc = [0]
|
| 80 |
+
fc = [0]
|
| 81 |
+
|
| 82 |
+
def phi(s):
|
| 83 |
+
fc[0] += 1
|
| 84 |
+
return f(xk + s*pk, *args)
|
| 85 |
+
|
| 86 |
+
def derphi(s):
|
| 87 |
+
gval[0] = fprime(xk + s*pk, *args)
|
| 88 |
+
gc[0] += 1
|
| 89 |
+
return np.dot(gval[0], pk)
|
| 90 |
+
|
| 91 |
+
derphi0 = np.dot(gfk, pk)
|
| 92 |
+
|
| 93 |
+
stp, fval, old_fval = scalar_search_wolfe1(
|
| 94 |
+
phi, derphi, old_fval, old_old_fval, derphi0,
|
| 95 |
+
c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
|
| 96 |
+
|
| 97 |
+
return stp, fc[0], gc[0], fval, old_fval, gval[0]
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
|
| 101 |
+
c1=1e-4, c2=0.9,
|
| 102 |
+
amax=50, amin=1e-8, xtol=1e-14):
|
| 103 |
+
"""
|
| 104 |
+
Scalar function search for alpha that satisfies strong Wolfe conditions
|
| 105 |
+
|
| 106 |
+
alpha > 0 is assumed to be a descent direction.
|
| 107 |
+
|
| 108 |
+
Parameters
|
| 109 |
+
----------
|
| 110 |
+
phi : callable phi(alpha)
|
| 111 |
+
Function at point `alpha`
|
| 112 |
+
derphi : callable phi'(alpha)
|
| 113 |
+
Objective function derivative. Returns a scalar.
|
| 114 |
+
phi0 : float, optional
|
| 115 |
+
Value of phi at 0
|
| 116 |
+
old_phi0 : float, optional
|
| 117 |
+
Value of phi at previous point
|
| 118 |
+
derphi0 : float, optional
|
| 119 |
+
Value derphi at 0
|
| 120 |
+
c1 : float, optional
|
| 121 |
+
Parameter for Armijo condition rule.
|
| 122 |
+
c2 : float, optional
|
| 123 |
+
Parameter for curvature condition rule.
|
| 124 |
+
amax, amin : float, optional
|
| 125 |
+
Maximum and minimum step size
|
| 126 |
+
xtol : float, optional
|
| 127 |
+
Relative tolerance for an acceptable step.
|
| 128 |
+
|
| 129 |
+
Returns
|
| 130 |
+
-------
|
| 131 |
+
alpha : float
|
| 132 |
+
Step size, or None if no suitable step was found
|
| 133 |
+
phi : float
|
| 134 |
+
Value of `phi` at the new point `alpha`
|
| 135 |
+
phi0 : float
|
| 136 |
+
Value of `phi` at `alpha=0`
|
| 137 |
+
|
| 138 |
+
Notes
|
| 139 |
+
-----
|
| 140 |
+
Uses routine DCSRCH from MINPACK.
|
| 141 |
+
|
| 142 |
+
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1`` as described in [1]_.
|
| 143 |
+
|
| 144 |
+
References
|
| 145 |
+
----------
|
| 146 |
+
|
| 147 |
+
.. [1] Nocedal, J., & Wright, S. J. (2006). Numerical optimization.
|
| 148 |
+
In Springer Series in Operations Research and Financial Engineering.
|
| 149 |
+
(Springer Series in Operations Research and Financial Engineering).
|
| 150 |
+
Springer Nature.
|
| 151 |
+
|
| 152 |
+
"""
|
| 153 |
+
_check_c1_c2(c1, c2)
|
| 154 |
+
|
| 155 |
+
if phi0 is None:
|
| 156 |
+
phi0 = phi(0.)
|
| 157 |
+
if derphi0 is None:
|
| 158 |
+
derphi0 = derphi(0.)
|
| 159 |
+
|
| 160 |
+
if old_phi0 is not None and derphi0 != 0:
|
| 161 |
+
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
|
| 162 |
+
if alpha1 < 0:
|
| 163 |
+
alpha1 = 1.0
|
| 164 |
+
else:
|
| 165 |
+
alpha1 = 1.0
|
| 166 |
+
|
| 167 |
+
maxiter = 100
|
| 168 |
+
|
| 169 |
+
dcsrch = DCSRCH(phi, derphi, c1, c2, xtol, amin, amax)
|
| 170 |
+
stp, phi1, phi0, task = dcsrch(
|
| 171 |
+
alpha1, phi0=phi0, derphi0=derphi0, maxiter=maxiter
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
return stp, phi1, phi0
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
line_search = line_search_wolfe1
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
#------------------------------------------------------------------------------
|
| 181 |
+
# Pure-Python Wolfe line and scalar searches
|
| 182 |
+
#------------------------------------------------------------------------------
|
| 183 |
+
|
| 184 |
+
# Note: `line_search_wolfe2` is the public `scipy.optimize.line_search`
|
| 185 |
+
|
| 186 |
+
def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
|
| 187 |
+
old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
|
| 188 |
+
extra_condition=None, maxiter=10):
|
| 189 |
+
"""Find alpha that satisfies strong Wolfe conditions.
|
| 190 |
+
|
| 191 |
+
Parameters
|
| 192 |
+
----------
|
| 193 |
+
f : callable f(x,*args)
|
| 194 |
+
Objective function.
|
| 195 |
+
myfprime : callable f'(x,*args)
|
| 196 |
+
Objective function gradient.
|
| 197 |
+
xk : ndarray
|
| 198 |
+
Starting point.
|
| 199 |
+
pk : ndarray
|
| 200 |
+
Search direction. The search direction must be a descent direction
|
| 201 |
+
for the algorithm to converge.
|
| 202 |
+
gfk : ndarray, optional
|
| 203 |
+
Gradient value for x=xk (xk being the current parameter
|
| 204 |
+
estimate). Will be recomputed if omitted.
|
| 205 |
+
old_fval : float, optional
|
| 206 |
+
Function value for x=xk. Will be recomputed if omitted.
|
| 207 |
+
old_old_fval : float, optional
|
| 208 |
+
Function value for the point preceding x=xk.
|
| 209 |
+
args : tuple, optional
|
| 210 |
+
Additional arguments passed to objective function.
|
| 211 |
+
c1 : float, optional
|
| 212 |
+
Parameter for Armijo condition rule.
|
| 213 |
+
c2 : float, optional
|
| 214 |
+
Parameter for curvature condition rule.
|
| 215 |
+
amax : float, optional
|
| 216 |
+
Maximum step size
|
| 217 |
+
extra_condition : callable, optional
|
| 218 |
+
A callable of the form ``extra_condition(alpha, x, f, g)``
|
| 219 |
+
returning a boolean. Arguments are the proposed step ``alpha``
|
| 220 |
+
and the corresponding ``x``, ``f`` and ``g`` values. The line search
|
| 221 |
+
accepts the value of ``alpha`` only if this
|
| 222 |
+
callable returns ``True``. If the callable returns ``False``
|
| 223 |
+
for the step length, the algorithm will continue with
|
| 224 |
+
new iterates. The callable is only called for iterates
|
| 225 |
+
satisfying the strong Wolfe conditions.
|
| 226 |
+
maxiter : int, optional
|
| 227 |
+
Maximum number of iterations to perform.
|
| 228 |
+
|
| 229 |
+
Returns
|
| 230 |
+
-------
|
| 231 |
+
alpha : float or None
|
| 232 |
+
Alpha for which ``x_new = x0 + alpha * pk``,
|
| 233 |
+
or None if the line search algorithm did not converge.
|
| 234 |
+
fc : int
|
| 235 |
+
Number of function evaluations made.
|
| 236 |
+
gc : int
|
| 237 |
+
Number of gradient evaluations made.
|
| 238 |
+
new_fval : float or None
|
| 239 |
+
New function value ``f(x_new)=f(x0+alpha*pk)``,
|
| 240 |
+
or None if the line search algorithm did not converge.
|
| 241 |
+
old_fval : float
|
| 242 |
+
Old function value ``f(x0)``.
|
| 243 |
+
new_slope : float or None
|
| 244 |
+
The local slope along the search direction at the
|
| 245 |
+
new value ``<myfprime(x_new), pk>``,
|
| 246 |
+
or None if the line search algorithm did not converge.
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
Notes
|
| 250 |
+
-----
|
| 251 |
+
Uses the line search algorithm to enforce strong Wolfe
|
| 252 |
+
conditions. See Wright and Nocedal, 'Numerical Optimization',
|
| 253 |
+
1999, pp. 59-61.
|
| 254 |
+
|
| 255 |
+
The search direction `pk` must be a descent direction (e.g.
|
| 256 |
+
``-myfprime(xk)``) to find a step length that satisfies the strong Wolfe
|
| 257 |
+
conditions. If the search direction is not a descent direction (e.g.
|
| 258 |
+
``myfprime(xk)``), then `alpha`, `new_fval`, and `new_slope` will be None.
|
| 259 |
+
|
| 260 |
+
Examples
|
| 261 |
+
--------
|
| 262 |
+
>>> import numpy as np
|
| 263 |
+
>>> from scipy.optimize import line_search
|
| 264 |
+
|
| 265 |
+
A objective function and its gradient are defined.
|
| 266 |
+
|
| 267 |
+
>>> def obj_func(x):
|
| 268 |
+
... return (x[0])**2+(x[1])**2
|
| 269 |
+
>>> def obj_grad(x):
|
| 270 |
+
... return [2*x[0], 2*x[1]]
|
| 271 |
+
|
| 272 |
+
We can find alpha that satisfies strong Wolfe conditions.
|
| 273 |
+
|
| 274 |
+
>>> start_point = np.array([1.8, 1.7])
|
| 275 |
+
>>> search_gradient = np.array([-1.0, -1.0])
|
| 276 |
+
>>> line_search(obj_func, obj_grad, start_point, search_gradient)
|
| 277 |
+
(1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
|
| 278 |
+
|
| 279 |
+
"""
|
| 280 |
+
fc = [0]
|
| 281 |
+
gc = [0]
|
| 282 |
+
gval = [None]
|
| 283 |
+
gval_alpha = [None]
|
| 284 |
+
|
| 285 |
+
def phi(alpha):
|
| 286 |
+
fc[0] += 1
|
| 287 |
+
return f(xk + alpha * pk, *args)
|
| 288 |
+
|
| 289 |
+
fprime = myfprime
|
| 290 |
+
|
| 291 |
+
def derphi(alpha):
|
| 292 |
+
gc[0] += 1
|
| 293 |
+
gval[0] = fprime(xk + alpha * pk, *args) # store for later use
|
| 294 |
+
gval_alpha[0] = alpha
|
| 295 |
+
return np.dot(gval[0], pk)
|
| 296 |
+
|
| 297 |
+
if gfk is None:
|
| 298 |
+
gfk = fprime(xk, *args)
|
| 299 |
+
derphi0 = np.dot(gfk, pk)
|
| 300 |
+
|
| 301 |
+
if extra_condition is not None:
|
| 302 |
+
# Add the current gradient as argument, to avoid needless
|
| 303 |
+
# re-evaluation
|
| 304 |
+
def extra_condition2(alpha, phi):
|
| 305 |
+
if gval_alpha[0] != alpha:
|
| 306 |
+
derphi(alpha)
|
| 307 |
+
x = xk + alpha * pk
|
| 308 |
+
return extra_condition(alpha, x, phi, gval[0])
|
| 309 |
+
else:
|
| 310 |
+
extra_condition2 = None
|
| 311 |
+
|
| 312 |
+
alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
|
| 313 |
+
phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
|
| 314 |
+
extra_condition2, maxiter=maxiter)
|
| 315 |
+
|
| 316 |
+
if derphi_star is None:
|
| 317 |
+
warn('The line search algorithm did not converge',
|
| 318 |
+
LineSearchWarning, stacklevel=2)
|
| 319 |
+
else:
|
| 320 |
+
# derphi_star is a number (derphi) -- so use the most recently
|
| 321 |
+
# calculated gradient used in computing it derphi = gfk*pk
|
| 322 |
+
# this is the gradient at the next step no need to compute it
|
| 323 |
+
# again in the outer loop.
|
| 324 |
+
derphi_star = gval[0]
|
| 325 |
+
|
| 326 |
+
return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
def scalar_search_wolfe2(phi, derphi, phi0=None,
|
| 330 |
+
old_phi0=None, derphi0=None,
|
| 331 |
+
c1=1e-4, c2=0.9, amax=None,
|
| 332 |
+
extra_condition=None, maxiter=10):
|
| 333 |
+
"""Find alpha that satisfies strong Wolfe conditions.
|
| 334 |
+
|
| 335 |
+
alpha > 0 is assumed to be a descent direction.
|
| 336 |
+
|
| 337 |
+
Parameters
|
| 338 |
+
----------
|
| 339 |
+
phi : callable phi(alpha)
|
| 340 |
+
Objective scalar function.
|
| 341 |
+
derphi : callable phi'(alpha)
|
| 342 |
+
Objective function derivative. Returns a scalar.
|
| 343 |
+
phi0 : float, optional
|
| 344 |
+
Value of phi at 0.
|
| 345 |
+
old_phi0 : float, optional
|
| 346 |
+
Value of phi at previous point.
|
| 347 |
+
derphi0 : float, optional
|
| 348 |
+
Value of derphi at 0
|
| 349 |
+
c1 : float, optional
|
| 350 |
+
Parameter for Armijo condition rule.
|
| 351 |
+
c2 : float, optional
|
| 352 |
+
Parameter for curvature condition rule.
|
| 353 |
+
amax : float, optional
|
| 354 |
+
Maximum step size.
|
| 355 |
+
extra_condition : callable, optional
|
| 356 |
+
A callable of the form ``extra_condition(alpha, phi_value)``
|
| 357 |
+
returning a boolean. The line search accepts the value
|
| 358 |
+
of ``alpha`` only if this callable returns ``True``.
|
| 359 |
+
If the callable returns ``False`` for the step length,
|
| 360 |
+
the algorithm will continue with new iterates.
|
| 361 |
+
The callable is only called for iterates satisfying
|
| 362 |
+
the strong Wolfe conditions.
|
| 363 |
+
maxiter : int, optional
|
| 364 |
+
Maximum number of iterations to perform.
|
| 365 |
+
|
| 366 |
+
Returns
|
| 367 |
+
-------
|
| 368 |
+
alpha_star : float or None
|
| 369 |
+
Best alpha, or None if the line search algorithm did not converge.
|
| 370 |
+
phi_star : float
|
| 371 |
+
phi at alpha_star.
|
| 372 |
+
phi0 : float
|
| 373 |
+
phi at 0.
|
| 374 |
+
derphi_star : float or None
|
| 375 |
+
derphi at alpha_star, or None if the line search algorithm
|
| 376 |
+
did not converge.
|
| 377 |
+
|
| 378 |
+
Notes
|
| 379 |
+
-----
|
| 380 |
+
Uses the line search algorithm to enforce strong Wolfe
|
| 381 |
+
conditions. See Wright and Nocedal, 'Numerical Optimization',
|
| 382 |
+
1999, pp. 59-61.
|
| 383 |
+
|
| 384 |
+
"""
|
| 385 |
+
_check_c1_c2(c1, c2)
|
| 386 |
+
|
| 387 |
+
if phi0 is None:
|
| 388 |
+
phi0 = phi(0.)
|
| 389 |
+
|
| 390 |
+
if derphi0 is None:
|
| 391 |
+
derphi0 = derphi(0.)
|
| 392 |
+
|
| 393 |
+
alpha0 = 0
|
| 394 |
+
if old_phi0 is not None and derphi0 != 0:
|
| 395 |
+
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
|
| 396 |
+
else:
|
| 397 |
+
alpha1 = 1.0
|
| 398 |
+
|
| 399 |
+
if alpha1 < 0:
|
| 400 |
+
alpha1 = 1.0
|
| 401 |
+
|
| 402 |
+
if amax is not None:
|
| 403 |
+
alpha1 = min(alpha1, amax)
|
| 404 |
+
|
| 405 |
+
phi_a1 = phi(alpha1)
|
| 406 |
+
#derphi_a1 = derphi(alpha1) evaluated below
|
| 407 |
+
|
| 408 |
+
phi_a0 = phi0
|
| 409 |
+
derphi_a0 = derphi0
|
| 410 |
+
|
| 411 |
+
if extra_condition is None:
|
| 412 |
+
def extra_condition(alpha, phi):
|
| 413 |
+
return True
|
| 414 |
+
|
| 415 |
+
for i in range(maxiter):
|
| 416 |
+
if alpha1 == 0 or (amax is not None and alpha0 > amax):
|
| 417 |
+
# alpha1 == 0: This shouldn't happen. Perhaps the increment has
|
| 418 |
+
# slipped below machine precision?
|
| 419 |
+
alpha_star = None
|
| 420 |
+
phi_star = phi0
|
| 421 |
+
phi0 = old_phi0
|
| 422 |
+
derphi_star = None
|
| 423 |
+
|
| 424 |
+
if alpha1 == 0:
|
| 425 |
+
msg = 'Rounding errors prevent the line search from converging'
|
| 426 |
+
else:
|
| 427 |
+
msg = "The line search algorithm could not find a solution " + \
|
| 428 |
+
"less than or equal to amax: %s" % amax
|
| 429 |
+
|
| 430 |
+
warn(msg, LineSearchWarning, stacklevel=2)
|
| 431 |
+
break
|
| 432 |
+
|
| 433 |
+
not_first_iteration = i > 0
|
| 434 |
+
if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
|
| 435 |
+
((phi_a1 >= phi_a0) and not_first_iteration):
|
| 436 |
+
alpha_star, phi_star, derphi_star = \
|
| 437 |
+
_zoom(alpha0, alpha1, phi_a0,
|
| 438 |
+
phi_a1, derphi_a0, phi, derphi,
|
| 439 |
+
phi0, derphi0, c1, c2, extra_condition)
|
| 440 |
+
break
|
| 441 |
+
|
| 442 |
+
derphi_a1 = derphi(alpha1)
|
| 443 |
+
if (abs(derphi_a1) <= -c2*derphi0):
|
| 444 |
+
if extra_condition(alpha1, phi_a1):
|
| 445 |
+
alpha_star = alpha1
|
| 446 |
+
phi_star = phi_a1
|
| 447 |
+
derphi_star = derphi_a1
|
| 448 |
+
break
|
| 449 |
+
|
| 450 |
+
if (derphi_a1 >= 0):
|
| 451 |
+
alpha_star, phi_star, derphi_star = \
|
| 452 |
+
_zoom(alpha1, alpha0, phi_a1,
|
| 453 |
+
phi_a0, derphi_a1, phi, derphi,
|
| 454 |
+
phi0, derphi0, c1, c2, extra_condition)
|
| 455 |
+
break
|
| 456 |
+
|
| 457 |
+
alpha2 = 2 * alpha1 # increase by factor of two on each iteration
|
| 458 |
+
if amax is not None:
|
| 459 |
+
alpha2 = min(alpha2, amax)
|
| 460 |
+
alpha0 = alpha1
|
| 461 |
+
alpha1 = alpha2
|
| 462 |
+
phi_a0 = phi_a1
|
| 463 |
+
phi_a1 = phi(alpha1)
|
| 464 |
+
derphi_a0 = derphi_a1
|
| 465 |
+
|
| 466 |
+
else:
|
| 467 |
+
# stopping test maxiter reached
|
| 468 |
+
alpha_star = alpha1
|
| 469 |
+
phi_star = phi_a1
|
| 470 |
+
derphi_star = None
|
| 471 |
+
warn('The line search algorithm did not converge',
|
| 472 |
+
LineSearchWarning, stacklevel=2)
|
| 473 |
+
|
| 474 |
+
return alpha_star, phi_star, phi0, derphi_star
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def _cubicmin(a, fa, fpa, b, fb, c, fc):
|
| 478 |
+
"""
|
| 479 |
+
Finds the minimizer for a cubic polynomial that goes through the
|
| 480 |
+
points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
|
| 481 |
+
|
| 482 |
+
If no minimizer can be found, return None.
|
| 483 |
+
|
| 484 |
+
"""
|
| 485 |
+
# f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
|
| 486 |
+
|
| 487 |
+
with np.errstate(divide='raise', over='raise', invalid='raise'):
|
| 488 |
+
try:
|
| 489 |
+
C = fpa
|
| 490 |
+
db = b - a
|
| 491 |
+
dc = c - a
|
| 492 |
+
denom = (db * dc) ** 2 * (db - dc)
|
| 493 |
+
d1 = np.empty((2, 2))
|
| 494 |
+
d1[0, 0] = dc ** 2
|
| 495 |
+
d1[0, 1] = -db ** 2
|
| 496 |
+
d1[1, 0] = -dc ** 3
|
| 497 |
+
d1[1, 1] = db ** 3
|
| 498 |
+
[A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
|
| 499 |
+
fc - fa - C * dc]).flatten())
|
| 500 |
+
A /= denom
|
| 501 |
+
B /= denom
|
| 502 |
+
radical = B * B - 3 * A * C
|
| 503 |
+
xmin = a + (-B + np.sqrt(radical)) / (3 * A)
|
| 504 |
+
except ArithmeticError:
|
| 505 |
+
return None
|
| 506 |
+
if not np.isfinite(xmin):
|
| 507 |
+
return None
|
| 508 |
+
return xmin
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
def _quadmin(a, fa, fpa, b, fb):
|
| 512 |
+
"""
|
| 513 |
+
Finds the minimizer for a quadratic polynomial that goes through
|
| 514 |
+
the points (a,fa), (b,fb) with derivative at a of fpa.
|
| 515 |
+
|
| 516 |
+
"""
|
| 517 |
+
# f(x) = B*(x-a)^2 + C*(x-a) + D
|
| 518 |
+
with np.errstate(divide='raise', over='raise', invalid='raise'):
|
| 519 |
+
try:
|
| 520 |
+
D = fa
|
| 521 |
+
C = fpa
|
| 522 |
+
db = b - a * 1.0
|
| 523 |
+
B = (fb - D - C * db) / (db * db)
|
| 524 |
+
xmin = a - C / (2.0 * B)
|
| 525 |
+
except ArithmeticError:
|
| 526 |
+
return None
|
| 527 |
+
if not np.isfinite(xmin):
|
| 528 |
+
return None
|
| 529 |
+
return xmin
|
| 530 |
+
|
| 531 |
+
|
| 532 |
+
def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
|
| 533 |
+
phi, derphi, phi0, derphi0, c1, c2, extra_condition):
|
| 534 |
+
"""Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
|
| 535 |
+
|
| 536 |
+
Part of the optimization algorithm in `scalar_search_wolfe2`.
|
| 537 |
+
|
| 538 |
+
Notes
|
| 539 |
+
-----
|
| 540 |
+
Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
|
| 541 |
+
'Numerical Optimization', 1999, pp. 61.
|
| 542 |
+
|
| 543 |
+
"""
|
| 544 |
+
|
| 545 |
+
maxiter = 10
|
| 546 |
+
i = 0
|
| 547 |
+
delta1 = 0.2 # cubic interpolant check
|
| 548 |
+
delta2 = 0.1 # quadratic interpolant check
|
| 549 |
+
phi_rec = phi0
|
| 550 |
+
a_rec = 0
|
| 551 |
+
while True:
|
| 552 |
+
# interpolate to find a trial step length between a_lo and
|
| 553 |
+
# a_hi Need to choose interpolation here. Use cubic
|
| 554 |
+
# interpolation and then if the result is within delta *
|
| 555 |
+
# dalpha or outside of the interval bounded by a_lo or a_hi
|
| 556 |
+
# then use quadratic interpolation, if the result is still too
|
| 557 |
+
# close, then use bisection
|
| 558 |
+
|
| 559 |
+
dalpha = a_hi - a_lo
|
| 560 |
+
if dalpha < 0:
|
| 561 |
+
a, b = a_hi, a_lo
|
| 562 |
+
else:
|
| 563 |
+
a, b = a_lo, a_hi
|
| 564 |
+
|
| 565 |
+
# minimizer of cubic interpolant
|
| 566 |
+
# (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
|
| 567 |
+
#
|
| 568 |
+
# if the result is too close to the end points (or out of the
|
| 569 |
+
# interval), then use quadratic interpolation with phi_lo,
|
| 570 |
+
# derphi_lo and phi_hi if the result is still too close to the
|
| 571 |
+
# end points (or out of the interval) then use bisection
|
| 572 |
+
|
| 573 |
+
if (i > 0):
|
| 574 |
+
cchk = delta1 * dalpha
|
| 575 |
+
a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
|
| 576 |
+
a_rec, phi_rec)
|
| 577 |
+
if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
|
| 578 |
+
qchk = delta2 * dalpha
|
| 579 |
+
a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
|
| 580 |
+
if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
|
| 581 |
+
a_j = a_lo + 0.5*dalpha
|
| 582 |
+
|
| 583 |
+
# Check new value of a_j
|
| 584 |
+
|
| 585 |
+
phi_aj = phi(a_j)
|
| 586 |
+
if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
|
| 587 |
+
phi_rec = phi_hi
|
| 588 |
+
a_rec = a_hi
|
| 589 |
+
a_hi = a_j
|
| 590 |
+
phi_hi = phi_aj
|
| 591 |
+
else:
|
| 592 |
+
derphi_aj = derphi(a_j)
|
| 593 |
+
if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
|
| 594 |
+
a_star = a_j
|
| 595 |
+
val_star = phi_aj
|
| 596 |
+
valprime_star = derphi_aj
|
| 597 |
+
break
|
| 598 |
+
if derphi_aj*(a_hi - a_lo) >= 0:
|
| 599 |
+
phi_rec = phi_hi
|
| 600 |
+
a_rec = a_hi
|
| 601 |
+
a_hi = a_lo
|
| 602 |
+
phi_hi = phi_lo
|
| 603 |
+
else:
|
| 604 |
+
phi_rec = phi_lo
|
| 605 |
+
a_rec = a_lo
|
| 606 |
+
a_lo = a_j
|
| 607 |
+
phi_lo = phi_aj
|
| 608 |
+
derphi_lo = derphi_aj
|
| 609 |
+
i += 1
|
| 610 |
+
if (i > maxiter):
|
| 611 |
+
# Failed to find a conforming step size
|
| 612 |
+
a_star = None
|
| 613 |
+
val_star = None
|
| 614 |
+
valprime_star = None
|
| 615 |
+
break
|
| 616 |
+
return a_star, val_star, valprime_star
|
| 617 |
+
|
| 618 |
+
|
| 619 |
+
#------------------------------------------------------------------------------
|
| 620 |
+
# Armijo line and scalar searches
|
| 621 |
+
#------------------------------------------------------------------------------
|
| 622 |
+
|
| 623 |
+
def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
|
| 624 |
+
"""Minimize over alpha, the function ``f(xk+alpha pk)``.
|
| 625 |
+
|
| 626 |
+
Parameters
|
| 627 |
+
----------
|
| 628 |
+
f : callable
|
| 629 |
+
Function to be minimized.
|
| 630 |
+
xk : array_like
|
| 631 |
+
Current point.
|
| 632 |
+
pk : array_like
|
| 633 |
+
Search direction.
|
| 634 |
+
gfk : array_like
|
| 635 |
+
Gradient of `f` at point `xk`.
|
| 636 |
+
old_fval : float
|
| 637 |
+
Value of `f` at point `xk`.
|
| 638 |
+
args : tuple, optional
|
| 639 |
+
Optional arguments.
|
| 640 |
+
c1 : float, optional
|
| 641 |
+
Value to control stopping criterion.
|
| 642 |
+
alpha0 : scalar, optional
|
| 643 |
+
Value of `alpha` at start of the optimization.
|
| 644 |
+
|
| 645 |
+
Returns
|
| 646 |
+
-------
|
| 647 |
+
alpha
|
| 648 |
+
f_count
|
| 649 |
+
f_val_at_alpha
|
| 650 |
+
|
| 651 |
+
Notes
|
| 652 |
+
-----
|
| 653 |
+
Uses the interpolation algorithm (Armijo backtracking) as suggested by
|
| 654 |
+
Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
|
| 655 |
+
|
| 656 |
+
"""
|
| 657 |
+
xk = np.atleast_1d(xk)
|
| 658 |
+
fc = [0]
|
| 659 |
+
|
| 660 |
+
def phi(alpha1):
|
| 661 |
+
fc[0] += 1
|
| 662 |
+
return f(xk + alpha1*pk, *args)
|
| 663 |
+
|
| 664 |
+
if old_fval is None:
|
| 665 |
+
phi0 = phi(0.)
|
| 666 |
+
else:
|
| 667 |
+
phi0 = old_fval # compute f(xk) -- done in past loop
|
| 668 |
+
|
| 669 |
+
derphi0 = np.dot(gfk, pk)
|
| 670 |
+
alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
|
| 671 |
+
alpha0=alpha0)
|
| 672 |
+
return alpha, fc[0], phi1
|
| 673 |
+
|
| 674 |
+
|
| 675 |
+
def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
|
| 676 |
+
"""
|
| 677 |
+
Compatibility wrapper for `line_search_armijo`
|
| 678 |
+
"""
|
| 679 |
+
r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
|
| 680 |
+
alpha0=alpha0)
|
| 681 |
+
return r[0], r[1], 0, r[2]
|
| 682 |
+
|
| 683 |
+
|
| 684 |
+
def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
|
| 685 |
+
"""Minimize over alpha, the function ``phi(alpha)``.
|
| 686 |
+
|
| 687 |
+
Uses the interpolation algorithm (Armijo backtracking) as suggested by
|
| 688 |
+
Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
|
| 689 |
+
|
| 690 |
+
alpha > 0 is assumed to be a descent direction.
|
| 691 |
+
|
| 692 |
+
Returns
|
| 693 |
+
-------
|
| 694 |
+
alpha
|
| 695 |
+
phi1
|
| 696 |
+
|
| 697 |
+
"""
|
| 698 |
+
phi_a0 = phi(alpha0)
|
| 699 |
+
if phi_a0 <= phi0 + c1*alpha0*derphi0:
|
| 700 |
+
return alpha0, phi_a0
|
| 701 |
+
|
| 702 |
+
# Otherwise, compute the minimizer of a quadratic interpolant:
|
| 703 |
+
|
| 704 |
+
alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
|
| 705 |
+
phi_a1 = phi(alpha1)
|
| 706 |
+
|
| 707 |
+
if (phi_a1 <= phi0 + c1*alpha1*derphi0):
|
| 708 |
+
return alpha1, phi_a1
|
| 709 |
+
|
| 710 |
+
# Otherwise, loop with cubic interpolation until we find an alpha which
|
| 711 |
+
# satisfies the first Wolfe condition (since we are backtracking, we will
|
| 712 |
+
# assume that the value of alpha is not too small and satisfies the second
|
| 713 |
+
# condition.
|
| 714 |
+
|
| 715 |
+
while alpha1 > amin: # we are assuming alpha>0 is a descent direction
|
| 716 |
+
factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
|
| 717 |
+
a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
|
| 718 |
+
alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
|
| 719 |
+
a = a / factor
|
| 720 |
+
b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
|
| 721 |
+
alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
|
| 722 |
+
b = b / factor
|
| 723 |
+
|
| 724 |
+
alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
|
| 725 |
+
phi_a2 = phi(alpha2)
|
| 726 |
+
|
| 727 |
+
if (phi_a2 <= phi0 + c1*alpha2*derphi0):
|
| 728 |
+
return alpha2, phi_a2
|
| 729 |
+
|
| 730 |
+
if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
|
| 731 |
+
alpha2 = alpha1 / 2.0
|
| 732 |
+
|
| 733 |
+
alpha0 = alpha1
|
| 734 |
+
alpha1 = alpha2
|
| 735 |
+
phi_a0 = phi_a1
|
| 736 |
+
phi_a1 = phi_a2
|
| 737 |
+
|
| 738 |
+
# Failed to find a suitable step length
|
| 739 |
+
return None, phi_a1
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
#------------------------------------------------------------------------------
|
| 743 |
+
# Non-monotone line search for DF-SANE
|
| 744 |
+
#------------------------------------------------------------------------------
|
| 745 |
+
|
| 746 |
+
def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
|
| 747 |
+
gamma=1e-4, tau_min=0.1, tau_max=0.5):
|
| 748 |
+
"""
|
| 749 |
+
Nonmonotone backtracking line search as described in [1]_
|
| 750 |
+
|
| 751 |
+
Parameters
|
| 752 |
+
----------
|
| 753 |
+
f : callable
|
| 754 |
+
Function returning a tuple ``(f, F)`` where ``f`` is the value
|
| 755 |
+
of a merit function and ``F`` the residual.
|
| 756 |
+
x_k : ndarray
|
| 757 |
+
Initial position.
|
| 758 |
+
d : ndarray
|
| 759 |
+
Search direction.
|
| 760 |
+
prev_fs : float
|
| 761 |
+
List of previous merit function values. Should have ``len(prev_fs) <= M``
|
| 762 |
+
where ``M`` is the nonmonotonicity window parameter.
|
| 763 |
+
eta : float
|
| 764 |
+
Allowed merit function increase, see [1]_
|
| 765 |
+
gamma, tau_min, tau_max : float, optional
|
| 766 |
+
Search parameters, see [1]_
|
| 767 |
+
|
| 768 |
+
Returns
|
| 769 |
+
-------
|
| 770 |
+
alpha : float
|
| 771 |
+
Step length
|
| 772 |
+
xp : ndarray
|
| 773 |
+
Next position
|
| 774 |
+
fp : float
|
| 775 |
+
Merit function value at next position
|
| 776 |
+
Fp : ndarray
|
| 777 |
+
Residual at next position
|
| 778 |
+
|
| 779 |
+
References
|
| 780 |
+
----------
|
| 781 |
+
[1] "Spectral residual method without gradient information for solving
|
| 782 |
+
large-scale nonlinear systems of equations." W. La Cruz,
|
| 783 |
+
J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
|
| 784 |
+
|
| 785 |
+
"""
|
| 786 |
+
f_k = prev_fs[-1]
|
| 787 |
+
f_bar = max(prev_fs)
|
| 788 |
+
|
| 789 |
+
alpha_p = 1
|
| 790 |
+
alpha_m = 1
|
| 791 |
+
alpha = 1
|
| 792 |
+
|
| 793 |
+
while True:
|
| 794 |
+
xp = x_k + alpha_p * d
|
| 795 |
+
fp, Fp = f(xp)
|
| 796 |
+
|
| 797 |
+
if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
|
| 798 |
+
alpha = alpha_p
|
| 799 |
+
break
|
| 800 |
+
|
| 801 |
+
alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
|
| 802 |
+
|
| 803 |
+
xp = x_k - alpha_m * d
|
| 804 |
+
fp, Fp = f(xp)
|
| 805 |
+
|
| 806 |
+
if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
|
| 807 |
+
alpha = -alpha_m
|
| 808 |
+
break
|
| 809 |
+
|
| 810 |
+
alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
|
| 811 |
+
|
| 812 |
+
alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
|
| 813 |
+
alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
|
| 814 |
+
|
| 815 |
+
return alpha, xp, fp, Fp
|
| 816 |
+
|
| 817 |
+
|
| 818 |
+
def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
|
| 819 |
+
gamma=1e-4, tau_min=0.1, tau_max=0.5,
|
| 820 |
+
nu=0.85):
|
| 821 |
+
"""
|
| 822 |
+
Nonmonotone line search from [1]
|
| 823 |
+
|
| 824 |
+
Parameters
|
| 825 |
+
----------
|
| 826 |
+
f : callable
|
| 827 |
+
Function returning a tuple ``(f, F)`` where ``f`` is the value
|
| 828 |
+
of a merit function and ``F`` the residual.
|
| 829 |
+
x_k : ndarray
|
| 830 |
+
Initial position.
|
| 831 |
+
d : ndarray
|
| 832 |
+
Search direction.
|
| 833 |
+
f_k : float
|
| 834 |
+
Initial merit function value.
|
| 835 |
+
C, Q : float
|
| 836 |
+
Control parameters. On the first iteration, give values
|
| 837 |
+
Q=1.0, C=f_k
|
| 838 |
+
eta : float
|
| 839 |
+
Allowed merit function increase, see [1]_
|
| 840 |
+
nu, gamma, tau_min, tau_max : float, optional
|
| 841 |
+
Search parameters, see [1]_
|
| 842 |
+
|
| 843 |
+
Returns
|
| 844 |
+
-------
|
| 845 |
+
alpha : float
|
| 846 |
+
Step length
|
| 847 |
+
xp : ndarray
|
| 848 |
+
Next position
|
| 849 |
+
fp : float
|
| 850 |
+
Merit function value at next position
|
| 851 |
+
Fp : ndarray
|
| 852 |
+
Residual at next position
|
| 853 |
+
C : float
|
| 854 |
+
New value for the control parameter C
|
| 855 |
+
Q : float
|
| 856 |
+
New value for the control parameter Q
|
| 857 |
+
|
| 858 |
+
References
|
| 859 |
+
----------
|
| 860 |
+
.. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
|
| 861 |
+
search and its application to the spectral residual
|
| 862 |
+
method'', IMA J. Numer. Anal. 29, 814 (2009).
|
| 863 |
+
|
| 864 |
+
"""
|
| 865 |
+
alpha_p = 1
|
| 866 |
+
alpha_m = 1
|
| 867 |
+
alpha = 1
|
| 868 |
+
|
| 869 |
+
while True:
|
| 870 |
+
xp = x_k + alpha_p * d
|
| 871 |
+
fp, Fp = f(xp)
|
| 872 |
+
|
| 873 |
+
if fp <= C + eta - gamma * alpha_p**2 * f_k:
|
| 874 |
+
alpha = alpha_p
|
| 875 |
+
break
|
| 876 |
+
|
| 877 |
+
alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
|
| 878 |
+
|
| 879 |
+
xp = x_k - alpha_m * d
|
| 880 |
+
fp, Fp = f(xp)
|
| 881 |
+
|
| 882 |
+
if fp <= C + eta - gamma * alpha_m**2 * f_k:
|
| 883 |
+
alpha = -alpha_m
|
| 884 |
+
break
|
| 885 |
+
|
| 886 |
+
alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
|
| 887 |
+
|
| 888 |
+
alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
|
| 889 |
+
alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
|
| 890 |
+
|
| 891 |
+
# Update C and Q
|
| 892 |
+
Q_next = nu * Q + 1
|
| 893 |
+
C = (nu * Q * (C + eta) + fp) / Q_next
|
| 894 |
+
Q = Q_next
|
| 895 |
+
|
| 896 |
+
return alpha, xp, fp, Fp, C, Q
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog.py
ADDED
|
@@ -0,0 +1,716 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
A top-level linear programming interface.
|
| 3 |
+
|
| 4 |
+
.. versionadded:: 0.15.0
|
| 5 |
+
|
| 6 |
+
Functions
|
| 7 |
+
---------
|
| 8 |
+
.. autosummary::
|
| 9 |
+
:toctree: generated/
|
| 10 |
+
|
| 11 |
+
linprog
|
| 12 |
+
linprog_verbose_callback
|
| 13 |
+
linprog_terse_callback
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
|
| 19 |
+
from ._optimize import OptimizeResult, OptimizeWarning
|
| 20 |
+
from warnings import warn
|
| 21 |
+
from ._linprog_highs import _linprog_highs
|
| 22 |
+
from ._linprog_ip import _linprog_ip
|
| 23 |
+
from ._linprog_simplex import _linprog_simplex
|
| 24 |
+
from ._linprog_rs import _linprog_rs
|
| 25 |
+
from ._linprog_doc import (_linprog_highs_doc, _linprog_ip_doc, # noqa: F401
|
| 26 |
+
_linprog_rs_doc, _linprog_simplex_doc,
|
| 27 |
+
_linprog_highs_ipm_doc, _linprog_highs_ds_doc)
|
| 28 |
+
from ._linprog_util import (
|
| 29 |
+
_parse_linprog, _presolve, _get_Abc, _LPProblem, _autoscale,
|
| 30 |
+
_postsolve, _check_result, _display_summary)
|
| 31 |
+
from copy import deepcopy
|
| 32 |
+
|
| 33 |
+
__all__ = ['linprog', 'linprog_verbose_callback', 'linprog_terse_callback']
|
| 34 |
+
|
| 35 |
+
__docformat__ = "restructuredtext en"
|
| 36 |
+
|
| 37 |
+
LINPROG_METHODS = [
|
| 38 |
+
'simplex', 'revised simplex', 'interior-point', 'highs', 'highs-ds', 'highs-ipm'
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def linprog_verbose_callback(res):
|
| 43 |
+
"""
|
| 44 |
+
A sample callback function demonstrating the linprog callback interface.
|
| 45 |
+
This callback produces detailed output to sys.stdout before each iteration
|
| 46 |
+
and after the final iteration of the simplex algorithm.
|
| 47 |
+
|
| 48 |
+
Parameters
|
| 49 |
+
----------
|
| 50 |
+
res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
|
| 51 |
+
|
| 52 |
+
x : 1-D array
|
| 53 |
+
The independent variable vector which optimizes the linear
|
| 54 |
+
programming problem.
|
| 55 |
+
fun : float
|
| 56 |
+
Value of the objective function.
|
| 57 |
+
success : bool
|
| 58 |
+
True if the algorithm succeeded in finding an optimal solution.
|
| 59 |
+
slack : 1-D array
|
| 60 |
+
The values of the slack variables. Each slack variable corresponds
|
| 61 |
+
to an inequality constraint. If the slack is zero, then the
|
| 62 |
+
corresponding constraint is active.
|
| 63 |
+
con : 1-D array
|
| 64 |
+
The (nominally zero) residuals of the equality constraints, that is,
|
| 65 |
+
``b - A_eq @ x``
|
| 66 |
+
phase : int
|
| 67 |
+
The phase of the optimization being executed. In phase 1 a basic
|
| 68 |
+
feasible solution is sought and the T has an additional row
|
| 69 |
+
representing an alternate objective function.
|
| 70 |
+
status : int
|
| 71 |
+
An integer representing the exit status of the optimization::
|
| 72 |
+
|
| 73 |
+
0 : Optimization terminated successfully
|
| 74 |
+
1 : Iteration limit reached
|
| 75 |
+
2 : Problem appears to be infeasible
|
| 76 |
+
3 : Problem appears to be unbounded
|
| 77 |
+
4 : Serious numerical difficulties encountered
|
| 78 |
+
|
| 79 |
+
nit : int
|
| 80 |
+
The number of iterations performed.
|
| 81 |
+
message : str
|
| 82 |
+
A string descriptor of the exit status of the optimization.
|
| 83 |
+
"""
|
| 84 |
+
x = res['x']
|
| 85 |
+
fun = res['fun']
|
| 86 |
+
phase = res['phase']
|
| 87 |
+
status = res['status']
|
| 88 |
+
nit = res['nit']
|
| 89 |
+
message = res['message']
|
| 90 |
+
complete = res['complete']
|
| 91 |
+
|
| 92 |
+
saved_printoptions = np.get_printoptions()
|
| 93 |
+
np.set_printoptions(linewidth=500,
|
| 94 |
+
formatter={'float': lambda x: f"{x: 12.4f}"})
|
| 95 |
+
if status:
|
| 96 |
+
print('--------- Simplex Early Exit -------\n')
|
| 97 |
+
print(f'The simplex method exited early with status {status:d}')
|
| 98 |
+
print(message)
|
| 99 |
+
elif complete:
|
| 100 |
+
print('--------- Simplex Complete --------\n')
|
| 101 |
+
print(f'Iterations required: {nit}')
|
| 102 |
+
else:
|
| 103 |
+
print(f'--------- Iteration {nit:d} ---------\n')
|
| 104 |
+
|
| 105 |
+
if nit > 0:
|
| 106 |
+
if phase == 1:
|
| 107 |
+
print('Current Pseudo-Objective Value:')
|
| 108 |
+
else:
|
| 109 |
+
print('Current Objective Value:')
|
| 110 |
+
print('f = ', fun)
|
| 111 |
+
print()
|
| 112 |
+
print('Current Solution Vector:')
|
| 113 |
+
print('x = ', x)
|
| 114 |
+
print()
|
| 115 |
+
|
| 116 |
+
np.set_printoptions(**saved_printoptions)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def linprog_terse_callback(res):
|
| 120 |
+
"""
|
| 121 |
+
A sample callback function demonstrating the linprog callback interface.
|
| 122 |
+
This callback produces brief output to sys.stdout before each iteration
|
| 123 |
+
and after the final iteration of the simplex algorithm.
|
| 124 |
+
|
| 125 |
+
Parameters
|
| 126 |
+
----------
|
| 127 |
+
res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
|
| 128 |
+
|
| 129 |
+
x : 1-D array
|
| 130 |
+
The independent variable vector which optimizes the linear
|
| 131 |
+
programming problem.
|
| 132 |
+
fun : float
|
| 133 |
+
Value of the objective function.
|
| 134 |
+
success : bool
|
| 135 |
+
True if the algorithm succeeded in finding an optimal solution.
|
| 136 |
+
slack : 1-D array
|
| 137 |
+
The values of the slack variables. Each slack variable corresponds
|
| 138 |
+
to an inequality constraint. If the slack is zero, then the
|
| 139 |
+
corresponding constraint is active.
|
| 140 |
+
con : 1-D array
|
| 141 |
+
The (nominally zero) residuals of the equality constraints, that is,
|
| 142 |
+
``b - A_eq @ x``.
|
| 143 |
+
phase : int
|
| 144 |
+
The phase of the optimization being executed. In phase 1 a basic
|
| 145 |
+
feasible solution is sought and the T has an additional row
|
| 146 |
+
representing an alternate objective function.
|
| 147 |
+
status : int
|
| 148 |
+
An integer representing the exit status of the optimization::
|
| 149 |
+
|
| 150 |
+
0 : Optimization terminated successfully
|
| 151 |
+
1 : Iteration limit reached
|
| 152 |
+
2 : Problem appears to be infeasible
|
| 153 |
+
3 : Problem appears to be unbounded
|
| 154 |
+
4 : Serious numerical difficulties encountered
|
| 155 |
+
|
| 156 |
+
nit : int
|
| 157 |
+
The number of iterations performed.
|
| 158 |
+
message : str
|
| 159 |
+
A string descriptor of the exit status of the optimization.
|
| 160 |
+
"""
|
| 161 |
+
nit = res['nit']
|
| 162 |
+
x = res['x']
|
| 163 |
+
|
| 164 |
+
if nit == 0:
|
| 165 |
+
print("Iter: X:")
|
| 166 |
+
print(f"{nit: <5d} ", end="")
|
| 167 |
+
print(x)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def linprog(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
| 171 |
+
bounds=(0, None), method='highs', callback=None,
|
| 172 |
+
options=None, x0=None, integrality=None):
|
| 173 |
+
r"""
|
| 174 |
+
Linear programming: minimize a linear objective function subject to linear
|
| 175 |
+
equality and inequality constraints.
|
| 176 |
+
|
| 177 |
+
Linear programming solves problems of the following form:
|
| 178 |
+
|
| 179 |
+
.. math::
|
| 180 |
+
|
| 181 |
+
\min_x \ & c^T x \\
|
| 182 |
+
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
| 183 |
+
& A_{eq} x = b_{eq},\\
|
| 184 |
+
& l \leq x \leq u ,
|
| 185 |
+
|
| 186 |
+
where :math:`x` is a vector of decision variables; :math:`c`,
|
| 187 |
+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
| 188 |
+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
| 189 |
+
|
| 190 |
+
Alternatively, that's:
|
| 191 |
+
|
| 192 |
+
- minimize ::
|
| 193 |
+
|
| 194 |
+
c @ x
|
| 195 |
+
|
| 196 |
+
- such that ::
|
| 197 |
+
|
| 198 |
+
A_ub @ x <= b_ub
|
| 199 |
+
A_eq @ x == b_eq
|
| 200 |
+
lb <= x <= ub
|
| 201 |
+
|
| 202 |
+
Note that by default ``lb = 0`` and ``ub = None``. Other bounds can be
|
| 203 |
+
specified with ``bounds``.
|
| 204 |
+
|
| 205 |
+
Parameters
|
| 206 |
+
----------
|
| 207 |
+
c : 1-D array
|
| 208 |
+
The coefficients of the linear objective function to be minimized.
|
| 209 |
+
A_ub : 2-D array, optional
|
| 210 |
+
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
| 211 |
+
coefficients of a linear inequality constraint on ``x``.
|
| 212 |
+
b_ub : 1-D array, optional
|
| 213 |
+
The inequality constraint vector. Each element represents an
|
| 214 |
+
upper bound on the corresponding value of ``A_ub @ x``.
|
| 215 |
+
A_eq : 2-D array, optional
|
| 216 |
+
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
| 217 |
+
coefficients of a linear equality constraint on ``x``.
|
| 218 |
+
b_eq : 1-D array, optional
|
| 219 |
+
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
| 220 |
+
the corresponding element of ``b_eq``.
|
| 221 |
+
bounds : sequence, optional
|
| 222 |
+
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
| 223 |
+
the minimum and maximum values of that decision variable.
|
| 224 |
+
If a single tuple ``(min, max)`` is provided, then ``min`` and ``max``
|
| 225 |
+
will serve as bounds for all decision variables.
|
| 226 |
+
Use ``None`` to indicate that there is no bound. For instance, the
|
| 227 |
+
default bound ``(0, None)`` means that all decision variables are
|
| 228 |
+
non-negative, and the pair ``(None, None)`` means no bounds at all,
|
| 229 |
+
i.e. all variables are allowed to be any real.
|
| 230 |
+
method : str, optional
|
| 231 |
+
The algorithm used to solve the standard form problem.
|
| 232 |
+
:ref:`'highs' <optimize.linprog-highs>` (default),
|
| 233 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`,
|
| 234 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
|
| 235 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (legacy),
|
| 236 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>` (legacy),
|
| 237 |
+
and
|
| 238 |
+
:ref:`'simplex' <optimize.linprog-simplex>` (legacy) are supported.
|
| 239 |
+
The legacy methods are deprecated and will be removed in SciPy 1.11.0.
|
| 240 |
+
callback : callable, optional
|
| 241 |
+
If a callback function is provided, it will be called at least once per
|
| 242 |
+
iteration of the algorithm. The callback function must accept a single
|
| 243 |
+
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
| 244 |
+
|
| 245 |
+
x : 1-D array
|
| 246 |
+
The current solution vector.
|
| 247 |
+
fun : float
|
| 248 |
+
The current value of the objective function ``c @ x``.
|
| 249 |
+
success : bool
|
| 250 |
+
``True`` when the algorithm has completed successfully.
|
| 251 |
+
slack : 1-D array
|
| 252 |
+
The (nominally positive) values of the slack,
|
| 253 |
+
``b_ub - A_ub @ x``.
|
| 254 |
+
con : 1-D array
|
| 255 |
+
The (nominally zero) residuals of the equality constraints,
|
| 256 |
+
``b_eq - A_eq @ x``.
|
| 257 |
+
phase : int
|
| 258 |
+
The phase of the algorithm being executed.
|
| 259 |
+
status : int
|
| 260 |
+
An integer representing the status of the algorithm.
|
| 261 |
+
|
| 262 |
+
``0`` : Optimization proceeding nominally.
|
| 263 |
+
|
| 264 |
+
``1`` : Iteration limit reached.
|
| 265 |
+
|
| 266 |
+
``2`` : Problem appears to be infeasible.
|
| 267 |
+
|
| 268 |
+
``3`` : Problem appears to be unbounded.
|
| 269 |
+
|
| 270 |
+
``4`` : Numerical difficulties encountered.
|
| 271 |
+
|
| 272 |
+
nit : int
|
| 273 |
+
The current iteration number.
|
| 274 |
+
message : str
|
| 275 |
+
A string descriptor of the algorithm status.
|
| 276 |
+
|
| 277 |
+
Callback functions are not currently supported by the HiGHS methods.
|
| 278 |
+
|
| 279 |
+
options : dict, optional
|
| 280 |
+
A dictionary of solver options. All methods accept the following
|
| 281 |
+
options:
|
| 282 |
+
|
| 283 |
+
maxiter : int
|
| 284 |
+
Maximum number of iterations to perform.
|
| 285 |
+
Default: see method-specific documentation.
|
| 286 |
+
disp : bool
|
| 287 |
+
Set to ``True`` to print convergence messages.
|
| 288 |
+
Default: ``False``.
|
| 289 |
+
presolve : bool
|
| 290 |
+
Set to ``False`` to disable automatic presolve.
|
| 291 |
+
Default: ``True``.
|
| 292 |
+
|
| 293 |
+
All methods except the HiGHS solvers also accept:
|
| 294 |
+
|
| 295 |
+
tol : float
|
| 296 |
+
A tolerance which determines when a residual is "close enough" to
|
| 297 |
+
zero to be considered exactly zero.
|
| 298 |
+
autoscale : bool
|
| 299 |
+
Set to ``True`` to automatically perform equilibration.
|
| 300 |
+
Consider using this option if the numerical values in the
|
| 301 |
+
constraints are separated by several orders of magnitude.
|
| 302 |
+
Default: ``False``.
|
| 303 |
+
rr : bool
|
| 304 |
+
Set to ``False`` to disable automatic redundancy removal.
|
| 305 |
+
Default: ``True``.
|
| 306 |
+
rr_method : string
|
| 307 |
+
Method used to identify and remove redundant rows from the
|
| 308 |
+
equality constraint matrix after presolve. For problems with
|
| 309 |
+
dense input, the available methods for redundancy removal are:
|
| 310 |
+
|
| 311 |
+
"SVD":
|
| 312 |
+
Repeatedly performs singular value decomposition on
|
| 313 |
+
the matrix, detecting redundant rows based on nonzeros
|
| 314 |
+
in the left singular vectors that correspond with
|
| 315 |
+
zero singular values. May be fast when the matrix is
|
| 316 |
+
nearly full rank.
|
| 317 |
+
"pivot":
|
| 318 |
+
Uses the algorithm presented in [5]_ to identify
|
| 319 |
+
redundant rows.
|
| 320 |
+
"ID":
|
| 321 |
+
Uses a randomized interpolative decomposition.
|
| 322 |
+
Identifies columns of the matrix transpose not used in
|
| 323 |
+
a full-rank interpolative decomposition of the matrix.
|
| 324 |
+
None:
|
| 325 |
+
Uses "svd" if the matrix is nearly full rank, that is,
|
| 326 |
+
the difference between the matrix rank and the number
|
| 327 |
+
of rows is less than five. If not, uses "pivot". The
|
| 328 |
+
behavior of this default is subject to change without
|
| 329 |
+
prior notice.
|
| 330 |
+
|
| 331 |
+
Default: None.
|
| 332 |
+
For problems with sparse input, this option is ignored, and the
|
| 333 |
+
pivot-based algorithm presented in [5]_ is used.
|
| 334 |
+
|
| 335 |
+
For method-specific options, see
|
| 336 |
+
:func:`show_options('linprog') <show_options>`.
|
| 337 |
+
|
| 338 |
+
x0 : 1-D array, optional
|
| 339 |
+
Guess values of the decision variables, which will be refined by
|
| 340 |
+
the optimization algorithm. This argument is currently used only by the
|
| 341 |
+
'revised simplex' method, and can only be used if `x0` represents a
|
| 342 |
+
basic feasible solution.
|
| 343 |
+
|
| 344 |
+
integrality : 1-D array or int, optional
|
| 345 |
+
Indicates the type of integrality constraint on each decision variable.
|
| 346 |
+
|
| 347 |
+
``0`` : Continuous variable; no integrality constraint.
|
| 348 |
+
|
| 349 |
+
``1`` : Integer variable; decision variable must be an integer
|
| 350 |
+
within `bounds`.
|
| 351 |
+
|
| 352 |
+
``2`` : Semi-continuous variable; decision variable must be within
|
| 353 |
+
`bounds` or take value ``0``.
|
| 354 |
+
|
| 355 |
+
``3`` : Semi-integer variable; decision variable must be an integer
|
| 356 |
+
within `bounds` or take value ``0``.
|
| 357 |
+
|
| 358 |
+
By default, all variables are continuous.
|
| 359 |
+
|
| 360 |
+
For mixed integrality constraints, supply an array of shape `c.shape`.
|
| 361 |
+
To infer a constraint on each decision variable from shorter inputs,
|
| 362 |
+
the argument will be broadcasted to `c.shape` using `np.broadcast_to`.
|
| 363 |
+
|
| 364 |
+
This argument is currently used only by the ``'highs'`` method and
|
| 365 |
+
ignored otherwise.
|
| 366 |
+
|
| 367 |
+
Returns
|
| 368 |
+
-------
|
| 369 |
+
res : OptimizeResult
|
| 370 |
+
A :class:`scipy.optimize.OptimizeResult` consisting of the fields
|
| 371 |
+
below. Note that the return types of the fields may depend on whether
|
| 372 |
+
the optimization was successful, therefore it is recommended to check
|
| 373 |
+
`OptimizeResult.status` before relying on the other fields:
|
| 374 |
+
|
| 375 |
+
x : 1-D array
|
| 376 |
+
The values of the decision variables that minimizes the
|
| 377 |
+
objective function while satisfying the constraints.
|
| 378 |
+
fun : float
|
| 379 |
+
The optimal value of the objective function ``c @ x``.
|
| 380 |
+
slack : 1-D array
|
| 381 |
+
The (nominally positive) values of the slack variables,
|
| 382 |
+
``b_ub - A_ub @ x``.
|
| 383 |
+
con : 1-D array
|
| 384 |
+
The (nominally zero) residuals of the equality constraints,
|
| 385 |
+
``b_eq - A_eq @ x``.
|
| 386 |
+
success : bool
|
| 387 |
+
``True`` when the algorithm succeeds in finding an optimal
|
| 388 |
+
solution.
|
| 389 |
+
status : int
|
| 390 |
+
An integer representing the exit status of the algorithm.
|
| 391 |
+
|
| 392 |
+
``0`` : Optimization terminated successfully.
|
| 393 |
+
|
| 394 |
+
``1`` : Iteration limit reached.
|
| 395 |
+
|
| 396 |
+
``2`` : Problem appears to be infeasible.
|
| 397 |
+
|
| 398 |
+
``3`` : Problem appears to be unbounded.
|
| 399 |
+
|
| 400 |
+
``4`` : Numerical difficulties encountered.
|
| 401 |
+
|
| 402 |
+
nit : int
|
| 403 |
+
The total number of iterations performed in all phases.
|
| 404 |
+
message : str
|
| 405 |
+
A string descriptor of the exit status of the algorithm.
|
| 406 |
+
|
| 407 |
+
See Also
|
| 408 |
+
--------
|
| 409 |
+
show_options : Additional options accepted by the solvers.
|
| 410 |
+
|
| 411 |
+
Notes
|
| 412 |
+
-----
|
| 413 |
+
This section describes the available solvers that can be selected by the
|
| 414 |
+
'method' parameter.
|
| 415 |
+
|
| 416 |
+
`'highs-ds'` and
|
| 417 |
+
`'highs-ipm'` are interfaces to the
|
| 418 |
+
HiGHS simplex and interior-point method solvers [13]_, respectively.
|
| 419 |
+
`'highs'` (default) chooses between
|
| 420 |
+
the two automatically. These are the fastest linear
|
| 421 |
+
programming solvers in SciPy, especially for large, sparse problems;
|
| 422 |
+
which of these two is faster is problem-dependent.
|
| 423 |
+
The other solvers (`'interior-point'`, `'revised simplex'`, and
|
| 424 |
+
`'simplex'`) are legacy methods and will be removed in SciPy 1.11.0.
|
| 425 |
+
|
| 426 |
+
Method *highs-ds* is a wrapper of the C++ high performance dual
|
| 427 |
+
revised simplex implementation (HSOL) [13]_, [14]_. Method *highs-ipm*
|
| 428 |
+
is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
|
| 429 |
+
**m**\ ethod [13]_; it features a crossover routine, so it is as accurate
|
| 430 |
+
as a simplex solver. Method *highs* chooses between the two automatically.
|
| 431 |
+
For new code involving `linprog`, we recommend explicitly choosing one of
|
| 432 |
+
these three method values.
|
| 433 |
+
|
| 434 |
+
.. versionadded:: 1.6.0
|
| 435 |
+
|
| 436 |
+
Method *interior-point* uses the primal-dual path following algorithm
|
| 437 |
+
as outlined in [4]_. This algorithm supports sparse constraint matrices and
|
| 438 |
+
is typically faster than the simplex methods, especially for large, sparse
|
| 439 |
+
problems. Note, however, that the solution returned may be slightly less
|
| 440 |
+
accurate than those of the simplex methods and will not, in general,
|
| 441 |
+
correspond with a vertex of the polytope defined by the constraints.
|
| 442 |
+
|
| 443 |
+
.. versionadded:: 1.0.0
|
| 444 |
+
|
| 445 |
+
Method *revised simplex* uses the revised simplex method as described in
|
| 446 |
+
[9]_, except that a factorization [11]_ of the basis matrix, rather than
|
| 447 |
+
its inverse, is efficiently maintained and used to solve the linear systems
|
| 448 |
+
at each iteration of the algorithm.
|
| 449 |
+
|
| 450 |
+
.. versionadded:: 1.3.0
|
| 451 |
+
|
| 452 |
+
Method *simplex* uses a traditional, full-tableau implementation of
|
| 453 |
+
Dantzig's simplex algorithm [1]_, [2]_ (*not* the
|
| 454 |
+
Nelder-Mead simplex). This algorithm is included for backwards
|
| 455 |
+
compatibility and educational purposes.
|
| 456 |
+
|
| 457 |
+
.. versionadded:: 0.15.0
|
| 458 |
+
|
| 459 |
+
Before applying *interior-point*, *revised simplex*, or *simplex*,
|
| 460 |
+
a presolve procedure based on [8]_ attempts
|
| 461 |
+
to identify trivial infeasibilities, trivial unboundedness, and potential
|
| 462 |
+
problem simplifications. Specifically, it checks for:
|
| 463 |
+
|
| 464 |
+
- rows of zeros in ``A_eq`` or ``A_ub``, representing trivial constraints;
|
| 465 |
+
- columns of zeros in ``A_eq`` `and` ``A_ub``, representing unconstrained
|
| 466 |
+
variables;
|
| 467 |
+
- column singletons in ``A_eq``, representing fixed variables; and
|
| 468 |
+
- column singletons in ``A_ub``, representing simple bounds.
|
| 469 |
+
|
| 470 |
+
If presolve reveals that the problem is unbounded (e.g. an unconstrained
|
| 471 |
+
and unbounded variable has negative cost) or infeasible (e.g., a row of
|
| 472 |
+
zeros in ``A_eq`` corresponds with a nonzero in ``b_eq``), the solver
|
| 473 |
+
terminates with the appropriate status code. Note that presolve terminates
|
| 474 |
+
as soon as any sign of unboundedness is detected; consequently, a problem
|
| 475 |
+
may be reported as unbounded when in reality the problem is infeasible
|
| 476 |
+
(but infeasibility has not been detected yet). Therefore, if it is
|
| 477 |
+
important to know whether the problem is actually infeasible, solve the
|
| 478 |
+
problem again with option ``presolve=False``.
|
| 479 |
+
|
| 480 |
+
If neither infeasibility nor unboundedness are detected in a single pass
|
| 481 |
+
of the presolve, bounds are tightened where possible and fixed
|
| 482 |
+
variables are removed from the problem. Then, linearly dependent rows
|
| 483 |
+
of the ``A_eq`` matrix are removed, (unless they represent an
|
| 484 |
+
infeasibility) to avoid numerical difficulties in the primary solve
|
| 485 |
+
routine. Note that rows that are nearly linearly dependent (within a
|
| 486 |
+
prescribed tolerance) may also be removed, which can change the optimal
|
| 487 |
+
solution in rare cases. If this is a concern, eliminate redundancy from
|
| 488 |
+
your problem formulation and run with option ``rr=False`` or
|
| 489 |
+
``presolve=False``.
|
| 490 |
+
|
| 491 |
+
Several potential improvements can be made here: additional presolve
|
| 492 |
+
checks outlined in [8]_ should be implemented, the presolve routine should
|
| 493 |
+
be run multiple times (until no further simplifications can be made), and
|
| 494 |
+
more of the efficiency improvements from [5]_ should be implemented in the
|
| 495 |
+
redundancy removal routines.
|
| 496 |
+
|
| 497 |
+
After presolve, the problem is transformed to standard form by converting
|
| 498 |
+
the (tightened) simple bounds to upper bound constraints, introducing
|
| 499 |
+
non-negative slack variables for inequality constraints, and expressing
|
| 500 |
+
unbounded variables as the difference between two non-negative variables.
|
| 501 |
+
Optionally, the problem is automatically scaled via equilibration [12]_.
|
| 502 |
+
The selected algorithm solves the standard form problem, and a
|
| 503 |
+
postprocessing routine converts the result to a solution to the original
|
| 504 |
+
problem.
|
| 505 |
+
|
| 506 |
+
References
|
| 507 |
+
----------
|
| 508 |
+
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
| 509 |
+
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
| 510 |
+
1963
|
| 511 |
+
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
| 512 |
+
Mathematical Programming", McGraw-Hill, Chapter 4.
|
| 513 |
+
.. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
|
| 514 |
+
Mathematics of Operations Research (2), 1977: pp. 103-107.
|
| 515 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 516 |
+
optimizer for linear programming: an implementation of the
|
| 517 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 518 |
+
2000. 197-232.
|
| 519 |
+
.. [5] Andersen, Erling D. "Finding all linearly dependent rows in
|
| 520 |
+
large-scale linear programming." Optimization Methods and Software
|
| 521 |
+
6.3 (1995): 219-227.
|
| 522 |
+
.. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
|
| 523 |
+
Programming based on Newton's Method." Unpublished Course Notes,
|
| 524 |
+
March 2004. Available 2/25/2017 at
|
| 525 |
+
https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
|
| 526 |
+
.. [7] Fourer, Robert. "Solving Linear Programs by Interior-Point Methods."
|
| 527 |
+
Unpublished Course Notes, August 26, 2005. Available 2/25/2017 at
|
| 528 |
+
http://www.4er.org/CourseNotes/Book%20B/B-III.pdf
|
| 529 |
+
.. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
|
| 530 |
+
programming." Mathematical Programming 71.2 (1995): 221-245.
|
| 531 |
+
.. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
| 532 |
+
programming." Athena Scientific 1 (1997): 997.
|
| 533 |
+
.. [10] Andersen, Erling D., et al. Implementation of interior point
|
| 534 |
+
methods for large scale linear programming. HEC/Universite de
|
| 535 |
+
Geneve, 1996.
|
| 536 |
+
.. [11] Bartels, Richard H. "A stabilization of the simplex method."
|
| 537 |
+
Journal in Numerische Mathematik 16.5 (1971): 414-434.
|
| 538 |
+
.. [12] Tomlin, J. A. "On scaling linear programming problems."
|
| 539 |
+
Mathematical Programming Study 4 (1975): 146-166.
|
| 540 |
+
.. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
|
| 541 |
+
"HiGHS - high performance software for linear optimization."
|
| 542 |
+
https://highs.dev/
|
| 543 |
+
.. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
|
| 544 |
+
simplex method." Mathematical Programming Computation, 10 (1),
|
| 545 |
+
119-142, 2018. DOI: 10.1007/s12532-017-0130-5
|
| 546 |
+
|
| 547 |
+
Examples
|
| 548 |
+
--------
|
| 549 |
+
Consider the following problem:
|
| 550 |
+
|
| 551 |
+
.. math::
|
| 552 |
+
|
| 553 |
+
\min_{x_0, x_1} \ -x_0 + 4x_1 & \\
|
| 554 |
+
\mbox{such that} \ -3x_0 + x_1 & \leq 6,\\
|
| 555 |
+
-x_0 - 2x_1 & \geq -4,\\
|
| 556 |
+
x_1 & \geq -3.
|
| 557 |
+
|
| 558 |
+
The problem is not presented in the form accepted by `linprog`. This is
|
| 559 |
+
easily remedied by converting the "greater than" inequality
|
| 560 |
+
constraint to a "less than" inequality constraint by
|
| 561 |
+
multiplying both sides by a factor of :math:`-1`. Note also that the last
|
| 562 |
+
constraint is really the simple bound :math:`-3 \leq x_1 \leq \infty`.
|
| 563 |
+
Finally, since there are no bounds on :math:`x_0`, we must explicitly
|
| 564 |
+
specify the bounds :math:`-\infty \leq x_0 \leq \infty`, as the
|
| 565 |
+
default is for variables to be non-negative. After collecting coeffecients
|
| 566 |
+
into arrays and tuples, the input for this problem is:
|
| 567 |
+
|
| 568 |
+
>>> from scipy.optimize import linprog
|
| 569 |
+
>>> c = [-1, 4]
|
| 570 |
+
>>> A = [[-3, 1], [1, 2]]
|
| 571 |
+
>>> b = [6, 4]
|
| 572 |
+
>>> x0_bounds = (None, None)
|
| 573 |
+
>>> x1_bounds = (-3, None)
|
| 574 |
+
>>> res = linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds])
|
| 575 |
+
>>> res.fun
|
| 576 |
+
-22.0
|
| 577 |
+
>>> res.x
|
| 578 |
+
array([10., -3.])
|
| 579 |
+
>>> res.message
|
| 580 |
+
'Optimization terminated successfully. (HiGHS Status 7: Optimal)'
|
| 581 |
+
|
| 582 |
+
The marginals (AKA dual values / shadow prices / Lagrange multipliers)
|
| 583 |
+
and residuals (slacks) are also available.
|
| 584 |
+
|
| 585 |
+
>>> res.ineqlin
|
| 586 |
+
residual: [ 3.900e+01 0.000e+00]
|
| 587 |
+
marginals: [-0.000e+00 -1.000e+00]
|
| 588 |
+
|
| 589 |
+
For example, because the marginal associated with the second inequality
|
| 590 |
+
constraint is -1, we expect the optimal value of the objective function
|
| 591 |
+
to decrease by ``eps`` if we add a small amount ``eps`` to the right hand
|
| 592 |
+
side of the second inequality constraint:
|
| 593 |
+
|
| 594 |
+
>>> eps = 0.05
|
| 595 |
+
>>> b[1] += eps
|
| 596 |
+
>>> linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds]).fun
|
| 597 |
+
-22.05
|
| 598 |
+
|
| 599 |
+
Also, because the residual on the first inequality constraint is 39, we
|
| 600 |
+
can decrease the right hand side of the first constraint by 39 without
|
| 601 |
+
affecting the optimal solution.
|
| 602 |
+
|
| 603 |
+
>>> b = [6, 4] # reset to original values
|
| 604 |
+
>>> b[0] -= 39
|
| 605 |
+
>>> linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds]).fun
|
| 606 |
+
-22.0
|
| 607 |
+
|
| 608 |
+
"""
|
| 609 |
+
|
| 610 |
+
meth = method.lower()
|
| 611 |
+
methods = {"highs", "highs-ds", "highs-ipm",
|
| 612 |
+
"simplex", "revised simplex", "interior-point"}
|
| 613 |
+
|
| 614 |
+
if meth not in methods:
|
| 615 |
+
raise ValueError(f"Unknown solver '{method}'")
|
| 616 |
+
|
| 617 |
+
if x0 is not None and meth != "revised simplex":
|
| 618 |
+
warning_message = "x0 is used only when method is 'revised simplex'. "
|
| 619 |
+
warn(warning_message, OptimizeWarning, stacklevel=2)
|
| 620 |
+
|
| 621 |
+
if np.any(integrality) and not meth == "highs":
|
| 622 |
+
integrality = None
|
| 623 |
+
warning_message = ("Only `method='highs'` supports integer "
|
| 624 |
+
"constraints. Ignoring `integrality`.")
|
| 625 |
+
warn(warning_message, OptimizeWarning, stacklevel=2)
|
| 626 |
+
elif np.any(integrality):
|
| 627 |
+
integrality = np.broadcast_to(integrality, np.shape(c))
|
| 628 |
+
else:
|
| 629 |
+
integrality = None
|
| 630 |
+
|
| 631 |
+
lp = _LPProblem(c, A_ub, b_ub, A_eq, b_eq, bounds, x0, integrality)
|
| 632 |
+
lp, solver_options = _parse_linprog(lp, options, meth)
|
| 633 |
+
tol = solver_options.get('tol', 1e-9)
|
| 634 |
+
|
| 635 |
+
# Give unmodified problem to HiGHS
|
| 636 |
+
if meth.startswith('highs'):
|
| 637 |
+
if callback is not None:
|
| 638 |
+
raise NotImplementedError("HiGHS solvers do not support the "
|
| 639 |
+
"callback interface.")
|
| 640 |
+
highs_solvers = {'highs-ipm': 'ipm', 'highs-ds': 'simplex',
|
| 641 |
+
'highs': None}
|
| 642 |
+
|
| 643 |
+
sol = _linprog_highs(lp, solver=highs_solvers[meth],
|
| 644 |
+
**solver_options)
|
| 645 |
+
sol['status'], sol['message'] = (
|
| 646 |
+
_check_result(sol['x'], sol['fun'], sol['status'], sol['slack'],
|
| 647 |
+
sol['con'], lp.bounds, tol, sol['message'],
|
| 648 |
+
integrality))
|
| 649 |
+
sol['success'] = sol['status'] == 0
|
| 650 |
+
return OptimizeResult(sol)
|
| 651 |
+
|
| 652 |
+
warn(f"`method='{meth}'` is deprecated and will be removed in SciPy "
|
| 653 |
+
"1.11.0. Please use one of the HiGHS solvers (e.g. "
|
| 654 |
+
"`method='highs'`) in new code.", DeprecationWarning, stacklevel=2)
|
| 655 |
+
|
| 656 |
+
iteration = 0
|
| 657 |
+
complete = False # will become True if solved in presolve
|
| 658 |
+
undo = []
|
| 659 |
+
|
| 660 |
+
# Keep the original arrays to calculate slack/residuals for original
|
| 661 |
+
# problem.
|
| 662 |
+
lp_o = deepcopy(lp)
|
| 663 |
+
|
| 664 |
+
# Solve trivial problem, eliminate variables, tighten bounds, etc.
|
| 665 |
+
rr_method = solver_options.pop('rr_method', None) # need to pop these;
|
| 666 |
+
rr = solver_options.pop('rr', True) # they're not passed to methods
|
| 667 |
+
c0 = 0 # we might get a constant term in the objective
|
| 668 |
+
if solver_options.pop('presolve', True):
|
| 669 |
+
(lp, c0, x, undo, complete, status, message) = _presolve(lp, rr,
|
| 670 |
+
rr_method,
|
| 671 |
+
tol)
|
| 672 |
+
|
| 673 |
+
C, b_scale = 1, 1 # for trivial unscaling if autoscale is not used
|
| 674 |
+
postsolve_args = (lp_o._replace(bounds=lp.bounds), undo, C, b_scale)
|
| 675 |
+
|
| 676 |
+
if not complete:
|
| 677 |
+
A, b, c, c0, x0 = _get_Abc(lp, c0)
|
| 678 |
+
if solver_options.pop('autoscale', False):
|
| 679 |
+
A, b, c, x0, C, b_scale = _autoscale(A, b, c, x0)
|
| 680 |
+
postsolve_args = postsolve_args[:-2] + (C, b_scale)
|
| 681 |
+
|
| 682 |
+
if meth == 'simplex':
|
| 683 |
+
x, status, message, iteration = _linprog_simplex(
|
| 684 |
+
c, c0=c0, A=A, b=b, callback=callback,
|
| 685 |
+
postsolve_args=postsolve_args, **solver_options)
|
| 686 |
+
elif meth == 'interior-point':
|
| 687 |
+
x, status, message, iteration = _linprog_ip(
|
| 688 |
+
c, c0=c0, A=A, b=b, callback=callback,
|
| 689 |
+
postsolve_args=postsolve_args, **solver_options)
|
| 690 |
+
elif meth == 'revised simplex':
|
| 691 |
+
x, status, message, iteration = _linprog_rs(
|
| 692 |
+
c, c0=c0, A=A, b=b, x0=x0, callback=callback,
|
| 693 |
+
postsolve_args=postsolve_args, **solver_options)
|
| 694 |
+
|
| 695 |
+
# Eliminate artificial variables, re-introduce presolved variables, etc.
|
| 696 |
+
disp = solver_options.get('disp', False)
|
| 697 |
+
|
| 698 |
+
x, fun, slack, con = _postsolve(x, postsolve_args, complete)
|
| 699 |
+
|
| 700 |
+
status, message = _check_result(x, fun, status, slack, con, lp_o.bounds,
|
| 701 |
+
tol, message, integrality)
|
| 702 |
+
|
| 703 |
+
if disp:
|
| 704 |
+
_display_summary(message, status, fun, iteration)
|
| 705 |
+
|
| 706 |
+
sol = {
|
| 707 |
+
'x': x,
|
| 708 |
+
'fun': fun,
|
| 709 |
+
'slack': slack,
|
| 710 |
+
'con': con,
|
| 711 |
+
'status': status,
|
| 712 |
+
'message': message,
|
| 713 |
+
'nit': iteration,
|
| 714 |
+
'success': status == 0}
|
| 715 |
+
|
| 716 |
+
return OptimizeResult(sol)
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_doc.py
ADDED
|
@@ -0,0 +1,1434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Created on Sat Aug 22 19:49:17 2020
|
| 3 |
+
|
| 4 |
+
@author: matth
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _linprog_highs_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
| 9 |
+
bounds=None, method='highs', callback=None,
|
| 10 |
+
maxiter=None, disp=False, presolve=True,
|
| 11 |
+
time_limit=None,
|
| 12 |
+
dual_feasibility_tolerance=None,
|
| 13 |
+
primal_feasibility_tolerance=None,
|
| 14 |
+
ipm_optimality_tolerance=None,
|
| 15 |
+
simplex_dual_edge_weight_strategy=None,
|
| 16 |
+
mip_rel_gap=None,
|
| 17 |
+
**unknown_options):
|
| 18 |
+
r"""
|
| 19 |
+
Linear programming: minimize a linear objective function subject to linear
|
| 20 |
+
equality and inequality constraints using one of the HiGHS solvers.
|
| 21 |
+
|
| 22 |
+
Linear programming solves problems of the following form:
|
| 23 |
+
|
| 24 |
+
.. math::
|
| 25 |
+
|
| 26 |
+
\min_x \ & c^T x \\
|
| 27 |
+
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
| 28 |
+
& A_{eq} x = b_{eq},\\
|
| 29 |
+
& l \leq x \leq u ,
|
| 30 |
+
|
| 31 |
+
where :math:`x` is a vector of decision variables; :math:`c`,
|
| 32 |
+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
| 33 |
+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
| 34 |
+
|
| 35 |
+
Alternatively, that's:
|
| 36 |
+
|
| 37 |
+
minimize::
|
| 38 |
+
|
| 39 |
+
c @ x
|
| 40 |
+
|
| 41 |
+
such that::
|
| 42 |
+
|
| 43 |
+
A_ub @ x <= b_ub
|
| 44 |
+
A_eq @ x == b_eq
|
| 45 |
+
lb <= x <= ub
|
| 46 |
+
|
| 47 |
+
Note that by default ``lb = 0`` and ``ub = None`` unless specified with
|
| 48 |
+
``bounds``.
|
| 49 |
+
|
| 50 |
+
Parameters
|
| 51 |
+
----------
|
| 52 |
+
c : 1-D array
|
| 53 |
+
The coefficients of the linear objective function to be minimized.
|
| 54 |
+
A_ub : 2-D array, optional
|
| 55 |
+
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
| 56 |
+
coefficients of a linear inequality constraint on ``x``.
|
| 57 |
+
b_ub : 1-D array, optional
|
| 58 |
+
The inequality constraint vector. Each element represents an
|
| 59 |
+
upper bound on the corresponding value of ``A_ub @ x``.
|
| 60 |
+
A_eq : 2-D array, optional
|
| 61 |
+
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
| 62 |
+
coefficients of a linear equality constraint on ``x``.
|
| 63 |
+
b_eq : 1-D array, optional
|
| 64 |
+
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
| 65 |
+
the corresponding element of ``b_eq``.
|
| 66 |
+
bounds : sequence, optional
|
| 67 |
+
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
| 68 |
+
the minimum and maximum values of that decision variable. Use ``None``
|
| 69 |
+
to indicate that there is no bound. By default, bounds are
|
| 70 |
+
``(0, None)`` (all decision variables are non-negative).
|
| 71 |
+
If a single tuple ``(min, max)`` is provided, then ``min`` and
|
| 72 |
+
``max`` will serve as bounds for all decision variables.
|
| 73 |
+
method : str
|
| 74 |
+
|
| 75 |
+
This is the method-specific documentation for 'highs', which chooses
|
| 76 |
+
automatically between
|
| 77 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>` and
|
| 78 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
|
| 79 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
| 80 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
|
| 81 |
+
:ref:`'simplex' <optimize.linprog-simplex>` (legacy)
|
| 82 |
+
are also available.
|
| 83 |
+
integrality : 1-D array or int, optional
|
| 84 |
+
Indicates the type of integrality constraint on each decision variable.
|
| 85 |
+
|
| 86 |
+
``0`` : Continuous variable; no integrality constraint.
|
| 87 |
+
|
| 88 |
+
``1`` : Integer variable; decision variable must be an integer
|
| 89 |
+
within `bounds`.
|
| 90 |
+
|
| 91 |
+
``2`` : Semi-continuous variable; decision variable must be within
|
| 92 |
+
`bounds` or take value ``0``.
|
| 93 |
+
|
| 94 |
+
``3`` : Semi-integer variable; decision variable must be an integer
|
| 95 |
+
within `bounds` or take value ``0``.
|
| 96 |
+
|
| 97 |
+
By default, all variables are continuous.
|
| 98 |
+
|
| 99 |
+
For mixed integrality constraints, supply an array of shape `c.shape`.
|
| 100 |
+
To infer a constraint on each decision variable from shorter inputs,
|
| 101 |
+
the argument will be broadcasted to `c.shape` using `np.broadcast_to`.
|
| 102 |
+
|
| 103 |
+
This argument is currently used only by the ``'highs'`` method and
|
| 104 |
+
ignored otherwise.
|
| 105 |
+
|
| 106 |
+
Options
|
| 107 |
+
-------
|
| 108 |
+
maxiter : int
|
| 109 |
+
The maximum number of iterations to perform in either phase.
|
| 110 |
+
For :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`, this does not
|
| 111 |
+
include the number of crossover iterations. Default is the largest
|
| 112 |
+
possible value for an ``int`` on the platform.
|
| 113 |
+
disp : bool (default: ``False``)
|
| 114 |
+
Set to ``True`` if indicators of optimization status are to be
|
| 115 |
+
printed to the console during optimization.
|
| 116 |
+
presolve : bool (default: ``True``)
|
| 117 |
+
Presolve attempts to identify trivial infeasibilities,
|
| 118 |
+
identify trivial unboundedness, and simplify the problem before
|
| 119 |
+
sending it to the main solver. It is generally recommended
|
| 120 |
+
to keep the default setting ``True``; set to ``False`` if
|
| 121 |
+
presolve is to be disabled.
|
| 122 |
+
time_limit : float
|
| 123 |
+
The maximum time in seconds allotted to solve the problem;
|
| 124 |
+
default is the largest possible value for a ``double`` on the
|
| 125 |
+
platform.
|
| 126 |
+
dual_feasibility_tolerance : double (default: 1e-07)
|
| 127 |
+
Dual feasibility tolerance for
|
| 128 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`.
|
| 129 |
+
The minimum of this and ``primal_feasibility_tolerance``
|
| 130 |
+
is used for the feasibility tolerance of
|
| 131 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
|
| 132 |
+
primal_feasibility_tolerance : double (default: 1e-07)
|
| 133 |
+
Primal feasibility tolerance for
|
| 134 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`.
|
| 135 |
+
The minimum of this and ``dual_feasibility_tolerance``
|
| 136 |
+
is used for the feasibility tolerance of
|
| 137 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
|
| 138 |
+
ipm_optimality_tolerance : double (default: ``1e-08``)
|
| 139 |
+
Optimality tolerance for
|
| 140 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
|
| 141 |
+
Minimum allowable value is 1e-12.
|
| 142 |
+
simplex_dual_edge_weight_strategy : str (default: None)
|
| 143 |
+
Strategy for simplex dual edge weights. The default, ``None``,
|
| 144 |
+
automatically selects one of the following.
|
| 145 |
+
|
| 146 |
+
``'dantzig'`` uses Dantzig's original strategy of choosing the most
|
| 147 |
+
negative reduced cost.
|
| 148 |
+
|
| 149 |
+
``'devex'`` uses the strategy described in [15]_.
|
| 150 |
+
|
| 151 |
+
``steepest`` uses the exact steepest edge strategy as described in
|
| 152 |
+
[16]_.
|
| 153 |
+
|
| 154 |
+
``'steepest-devex'`` begins with the exact steepest edge strategy
|
| 155 |
+
until the computation is too costly or inexact and then switches to
|
| 156 |
+
the devex method.
|
| 157 |
+
|
| 158 |
+
Currently, ``None`` always selects ``'steepest-devex'``, but this
|
| 159 |
+
may change as new options become available.
|
| 160 |
+
mip_rel_gap : double (default: None)
|
| 161 |
+
Termination criterion for MIP solver: solver will terminate when the
|
| 162 |
+
gap between the primal objective value and the dual objective bound,
|
| 163 |
+
scaled by the primal objective value, is <= mip_rel_gap.
|
| 164 |
+
unknown_options : dict
|
| 165 |
+
Optional arguments not used by this particular solver. If
|
| 166 |
+
``unknown_options`` is non-empty, a warning is issued listing
|
| 167 |
+
all unused options.
|
| 168 |
+
|
| 169 |
+
Returns
|
| 170 |
+
-------
|
| 171 |
+
res : OptimizeResult
|
| 172 |
+
A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
|
| 173 |
+
|
| 174 |
+
x : 1D array
|
| 175 |
+
The values of the decision variables that minimizes the
|
| 176 |
+
objective function while satisfying the constraints.
|
| 177 |
+
fun : float
|
| 178 |
+
The optimal value of the objective function ``c @ x``.
|
| 179 |
+
slack : 1D array
|
| 180 |
+
The (nominally positive) values of the slack,
|
| 181 |
+
``b_ub - A_ub @ x``.
|
| 182 |
+
con : 1D array
|
| 183 |
+
The (nominally zero) residuals of the equality constraints,
|
| 184 |
+
``b_eq - A_eq @ x``.
|
| 185 |
+
success : bool
|
| 186 |
+
``True`` when the algorithm succeeds in finding an optimal
|
| 187 |
+
solution.
|
| 188 |
+
status : int
|
| 189 |
+
An integer representing the exit status of the algorithm.
|
| 190 |
+
|
| 191 |
+
``0`` : Optimization terminated successfully.
|
| 192 |
+
|
| 193 |
+
``1`` : Iteration or time limit reached.
|
| 194 |
+
|
| 195 |
+
``2`` : Problem appears to be infeasible.
|
| 196 |
+
|
| 197 |
+
``3`` : Problem appears to be unbounded.
|
| 198 |
+
|
| 199 |
+
``4`` : The HiGHS solver ran into a problem.
|
| 200 |
+
|
| 201 |
+
message : str
|
| 202 |
+
A string descriptor of the exit status of the algorithm.
|
| 203 |
+
nit : int
|
| 204 |
+
The total number of iterations performed.
|
| 205 |
+
For the HiGHS simplex method, this includes iterations in all
|
| 206 |
+
phases. For the HiGHS interior-point method, this does not include
|
| 207 |
+
crossover iterations.
|
| 208 |
+
crossover_nit : int
|
| 209 |
+
The number of primal/dual pushes performed during the
|
| 210 |
+
crossover routine for the HiGHS interior-point method.
|
| 211 |
+
This is ``0`` for the HiGHS simplex method.
|
| 212 |
+
ineqlin : OptimizeResult
|
| 213 |
+
Solution and sensitivity information corresponding to the
|
| 214 |
+
inequality constraints, `b_ub`. A dictionary consisting of the
|
| 215 |
+
fields:
|
| 216 |
+
|
| 217 |
+
residual : np.ndnarray
|
| 218 |
+
The (nominally positive) values of the slack variables,
|
| 219 |
+
``b_ub - A_ub @ x``. This quantity is also commonly
|
| 220 |
+
referred to as "slack".
|
| 221 |
+
|
| 222 |
+
marginals : np.ndarray
|
| 223 |
+
The sensitivity (partial derivative) of the objective
|
| 224 |
+
function with respect to the right-hand side of the
|
| 225 |
+
inequality constraints, `b_ub`.
|
| 226 |
+
|
| 227 |
+
eqlin : OptimizeResult
|
| 228 |
+
Solution and sensitivity information corresponding to the
|
| 229 |
+
equality constraints, `b_eq`. A dictionary consisting of the
|
| 230 |
+
fields:
|
| 231 |
+
|
| 232 |
+
residual : np.ndarray
|
| 233 |
+
The (nominally zero) residuals of the equality constraints,
|
| 234 |
+
``b_eq - A_eq @ x``.
|
| 235 |
+
|
| 236 |
+
marginals : np.ndarray
|
| 237 |
+
The sensitivity (partial derivative) of the objective
|
| 238 |
+
function with respect to the right-hand side of the
|
| 239 |
+
equality constraints, `b_eq`.
|
| 240 |
+
|
| 241 |
+
lower, upper : OptimizeResult
|
| 242 |
+
Solution and sensitivity information corresponding to the
|
| 243 |
+
lower and upper bounds on decision variables, `bounds`.
|
| 244 |
+
|
| 245 |
+
residual : np.ndarray
|
| 246 |
+
The (nominally positive) values of the quantity
|
| 247 |
+
``x - lb`` (lower) or ``ub - x`` (upper).
|
| 248 |
+
|
| 249 |
+
marginals : np.ndarray
|
| 250 |
+
The sensitivity (partial derivative) of the objective
|
| 251 |
+
function with respect to the lower and upper
|
| 252 |
+
`bounds`.
|
| 253 |
+
|
| 254 |
+
Notes
|
| 255 |
+
-----
|
| 256 |
+
|
| 257 |
+
Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
|
| 258 |
+
of the C++ high performance dual revised simplex implementation (HSOL)
|
| 259 |
+
[13]_, [14]_. Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
|
| 260 |
+
is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
|
| 261 |
+
**m**\ ethod [13]_; it features a crossover routine, so it is as accurate
|
| 262 |
+
as a simplex solver. Method :ref:`'highs' <optimize.linprog-highs>` chooses
|
| 263 |
+
between the two automatically. For new code involving `linprog`, we
|
| 264 |
+
recommend explicitly choosing one of these three method values instead of
|
| 265 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
| 266 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
|
| 267 |
+
:ref:`'simplex' <optimize.linprog-simplex>` (legacy).
|
| 268 |
+
|
| 269 |
+
The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
|
| 270 |
+
`marginals`, or partial derivatives of the objective function with respect
|
| 271 |
+
to the right-hand side of each constraint. These partial derivatives are
|
| 272 |
+
also referred to as "Lagrange multipliers", "dual values", and
|
| 273 |
+
"shadow prices". The sign convention of `marginals` is opposite that
|
| 274 |
+
of Lagrange multipliers produced by many nonlinear solvers.
|
| 275 |
+
|
| 276 |
+
References
|
| 277 |
+
----------
|
| 278 |
+
.. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
|
| 279 |
+
"HiGHS - high performance software for linear optimization."
|
| 280 |
+
https://highs.dev/
|
| 281 |
+
.. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
|
| 282 |
+
simplex method." Mathematical Programming Computation, 10 (1),
|
| 283 |
+
119-142, 2018. DOI: 10.1007/s12532-017-0130-5
|
| 284 |
+
.. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
|
| 285 |
+
Mathematical programming 5.1 (1973): 1-28.
|
| 286 |
+
.. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
|
| 287 |
+
simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
|
| 288 |
+
"""
|
| 289 |
+
pass
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def _linprog_highs_ds_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
| 293 |
+
bounds=None, method='highs-ds', callback=None,
|
| 294 |
+
maxiter=None, disp=False, presolve=True,
|
| 295 |
+
time_limit=None,
|
| 296 |
+
dual_feasibility_tolerance=None,
|
| 297 |
+
primal_feasibility_tolerance=None,
|
| 298 |
+
simplex_dual_edge_weight_strategy=None,
|
| 299 |
+
**unknown_options):
|
| 300 |
+
r"""
|
| 301 |
+
Linear programming: minimize a linear objective function subject to linear
|
| 302 |
+
equality and inequality constraints using the HiGHS dual simplex solver.
|
| 303 |
+
|
| 304 |
+
Linear programming solves problems of the following form:
|
| 305 |
+
|
| 306 |
+
.. math::
|
| 307 |
+
|
| 308 |
+
\min_x \ & c^T x \\
|
| 309 |
+
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
| 310 |
+
& A_{eq} x = b_{eq},\\
|
| 311 |
+
& l \leq x \leq u ,
|
| 312 |
+
|
| 313 |
+
where :math:`x` is a vector of decision variables; :math:`c`,
|
| 314 |
+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
| 315 |
+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
| 316 |
+
|
| 317 |
+
Alternatively, that's:
|
| 318 |
+
|
| 319 |
+
minimize::
|
| 320 |
+
|
| 321 |
+
c @ x
|
| 322 |
+
|
| 323 |
+
such that::
|
| 324 |
+
|
| 325 |
+
A_ub @ x <= b_ub
|
| 326 |
+
A_eq @ x == b_eq
|
| 327 |
+
lb <= x <= ub
|
| 328 |
+
|
| 329 |
+
Note that by default ``lb = 0`` and ``ub = None`` unless specified with
|
| 330 |
+
``bounds``.
|
| 331 |
+
|
| 332 |
+
Parameters
|
| 333 |
+
----------
|
| 334 |
+
c : 1-D array
|
| 335 |
+
The coefficients of the linear objective function to be minimized.
|
| 336 |
+
A_ub : 2-D array, optional
|
| 337 |
+
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
| 338 |
+
coefficients of a linear inequality constraint on ``x``.
|
| 339 |
+
b_ub : 1-D array, optional
|
| 340 |
+
The inequality constraint vector. Each element represents an
|
| 341 |
+
upper bound on the corresponding value of ``A_ub @ x``.
|
| 342 |
+
A_eq : 2-D array, optional
|
| 343 |
+
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
| 344 |
+
coefficients of a linear equality constraint on ``x``.
|
| 345 |
+
b_eq : 1-D array, optional
|
| 346 |
+
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
| 347 |
+
the corresponding element of ``b_eq``.
|
| 348 |
+
bounds : sequence, optional
|
| 349 |
+
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
| 350 |
+
the minimum and maximum values of that decision variable. Use ``None``
|
| 351 |
+
to indicate that there is no bound. By default, bounds are
|
| 352 |
+
``(0, None)`` (all decision variables are non-negative).
|
| 353 |
+
If a single tuple ``(min, max)`` is provided, then ``min`` and
|
| 354 |
+
``max`` will serve as bounds for all decision variables.
|
| 355 |
+
method : str
|
| 356 |
+
|
| 357 |
+
This is the method-specific documentation for 'highs-ds'.
|
| 358 |
+
:ref:`'highs' <optimize.linprog-highs>`,
|
| 359 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
|
| 360 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
| 361 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
|
| 362 |
+
:ref:`'simplex' <optimize.linprog-simplex>` (legacy)
|
| 363 |
+
are also available.
|
| 364 |
+
|
| 365 |
+
Options
|
| 366 |
+
-------
|
| 367 |
+
maxiter : int
|
| 368 |
+
The maximum number of iterations to perform in either phase.
|
| 369 |
+
Default is the largest possible value for an ``int`` on the platform.
|
| 370 |
+
disp : bool (default: ``False``)
|
| 371 |
+
Set to ``True`` if indicators of optimization status are to be
|
| 372 |
+
printed to the console during optimization.
|
| 373 |
+
presolve : bool (default: ``True``)
|
| 374 |
+
Presolve attempts to identify trivial infeasibilities,
|
| 375 |
+
identify trivial unboundedness, and simplify the problem before
|
| 376 |
+
sending it to the main solver. It is generally recommended
|
| 377 |
+
to keep the default setting ``True``; set to ``False`` if
|
| 378 |
+
presolve is to be disabled.
|
| 379 |
+
time_limit : float
|
| 380 |
+
The maximum time in seconds allotted to solve the problem;
|
| 381 |
+
default is the largest possible value for a ``double`` on the
|
| 382 |
+
platform.
|
| 383 |
+
dual_feasibility_tolerance : double (default: 1e-07)
|
| 384 |
+
Dual feasibility tolerance for
|
| 385 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`.
|
| 386 |
+
primal_feasibility_tolerance : double (default: 1e-07)
|
| 387 |
+
Primal feasibility tolerance for
|
| 388 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`.
|
| 389 |
+
simplex_dual_edge_weight_strategy : str (default: None)
|
| 390 |
+
Strategy for simplex dual edge weights. The default, ``None``,
|
| 391 |
+
automatically selects one of the following.
|
| 392 |
+
|
| 393 |
+
``'dantzig'`` uses Dantzig's original strategy of choosing the most
|
| 394 |
+
negative reduced cost.
|
| 395 |
+
|
| 396 |
+
``'devex'`` uses the strategy described in [15]_.
|
| 397 |
+
|
| 398 |
+
``steepest`` uses the exact steepest edge strategy as described in
|
| 399 |
+
[16]_.
|
| 400 |
+
|
| 401 |
+
``'steepest-devex'`` begins with the exact steepest edge strategy
|
| 402 |
+
until the computation is too costly or inexact and then switches to
|
| 403 |
+
the devex method.
|
| 404 |
+
|
| 405 |
+
Currently, ``None`` always selects ``'steepest-devex'``, but this
|
| 406 |
+
may change as new options become available.
|
| 407 |
+
unknown_options : dict
|
| 408 |
+
Optional arguments not used by this particular solver. If
|
| 409 |
+
``unknown_options`` is non-empty, a warning is issued listing
|
| 410 |
+
all unused options.
|
| 411 |
+
|
| 412 |
+
Returns
|
| 413 |
+
-------
|
| 414 |
+
res : OptimizeResult
|
| 415 |
+
A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
|
| 416 |
+
|
| 417 |
+
x : 1D array
|
| 418 |
+
The values of the decision variables that minimizes the
|
| 419 |
+
objective function while satisfying the constraints.
|
| 420 |
+
fun : float
|
| 421 |
+
The optimal value of the objective function ``c @ x``.
|
| 422 |
+
slack : 1D array
|
| 423 |
+
The (nominally positive) values of the slack,
|
| 424 |
+
``b_ub - A_ub @ x``.
|
| 425 |
+
con : 1D array
|
| 426 |
+
The (nominally zero) residuals of the equality constraints,
|
| 427 |
+
``b_eq - A_eq @ x``.
|
| 428 |
+
success : bool
|
| 429 |
+
``True`` when the algorithm succeeds in finding an optimal
|
| 430 |
+
solution.
|
| 431 |
+
status : int
|
| 432 |
+
An integer representing the exit status of the algorithm.
|
| 433 |
+
|
| 434 |
+
``0`` : Optimization terminated successfully.
|
| 435 |
+
|
| 436 |
+
``1`` : Iteration or time limit reached.
|
| 437 |
+
|
| 438 |
+
``2`` : Problem appears to be infeasible.
|
| 439 |
+
|
| 440 |
+
``3`` : Problem appears to be unbounded.
|
| 441 |
+
|
| 442 |
+
``4`` : The HiGHS solver ran into a problem.
|
| 443 |
+
|
| 444 |
+
message : str
|
| 445 |
+
A string descriptor of the exit status of the algorithm.
|
| 446 |
+
nit : int
|
| 447 |
+
The total number of iterations performed. This includes iterations
|
| 448 |
+
in all phases.
|
| 449 |
+
crossover_nit : int
|
| 450 |
+
This is always ``0`` for the HiGHS simplex method.
|
| 451 |
+
For the HiGHS interior-point method, this is the number of
|
| 452 |
+
primal/dual pushes performed during the crossover routine.
|
| 453 |
+
ineqlin : OptimizeResult
|
| 454 |
+
Solution and sensitivity information corresponding to the
|
| 455 |
+
inequality constraints, `b_ub`. A dictionary consisting of the
|
| 456 |
+
fields:
|
| 457 |
+
|
| 458 |
+
residual : np.ndnarray
|
| 459 |
+
The (nominally positive) values of the slack variables,
|
| 460 |
+
``b_ub - A_ub @ x``. This quantity is also commonly
|
| 461 |
+
referred to as "slack".
|
| 462 |
+
|
| 463 |
+
marginals : np.ndarray
|
| 464 |
+
The sensitivity (partial derivative) of the objective
|
| 465 |
+
function with respect to the right-hand side of the
|
| 466 |
+
inequality constraints, `b_ub`.
|
| 467 |
+
|
| 468 |
+
eqlin : OptimizeResult
|
| 469 |
+
Solution and sensitivity information corresponding to the
|
| 470 |
+
equality constraints, `b_eq`. A dictionary consisting of the
|
| 471 |
+
fields:
|
| 472 |
+
|
| 473 |
+
residual : np.ndarray
|
| 474 |
+
The (nominally zero) residuals of the equality constraints,
|
| 475 |
+
``b_eq - A_eq @ x``.
|
| 476 |
+
|
| 477 |
+
marginals : np.ndarray
|
| 478 |
+
The sensitivity (partial derivative) of the objective
|
| 479 |
+
function with respect to the right-hand side of the
|
| 480 |
+
equality constraints, `b_eq`.
|
| 481 |
+
|
| 482 |
+
lower, upper : OptimizeResult
|
| 483 |
+
Solution and sensitivity information corresponding to the
|
| 484 |
+
lower and upper bounds on decision variables, `bounds`.
|
| 485 |
+
|
| 486 |
+
residual : np.ndarray
|
| 487 |
+
The (nominally positive) values of the quantity
|
| 488 |
+
``x - lb`` (lower) or ``ub - x`` (upper).
|
| 489 |
+
|
| 490 |
+
marginals : np.ndarray
|
| 491 |
+
The sensitivity (partial derivative) of the objective
|
| 492 |
+
function with respect to the lower and upper
|
| 493 |
+
`bounds`.
|
| 494 |
+
|
| 495 |
+
Notes
|
| 496 |
+
-----
|
| 497 |
+
|
| 498 |
+
Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
|
| 499 |
+
of the C++ high performance dual revised simplex implementation (HSOL)
|
| 500 |
+
[13]_, [14]_. Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
|
| 501 |
+
is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
|
| 502 |
+
**m**\ ethod [13]_; it features a crossover routine, so it is as accurate
|
| 503 |
+
as a simplex solver. Method :ref:`'highs' <optimize.linprog-highs>` chooses
|
| 504 |
+
between the two automatically. For new code involving `linprog`, we
|
| 505 |
+
recommend explicitly choosing one of these three method values instead of
|
| 506 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
| 507 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
|
| 508 |
+
:ref:`'simplex' <optimize.linprog-simplex>` (legacy).
|
| 509 |
+
|
| 510 |
+
The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
|
| 511 |
+
`marginals`, or partial derivatives of the objective function with respect
|
| 512 |
+
to the right-hand side of each constraint. These partial derivatives are
|
| 513 |
+
also referred to as "Lagrange multipliers", "dual values", and
|
| 514 |
+
"shadow prices". The sign convention of `marginals` is opposite that
|
| 515 |
+
of Lagrange multipliers produced by many nonlinear solvers.
|
| 516 |
+
|
| 517 |
+
References
|
| 518 |
+
----------
|
| 519 |
+
.. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
|
| 520 |
+
"HiGHS - high performance software for linear optimization."
|
| 521 |
+
https://highs.dev/
|
| 522 |
+
.. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
|
| 523 |
+
simplex method." Mathematical Programming Computation, 10 (1),
|
| 524 |
+
119-142, 2018. DOI: 10.1007/s12532-017-0130-5
|
| 525 |
+
.. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
|
| 526 |
+
Mathematical programming 5.1 (1973): 1-28.
|
| 527 |
+
.. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
|
| 528 |
+
simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
|
| 529 |
+
"""
|
| 530 |
+
pass
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
def _linprog_highs_ipm_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
| 534 |
+
bounds=None, method='highs-ipm', callback=None,
|
| 535 |
+
maxiter=None, disp=False, presolve=True,
|
| 536 |
+
time_limit=None,
|
| 537 |
+
dual_feasibility_tolerance=None,
|
| 538 |
+
primal_feasibility_tolerance=None,
|
| 539 |
+
ipm_optimality_tolerance=None,
|
| 540 |
+
**unknown_options):
|
| 541 |
+
r"""
|
| 542 |
+
Linear programming: minimize a linear objective function subject to linear
|
| 543 |
+
equality and inequality constraints using the HiGHS interior point solver.
|
| 544 |
+
|
| 545 |
+
Linear programming solves problems of the following form:
|
| 546 |
+
|
| 547 |
+
.. math::
|
| 548 |
+
|
| 549 |
+
\min_x \ & c^T x \\
|
| 550 |
+
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
| 551 |
+
& A_{eq} x = b_{eq},\\
|
| 552 |
+
& l \leq x \leq u ,
|
| 553 |
+
|
| 554 |
+
where :math:`x` is a vector of decision variables; :math:`c`,
|
| 555 |
+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
| 556 |
+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
| 557 |
+
|
| 558 |
+
Alternatively, that's:
|
| 559 |
+
|
| 560 |
+
minimize::
|
| 561 |
+
|
| 562 |
+
c @ x
|
| 563 |
+
|
| 564 |
+
such that::
|
| 565 |
+
|
| 566 |
+
A_ub @ x <= b_ub
|
| 567 |
+
A_eq @ x == b_eq
|
| 568 |
+
lb <= x <= ub
|
| 569 |
+
|
| 570 |
+
Note that by default ``lb = 0`` and ``ub = None`` unless specified with
|
| 571 |
+
``bounds``.
|
| 572 |
+
|
| 573 |
+
Parameters
|
| 574 |
+
----------
|
| 575 |
+
c : 1-D array
|
| 576 |
+
The coefficients of the linear objective function to be minimized.
|
| 577 |
+
A_ub : 2-D array, optional
|
| 578 |
+
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
| 579 |
+
coefficients of a linear inequality constraint on ``x``.
|
| 580 |
+
b_ub : 1-D array, optional
|
| 581 |
+
The inequality constraint vector. Each element represents an
|
| 582 |
+
upper bound on the corresponding value of ``A_ub @ x``.
|
| 583 |
+
A_eq : 2-D array, optional
|
| 584 |
+
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
| 585 |
+
coefficients of a linear equality constraint on ``x``.
|
| 586 |
+
b_eq : 1-D array, optional
|
| 587 |
+
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
| 588 |
+
the corresponding element of ``b_eq``.
|
| 589 |
+
bounds : sequence, optional
|
| 590 |
+
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
| 591 |
+
the minimum and maximum values of that decision variable. Use ``None``
|
| 592 |
+
to indicate that there is no bound. By default, bounds are
|
| 593 |
+
``(0, None)`` (all decision variables are non-negative).
|
| 594 |
+
If a single tuple ``(min, max)`` is provided, then ``min`` and
|
| 595 |
+
``max`` will serve as bounds for all decision variables.
|
| 596 |
+
method : str
|
| 597 |
+
|
| 598 |
+
This is the method-specific documentation for 'highs-ipm'.
|
| 599 |
+
:ref:`'highs-ipm' <optimize.linprog-highs>`,
|
| 600 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`,
|
| 601 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
| 602 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
|
| 603 |
+
:ref:`'simplex' <optimize.linprog-simplex>` (legacy)
|
| 604 |
+
are also available.
|
| 605 |
+
|
| 606 |
+
Options
|
| 607 |
+
-------
|
| 608 |
+
maxiter : int
|
| 609 |
+
The maximum number of iterations to perform in either phase.
|
| 610 |
+
For :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`, this does not
|
| 611 |
+
include the number of crossover iterations. Default is the largest
|
| 612 |
+
possible value for an ``int`` on the platform.
|
| 613 |
+
disp : bool (default: ``False``)
|
| 614 |
+
Set to ``True`` if indicators of optimization status are to be
|
| 615 |
+
printed to the console during optimization.
|
| 616 |
+
presolve : bool (default: ``True``)
|
| 617 |
+
Presolve attempts to identify trivial infeasibilities,
|
| 618 |
+
identify trivial unboundedness, and simplify the problem before
|
| 619 |
+
sending it to the main solver. It is generally recommended
|
| 620 |
+
to keep the default setting ``True``; set to ``False`` if
|
| 621 |
+
presolve is to be disabled.
|
| 622 |
+
time_limit : float
|
| 623 |
+
The maximum time in seconds allotted to solve the problem;
|
| 624 |
+
default is the largest possible value for a ``double`` on the
|
| 625 |
+
platform.
|
| 626 |
+
dual_feasibility_tolerance : double (default: 1e-07)
|
| 627 |
+
The minimum of this and ``primal_feasibility_tolerance``
|
| 628 |
+
is used for the feasibility tolerance of
|
| 629 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
|
| 630 |
+
primal_feasibility_tolerance : double (default: 1e-07)
|
| 631 |
+
The minimum of this and ``dual_feasibility_tolerance``
|
| 632 |
+
is used for the feasibility tolerance of
|
| 633 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
|
| 634 |
+
ipm_optimality_tolerance : double (default: ``1e-08``)
|
| 635 |
+
Optimality tolerance for
|
| 636 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
|
| 637 |
+
Minimum allowable value is 1e-12.
|
| 638 |
+
unknown_options : dict
|
| 639 |
+
Optional arguments not used by this particular solver. If
|
| 640 |
+
``unknown_options`` is non-empty, a warning is issued listing
|
| 641 |
+
all unused options.
|
| 642 |
+
|
| 643 |
+
Returns
|
| 644 |
+
-------
|
| 645 |
+
res : OptimizeResult
|
| 646 |
+
A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
|
| 647 |
+
|
| 648 |
+
x : 1D array
|
| 649 |
+
The values of the decision variables that minimizes the
|
| 650 |
+
objective function while satisfying the constraints.
|
| 651 |
+
fun : float
|
| 652 |
+
The optimal value of the objective function ``c @ x``.
|
| 653 |
+
slack : 1D array
|
| 654 |
+
The (nominally positive) values of the slack,
|
| 655 |
+
``b_ub - A_ub @ x``.
|
| 656 |
+
con : 1D array
|
| 657 |
+
The (nominally zero) residuals of the equality constraints,
|
| 658 |
+
``b_eq - A_eq @ x``.
|
| 659 |
+
success : bool
|
| 660 |
+
``True`` when the algorithm succeeds in finding an optimal
|
| 661 |
+
solution.
|
| 662 |
+
status : int
|
| 663 |
+
An integer representing the exit status of the algorithm.
|
| 664 |
+
|
| 665 |
+
``0`` : Optimization terminated successfully.
|
| 666 |
+
|
| 667 |
+
``1`` : Iteration or time limit reached.
|
| 668 |
+
|
| 669 |
+
``2`` : Problem appears to be infeasible.
|
| 670 |
+
|
| 671 |
+
``3`` : Problem appears to be unbounded.
|
| 672 |
+
|
| 673 |
+
``4`` : The HiGHS solver ran into a problem.
|
| 674 |
+
|
| 675 |
+
message : str
|
| 676 |
+
A string descriptor of the exit status of the algorithm.
|
| 677 |
+
nit : int
|
| 678 |
+
The total number of iterations performed.
|
| 679 |
+
For the HiGHS interior-point method, this does not include
|
| 680 |
+
crossover iterations.
|
| 681 |
+
crossover_nit : int
|
| 682 |
+
The number of primal/dual pushes performed during the
|
| 683 |
+
crossover routine for the HiGHS interior-point method.
|
| 684 |
+
ineqlin : OptimizeResult
|
| 685 |
+
Solution and sensitivity information corresponding to the
|
| 686 |
+
inequality constraints, `b_ub`. A dictionary consisting of the
|
| 687 |
+
fields:
|
| 688 |
+
|
| 689 |
+
residual : np.ndnarray
|
| 690 |
+
The (nominally positive) values of the slack variables,
|
| 691 |
+
``b_ub - A_ub @ x``. This quantity is also commonly
|
| 692 |
+
referred to as "slack".
|
| 693 |
+
|
| 694 |
+
marginals : np.ndarray
|
| 695 |
+
The sensitivity (partial derivative) of the objective
|
| 696 |
+
function with respect to the right-hand side of the
|
| 697 |
+
inequality constraints, `b_ub`.
|
| 698 |
+
|
| 699 |
+
eqlin : OptimizeResult
|
| 700 |
+
Solution and sensitivity information corresponding to the
|
| 701 |
+
equality constraints, `b_eq`. A dictionary consisting of the
|
| 702 |
+
fields:
|
| 703 |
+
|
| 704 |
+
residual : np.ndarray
|
| 705 |
+
The (nominally zero) residuals of the equality constraints,
|
| 706 |
+
``b_eq - A_eq @ x``.
|
| 707 |
+
|
| 708 |
+
marginals : np.ndarray
|
| 709 |
+
The sensitivity (partial derivative) of the objective
|
| 710 |
+
function with respect to the right-hand side of the
|
| 711 |
+
equality constraints, `b_eq`.
|
| 712 |
+
|
| 713 |
+
lower, upper : OptimizeResult
|
| 714 |
+
Solution and sensitivity information corresponding to the
|
| 715 |
+
lower and upper bounds on decision variables, `bounds`.
|
| 716 |
+
|
| 717 |
+
residual : np.ndarray
|
| 718 |
+
The (nominally positive) values of the quantity
|
| 719 |
+
``x - lb`` (lower) or ``ub - x`` (upper).
|
| 720 |
+
|
| 721 |
+
marginals : np.ndarray
|
| 722 |
+
The sensitivity (partial derivative) of the objective
|
| 723 |
+
function with respect to the lower and upper
|
| 724 |
+
`bounds`.
|
| 725 |
+
|
| 726 |
+
Notes
|
| 727 |
+
-----
|
| 728 |
+
|
| 729 |
+
Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
|
| 730 |
+
is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
|
| 731 |
+
**m**\ ethod [13]_; it features a crossover routine, so it is as accurate
|
| 732 |
+
as a simplex solver.
|
| 733 |
+
Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
|
| 734 |
+
of the C++ high performance dual revised simplex implementation (HSOL)
|
| 735 |
+
[13]_, [14]_. Method :ref:`'highs' <optimize.linprog-highs>` chooses
|
| 736 |
+
between the two automatically. For new code involving `linprog`, we
|
| 737 |
+
recommend explicitly choosing one of these three method values instead of
|
| 738 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
| 739 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
|
| 740 |
+
:ref:`'simplex' <optimize.linprog-simplex>` (legacy).
|
| 741 |
+
|
| 742 |
+
The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
|
| 743 |
+
`marginals`, or partial derivatives of the objective function with respect
|
| 744 |
+
to the right-hand side of each constraint. These partial derivatives are
|
| 745 |
+
also referred to as "Lagrange multipliers", "dual values", and
|
| 746 |
+
"shadow prices". The sign convention of `marginals` is opposite that
|
| 747 |
+
of Lagrange multipliers produced by many nonlinear solvers.
|
| 748 |
+
|
| 749 |
+
References
|
| 750 |
+
----------
|
| 751 |
+
.. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
|
| 752 |
+
"HiGHS - high performance software for linear optimization."
|
| 753 |
+
https://highs.dev/
|
| 754 |
+
.. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
|
| 755 |
+
simplex method." Mathematical Programming Computation, 10 (1),
|
| 756 |
+
119-142, 2018. DOI: 10.1007/s12532-017-0130-5
|
| 757 |
+
"""
|
| 758 |
+
pass
|
| 759 |
+
|
| 760 |
+
|
| 761 |
+
def _linprog_ip_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
| 762 |
+
bounds=None, method='interior-point', callback=None,
|
| 763 |
+
maxiter=1000, disp=False, presolve=True,
|
| 764 |
+
tol=1e-8, autoscale=False, rr=True,
|
| 765 |
+
alpha0=.99995, beta=0.1, sparse=False,
|
| 766 |
+
lstsq=False, sym_pos=True, cholesky=True, pc=True,
|
| 767 |
+
ip=False, permc_spec='MMD_AT_PLUS_A', **unknown_options):
|
| 768 |
+
r"""
|
| 769 |
+
Linear programming: minimize a linear objective function subject to linear
|
| 770 |
+
equality and inequality constraints using the interior-point method of
|
| 771 |
+
[4]_.
|
| 772 |
+
|
| 773 |
+
.. deprecated:: 1.9.0
|
| 774 |
+
`method='interior-point'` will be removed in SciPy 1.11.0.
|
| 775 |
+
It is replaced by `method='highs'` because the latter is
|
| 776 |
+
faster and more robust.
|
| 777 |
+
|
| 778 |
+
Linear programming solves problems of the following form:
|
| 779 |
+
|
| 780 |
+
.. math::
|
| 781 |
+
|
| 782 |
+
\min_x \ & c^T x \\
|
| 783 |
+
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
| 784 |
+
& A_{eq} x = b_{eq},\\
|
| 785 |
+
& l \leq x \leq u ,
|
| 786 |
+
|
| 787 |
+
where :math:`x` is a vector of decision variables; :math:`c`,
|
| 788 |
+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
| 789 |
+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
| 790 |
+
|
| 791 |
+
Alternatively, that's:
|
| 792 |
+
|
| 793 |
+
minimize::
|
| 794 |
+
|
| 795 |
+
c @ x
|
| 796 |
+
|
| 797 |
+
such that::
|
| 798 |
+
|
| 799 |
+
A_ub @ x <= b_ub
|
| 800 |
+
A_eq @ x == b_eq
|
| 801 |
+
lb <= x <= ub
|
| 802 |
+
|
| 803 |
+
Note that by default ``lb = 0`` and ``ub = None`` unless specified with
|
| 804 |
+
``bounds``.
|
| 805 |
+
|
| 806 |
+
Parameters
|
| 807 |
+
----------
|
| 808 |
+
c : 1-D array
|
| 809 |
+
The coefficients of the linear objective function to be minimized.
|
| 810 |
+
A_ub : 2-D array, optional
|
| 811 |
+
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
| 812 |
+
coefficients of a linear inequality constraint on ``x``.
|
| 813 |
+
b_ub : 1-D array, optional
|
| 814 |
+
The inequality constraint vector. Each element represents an
|
| 815 |
+
upper bound on the corresponding value of ``A_ub @ x``.
|
| 816 |
+
A_eq : 2-D array, optional
|
| 817 |
+
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
| 818 |
+
coefficients of a linear equality constraint on ``x``.
|
| 819 |
+
b_eq : 1-D array, optional
|
| 820 |
+
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
| 821 |
+
the corresponding element of ``b_eq``.
|
| 822 |
+
bounds : sequence, optional
|
| 823 |
+
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
| 824 |
+
the minimum and maximum values of that decision variable. Use ``None``
|
| 825 |
+
to indicate that there is no bound. By default, bounds are
|
| 826 |
+
``(0, None)`` (all decision variables are non-negative).
|
| 827 |
+
If a single tuple ``(min, max)`` is provided, then ``min`` and
|
| 828 |
+
``max`` will serve as bounds for all decision variables.
|
| 829 |
+
method : str
|
| 830 |
+
This is the method-specific documentation for 'interior-point'.
|
| 831 |
+
:ref:`'highs' <optimize.linprog-highs>`,
|
| 832 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`,
|
| 833 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
|
| 834 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
|
| 835 |
+
:ref:`'simplex' <optimize.linprog-simplex>` (legacy)
|
| 836 |
+
are also available.
|
| 837 |
+
callback : callable, optional
|
| 838 |
+
Callback function to be executed once per iteration.
|
| 839 |
+
|
| 840 |
+
Options
|
| 841 |
+
-------
|
| 842 |
+
maxiter : int (default: 1000)
|
| 843 |
+
The maximum number of iterations of the algorithm.
|
| 844 |
+
disp : bool (default: False)
|
| 845 |
+
Set to ``True`` if indicators of optimization status are to be printed
|
| 846 |
+
to the console each iteration.
|
| 847 |
+
presolve : bool (default: True)
|
| 848 |
+
Presolve attempts to identify trivial infeasibilities,
|
| 849 |
+
identify trivial unboundedness, and simplify the problem before
|
| 850 |
+
sending it to the main solver. It is generally recommended
|
| 851 |
+
to keep the default setting ``True``; set to ``False`` if
|
| 852 |
+
presolve is to be disabled.
|
| 853 |
+
tol : float (default: 1e-8)
|
| 854 |
+
Termination tolerance to be used for all termination criteria;
|
| 855 |
+
see [4]_ Section 4.5.
|
| 856 |
+
autoscale : bool (default: False)
|
| 857 |
+
Set to ``True`` to automatically perform equilibration.
|
| 858 |
+
Consider using this option if the numerical values in the
|
| 859 |
+
constraints are separated by several orders of magnitude.
|
| 860 |
+
rr : bool (default: True)
|
| 861 |
+
Set to ``False`` to disable automatic redundancy removal.
|
| 862 |
+
alpha0 : float (default: 0.99995)
|
| 863 |
+
The maximal step size for Mehrota's predictor-corrector search
|
| 864 |
+
direction; see :math:`\beta_{3}` of [4]_ Table 8.1.
|
| 865 |
+
beta : float (default: 0.1)
|
| 866 |
+
The desired reduction of the path parameter :math:`\mu` (see [6]_)
|
| 867 |
+
when Mehrota's predictor-corrector is not in use (uncommon).
|
| 868 |
+
sparse : bool (default: False)
|
| 869 |
+
Set to ``True`` if the problem is to be treated as sparse after
|
| 870 |
+
presolve. If either ``A_eq`` or ``A_ub`` is a sparse matrix,
|
| 871 |
+
this option will automatically be set ``True``, and the problem
|
| 872 |
+
will be treated as sparse even during presolve. If your constraint
|
| 873 |
+
matrices contain mostly zeros and the problem is not very small (less
|
| 874 |
+
than about 100 constraints or variables), consider setting ``True``
|
| 875 |
+
or providing ``A_eq`` and ``A_ub`` as sparse matrices.
|
| 876 |
+
lstsq : bool (default: ``False``)
|
| 877 |
+
Set to ``True`` if the problem is expected to be very poorly
|
| 878 |
+
conditioned. This should always be left ``False`` unless severe
|
| 879 |
+
numerical difficulties are encountered. Leave this at the default
|
| 880 |
+
unless you receive a warning message suggesting otherwise.
|
| 881 |
+
sym_pos : bool (default: True)
|
| 882 |
+
Leave ``True`` if the problem is expected to yield a well conditioned
|
| 883 |
+
symmetric positive definite normal equation matrix
|
| 884 |
+
(almost always). Leave this at the default unless you receive
|
| 885 |
+
a warning message suggesting otherwise.
|
| 886 |
+
cholesky : bool (default: True)
|
| 887 |
+
Set to ``True`` if the normal equations are to be solved by explicit
|
| 888 |
+
Cholesky decomposition followed by explicit forward/backward
|
| 889 |
+
substitution. This is typically faster for problems
|
| 890 |
+
that are numerically well-behaved.
|
| 891 |
+
pc : bool (default: True)
|
| 892 |
+
Leave ``True`` if the predictor-corrector method of Mehrota is to be
|
| 893 |
+
used. This is almost always (if not always) beneficial.
|
| 894 |
+
ip : bool (default: False)
|
| 895 |
+
Set to ``True`` if the improved initial point suggestion due to [4]_
|
| 896 |
+
Section 4.3 is desired. Whether this is beneficial or not
|
| 897 |
+
depends on the problem.
|
| 898 |
+
permc_spec : str (default: 'MMD_AT_PLUS_A')
|
| 899 |
+
(Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
|
| 900 |
+
True``, and no SuiteSparse.)
|
| 901 |
+
A matrix is factorized in each iteration of the algorithm.
|
| 902 |
+
This option specifies how to permute the columns of the matrix for
|
| 903 |
+
sparsity preservation. Acceptable values are:
|
| 904 |
+
|
| 905 |
+
- ``NATURAL``: natural ordering.
|
| 906 |
+
- ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
|
| 907 |
+
- ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
|
| 908 |
+
- ``COLAMD``: approximate minimum degree column ordering.
|
| 909 |
+
|
| 910 |
+
This option can impact the convergence of the
|
| 911 |
+
interior point algorithm; test different values to determine which
|
| 912 |
+
performs best for your problem. For more information, refer to
|
| 913 |
+
``scipy.sparse.linalg.splu``.
|
| 914 |
+
unknown_options : dict
|
| 915 |
+
Optional arguments not used by this particular solver. If
|
| 916 |
+
`unknown_options` is non-empty a warning is issued listing all
|
| 917 |
+
unused options.
|
| 918 |
+
|
| 919 |
+
Returns
|
| 920 |
+
-------
|
| 921 |
+
res : OptimizeResult
|
| 922 |
+
A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
|
| 923 |
+
|
| 924 |
+
x : 1-D array
|
| 925 |
+
The values of the decision variables that minimizes the
|
| 926 |
+
objective function while satisfying the constraints.
|
| 927 |
+
fun : float
|
| 928 |
+
The optimal value of the objective function ``c @ x``.
|
| 929 |
+
slack : 1-D array
|
| 930 |
+
The (nominally positive) values of the slack variables,
|
| 931 |
+
``b_ub - A_ub @ x``.
|
| 932 |
+
con : 1-D array
|
| 933 |
+
The (nominally zero) residuals of the equality constraints,
|
| 934 |
+
``b_eq - A_eq @ x``.
|
| 935 |
+
success : bool
|
| 936 |
+
``True`` when the algorithm succeeds in finding an optimal
|
| 937 |
+
solution.
|
| 938 |
+
status : int
|
| 939 |
+
An integer representing the exit status of the algorithm.
|
| 940 |
+
|
| 941 |
+
``0`` : Optimization terminated successfully.
|
| 942 |
+
|
| 943 |
+
``1`` : Iteration limit reached.
|
| 944 |
+
|
| 945 |
+
``2`` : Problem appears to be infeasible.
|
| 946 |
+
|
| 947 |
+
``3`` : Problem appears to be unbounded.
|
| 948 |
+
|
| 949 |
+
``4`` : Numerical difficulties encountered.
|
| 950 |
+
|
| 951 |
+
message : str
|
| 952 |
+
A string descriptor of the exit status of the algorithm.
|
| 953 |
+
nit : int
|
| 954 |
+
The total number of iterations performed in all phases.
|
| 955 |
+
|
| 956 |
+
|
| 957 |
+
Notes
|
| 958 |
+
-----
|
| 959 |
+
This method implements the algorithm outlined in [4]_ with ideas from [8]_
|
| 960 |
+
and a structure inspired by the simpler methods of [6]_.
|
| 961 |
+
|
| 962 |
+
The primal-dual path following method begins with initial 'guesses' of
|
| 963 |
+
the primal and dual variables of the standard form problem and iteratively
|
| 964 |
+
attempts to solve the (nonlinear) Karush-Kuhn-Tucker conditions for the
|
| 965 |
+
problem with a gradually reduced logarithmic barrier term added to the
|
| 966 |
+
objective. This particular implementation uses a homogeneous self-dual
|
| 967 |
+
formulation, which provides certificates of infeasibility or unboundedness
|
| 968 |
+
where applicable.
|
| 969 |
+
|
| 970 |
+
The default initial point for the primal and dual variables is that
|
| 971 |
+
defined in [4]_ Section 4.4 Equation 8.22. Optionally (by setting initial
|
| 972 |
+
point option ``ip=True``), an alternate (potentially improved) starting
|
| 973 |
+
point can be calculated according to the additional recommendations of
|
| 974 |
+
[4]_ Section 4.4.
|
| 975 |
+
|
| 976 |
+
A search direction is calculated using the predictor-corrector method
|
| 977 |
+
(single correction) proposed by Mehrota and detailed in [4]_ Section 4.1.
|
| 978 |
+
(A potential improvement would be to implement the method of multiple
|
| 979 |
+
corrections described in [4]_ Section 4.2.) In practice, this is
|
| 980 |
+
accomplished by solving the normal equations, [4]_ Section 5.1 Equations
|
| 981 |
+
8.31 and 8.32, derived from the Newton equations [4]_ Section 5 Equations
|
| 982 |
+
8.25 (compare to [4]_ Section 4 Equations 8.6-8.8). The advantage of
|
| 983 |
+
solving the normal equations rather than 8.25 directly is that the
|
| 984 |
+
matrices involved are symmetric positive definite, so Cholesky
|
| 985 |
+
decomposition can be used rather than the more expensive LU factorization.
|
| 986 |
+
|
| 987 |
+
With default options, the solver used to perform the factorization depends
|
| 988 |
+
on third-party software availability and the conditioning of the problem.
|
| 989 |
+
|
| 990 |
+
For dense problems, solvers are tried in the following order:
|
| 991 |
+
|
| 992 |
+
1. ``scipy.linalg.cho_factor``
|
| 993 |
+
|
| 994 |
+
2. ``scipy.linalg.solve`` with option ``sym_pos=True``
|
| 995 |
+
|
| 996 |
+
3. ``scipy.linalg.solve`` with option ``sym_pos=False``
|
| 997 |
+
|
| 998 |
+
4. ``scipy.linalg.lstsq``
|
| 999 |
+
|
| 1000 |
+
For sparse problems:
|
| 1001 |
+
|
| 1002 |
+
1. ``sksparse.cholmod.cholesky`` (if scikit-sparse and SuiteSparse are
|
| 1003 |
+
installed)
|
| 1004 |
+
|
| 1005 |
+
2. ``scipy.sparse.linalg.factorized`` (if scikit-umfpack and SuiteSparse
|
| 1006 |
+
are installed)
|
| 1007 |
+
|
| 1008 |
+
3. ``scipy.sparse.linalg.splu`` (which uses SuperLU distributed with SciPy)
|
| 1009 |
+
|
| 1010 |
+
4. ``scipy.sparse.linalg.lsqr``
|
| 1011 |
+
|
| 1012 |
+
If the solver fails for any reason, successively more robust (but slower)
|
| 1013 |
+
solvers are attempted in the order indicated. Attempting, failing, and
|
| 1014 |
+
re-starting factorization can be time consuming, so if the problem is
|
| 1015 |
+
numerically challenging, options can be set to bypass solvers that are
|
| 1016 |
+
failing. Setting ``cholesky=False`` skips to solver 2,
|
| 1017 |
+
``sym_pos=False`` skips to solver 3, and ``lstsq=True`` skips
|
| 1018 |
+
to solver 4 for both sparse and dense problems.
|
| 1019 |
+
|
| 1020 |
+
Potential improvements for combatting issues associated with dense
|
| 1021 |
+
columns in otherwise sparse problems are outlined in [4]_ Section 5.3 and
|
| 1022 |
+
[10]_ Section 4.1-4.2; the latter also discusses the alleviation of
|
| 1023 |
+
accuracy issues associated with the substitution approach to free
|
| 1024 |
+
variables.
|
| 1025 |
+
|
| 1026 |
+
After calculating the search direction, the maximum possible step size
|
| 1027 |
+
that does not activate the non-negativity constraints is calculated, and
|
| 1028 |
+
the smaller of this step size and unity is applied (as in [4]_ Section
|
| 1029 |
+
4.1.) [4]_ Section 4.3 suggests improvements for choosing the step size.
|
| 1030 |
+
|
| 1031 |
+
The new point is tested according to the termination conditions of [4]_
|
| 1032 |
+
Section 4.5. The same tolerance, which can be set using the ``tol`` option,
|
| 1033 |
+
is used for all checks. (A potential improvement would be to expose
|
| 1034 |
+
the different tolerances to be set independently.) If optimality,
|
| 1035 |
+
unboundedness, or infeasibility is detected, the solve procedure
|
| 1036 |
+
terminates; otherwise it repeats.
|
| 1037 |
+
|
| 1038 |
+
Whereas the top level ``linprog`` module expects a problem of form:
|
| 1039 |
+
|
| 1040 |
+
Minimize::
|
| 1041 |
+
|
| 1042 |
+
c @ x
|
| 1043 |
+
|
| 1044 |
+
Subject to::
|
| 1045 |
+
|
| 1046 |
+
A_ub @ x <= b_ub
|
| 1047 |
+
A_eq @ x == b_eq
|
| 1048 |
+
lb <= x <= ub
|
| 1049 |
+
|
| 1050 |
+
where ``lb = 0`` and ``ub = None`` unless set in ``bounds``. The problem
|
| 1051 |
+
is automatically converted to the form:
|
| 1052 |
+
|
| 1053 |
+
Minimize::
|
| 1054 |
+
|
| 1055 |
+
c @ x
|
| 1056 |
+
|
| 1057 |
+
Subject to::
|
| 1058 |
+
|
| 1059 |
+
A @ x == b
|
| 1060 |
+
x >= 0
|
| 1061 |
+
|
| 1062 |
+
for solution. That is, the original problem contains equality, upper-bound
|
| 1063 |
+
and variable constraints whereas the method specific solver requires
|
| 1064 |
+
equality constraints and variable non-negativity. ``linprog`` converts the
|
| 1065 |
+
original problem to standard form by converting the simple bounds to upper
|
| 1066 |
+
bound constraints, introducing non-negative slack variables for inequality
|
| 1067 |
+
constraints, and expressing unbounded variables as the difference between
|
| 1068 |
+
two non-negative variables. The problem is converted back to the original
|
| 1069 |
+
form before results are reported.
|
| 1070 |
+
|
| 1071 |
+
References
|
| 1072 |
+
----------
|
| 1073 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 1074 |
+
optimizer for linear programming: an implementation of the
|
| 1075 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 1076 |
+
2000. 197-232.
|
| 1077 |
+
.. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
|
| 1078 |
+
Programming based on Newton's Method." Unpublished Course Notes,
|
| 1079 |
+
March 2004. Available 2/25/2017 at
|
| 1080 |
+
https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
|
| 1081 |
+
.. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
|
| 1082 |
+
programming." Mathematical Programming 71.2 (1995): 221-245.
|
| 1083 |
+
.. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
| 1084 |
+
programming." Athena Scientific 1 (1997): 997.
|
| 1085 |
+
.. [10] Andersen, Erling D., et al. Implementation of interior point
|
| 1086 |
+
methods for large scale linear programming. HEC/Universite de
|
| 1087 |
+
Geneve, 1996.
|
| 1088 |
+
"""
|
| 1089 |
+
pass
|
| 1090 |
+
|
| 1091 |
+
|
| 1092 |
+
def _linprog_rs_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
| 1093 |
+
bounds=None, method='interior-point', callback=None,
|
| 1094 |
+
x0=None, maxiter=5000, disp=False, presolve=True,
|
| 1095 |
+
tol=1e-12, autoscale=False, rr=True, maxupdate=10,
|
| 1096 |
+
mast=False, pivot="mrc", **unknown_options):
|
| 1097 |
+
r"""
|
| 1098 |
+
Linear programming: minimize a linear objective function subject to linear
|
| 1099 |
+
equality and inequality constraints using the revised simplex method.
|
| 1100 |
+
|
| 1101 |
+
.. deprecated:: 1.9.0
|
| 1102 |
+
`method='revised simplex'` will be removed in SciPy 1.11.0.
|
| 1103 |
+
It is replaced by `method='highs'` because the latter is
|
| 1104 |
+
faster and more robust.
|
| 1105 |
+
|
| 1106 |
+
Linear programming solves problems of the following form:
|
| 1107 |
+
|
| 1108 |
+
.. math::
|
| 1109 |
+
|
| 1110 |
+
\min_x \ & c^T x \\
|
| 1111 |
+
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
| 1112 |
+
& A_{eq} x = b_{eq},\\
|
| 1113 |
+
& l \leq x \leq u ,
|
| 1114 |
+
|
| 1115 |
+
where :math:`x` is a vector of decision variables; :math:`c`,
|
| 1116 |
+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
| 1117 |
+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
| 1118 |
+
|
| 1119 |
+
Alternatively, that's:
|
| 1120 |
+
|
| 1121 |
+
minimize::
|
| 1122 |
+
|
| 1123 |
+
c @ x
|
| 1124 |
+
|
| 1125 |
+
such that::
|
| 1126 |
+
|
| 1127 |
+
A_ub @ x <= b_ub
|
| 1128 |
+
A_eq @ x == b_eq
|
| 1129 |
+
lb <= x <= ub
|
| 1130 |
+
|
| 1131 |
+
Note that by default ``lb = 0`` and ``ub = None`` unless specified with
|
| 1132 |
+
``bounds``.
|
| 1133 |
+
|
| 1134 |
+
Parameters
|
| 1135 |
+
----------
|
| 1136 |
+
c : 1-D array
|
| 1137 |
+
The coefficients of the linear objective function to be minimized.
|
| 1138 |
+
A_ub : 2-D array, optional
|
| 1139 |
+
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
| 1140 |
+
coefficients of a linear inequality constraint on ``x``.
|
| 1141 |
+
b_ub : 1-D array, optional
|
| 1142 |
+
The inequality constraint vector. Each element represents an
|
| 1143 |
+
upper bound on the corresponding value of ``A_ub @ x``.
|
| 1144 |
+
A_eq : 2-D array, optional
|
| 1145 |
+
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
| 1146 |
+
coefficients of a linear equality constraint on ``x``.
|
| 1147 |
+
b_eq : 1-D array, optional
|
| 1148 |
+
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
| 1149 |
+
the corresponding element of ``b_eq``.
|
| 1150 |
+
bounds : sequence, optional
|
| 1151 |
+
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
| 1152 |
+
the minimum and maximum values of that decision variable. Use ``None``
|
| 1153 |
+
to indicate that there is no bound. By default, bounds are
|
| 1154 |
+
``(0, None)`` (all decision variables are non-negative).
|
| 1155 |
+
If a single tuple ``(min, max)`` is provided, then ``min`` and
|
| 1156 |
+
``max`` will serve as bounds for all decision variables.
|
| 1157 |
+
method : str
|
| 1158 |
+
This is the method-specific documentation for 'revised simplex'.
|
| 1159 |
+
:ref:`'highs' <optimize.linprog-highs>`,
|
| 1160 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`,
|
| 1161 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
|
| 1162 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
| 1163 |
+
and :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
|
| 1164 |
+
are also available.
|
| 1165 |
+
callback : callable, optional
|
| 1166 |
+
Callback function to be executed once per iteration.
|
| 1167 |
+
x0 : 1-D array, optional
|
| 1168 |
+
Guess values of the decision variables, which will be refined by
|
| 1169 |
+
the optimization algorithm. This argument is currently used only by the
|
| 1170 |
+
'revised simplex' method, and can only be used if `x0` represents a
|
| 1171 |
+
basic feasible solution.
|
| 1172 |
+
|
| 1173 |
+
Options
|
| 1174 |
+
-------
|
| 1175 |
+
maxiter : int (default: 5000)
|
| 1176 |
+
The maximum number of iterations to perform in either phase.
|
| 1177 |
+
disp : bool (default: False)
|
| 1178 |
+
Set to ``True`` if indicators of optimization status are to be printed
|
| 1179 |
+
to the console each iteration.
|
| 1180 |
+
presolve : bool (default: True)
|
| 1181 |
+
Presolve attempts to identify trivial infeasibilities,
|
| 1182 |
+
identify trivial unboundedness, and simplify the problem before
|
| 1183 |
+
sending it to the main solver. It is generally recommended
|
| 1184 |
+
to keep the default setting ``True``; set to ``False`` if
|
| 1185 |
+
presolve is to be disabled.
|
| 1186 |
+
tol : float (default: 1e-12)
|
| 1187 |
+
The tolerance which determines when a solution is "close enough" to
|
| 1188 |
+
zero in Phase 1 to be considered a basic feasible solution or close
|
| 1189 |
+
enough to positive to serve as an optimal solution.
|
| 1190 |
+
autoscale : bool (default: False)
|
| 1191 |
+
Set to ``True`` to automatically perform equilibration.
|
| 1192 |
+
Consider using this option if the numerical values in the
|
| 1193 |
+
constraints are separated by several orders of magnitude.
|
| 1194 |
+
rr : bool (default: True)
|
| 1195 |
+
Set to ``False`` to disable automatic redundancy removal.
|
| 1196 |
+
maxupdate : int (default: 10)
|
| 1197 |
+
The maximum number of updates performed on the LU factorization.
|
| 1198 |
+
After this many updates is reached, the basis matrix is factorized
|
| 1199 |
+
from scratch.
|
| 1200 |
+
mast : bool (default: False)
|
| 1201 |
+
Minimize Amortized Solve Time. If enabled, the average time to solve
|
| 1202 |
+
a linear system using the basis factorization is measured. Typically,
|
| 1203 |
+
the average solve time will decrease with each successive solve after
|
| 1204 |
+
initial factorization, as factorization takes much more time than the
|
| 1205 |
+
solve operation (and updates). Eventually, however, the updated
|
| 1206 |
+
factorization becomes sufficiently complex that the average solve time
|
| 1207 |
+
begins to increase. When this is detected, the basis is refactorized
|
| 1208 |
+
from scratch. Enable this option to maximize speed at the risk of
|
| 1209 |
+
nondeterministic behavior. Ignored if ``maxupdate`` is 0.
|
| 1210 |
+
pivot : "mrc" or "bland" (default: "mrc")
|
| 1211 |
+
Pivot rule: Minimum Reduced Cost ("mrc") or Bland's rule ("bland").
|
| 1212 |
+
Choose Bland's rule if iteration limit is reached and cycling is
|
| 1213 |
+
suspected.
|
| 1214 |
+
unknown_options : dict
|
| 1215 |
+
Optional arguments not used by this particular solver. If
|
| 1216 |
+
`unknown_options` is non-empty a warning is issued listing all
|
| 1217 |
+
unused options.
|
| 1218 |
+
|
| 1219 |
+
Returns
|
| 1220 |
+
-------
|
| 1221 |
+
res : OptimizeResult
|
| 1222 |
+
A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
|
| 1223 |
+
|
| 1224 |
+
x : 1-D array
|
| 1225 |
+
The values of the decision variables that minimizes the
|
| 1226 |
+
objective function while satisfying the constraints.
|
| 1227 |
+
fun : float
|
| 1228 |
+
The optimal value of the objective function ``c @ x``.
|
| 1229 |
+
slack : 1-D array
|
| 1230 |
+
The (nominally positive) values of the slack variables,
|
| 1231 |
+
``b_ub - A_ub @ x``.
|
| 1232 |
+
con : 1-D array
|
| 1233 |
+
The (nominally zero) residuals of the equality constraints,
|
| 1234 |
+
``b_eq - A_eq @ x``.
|
| 1235 |
+
success : bool
|
| 1236 |
+
``True`` when the algorithm succeeds in finding an optimal
|
| 1237 |
+
solution.
|
| 1238 |
+
status : int
|
| 1239 |
+
An integer representing the exit status of the algorithm.
|
| 1240 |
+
|
| 1241 |
+
``0`` : Optimization terminated successfully.
|
| 1242 |
+
|
| 1243 |
+
``1`` : Iteration limit reached.
|
| 1244 |
+
|
| 1245 |
+
``2`` : Problem appears to be infeasible.
|
| 1246 |
+
|
| 1247 |
+
``3`` : Problem appears to be unbounded.
|
| 1248 |
+
|
| 1249 |
+
``4`` : Numerical difficulties encountered.
|
| 1250 |
+
|
| 1251 |
+
``5`` : Problem has no constraints; turn presolve on.
|
| 1252 |
+
|
| 1253 |
+
``6`` : Invalid guess provided.
|
| 1254 |
+
|
| 1255 |
+
message : str
|
| 1256 |
+
A string descriptor of the exit status of the algorithm.
|
| 1257 |
+
nit : int
|
| 1258 |
+
The total number of iterations performed in all phases.
|
| 1259 |
+
|
| 1260 |
+
|
| 1261 |
+
Notes
|
| 1262 |
+
-----
|
| 1263 |
+
Method *revised simplex* uses the revised simplex method as described in
|
| 1264 |
+
[9]_, except that a factorization [11]_ of the basis matrix, rather than
|
| 1265 |
+
its inverse, is efficiently maintained and used to solve the linear systems
|
| 1266 |
+
at each iteration of the algorithm.
|
| 1267 |
+
|
| 1268 |
+
References
|
| 1269 |
+
----------
|
| 1270 |
+
.. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
| 1271 |
+
programming." Athena Scientific 1 (1997): 997.
|
| 1272 |
+
.. [11] Bartels, Richard H. "A stabilization of the simplex method."
|
| 1273 |
+
Journal in Numerische Mathematik 16.5 (1971): 414-434.
|
| 1274 |
+
"""
|
| 1275 |
+
pass
|
| 1276 |
+
|
| 1277 |
+
|
| 1278 |
+
def _linprog_simplex_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
| 1279 |
+
bounds=None, method='interior-point', callback=None,
|
| 1280 |
+
maxiter=5000, disp=False, presolve=True,
|
| 1281 |
+
tol=1e-12, autoscale=False, rr=True, bland=False,
|
| 1282 |
+
**unknown_options):
|
| 1283 |
+
r"""
|
| 1284 |
+
Linear programming: minimize a linear objective function subject to linear
|
| 1285 |
+
equality and inequality constraints using the tableau-based simplex method.
|
| 1286 |
+
|
| 1287 |
+
.. deprecated:: 1.9.0
|
| 1288 |
+
`method='simplex'` will be removed in SciPy 1.11.0.
|
| 1289 |
+
It is replaced by `method='highs'` because the latter is
|
| 1290 |
+
faster and more robust.
|
| 1291 |
+
|
| 1292 |
+
Linear programming solves problems of the following form:
|
| 1293 |
+
|
| 1294 |
+
.. math::
|
| 1295 |
+
|
| 1296 |
+
\min_x \ & c^T x \\
|
| 1297 |
+
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
| 1298 |
+
& A_{eq} x = b_{eq},\\
|
| 1299 |
+
& l \leq x \leq u ,
|
| 1300 |
+
|
| 1301 |
+
where :math:`x` is a vector of decision variables; :math:`c`,
|
| 1302 |
+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
| 1303 |
+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
| 1304 |
+
|
| 1305 |
+
Alternatively, that's:
|
| 1306 |
+
|
| 1307 |
+
minimize::
|
| 1308 |
+
|
| 1309 |
+
c @ x
|
| 1310 |
+
|
| 1311 |
+
such that::
|
| 1312 |
+
|
| 1313 |
+
A_ub @ x <= b_ub
|
| 1314 |
+
A_eq @ x == b_eq
|
| 1315 |
+
lb <= x <= ub
|
| 1316 |
+
|
| 1317 |
+
Note that by default ``lb = 0`` and ``ub = None`` unless specified with
|
| 1318 |
+
``bounds``.
|
| 1319 |
+
|
| 1320 |
+
Parameters
|
| 1321 |
+
----------
|
| 1322 |
+
c : 1-D array
|
| 1323 |
+
The coefficients of the linear objective function to be minimized.
|
| 1324 |
+
A_ub : 2-D array, optional
|
| 1325 |
+
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
| 1326 |
+
coefficients of a linear inequality constraint on ``x``.
|
| 1327 |
+
b_ub : 1-D array, optional
|
| 1328 |
+
The inequality constraint vector. Each element represents an
|
| 1329 |
+
upper bound on the corresponding value of ``A_ub @ x``.
|
| 1330 |
+
A_eq : 2-D array, optional
|
| 1331 |
+
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
| 1332 |
+
coefficients of a linear equality constraint on ``x``.
|
| 1333 |
+
b_eq : 1-D array, optional
|
| 1334 |
+
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
| 1335 |
+
the corresponding element of ``b_eq``.
|
| 1336 |
+
bounds : sequence, optional
|
| 1337 |
+
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
| 1338 |
+
the minimum and maximum values of that decision variable. Use ``None``
|
| 1339 |
+
to indicate that there is no bound. By default, bounds are
|
| 1340 |
+
``(0, None)`` (all decision variables are non-negative).
|
| 1341 |
+
If a single tuple ``(min, max)`` is provided, then ``min`` and
|
| 1342 |
+
``max`` will serve as bounds for all decision variables.
|
| 1343 |
+
method : str
|
| 1344 |
+
This is the method-specific documentation for 'simplex'.
|
| 1345 |
+
:ref:`'highs' <optimize.linprog-highs>`,
|
| 1346 |
+
:ref:`'highs-ds' <optimize.linprog-highs-ds>`,
|
| 1347 |
+
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
|
| 1348 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
| 1349 |
+
and :ref:`'revised simplex' <optimize.linprog-revised_simplex>`
|
| 1350 |
+
are also available.
|
| 1351 |
+
callback : callable, optional
|
| 1352 |
+
Callback function to be executed once per iteration.
|
| 1353 |
+
|
| 1354 |
+
Options
|
| 1355 |
+
-------
|
| 1356 |
+
maxiter : int (default: 5000)
|
| 1357 |
+
The maximum number of iterations to perform in either phase.
|
| 1358 |
+
disp : bool (default: False)
|
| 1359 |
+
Set to ``True`` if indicators of optimization status are to be printed
|
| 1360 |
+
to the console each iteration.
|
| 1361 |
+
presolve : bool (default: True)
|
| 1362 |
+
Presolve attempts to identify trivial infeasibilities,
|
| 1363 |
+
identify trivial unboundedness, and simplify the problem before
|
| 1364 |
+
sending it to the main solver. It is generally recommended
|
| 1365 |
+
to keep the default setting ``True``; set to ``False`` if
|
| 1366 |
+
presolve is to be disabled.
|
| 1367 |
+
tol : float (default: 1e-12)
|
| 1368 |
+
The tolerance which determines when a solution is "close enough" to
|
| 1369 |
+
zero in Phase 1 to be considered a basic feasible solution or close
|
| 1370 |
+
enough to positive to serve as an optimal solution.
|
| 1371 |
+
autoscale : bool (default: False)
|
| 1372 |
+
Set to ``True`` to automatically perform equilibration.
|
| 1373 |
+
Consider using this option if the numerical values in the
|
| 1374 |
+
constraints are separated by several orders of magnitude.
|
| 1375 |
+
rr : bool (default: True)
|
| 1376 |
+
Set to ``False`` to disable automatic redundancy removal.
|
| 1377 |
+
bland : bool
|
| 1378 |
+
If True, use Bland's anti-cycling rule [3]_ to choose pivots to
|
| 1379 |
+
prevent cycling. If False, choose pivots which should lead to a
|
| 1380 |
+
converged solution more quickly. The latter method is subject to
|
| 1381 |
+
cycling (non-convergence) in rare instances.
|
| 1382 |
+
unknown_options : dict
|
| 1383 |
+
Optional arguments not used by this particular solver. If
|
| 1384 |
+
`unknown_options` is non-empty a warning is issued listing all
|
| 1385 |
+
unused options.
|
| 1386 |
+
|
| 1387 |
+
Returns
|
| 1388 |
+
-------
|
| 1389 |
+
res : OptimizeResult
|
| 1390 |
+
A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
|
| 1391 |
+
|
| 1392 |
+
x : 1-D array
|
| 1393 |
+
The values of the decision variables that minimizes the
|
| 1394 |
+
objective function while satisfying the constraints.
|
| 1395 |
+
fun : float
|
| 1396 |
+
The optimal value of the objective function ``c @ x``.
|
| 1397 |
+
slack : 1-D array
|
| 1398 |
+
The (nominally positive) values of the slack variables,
|
| 1399 |
+
``b_ub - A_ub @ x``.
|
| 1400 |
+
con : 1-D array
|
| 1401 |
+
The (nominally zero) residuals of the equality constraints,
|
| 1402 |
+
``b_eq - A_eq @ x``.
|
| 1403 |
+
success : bool
|
| 1404 |
+
``True`` when the algorithm succeeds in finding an optimal
|
| 1405 |
+
solution.
|
| 1406 |
+
status : int
|
| 1407 |
+
An integer representing the exit status of the algorithm.
|
| 1408 |
+
|
| 1409 |
+
``0`` : Optimization terminated successfully.
|
| 1410 |
+
|
| 1411 |
+
``1`` : Iteration limit reached.
|
| 1412 |
+
|
| 1413 |
+
``2`` : Problem appears to be infeasible.
|
| 1414 |
+
|
| 1415 |
+
``3`` : Problem appears to be unbounded.
|
| 1416 |
+
|
| 1417 |
+
``4`` : Numerical difficulties encountered.
|
| 1418 |
+
|
| 1419 |
+
message : str
|
| 1420 |
+
A string descriptor of the exit status of the algorithm.
|
| 1421 |
+
nit : int
|
| 1422 |
+
The total number of iterations performed in all phases.
|
| 1423 |
+
|
| 1424 |
+
References
|
| 1425 |
+
----------
|
| 1426 |
+
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
| 1427 |
+
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
| 1428 |
+
1963
|
| 1429 |
+
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
| 1430 |
+
Mathematical Programming", McGraw-Hill, Chapter 4.
|
| 1431 |
+
.. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
|
| 1432 |
+
Mathematics of Operations Research (2), 1977: pp. 103-107.
|
| 1433 |
+
"""
|
| 1434 |
+
pass
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_highs.py
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HiGHS Linear Optimization Methods
|
| 2 |
+
|
| 3 |
+
Interface to HiGHS linear optimization software.
|
| 4 |
+
https://highs.dev/
|
| 5 |
+
|
| 6 |
+
.. versionadded:: 1.5.0
|
| 7 |
+
|
| 8 |
+
References
|
| 9 |
+
----------
|
| 10 |
+
.. [1] Q. Huangfu and J.A.J. Hall. "Parallelizing the dual revised simplex
|
| 11 |
+
method." Mathematical Programming Computation, 10 (1), 119-142,
|
| 12 |
+
2018. DOI: 10.1007/s12532-017-0130-5
|
| 13 |
+
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import inspect
|
| 17 |
+
import numpy as np
|
| 18 |
+
from ._optimize import OptimizeWarning, OptimizeResult
|
| 19 |
+
from warnings import warn
|
| 20 |
+
from ._highs._highs_wrapper import _highs_wrapper
|
| 21 |
+
from ._highs._highs_constants import (
|
| 22 |
+
CONST_INF,
|
| 23 |
+
MESSAGE_LEVEL_NONE,
|
| 24 |
+
HIGHS_OBJECTIVE_SENSE_MINIMIZE,
|
| 25 |
+
|
| 26 |
+
MODEL_STATUS_NOTSET,
|
| 27 |
+
MODEL_STATUS_LOAD_ERROR,
|
| 28 |
+
MODEL_STATUS_MODEL_ERROR,
|
| 29 |
+
MODEL_STATUS_PRESOLVE_ERROR,
|
| 30 |
+
MODEL_STATUS_SOLVE_ERROR,
|
| 31 |
+
MODEL_STATUS_POSTSOLVE_ERROR,
|
| 32 |
+
MODEL_STATUS_MODEL_EMPTY,
|
| 33 |
+
MODEL_STATUS_OPTIMAL,
|
| 34 |
+
MODEL_STATUS_INFEASIBLE,
|
| 35 |
+
MODEL_STATUS_UNBOUNDED_OR_INFEASIBLE,
|
| 36 |
+
MODEL_STATUS_UNBOUNDED,
|
| 37 |
+
MODEL_STATUS_REACHED_DUAL_OBJECTIVE_VALUE_UPPER_BOUND
|
| 38 |
+
as MODEL_STATUS_RDOVUB,
|
| 39 |
+
MODEL_STATUS_REACHED_OBJECTIVE_TARGET,
|
| 40 |
+
MODEL_STATUS_REACHED_TIME_LIMIT,
|
| 41 |
+
MODEL_STATUS_REACHED_ITERATION_LIMIT,
|
| 42 |
+
|
| 43 |
+
HIGHS_SIMPLEX_STRATEGY_DUAL,
|
| 44 |
+
|
| 45 |
+
HIGHS_SIMPLEX_CRASH_STRATEGY_OFF,
|
| 46 |
+
|
| 47 |
+
HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_CHOOSE,
|
| 48 |
+
HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DANTZIG,
|
| 49 |
+
HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DEVEX,
|
| 50 |
+
HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE,
|
| 51 |
+
)
|
| 52 |
+
from scipy.sparse import csc_matrix, vstack, issparse
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _highs_to_scipy_status_message(highs_status, highs_message):
|
| 56 |
+
"""Converts HiGHS status number/message to SciPy status number/message"""
|
| 57 |
+
|
| 58 |
+
scipy_statuses_messages = {
|
| 59 |
+
None: (4, "HiGHS did not provide a status code. "),
|
| 60 |
+
MODEL_STATUS_NOTSET: (4, ""),
|
| 61 |
+
MODEL_STATUS_LOAD_ERROR: (4, ""),
|
| 62 |
+
MODEL_STATUS_MODEL_ERROR: (2, ""),
|
| 63 |
+
MODEL_STATUS_PRESOLVE_ERROR: (4, ""),
|
| 64 |
+
MODEL_STATUS_SOLVE_ERROR: (4, ""),
|
| 65 |
+
MODEL_STATUS_POSTSOLVE_ERROR: (4, ""),
|
| 66 |
+
MODEL_STATUS_MODEL_EMPTY: (4, ""),
|
| 67 |
+
MODEL_STATUS_RDOVUB: (4, ""),
|
| 68 |
+
MODEL_STATUS_REACHED_OBJECTIVE_TARGET: (4, ""),
|
| 69 |
+
MODEL_STATUS_OPTIMAL: (0, "Optimization terminated successfully. "),
|
| 70 |
+
MODEL_STATUS_REACHED_TIME_LIMIT: (1, "Time limit reached. "),
|
| 71 |
+
MODEL_STATUS_REACHED_ITERATION_LIMIT: (1, "Iteration limit reached. "),
|
| 72 |
+
MODEL_STATUS_INFEASIBLE: (2, "The problem is infeasible. "),
|
| 73 |
+
MODEL_STATUS_UNBOUNDED: (3, "The problem is unbounded. "),
|
| 74 |
+
MODEL_STATUS_UNBOUNDED_OR_INFEASIBLE: (4, "The problem is unbounded "
|
| 75 |
+
"or infeasible. ")}
|
| 76 |
+
unrecognized = (4, "The HiGHS status code was not recognized. ")
|
| 77 |
+
scipy_status, scipy_message = (
|
| 78 |
+
scipy_statuses_messages.get(highs_status, unrecognized))
|
| 79 |
+
scipy_message = (f"{scipy_message}"
|
| 80 |
+
f"(HiGHS Status {highs_status}: {highs_message})")
|
| 81 |
+
return scipy_status, scipy_message
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _replace_inf(x):
|
| 85 |
+
# Replace `np.inf` with CONST_INF
|
| 86 |
+
infs = np.isinf(x)
|
| 87 |
+
with np.errstate(invalid="ignore"):
|
| 88 |
+
x[infs] = np.sign(x[infs])*CONST_INF
|
| 89 |
+
return x
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _convert_to_highs_enum(option, option_str, choices):
|
| 93 |
+
# If option is in the choices we can look it up, if not use
|
| 94 |
+
# the default value taken from function signature and warn:
|
| 95 |
+
try:
|
| 96 |
+
return choices[option.lower()]
|
| 97 |
+
except AttributeError:
|
| 98 |
+
return choices[option]
|
| 99 |
+
except KeyError:
|
| 100 |
+
sig = inspect.signature(_linprog_highs)
|
| 101 |
+
default_str = sig.parameters[option_str].default
|
| 102 |
+
warn(f"Option {option_str} is {option}, but only values in "
|
| 103 |
+
f"{set(choices.keys())} are allowed. Using default: "
|
| 104 |
+
f"{default_str}.",
|
| 105 |
+
OptimizeWarning, stacklevel=3)
|
| 106 |
+
return choices[default_str]
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def _linprog_highs(lp, solver, time_limit=None, presolve=True,
|
| 110 |
+
disp=False, maxiter=None,
|
| 111 |
+
dual_feasibility_tolerance=None,
|
| 112 |
+
primal_feasibility_tolerance=None,
|
| 113 |
+
ipm_optimality_tolerance=None,
|
| 114 |
+
simplex_dual_edge_weight_strategy=None,
|
| 115 |
+
mip_rel_gap=None,
|
| 116 |
+
mip_max_nodes=None,
|
| 117 |
+
**unknown_options):
|
| 118 |
+
r"""
|
| 119 |
+
Solve the following linear programming problem using one of the HiGHS
|
| 120 |
+
solvers:
|
| 121 |
+
|
| 122 |
+
User-facing documentation is in _linprog_doc.py.
|
| 123 |
+
|
| 124 |
+
Parameters
|
| 125 |
+
----------
|
| 126 |
+
lp : _LPProblem
|
| 127 |
+
A ``scipy.optimize._linprog_util._LPProblem`` ``namedtuple``.
|
| 128 |
+
solver : "ipm" or "simplex" or None
|
| 129 |
+
Which HiGHS solver to use. If ``None``, "simplex" will be used.
|
| 130 |
+
|
| 131 |
+
Options
|
| 132 |
+
-------
|
| 133 |
+
maxiter : int
|
| 134 |
+
The maximum number of iterations to perform in either phase. For
|
| 135 |
+
``solver='ipm'``, this does not include the number of crossover
|
| 136 |
+
iterations. Default is the largest possible value for an ``int``
|
| 137 |
+
on the platform.
|
| 138 |
+
disp : bool
|
| 139 |
+
Set to ``True`` if indicators of optimization status are to be printed
|
| 140 |
+
to the console each iteration; default ``False``.
|
| 141 |
+
time_limit : float
|
| 142 |
+
The maximum time in seconds allotted to solve the problem; default is
|
| 143 |
+
the largest possible value for a ``double`` on the platform.
|
| 144 |
+
presolve : bool
|
| 145 |
+
Presolve attempts to identify trivial infeasibilities,
|
| 146 |
+
identify trivial unboundedness, and simplify the problem before
|
| 147 |
+
sending it to the main solver. It is generally recommended
|
| 148 |
+
to keep the default setting ``True``; set to ``False`` if presolve is
|
| 149 |
+
to be disabled.
|
| 150 |
+
dual_feasibility_tolerance : double
|
| 151 |
+
Dual feasibility tolerance. Default is 1e-07.
|
| 152 |
+
The minimum of this and ``primal_feasibility_tolerance``
|
| 153 |
+
is used for the feasibility tolerance when ``solver='ipm'``.
|
| 154 |
+
primal_feasibility_tolerance : double
|
| 155 |
+
Primal feasibility tolerance. Default is 1e-07.
|
| 156 |
+
The minimum of this and ``dual_feasibility_tolerance``
|
| 157 |
+
is used for the feasibility tolerance when ``solver='ipm'``.
|
| 158 |
+
ipm_optimality_tolerance : double
|
| 159 |
+
Optimality tolerance for ``solver='ipm'``. Default is 1e-08.
|
| 160 |
+
Minimum possible value is 1e-12 and must be smaller than the largest
|
| 161 |
+
possible value for a ``double`` on the platform.
|
| 162 |
+
simplex_dual_edge_weight_strategy : str (default: None)
|
| 163 |
+
Strategy for simplex dual edge weights. The default, ``None``,
|
| 164 |
+
automatically selects one of the following.
|
| 165 |
+
|
| 166 |
+
``'dantzig'`` uses Dantzig's original strategy of choosing the most
|
| 167 |
+
negative reduced cost.
|
| 168 |
+
|
| 169 |
+
``'devex'`` uses the strategy described in [15]_.
|
| 170 |
+
|
| 171 |
+
``steepest`` uses the exact steepest edge strategy as described in
|
| 172 |
+
[16]_.
|
| 173 |
+
|
| 174 |
+
``'steepest-devex'`` begins with the exact steepest edge strategy
|
| 175 |
+
until the computation is too costly or inexact and then switches to
|
| 176 |
+
the devex method.
|
| 177 |
+
|
| 178 |
+
Currently, using ``None`` always selects ``'steepest-devex'``, but this
|
| 179 |
+
may change as new options become available.
|
| 180 |
+
|
| 181 |
+
mip_max_nodes : int
|
| 182 |
+
The maximum number of nodes allotted to solve the problem; default is
|
| 183 |
+
the largest possible value for a ``HighsInt`` on the platform.
|
| 184 |
+
Ignored if not using the MIP solver.
|
| 185 |
+
unknown_options : dict
|
| 186 |
+
Optional arguments not used by this particular solver. If
|
| 187 |
+
``unknown_options`` is non-empty, a warning is issued listing all
|
| 188 |
+
unused options.
|
| 189 |
+
|
| 190 |
+
Returns
|
| 191 |
+
-------
|
| 192 |
+
sol : dict
|
| 193 |
+
A dictionary consisting of the fields:
|
| 194 |
+
|
| 195 |
+
x : 1D array
|
| 196 |
+
The values of the decision variables that minimizes the
|
| 197 |
+
objective function while satisfying the constraints.
|
| 198 |
+
fun : float
|
| 199 |
+
The optimal value of the objective function ``c @ x``.
|
| 200 |
+
slack : 1D array
|
| 201 |
+
The (nominally positive) values of the slack,
|
| 202 |
+
``b_ub - A_ub @ x``.
|
| 203 |
+
con : 1D array
|
| 204 |
+
The (nominally zero) residuals of the equality constraints,
|
| 205 |
+
``b_eq - A_eq @ x``.
|
| 206 |
+
success : bool
|
| 207 |
+
``True`` when the algorithm succeeds in finding an optimal
|
| 208 |
+
solution.
|
| 209 |
+
status : int
|
| 210 |
+
An integer representing the exit status of the algorithm.
|
| 211 |
+
|
| 212 |
+
``0`` : Optimization terminated successfully.
|
| 213 |
+
|
| 214 |
+
``1`` : Iteration or time limit reached.
|
| 215 |
+
|
| 216 |
+
``2`` : Problem appears to be infeasible.
|
| 217 |
+
|
| 218 |
+
``3`` : Problem appears to be unbounded.
|
| 219 |
+
|
| 220 |
+
``4`` : The HiGHS solver ran into a problem.
|
| 221 |
+
|
| 222 |
+
message : str
|
| 223 |
+
A string descriptor of the exit status of the algorithm.
|
| 224 |
+
nit : int
|
| 225 |
+
The total number of iterations performed.
|
| 226 |
+
For ``solver='simplex'``, this includes iterations in all
|
| 227 |
+
phases. For ``solver='ipm'``, this does not include
|
| 228 |
+
crossover iterations.
|
| 229 |
+
crossover_nit : int
|
| 230 |
+
The number of primal/dual pushes performed during the
|
| 231 |
+
crossover routine for ``solver='ipm'``. This is ``0``
|
| 232 |
+
for ``solver='simplex'``.
|
| 233 |
+
ineqlin : OptimizeResult
|
| 234 |
+
Solution and sensitivity information corresponding to the
|
| 235 |
+
inequality constraints, `b_ub`. A dictionary consisting of the
|
| 236 |
+
fields:
|
| 237 |
+
|
| 238 |
+
residual : np.ndnarray
|
| 239 |
+
The (nominally positive) values of the slack variables,
|
| 240 |
+
``b_ub - A_ub @ x``. This quantity is also commonly
|
| 241 |
+
referred to as "slack".
|
| 242 |
+
|
| 243 |
+
marginals : np.ndarray
|
| 244 |
+
The sensitivity (partial derivative) of the objective
|
| 245 |
+
function with respect to the right-hand side of the
|
| 246 |
+
inequality constraints, `b_ub`.
|
| 247 |
+
|
| 248 |
+
eqlin : OptimizeResult
|
| 249 |
+
Solution and sensitivity information corresponding to the
|
| 250 |
+
equality constraints, `b_eq`. A dictionary consisting of the
|
| 251 |
+
fields:
|
| 252 |
+
|
| 253 |
+
residual : np.ndarray
|
| 254 |
+
The (nominally zero) residuals of the equality constraints,
|
| 255 |
+
``b_eq - A_eq @ x``.
|
| 256 |
+
|
| 257 |
+
marginals : np.ndarray
|
| 258 |
+
The sensitivity (partial derivative) of the objective
|
| 259 |
+
function with respect to the right-hand side of the
|
| 260 |
+
equality constraints, `b_eq`.
|
| 261 |
+
|
| 262 |
+
lower, upper : OptimizeResult
|
| 263 |
+
Solution and sensitivity information corresponding to the
|
| 264 |
+
lower and upper bounds on decision variables, `bounds`.
|
| 265 |
+
|
| 266 |
+
residual : np.ndarray
|
| 267 |
+
The (nominally positive) values of the quantity
|
| 268 |
+
``x - lb`` (lower) or ``ub - x`` (upper).
|
| 269 |
+
|
| 270 |
+
marginals : np.ndarray
|
| 271 |
+
The sensitivity (partial derivative) of the objective
|
| 272 |
+
function with respect to the lower and upper
|
| 273 |
+
`bounds`.
|
| 274 |
+
|
| 275 |
+
mip_node_count : int
|
| 276 |
+
The number of subproblems or "nodes" solved by the MILP
|
| 277 |
+
solver. Only present when `integrality` is not `None`.
|
| 278 |
+
|
| 279 |
+
mip_dual_bound : float
|
| 280 |
+
The MILP solver's final estimate of the lower bound on the
|
| 281 |
+
optimal solution. Only present when `integrality` is not
|
| 282 |
+
`None`.
|
| 283 |
+
|
| 284 |
+
mip_gap : float
|
| 285 |
+
The difference between the final objective function value
|
| 286 |
+
and the final dual bound, scaled by the final objective
|
| 287 |
+
function value. Only present when `integrality` is not
|
| 288 |
+
`None`.
|
| 289 |
+
|
| 290 |
+
Notes
|
| 291 |
+
-----
|
| 292 |
+
The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
|
| 293 |
+
`marginals`, or partial derivatives of the objective function with respect
|
| 294 |
+
to the right-hand side of each constraint. These partial derivatives are
|
| 295 |
+
also referred to as "Lagrange multipliers", "dual values", and
|
| 296 |
+
"shadow prices". The sign convention of `marginals` is opposite that
|
| 297 |
+
of Lagrange multipliers produced by many nonlinear solvers.
|
| 298 |
+
|
| 299 |
+
References
|
| 300 |
+
----------
|
| 301 |
+
.. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
|
| 302 |
+
Mathematical programming 5.1 (1973): 1-28.
|
| 303 |
+
.. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
|
| 304 |
+
simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
|
| 305 |
+
"""
|
| 306 |
+
if unknown_options:
|
| 307 |
+
message = (f"Unrecognized options detected: {unknown_options}. "
|
| 308 |
+
"These will be passed to HiGHS verbatim.")
|
| 309 |
+
warn(message, OptimizeWarning, stacklevel=3)
|
| 310 |
+
|
| 311 |
+
# Map options to HiGHS enum values
|
| 312 |
+
simplex_dual_edge_weight_strategy_enum = _convert_to_highs_enum(
|
| 313 |
+
simplex_dual_edge_weight_strategy,
|
| 314 |
+
'simplex_dual_edge_weight_strategy',
|
| 315 |
+
choices={'dantzig': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DANTZIG,
|
| 316 |
+
'devex': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DEVEX,
|
| 317 |
+
'steepest-devex': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_CHOOSE,
|
| 318 |
+
'steepest':
|
| 319 |
+
HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE,
|
| 320 |
+
None: None})
|
| 321 |
+
|
| 322 |
+
c, A_ub, b_ub, A_eq, b_eq, bounds, x0, integrality = lp
|
| 323 |
+
|
| 324 |
+
lb, ub = bounds.T.copy() # separate bounds, copy->C-cntgs
|
| 325 |
+
# highs_wrapper solves LHS <= A*x <= RHS, not equality constraints
|
| 326 |
+
with np.errstate(invalid="ignore"):
|
| 327 |
+
lhs_ub = -np.ones_like(b_ub)*np.inf # LHS of UB constraints is -inf
|
| 328 |
+
rhs_ub = b_ub # RHS of UB constraints is b_ub
|
| 329 |
+
lhs_eq = b_eq # Equality constraint is inequality
|
| 330 |
+
rhs_eq = b_eq # constraint with LHS=RHS
|
| 331 |
+
lhs = np.concatenate((lhs_ub, lhs_eq))
|
| 332 |
+
rhs = np.concatenate((rhs_ub, rhs_eq))
|
| 333 |
+
|
| 334 |
+
if issparse(A_ub) or issparse(A_eq):
|
| 335 |
+
A = vstack((A_ub, A_eq))
|
| 336 |
+
else:
|
| 337 |
+
A = np.vstack((A_ub, A_eq))
|
| 338 |
+
A = csc_matrix(A)
|
| 339 |
+
|
| 340 |
+
options = {
|
| 341 |
+
'presolve': presolve,
|
| 342 |
+
'sense': HIGHS_OBJECTIVE_SENSE_MINIMIZE,
|
| 343 |
+
'solver': solver,
|
| 344 |
+
'time_limit': time_limit,
|
| 345 |
+
'highs_debug_level': MESSAGE_LEVEL_NONE,
|
| 346 |
+
'dual_feasibility_tolerance': dual_feasibility_tolerance,
|
| 347 |
+
'ipm_optimality_tolerance': ipm_optimality_tolerance,
|
| 348 |
+
'log_to_console': disp,
|
| 349 |
+
'mip_max_nodes': mip_max_nodes,
|
| 350 |
+
'output_flag': disp,
|
| 351 |
+
'primal_feasibility_tolerance': primal_feasibility_tolerance,
|
| 352 |
+
'simplex_dual_edge_weight_strategy':
|
| 353 |
+
simplex_dual_edge_weight_strategy_enum,
|
| 354 |
+
'simplex_strategy': HIGHS_SIMPLEX_STRATEGY_DUAL,
|
| 355 |
+
'simplex_crash_strategy': HIGHS_SIMPLEX_CRASH_STRATEGY_OFF,
|
| 356 |
+
'ipm_iteration_limit': maxiter,
|
| 357 |
+
'simplex_iteration_limit': maxiter,
|
| 358 |
+
'mip_rel_gap': mip_rel_gap,
|
| 359 |
+
}
|
| 360 |
+
options.update(unknown_options)
|
| 361 |
+
|
| 362 |
+
# np.inf doesn't work; use very large constant
|
| 363 |
+
rhs = _replace_inf(rhs)
|
| 364 |
+
lhs = _replace_inf(lhs)
|
| 365 |
+
lb = _replace_inf(lb)
|
| 366 |
+
ub = _replace_inf(ub)
|
| 367 |
+
|
| 368 |
+
if integrality is None or np.sum(integrality) == 0:
|
| 369 |
+
integrality = np.empty(0)
|
| 370 |
+
else:
|
| 371 |
+
integrality = np.array(integrality)
|
| 372 |
+
|
| 373 |
+
res = _highs_wrapper(c, A.indptr, A.indices, A.data, lhs, rhs,
|
| 374 |
+
lb, ub, integrality.astype(np.uint8), options)
|
| 375 |
+
|
| 376 |
+
# HiGHS represents constraints as lhs/rhs, so
|
| 377 |
+
# Ax + s = b => Ax = b - s
|
| 378 |
+
# and we need to split up s by A_ub and A_eq
|
| 379 |
+
if 'slack' in res:
|
| 380 |
+
slack = res['slack']
|
| 381 |
+
con = np.array(slack[len(b_ub):])
|
| 382 |
+
slack = np.array(slack[:len(b_ub)])
|
| 383 |
+
else:
|
| 384 |
+
slack, con = None, None
|
| 385 |
+
|
| 386 |
+
# lagrange multipliers for equalities/inequalities and upper/lower bounds
|
| 387 |
+
if 'lambda' in res:
|
| 388 |
+
lamda = res['lambda']
|
| 389 |
+
marg_ineqlin = np.array(lamda[:len(b_ub)])
|
| 390 |
+
marg_eqlin = np.array(lamda[len(b_ub):])
|
| 391 |
+
marg_upper = np.array(res['marg_bnds'][1, :])
|
| 392 |
+
marg_lower = np.array(res['marg_bnds'][0, :])
|
| 393 |
+
else:
|
| 394 |
+
marg_ineqlin, marg_eqlin = None, None
|
| 395 |
+
marg_upper, marg_lower = None, None
|
| 396 |
+
|
| 397 |
+
# this needs to be updated if we start choosing the solver intelligently
|
| 398 |
+
|
| 399 |
+
# Convert to scipy-style status and message
|
| 400 |
+
highs_status = res.get('status', None)
|
| 401 |
+
highs_message = res.get('message', None)
|
| 402 |
+
status, message = _highs_to_scipy_status_message(highs_status,
|
| 403 |
+
highs_message)
|
| 404 |
+
|
| 405 |
+
x = np.array(res['x']) if 'x' in res else None
|
| 406 |
+
sol = {'x': x,
|
| 407 |
+
'slack': slack,
|
| 408 |
+
'con': con,
|
| 409 |
+
'ineqlin': OptimizeResult({
|
| 410 |
+
'residual': slack,
|
| 411 |
+
'marginals': marg_ineqlin,
|
| 412 |
+
}),
|
| 413 |
+
'eqlin': OptimizeResult({
|
| 414 |
+
'residual': con,
|
| 415 |
+
'marginals': marg_eqlin,
|
| 416 |
+
}),
|
| 417 |
+
'lower': OptimizeResult({
|
| 418 |
+
'residual': None if x is None else x - lb,
|
| 419 |
+
'marginals': marg_lower,
|
| 420 |
+
}),
|
| 421 |
+
'upper': OptimizeResult({
|
| 422 |
+
'residual': None if x is None else ub - x,
|
| 423 |
+
'marginals': marg_upper
|
| 424 |
+
}),
|
| 425 |
+
'fun': res.get('fun'),
|
| 426 |
+
'status': status,
|
| 427 |
+
'success': res['status'] == MODEL_STATUS_OPTIMAL,
|
| 428 |
+
'message': message,
|
| 429 |
+
'nit': res.get('simplex_nit', 0) or res.get('ipm_nit', 0),
|
| 430 |
+
'crossover_nit': res.get('crossover_nit'),
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
if np.any(x) and integrality is not None:
|
| 434 |
+
sol.update({
|
| 435 |
+
'mip_node_count': res.get('mip_node_count', 0),
|
| 436 |
+
'mip_dual_bound': res.get('mip_dual_bound', 0.0),
|
| 437 |
+
'mip_gap': res.get('mip_gap', 0.0),
|
| 438 |
+
})
|
| 439 |
+
|
| 440 |
+
return sol
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_ip.py
ADDED
|
@@ -0,0 +1,1126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Interior-point method for linear programming
|
| 2 |
+
|
| 3 |
+
The *interior-point* method uses the primal-dual path following algorithm
|
| 4 |
+
outlined in [1]_. This algorithm supports sparse constraint matrices and
|
| 5 |
+
is typically faster than the simplex methods, especially for large, sparse
|
| 6 |
+
problems. Note, however, that the solution returned may be slightly less
|
| 7 |
+
accurate than those of the simplex methods and will not, in general,
|
| 8 |
+
correspond with a vertex of the polytope defined by the constraints.
|
| 9 |
+
|
| 10 |
+
.. versionadded:: 1.0.0
|
| 11 |
+
|
| 12 |
+
References
|
| 13 |
+
----------
|
| 14 |
+
.. [1] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 15 |
+
optimizer for linear programming: an implementation of the
|
| 16 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 17 |
+
2000. 197-232.
|
| 18 |
+
"""
|
| 19 |
+
# Author: Matt Haberland
|
| 20 |
+
|
| 21 |
+
import numpy as np
|
| 22 |
+
import scipy as sp
|
| 23 |
+
import scipy.sparse as sps
|
| 24 |
+
from warnings import warn
|
| 25 |
+
from scipy.linalg import LinAlgError
|
| 26 |
+
from ._optimize import OptimizeWarning, OptimizeResult, _check_unknown_options
|
| 27 |
+
from ._linprog_util import _postsolve
|
| 28 |
+
has_umfpack = True
|
| 29 |
+
has_cholmod = True
|
| 30 |
+
try:
|
| 31 |
+
import sksparse # noqa: F401
|
| 32 |
+
from sksparse.cholmod import cholesky as cholmod # noqa: F401
|
| 33 |
+
from sksparse.cholmod import analyze as cholmod_analyze
|
| 34 |
+
except ImportError:
|
| 35 |
+
has_cholmod = False
|
| 36 |
+
try:
|
| 37 |
+
import scikits.umfpack # test whether to use factorized # noqa: F401
|
| 38 |
+
except ImportError:
|
| 39 |
+
has_umfpack = False
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _get_solver(M, sparse=False, lstsq=False, sym_pos=True,
|
| 43 |
+
cholesky=True, permc_spec='MMD_AT_PLUS_A'):
|
| 44 |
+
"""
|
| 45 |
+
Given solver options, return a handle to the appropriate linear system
|
| 46 |
+
solver.
|
| 47 |
+
|
| 48 |
+
Parameters
|
| 49 |
+
----------
|
| 50 |
+
M : 2-D array
|
| 51 |
+
As defined in [4] Equation 8.31
|
| 52 |
+
sparse : bool (default = False)
|
| 53 |
+
True if the system to be solved is sparse. This is typically set
|
| 54 |
+
True when the original ``A_ub`` and ``A_eq`` arrays are sparse.
|
| 55 |
+
lstsq : bool (default = False)
|
| 56 |
+
True if the system is ill-conditioned and/or (nearly) singular and
|
| 57 |
+
thus a more robust least-squares solver is desired. This is sometimes
|
| 58 |
+
needed as the solution is approached.
|
| 59 |
+
sym_pos : bool (default = True)
|
| 60 |
+
True if the system matrix is symmetric positive definite
|
| 61 |
+
Sometimes this needs to be set false as the solution is approached,
|
| 62 |
+
even when the system should be symmetric positive definite, due to
|
| 63 |
+
numerical difficulties.
|
| 64 |
+
cholesky : bool (default = True)
|
| 65 |
+
True if the system is to be solved by Cholesky, rather than LU,
|
| 66 |
+
decomposition. This is typically faster unless the problem is very
|
| 67 |
+
small or prone to numerical difficulties.
|
| 68 |
+
permc_spec : str (default = 'MMD_AT_PLUS_A')
|
| 69 |
+
Sparsity preservation strategy used by SuperLU. Acceptable values are:
|
| 70 |
+
|
| 71 |
+
- ``NATURAL``: natural ordering.
|
| 72 |
+
- ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
|
| 73 |
+
- ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
|
| 74 |
+
- ``COLAMD``: approximate minimum degree column ordering.
|
| 75 |
+
|
| 76 |
+
See SuperLU documentation.
|
| 77 |
+
|
| 78 |
+
Returns
|
| 79 |
+
-------
|
| 80 |
+
solve : function
|
| 81 |
+
Handle to the appropriate solver function
|
| 82 |
+
|
| 83 |
+
"""
|
| 84 |
+
try:
|
| 85 |
+
if sparse:
|
| 86 |
+
if lstsq:
|
| 87 |
+
def solve(r, sym_pos=False):
|
| 88 |
+
return sps.linalg.lsqr(M, r)[0]
|
| 89 |
+
elif cholesky:
|
| 90 |
+
try:
|
| 91 |
+
# Will raise an exception in the first call,
|
| 92 |
+
# or when the matrix changes due to a new problem
|
| 93 |
+
_get_solver.cholmod_factor.cholesky_inplace(M)
|
| 94 |
+
except Exception:
|
| 95 |
+
_get_solver.cholmod_factor = cholmod_analyze(M)
|
| 96 |
+
_get_solver.cholmod_factor.cholesky_inplace(M)
|
| 97 |
+
solve = _get_solver.cholmod_factor
|
| 98 |
+
else:
|
| 99 |
+
if has_umfpack and sym_pos:
|
| 100 |
+
solve = sps.linalg.factorized(M)
|
| 101 |
+
else: # factorized doesn't pass permc_spec
|
| 102 |
+
solve = sps.linalg.splu(M, permc_spec=permc_spec).solve
|
| 103 |
+
|
| 104 |
+
else:
|
| 105 |
+
if lstsq: # sometimes necessary as solution is approached
|
| 106 |
+
def solve(r):
|
| 107 |
+
return sp.linalg.lstsq(M, r)[0]
|
| 108 |
+
elif cholesky:
|
| 109 |
+
L = sp.linalg.cho_factor(M)
|
| 110 |
+
|
| 111 |
+
def solve(r):
|
| 112 |
+
return sp.linalg.cho_solve(L, r)
|
| 113 |
+
else:
|
| 114 |
+
# this seems to cache the matrix factorization, so solving
|
| 115 |
+
# with multiple right hand sides is much faster
|
| 116 |
+
def solve(r, sym_pos=sym_pos):
|
| 117 |
+
if sym_pos:
|
| 118 |
+
return sp.linalg.solve(M, r, assume_a="pos")
|
| 119 |
+
else:
|
| 120 |
+
return sp.linalg.solve(M, r)
|
| 121 |
+
# There are many things that can go wrong here, and it's hard to say
|
| 122 |
+
# what all of them are. It doesn't really matter: if the matrix can't be
|
| 123 |
+
# factorized, return None. get_solver will be called again with different
|
| 124 |
+
# inputs, and a new routine will try to factorize the matrix.
|
| 125 |
+
except KeyboardInterrupt:
|
| 126 |
+
raise
|
| 127 |
+
except Exception:
|
| 128 |
+
return None
|
| 129 |
+
return solve
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def _get_delta(A, b, c, x, y, z, tau, kappa, gamma, eta, sparse=False,
|
| 133 |
+
lstsq=False, sym_pos=True, cholesky=True, pc=True, ip=False,
|
| 134 |
+
permc_spec='MMD_AT_PLUS_A'):
|
| 135 |
+
"""
|
| 136 |
+
Given standard form problem defined by ``A``, ``b``, and ``c``;
|
| 137 |
+
current variable estimates ``x``, ``y``, ``z``, ``tau``, and ``kappa``;
|
| 138 |
+
algorithmic parameters ``gamma and ``eta;
|
| 139 |
+
and options ``sparse``, ``lstsq``, ``sym_pos``, ``cholesky``, ``pc``
|
| 140 |
+
(predictor-corrector), and ``ip`` (initial point improvement),
|
| 141 |
+
get the search direction for increments to the variable estimates.
|
| 142 |
+
|
| 143 |
+
Parameters
|
| 144 |
+
----------
|
| 145 |
+
As defined in [4], except:
|
| 146 |
+
sparse : bool
|
| 147 |
+
True if the system to be solved is sparse. This is typically set
|
| 148 |
+
True when the original ``A_ub`` and ``A_eq`` arrays are sparse.
|
| 149 |
+
lstsq : bool
|
| 150 |
+
True if the system is ill-conditioned and/or (nearly) singular and
|
| 151 |
+
thus a more robust least-squares solver is desired. This is sometimes
|
| 152 |
+
needed as the solution is approached.
|
| 153 |
+
sym_pos : bool
|
| 154 |
+
True if the system matrix is symmetric positive definite
|
| 155 |
+
Sometimes this needs to be set false as the solution is approached,
|
| 156 |
+
even when the system should be symmetric positive definite, due to
|
| 157 |
+
numerical difficulties.
|
| 158 |
+
cholesky : bool
|
| 159 |
+
True if the system is to be solved by Cholesky, rather than LU,
|
| 160 |
+
decomposition. This is typically faster unless the problem is very
|
| 161 |
+
small or prone to numerical difficulties.
|
| 162 |
+
pc : bool
|
| 163 |
+
True if the predictor-corrector method of Mehrota is to be used. This
|
| 164 |
+
is almost always (if not always) beneficial. Even though it requires
|
| 165 |
+
the solution of an additional linear system, the factorization
|
| 166 |
+
is typically (implicitly) reused so solution is efficient, and the
|
| 167 |
+
number of algorithm iterations is typically reduced.
|
| 168 |
+
ip : bool
|
| 169 |
+
True if the improved initial point suggestion due to [4] section 4.3
|
| 170 |
+
is desired. It's unclear whether this is beneficial.
|
| 171 |
+
permc_spec : str (default = 'MMD_AT_PLUS_A')
|
| 172 |
+
(Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
|
| 173 |
+
True``.) A matrix is factorized in each iteration of the algorithm.
|
| 174 |
+
This option specifies how to permute the columns of the matrix for
|
| 175 |
+
sparsity preservation. Acceptable values are:
|
| 176 |
+
|
| 177 |
+
- ``NATURAL``: natural ordering.
|
| 178 |
+
- ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
|
| 179 |
+
- ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
|
| 180 |
+
- ``COLAMD``: approximate minimum degree column ordering.
|
| 181 |
+
|
| 182 |
+
This option can impact the convergence of the
|
| 183 |
+
interior point algorithm; test different values to determine which
|
| 184 |
+
performs best for your problem. For more information, refer to
|
| 185 |
+
``scipy.sparse.linalg.splu``.
|
| 186 |
+
|
| 187 |
+
Returns
|
| 188 |
+
-------
|
| 189 |
+
Search directions as defined in [4]
|
| 190 |
+
|
| 191 |
+
References
|
| 192 |
+
----------
|
| 193 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 194 |
+
optimizer for linear programming: an implementation of the
|
| 195 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 196 |
+
2000. 197-232.
|
| 197 |
+
|
| 198 |
+
"""
|
| 199 |
+
if A.shape[0] == 0:
|
| 200 |
+
# If there are no constraints, some solvers fail (understandably)
|
| 201 |
+
# rather than returning empty solution. This gets the job done.
|
| 202 |
+
sparse, lstsq, sym_pos, cholesky = False, False, True, False
|
| 203 |
+
n_x = len(x)
|
| 204 |
+
|
| 205 |
+
# [4] Equation 8.8
|
| 206 |
+
r_P = b * tau - A.dot(x)
|
| 207 |
+
r_D = c * tau - A.T.dot(y) - z
|
| 208 |
+
r_G = c.dot(x) - b.transpose().dot(y) + kappa
|
| 209 |
+
mu = (x.dot(z) + tau * kappa) / (n_x + 1)
|
| 210 |
+
|
| 211 |
+
# Assemble M from [4] Equation 8.31
|
| 212 |
+
Dinv = x / z
|
| 213 |
+
|
| 214 |
+
if sparse:
|
| 215 |
+
M = A.dot(sps.diags(Dinv, 0, format="csc").dot(A.T))
|
| 216 |
+
else:
|
| 217 |
+
M = A.dot(Dinv.reshape(-1, 1) * A.T)
|
| 218 |
+
solve = _get_solver(M, sparse, lstsq, sym_pos, cholesky, permc_spec)
|
| 219 |
+
|
| 220 |
+
# pc: "predictor-corrector" [4] Section 4.1
|
| 221 |
+
# In development this option could be turned off
|
| 222 |
+
# but it always seems to improve performance substantially
|
| 223 |
+
n_corrections = 1 if pc else 0
|
| 224 |
+
|
| 225 |
+
i = 0
|
| 226 |
+
alpha, d_x, d_z, d_tau, d_kappa = 0, 0, 0, 0, 0
|
| 227 |
+
while i <= n_corrections:
|
| 228 |
+
# Reference [4] Eq. 8.6
|
| 229 |
+
rhatp = eta(gamma) * r_P
|
| 230 |
+
rhatd = eta(gamma) * r_D
|
| 231 |
+
rhatg = eta(gamma) * r_G
|
| 232 |
+
|
| 233 |
+
# Reference [4] Eq. 8.7
|
| 234 |
+
rhatxs = gamma * mu - x * z
|
| 235 |
+
rhattk = gamma * mu - tau * kappa
|
| 236 |
+
|
| 237 |
+
if i == 1:
|
| 238 |
+
if ip: # if the correction is to get "initial point"
|
| 239 |
+
# Reference [4] Eq. 8.23
|
| 240 |
+
rhatxs = ((1 - alpha) * gamma * mu -
|
| 241 |
+
x * z - alpha**2 * d_x * d_z)
|
| 242 |
+
rhattk = ((1 - alpha) * gamma * mu -
|
| 243 |
+
tau * kappa -
|
| 244 |
+
alpha**2 * d_tau * d_kappa)
|
| 245 |
+
else: # if the correction is for "predictor-corrector"
|
| 246 |
+
# Reference [4] Eq. 8.13
|
| 247 |
+
rhatxs -= d_x * d_z
|
| 248 |
+
rhattk -= d_tau * d_kappa
|
| 249 |
+
|
| 250 |
+
# sometimes numerical difficulties arise as the solution is approached
|
| 251 |
+
# this loop tries to solve the equations using a sequence of functions
|
| 252 |
+
# for solve. For dense systems, the order is:
|
| 253 |
+
# 1. scipy.linalg.cho_factor/scipy.linalg.cho_solve,
|
| 254 |
+
# 2. scipy.linalg.solve w/ sym_pos = True,
|
| 255 |
+
# 3. scipy.linalg.solve w/ sym_pos = False, and if all else fails
|
| 256 |
+
# 4. scipy.linalg.lstsq
|
| 257 |
+
# For sparse systems, the order is:
|
| 258 |
+
# 1. sksparse.cholmod.cholesky (if available)
|
| 259 |
+
# 2. scipy.sparse.linalg.factorized (if umfpack available)
|
| 260 |
+
# 3. scipy.sparse.linalg.splu
|
| 261 |
+
# 4. scipy.sparse.linalg.lsqr
|
| 262 |
+
solved = False
|
| 263 |
+
while not solved:
|
| 264 |
+
try:
|
| 265 |
+
# [4] Equation 8.28
|
| 266 |
+
p, q = _sym_solve(Dinv, A, c, b, solve)
|
| 267 |
+
# [4] Equation 8.29
|
| 268 |
+
u, v = _sym_solve(Dinv, A, rhatd -
|
| 269 |
+
(1 / x) * rhatxs, rhatp, solve)
|
| 270 |
+
if np.any(np.isnan(p)) or np.any(np.isnan(q)):
|
| 271 |
+
raise LinAlgError
|
| 272 |
+
solved = True
|
| 273 |
+
except (LinAlgError, ValueError, TypeError) as e:
|
| 274 |
+
# Usually this doesn't happen. If it does, it happens when
|
| 275 |
+
# there are redundant constraints or when approaching the
|
| 276 |
+
# solution. If so, change solver.
|
| 277 |
+
if cholesky:
|
| 278 |
+
cholesky = False
|
| 279 |
+
warn(
|
| 280 |
+
"Solving system with option 'cholesky':True "
|
| 281 |
+
"failed. It is normal for this to happen "
|
| 282 |
+
"occasionally, especially as the solution is "
|
| 283 |
+
"approached. However, if you see this frequently, "
|
| 284 |
+
"consider setting option 'cholesky' to False.",
|
| 285 |
+
OptimizeWarning, stacklevel=5)
|
| 286 |
+
elif sym_pos:
|
| 287 |
+
sym_pos = False
|
| 288 |
+
warn(
|
| 289 |
+
"Solving system with option 'sym_pos':True "
|
| 290 |
+
"failed. It is normal for this to happen "
|
| 291 |
+
"occasionally, especially as the solution is "
|
| 292 |
+
"approached. However, if you see this frequently, "
|
| 293 |
+
"consider setting option 'sym_pos' to False.",
|
| 294 |
+
OptimizeWarning, stacklevel=5)
|
| 295 |
+
elif not lstsq:
|
| 296 |
+
lstsq = True
|
| 297 |
+
warn(
|
| 298 |
+
"Solving system with option 'sym_pos':False "
|
| 299 |
+
"failed. This may happen occasionally, "
|
| 300 |
+
"especially as the solution is "
|
| 301 |
+
"approached. However, if you see this frequently, "
|
| 302 |
+
"your problem may be numerically challenging. "
|
| 303 |
+
"If you cannot improve the formulation, consider "
|
| 304 |
+
"setting 'lstsq' to True. Consider also setting "
|
| 305 |
+
"`presolve` to True, if it is not already.",
|
| 306 |
+
OptimizeWarning, stacklevel=5)
|
| 307 |
+
else:
|
| 308 |
+
raise e
|
| 309 |
+
solve = _get_solver(M, sparse, lstsq, sym_pos,
|
| 310 |
+
cholesky, permc_spec)
|
| 311 |
+
# [4] Results after 8.29
|
| 312 |
+
d_tau = ((rhatg + 1 / tau * rhattk - (-c.dot(u) + b.dot(v))) /
|
| 313 |
+
(1 / tau * kappa + (-c.dot(p) + b.dot(q))))
|
| 314 |
+
d_x = u + p * d_tau
|
| 315 |
+
d_y = v + q * d_tau
|
| 316 |
+
|
| 317 |
+
# [4] Relations between after 8.25 and 8.26
|
| 318 |
+
d_z = (1 / x) * (rhatxs - z * d_x)
|
| 319 |
+
d_kappa = 1 / tau * (rhattk - kappa * d_tau)
|
| 320 |
+
|
| 321 |
+
# [4] 8.12 and "Let alpha be the maximal possible step..." before 8.23
|
| 322 |
+
alpha = _get_step(x, d_x, z, d_z, tau, d_tau, kappa, d_kappa, 1)
|
| 323 |
+
if ip: # initial point - see [4] 4.4
|
| 324 |
+
gamma = 10
|
| 325 |
+
else: # predictor-corrector, [4] definition after 8.12
|
| 326 |
+
beta1 = 0.1 # [4] pg. 220 (Table 8.1)
|
| 327 |
+
gamma = (1 - alpha)**2 * min(beta1, (1 - alpha))
|
| 328 |
+
i += 1
|
| 329 |
+
|
| 330 |
+
return d_x, d_y, d_z, d_tau, d_kappa
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def _sym_solve(Dinv, A, r1, r2, solve):
|
| 334 |
+
"""
|
| 335 |
+
An implementation of [4] equation 8.31 and 8.32
|
| 336 |
+
|
| 337 |
+
References
|
| 338 |
+
----------
|
| 339 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 340 |
+
optimizer for linear programming: an implementation of the
|
| 341 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 342 |
+
2000. 197-232.
|
| 343 |
+
|
| 344 |
+
"""
|
| 345 |
+
# [4] 8.31
|
| 346 |
+
r = r2 + A.dot(Dinv * r1)
|
| 347 |
+
v = solve(r)
|
| 348 |
+
# [4] 8.32
|
| 349 |
+
u = Dinv * (A.T.dot(v) - r1)
|
| 350 |
+
return u, v
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
def _get_step(x, d_x, z, d_z, tau, d_tau, kappa, d_kappa, alpha0):
|
| 354 |
+
"""
|
| 355 |
+
An implementation of [4] equation 8.21
|
| 356 |
+
|
| 357 |
+
References
|
| 358 |
+
----------
|
| 359 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 360 |
+
optimizer for linear programming: an implementation of the
|
| 361 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 362 |
+
2000. 197-232.
|
| 363 |
+
|
| 364 |
+
"""
|
| 365 |
+
# [4] 4.3 Equation 8.21, ignoring 8.20 requirement
|
| 366 |
+
# same step is taken in primal and dual spaces
|
| 367 |
+
# alpha0 is basically beta3 from [4] Table 8.1, but instead of beta3
|
| 368 |
+
# the value 1 is used in Mehrota corrector and initial point correction
|
| 369 |
+
i_x = d_x < 0
|
| 370 |
+
i_z = d_z < 0
|
| 371 |
+
alpha_x = alpha0 * np.min(x[i_x] / -d_x[i_x]) if np.any(i_x) else 1
|
| 372 |
+
alpha_tau = alpha0 * tau / -d_tau if d_tau < 0 else 1
|
| 373 |
+
alpha_z = alpha0 * np.min(z[i_z] / -d_z[i_z]) if np.any(i_z) else 1
|
| 374 |
+
alpha_kappa = alpha0 * kappa / -d_kappa if d_kappa < 0 else 1
|
| 375 |
+
alpha = np.min([1, alpha_x, alpha_tau, alpha_z, alpha_kappa])
|
| 376 |
+
return alpha
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def _get_message(status):
|
| 380 |
+
"""
|
| 381 |
+
Given problem status code, return a more detailed message.
|
| 382 |
+
|
| 383 |
+
Parameters
|
| 384 |
+
----------
|
| 385 |
+
status : int
|
| 386 |
+
An integer representing the exit status of the optimization::
|
| 387 |
+
|
| 388 |
+
0 : Optimization terminated successfully
|
| 389 |
+
1 : Iteration limit reached
|
| 390 |
+
2 : Problem appears to be infeasible
|
| 391 |
+
3 : Problem appears to be unbounded
|
| 392 |
+
4 : Serious numerical difficulties encountered
|
| 393 |
+
|
| 394 |
+
Returns
|
| 395 |
+
-------
|
| 396 |
+
message : str
|
| 397 |
+
A string descriptor of the exit status of the optimization.
|
| 398 |
+
|
| 399 |
+
"""
|
| 400 |
+
messages = (
|
| 401 |
+
["Optimization terminated successfully.",
|
| 402 |
+
"The iteration limit was reached before the algorithm converged.",
|
| 403 |
+
"The algorithm terminated successfully and determined that the "
|
| 404 |
+
"problem is infeasible.",
|
| 405 |
+
"The algorithm terminated successfully and determined that the "
|
| 406 |
+
"problem is unbounded.",
|
| 407 |
+
"Numerical difficulties were encountered before the problem "
|
| 408 |
+
"converged. Please check your problem formulation for errors, "
|
| 409 |
+
"independence of linear equality constraints, and reasonable "
|
| 410 |
+
"scaling and matrix condition numbers. If you continue to "
|
| 411 |
+
"encounter this error, please submit a bug report."
|
| 412 |
+
])
|
| 413 |
+
return messages[status]
|
| 414 |
+
|
| 415 |
+
|
| 416 |
+
def _do_step(x, y, z, tau, kappa, d_x, d_y, d_z, d_tau, d_kappa, alpha):
|
| 417 |
+
"""
|
| 418 |
+
An implementation of [4] Equation 8.9
|
| 419 |
+
|
| 420 |
+
References
|
| 421 |
+
----------
|
| 422 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 423 |
+
optimizer for linear programming: an implementation of the
|
| 424 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 425 |
+
2000. 197-232.
|
| 426 |
+
|
| 427 |
+
"""
|
| 428 |
+
x = x + alpha * d_x
|
| 429 |
+
tau = tau + alpha * d_tau
|
| 430 |
+
z = z + alpha * d_z
|
| 431 |
+
kappa = kappa + alpha * d_kappa
|
| 432 |
+
y = y + alpha * d_y
|
| 433 |
+
return x, y, z, tau, kappa
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def _get_blind_start(shape):
|
| 437 |
+
"""
|
| 438 |
+
Return the starting point from [4] 4.4
|
| 439 |
+
|
| 440 |
+
References
|
| 441 |
+
----------
|
| 442 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 443 |
+
optimizer for linear programming: an implementation of the
|
| 444 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 445 |
+
2000. 197-232.
|
| 446 |
+
|
| 447 |
+
"""
|
| 448 |
+
m, n = shape
|
| 449 |
+
x0 = np.ones(n)
|
| 450 |
+
y0 = np.zeros(m)
|
| 451 |
+
z0 = np.ones(n)
|
| 452 |
+
tau0 = 1
|
| 453 |
+
kappa0 = 1
|
| 454 |
+
return x0, y0, z0, tau0, kappa0
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def _indicators(A, b, c, c0, x, y, z, tau, kappa):
|
| 458 |
+
"""
|
| 459 |
+
Implementation of several equations from [4] used as indicators of
|
| 460 |
+
the status of optimization.
|
| 461 |
+
|
| 462 |
+
References
|
| 463 |
+
----------
|
| 464 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 465 |
+
optimizer for linear programming: an implementation of the
|
| 466 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 467 |
+
2000. 197-232.
|
| 468 |
+
|
| 469 |
+
"""
|
| 470 |
+
|
| 471 |
+
# residuals for termination are relative to initial values
|
| 472 |
+
x0, y0, z0, tau0, kappa0 = _get_blind_start(A.shape)
|
| 473 |
+
|
| 474 |
+
# See [4], Section 4 - The Homogeneous Algorithm, Equation 8.8
|
| 475 |
+
def r_p(x, tau):
|
| 476 |
+
return b * tau - A.dot(x)
|
| 477 |
+
|
| 478 |
+
def r_d(y, z, tau):
|
| 479 |
+
return c * tau - A.T.dot(y) - z
|
| 480 |
+
|
| 481 |
+
def r_g(x, y, kappa):
|
| 482 |
+
return kappa + c.dot(x) - b.dot(y)
|
| 483 |
+
|
| 484 |
+
# np.dot unpacks if they are arrays of size one
|
| 485 |
+
def mu(x, tau, z, kappa):
|
| 486 |
+
return (x.dot(z) + np.dot(tau, kappa)) / (len(x) + 1)
|
| 487 |
+
|
| 488 |
+
obj = c.dot(x / tau) + c0
|
| 489 |
+
|
| 490 |
+
def norm(a):
|
| 491 |
+
return np.linalg.norm(a)
|
| 492 |
+
|
| 493 |
+
# See [4], Section 4.5 - The Stopping Criteria
|
| 494 |
+
r_p0 = r_p(x0, tau0)
|
| 495 |
+
r_d0 = r_d(y0, z0, tau0)
|
| 496 |
+
r_g0 = r_g(x0, y0, kappa0)
|
| 497 |
+
mu_0 = mu(x0, tau0, z0, kappa0)
|
| 498 |
+
rho_A = norm(c.T.dot(x) - b.T.dot(y)) / (tau + norm(b.T.dot(y)))
|
| 499 |
+
rho_p = norm(r_p(x, tau)) / max(1, norm(r_p0))
|
| 500 |
+
rho_d = norm(r_d(y, z, tau)) / max(1, norm(r_d0))
|
| 501 |
+
rho_g = norm(r_g(x, y, kappa)) / max(1, norm(r_g0))
|
| 502 |
+
rho_mu = mu(x, tau, z, kappa) / mu_0
|
| 503 |
+
return rho_p, rho_d, rho_A, rho_g, rho_mu, obj
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
def _display_iter(rho_p, rho_d, rho_g, alpha, rho_mu, obj, header=False):
|
| 507 |
+
"""
|
| 508 |
+
Print indicators of optimization status to the console.
|
| 509 |
+
|
| 510 |
+
Parameters
|
| 511 |
+
----------
|
| 512 |
+
rho_p : float
|
| 513 |
+
The (normalized) primal feasibility, see [4] 4.5
|
| 514 |
+
rho_d : float
|
| 515 |
+
The (normalized) dual feasibility, see [4] 4.5
|
| 516 |
+
rho_g : float
|
| 517 |
+
The (normalized) duality gap, see [4] 4.5
|
| 518 |
+
alpha : float
|
| 519 |
+
The step size, see [4] 4.3
|
| 520 |
+
rho_mu : float
|
| 521 |
+
The (normalized) path parameter, see [4] 4.5
|
| 522 |
+
obj : float
|
| 523 |
+
The objective function value of the current iterate
|
| 524 |
+
header : bool
|
| 525 |
+
True if a header is to be printed
|
| 526 |
+
|
| 527 |
+
References
|
| 528 |
+
----------
|
| 529 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 530 |
+
optimizer for linear programming: an implementation of the
|
| 531 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 532 |
+
2000. 197-232.
|
| 533 |
+
|
| 534 |
+
"""
|
| 535 |
+
if header:
|
| 536 |
+
print("Primal Feasibility ",
|
| 537 |
+
"Dual Feasibility ",
|
| 538 |
+
"Duality Gap ",
|
| 539 |
+
"Step ",
|
| 540 |
+
"Path Parameter ",
|
| 541 |
+
"Objective ")
|
| 542 |
+
|
| 543 |
+
# no clue why this works
|
| 544 |
+
fmt = '{0:<20.13}{1:<20.13}{2:<20.13}{3:<17.13}{4:<20.13}{5:<20.13}'
|
| 545 |
+
print(fmt.format(
|
| 546 |
+
float(rho_p),
|
| 547 |
+
float(rho_d),
|
| 548 |
+
float(rho_g),
|
| 549 |
+
alpha if isinstance(alpha, str) else float(alpha),
|
| 550 |
+
float(rho_mu),
|
| 551 |
+
float(obj)))
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
def _ip_hsd(A, b, c, c0, alpha0, beta, maxiter, disp, tol, sparse, lstsq,
|
| 555 |
+
sym_pos, cholesky, pc, ip, permc_spec, callback, postsolve_args):
|
| 556 |
+
r"""
|
| 557 |
+
Solve a linear programming problem in standard form:
|
| 558 |
+
|
| 559 |
+
Minimize::
|
| 560 |
+
|
| 561 |
+
c @ x
|
| 562 |
+
|
| 563 |
+
Subject to::
|
| 564 |
+
|
| 565 |
+
A @ x == b
|
| 566 |
+
x >= 0
|
| 567 |
+
|
| 568 |
+
using the interior point method of [4].
|
| 569 |
+
|
| 570 |
+
Parameters
|
| 571 |
+
----------
|
| 572 |
+
A : 2-D array
|
| 573 |
+
2-D array such that ``A @ x``, gives the values of the equality
|
| 574 |
+
constraints at ``x``.
|
| 575 |
+
b : 1-D array
|
| 576 |
+
1-D array of values representing the RHS of each equality constraint
|
| 577 |
+
(row) in ``A`` (for standard form problem).
|
| 578 |
+
c : 1-D array
|
| 579 |
+
Coefficients of the linear objective function to be minimized (for
|
| 580 |
+
standard form problem).
|
| 581 |
+
c0 : float
|
| 582 |
+
Constant term in objective function due to fixed (and eliminated)
|
| 583 |
+
variables. (Purely for display.)
|
| 584 |
+
alpha0 : float
|
| 585 |
+
The maximal step size for Mehrota's predictor-corrector search
|
| 586 |
+
direction; see :math:`\beta_3`of [4] Table 8.1
|
| 587 |
+
beta : float
|
| 588 |
+
The desired reduction of the path parameter :math:`\mu` (see [6]_)
|
| 589 |
+
maxiter : int
|
| 590 |
+
The maximum number of iterations of the algorithm.
|
| 591 |
+
disp : bool
|
| 592 |
+
Set to ``True`` if indicators of optimization status are to be printed
|
| 593 |
+
to the console each iteration.
|
| 594 |
+
tol : float
|
| 595 |
+
Termination tolerance; see [4]_ Section 4.5.
|
| 596 |
+
sparse : bool
|
| 597 |
+
Set to ``True`` if the problem is to be treated as sparse. However,
|
| 598 |
+
the inputs ``A_eq`` and ``A_ub`` should nonetheless be provided as
|
| 599 |
+
(dense) arrays rather than sparse matrices.
|
| 600 |
+
lstsq : bool
|
| 601 |
+
Set to ``True`` if the problem is expected to be very poorly
|
| 602 |
+
conditioned. This should always be left as ``False`` unless severe
|
| 603 |
+
numerical difficulties are frequently encountered, and a better option
|
| 604 |
+
would be to improve the formulation of the problem.
|
| 605 |
+
sym_pos : bool
|
| 606 |
+
Leave ``True`` if the problem is expected to yield a well conditioned
|
| 607 |
+
symmetric positive definite normal equation matrix (almost always).
|
| 608 |
+
cholesky : bool
|
| 609 |
+
Set to ``True`` if the normal equations are to be solved by explicit
|
| 610 |
+
Cholesky decomposition followed by explicit forward/backward
|
| 611 |
+
substitution. This is typically faster for moderate, dense problems
|
| 612 |
+
that are numerically well-behaved.
|
| 613 |
+
pc : bool
|
| 614 |
+
Leave ``True`` if the predictor-corrector method of Mehrota is to be
|
| 615 |
+
used. This is almost always (if not always) beneficial.
|
| 616 |
+
ip : bool
|
| 617 |
+
Set to ``True`` if the improved initial point suggestion due to [4]_
|
| 618 |
+
Section 4.3 is desired. It's unclear whether this is beneficial.
|
| 619 |
+
permc_spec : str (default = 'MMD_AT_PLUS_A')
|
| 620 |
+
(Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
|
| 621 |
+
True``.) A matrix is factorized in each iteration of the algorithm.
|
| 622 |
+
This option specifies how to permute the columns of the matrix for
|
| 623 |
+
sparsity preservation. Acceptable values are:
|
| 624 |
+
|
| 625 |
+
- ``NATURAL``: natural ordering.
|
| 626 |
+
- ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
|
| 627 |
+
- ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
|
| 628 |
+
- ``COLAMD``: approximate minimum degree column ordering.
|
| 629 |
+
|
| 630 |
+
This option can impact the convergence of the
|
| 631 |
+
interior point algorithm; test different values to determine which
|
| 632 |
+
performs best for your problem. For more information, refer to
|
| 633 |
+
``scipy.sparse.linalg.splu``.
|
| 634 |
+
callback : callable, optional
|
| 635 |
+
If a callback function is provided, it will be called within each
|
| 636 |
+
iteration of the algorithm. The callback function must accept a single
|
| 637 |
+
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
| 638 |
+
|
| 639 |
+
x : 1-D array
|
| 640 |
+
Current solution vector
|
| 641 |
+
fun : float
|
| 642 |
+
Current value of the objective function
|
| 643 |
+
success : bool
|
| 644 |
+
True only when an algorithm has completed successfully,
|
| 645 |
+
so this is always False as the callback function is called
|
| 646 |
+
only while the algorithm is still iterating.
|
| 647 |
+
slack : 1-D array
|
| 648 |
+
The values of the slack variables. Each slack variable
|
| 649 |
+
corresponds to an inequality constraint. If the slack is zero,
|
| 650 |
+
the corresponding constraint is active.
|
| 651 |
+
con : 1-D array
|
| 652 |
+
The (nominally zero) residuals of the equality constraints,
|
| 653 |
+
that is, ``b - A_eq @ x``
|
| 654 |
+
phase : int
|
| 655 |
+
The phase of the algorithm being executed. This is always
|
| 656 |
+
1 for the interior-point method because it has only one phase.
|
| 657 |
+
status : int
|
| 658 |
+
For revised simplex, this is always 0 because if a different
|
| 659 |
+
status is detected, the algorithm terminates.
|
| 660 |
+
nit : int
|
| 661 |
+
The number of iterations performed.
|
| 662 |
+
message : str
|
| 663 |
+
A string descriptor of the exit status of the optimization.
|
| 664 |
+
postsolve_args : tuple
|
| 665 |
+
Data needed by _postsolve to convert the solution to the standard-form
|
| 666 |
+
problem into the solution to the original problem.
|
| 667 |
+
|
| 668 |
+
Returns
|
| 669 |
+
-------
|
| 670 |
+
x_hat : float
|
| 671 |
+
Solution vector (for standard form problem).
|
| 672 |
+
status : int
|
| 673 |
+
An integer representing the exit status of the optimization::
|
| 674 |
+
|
| 675 |
+
0 : Optimization terminated successfully
|
| 676 |
+
1 : Iteration limit reached
|
| 677 |
+
2 : Problem appears to be infeasible
|
| 678 |
+
3 : Problem appears to be unbounded
|
| 679 |
+
4 : Serious numerical difficulties encountered
|
| 680 |
+
|
| 681 |
+
message : str
|
| 682 |
+
A string descriptor of the exit status of the optimization.
|
| 683 |
+
iteration : int
|
| 684 |
+
The number of iterations taken to solve the problem
|
| 685 |
+
|
| 686 |
+
References
|
| 687 |
+
----------
|
| 688 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 689 |
+
optimizer for linear programming: an implementation of the
|
| 690 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 691 |
+
2000. 197-232.
|
| 692 |
+
.. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
|
| 693 |
+
Programming based on Newton's Method." Unpublished Course Notes,
|
| 694 |
+
March 2004. Available 2/25/2017 at:
|
| 695 |
+
https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
|
| 696 |
+
|
| 697 |
+
"""
|
| 698 |
+
|
| 699 |
+
iteration = 0
|
| 700 |
+
|
| 701 |
+
# default initial point
|
| 702 |
+
x, y, z, tau, kappa = _get_blind_start(A.shape)
|
| 703 |
+
|
| 704 |
+
# first iteration is special improvement of initial point
|
| 705 |
+
ip = ip if pc else False
|
| 706 |
+
|
| 707 |
+
# [4] 4.5
|
| 708 |
+
rho_p, rho_d, rho_A, rho_g, rho_mu, obj = _indicators(
|
| 709 |
+
A, b, c, c0, x, y, z, tau, kappa)
|
| 710 |
+
go = rho_p > tol or rho_d > tol or rho_A > tol # we might get lucky : )
|
| 711 |
+
|
| 712 |
+
if disp:
|
| 713 |
+
_display_iter(rho_p, rho_d, rho_g, "-", rho_mu, obj, header=True)
|
| 714 |
+
if callback is not None:
|
| 715 |
+
x_o, fun, slack, con = _postsolve(x/tau, postsolve_args)
|
| 716 |
+
res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
|
| 717 |
+
'con': con, 'nit': iteration, 'phase': 1,
|
| 718 |
+
'complete': False, 'status': 0,
|
| 719 |
+
'message': "", 'success': False})
|
| 720 |
+
callback(res)
|
| 721 |
+
|
| 722 |
+
status = 0
|
| 723 |
+
message = "Optimization terminated successfully."
|
| 724 |
+
|
| 725 |
+
if sparse:
|
| 726 |
+
A = sps.csc_matrix(A)
|
| 727 |
+
|
| 728 |
+
while go:
|
| 729 |
+
|
| 730 |
+
iteration += 1
|
| 731 |
+
|
| 732 |
+
if ip: # initial point
|
| 733 |
+
# [4] Section 4.4
|
| 734 |
+
gamma = 1
|
| 735 |
+
|
| 736 |
+
def eta(g):
|
| 737 |
+
return 1
|
| 738 |
+
else:
|
| 739 |
+
# gamma = 0 in predictor step according to [4] 4.1
|
| 740 |
+
# if predictor/corrector is off, use mean of complementarity [6]
|
| 741 |
+
# 5.1 / [4] Below Figure 10-4
|
| 742 |
+
gamma = 0 if pc else beta * np.mean(z * x)
|
| 743 |
+
# [4] Section 4.1
|
| 744 |
+
|
| 745 |
+
def eta(g=gamma):
|
| 746 |
+
return 1 - g
|
| 747 |
+
|
| 748 |
+
try:
|
| 749 |
+
# Solve [4] 8.6 and 8.7/8.13/8.23
|
| 750 |
+
d_x, d_y, d_z, d_tau, d_kappa = _get_delta(
|
| 751 |
+
A, b, c, x, y, z, tau, kappa, gamma, eta,
|
| 752 |
+
sparse, lstsq, sym_pos, cholesky, pc, ip, permc_spec)
|
| 753 |
+
|
| 754 |
+
if ip: # initial point
|
| 755 |
+
# [4] 4.4
|
| 756 |
+
# Formula after 8.23 takes a full step regardless if this will
|
| 757 |
+
# take it negative
|
| 758 |
+
alpha = 1.0
|
| 759 |
+
x, y, z, tau, kappa = _do_step(
|
| 760 |
+
x, y, z, tau, kappa, d_x, d_y,
|
| 761 |
+
d_z, d_tau, d_kappa, alpha)
|
| 762 |
+
x[x < 1] = 1
|
| 763 |
+
z[z < 1] = 1
|
| 764 |
+
tau = max(1, tau)
|
| 765 |
+
kappa = max(1, kappa)
|
| 766 |
+
ip = False # done with initial point
|
| 767 |
+
else:
|
| 768 |
+
# [4] Section 4.3
|
| 769 |
+
alpha = _get_step(x, d_x, z, d_z, tau,
|
| 770 |
+
d_tau, kappa, d_kappa, alpha0)
|
| 771 |
+
# [4] Equation 8.9
|
| 772 |
+
x, y, z, tau, kappa = _do_step(
|
| 773 |
+
x, y, z, tau, kappa, d_x, d_y, d_z, d_tau, d_kappa, alpha)
|
| 774 |
+
|
| 775 |
+
except (LinAlgError, FloatingPointError,
|
| 776 |
+
ValueError, ZeroDivisionError):
|
| 777 |
+
# this can happen when sparse solver is used and presolve
|
| 778 |
+
# is turned off. Also observed ValueError in AppVeyor Python 3.6
|
| 779 |
+
# Win32 build (PR #8676). I've never seen it otherwise.
|
| 780 |
+
status = 4
|
| 781 |
+
message = _get_message(status)
|
| 782 |
+
break
|
| 783 |
+
|
| 784 |
+
# [4] 4.5
|
| 785 |
+
rho_p, rho_d, rho_A, rho_g, rho_mu, obj = _indicators(
|
| 786 |
+
A, b, c, c0, x, y, z, tau, kappa)
|
| 787 |
+
go = rho_p > tol or rho_d > tol or rho_A > tol
|
| 788 |
+
|
| 789 |
+
if disp:
|
| 790 |
+
_display_iter(rho_p, rho_d, rho_g, alpha, rho_mu, obj)
|
| 791 |
+
if callback is not None:
|
| 792 |
+
x_o, fun, slack, con = _postsolve(x/tau, postsolve_args)
|
| 793 |
+
res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
|
| 794 |
+
'con': con, 'nit': iteration, 'phase': 1,
|
| 795 |
+
'complete': False, 'status': 0,
|
| 796 |
+
'message': "", 'success': False})
|
| 797 |
+
callback(res)
|
| 798 |
+
|
| 799 |
+
# [4] 4.5
|
| 800 |
+
inf1 = (rho_p < tol and rho_d < tol and rho_g < tol and tau < tol *
|
| 801 |
+
max(1, kappa))
|
| 802 |
+
inf2 = rho_mu < tol and tau < tol * min(1, kappa)
|
| 803 |
+
if inf1 or inf2:
|
| 804 |
+
# [4] Lemma 8.4 / Theorem 8.3
|
| 805 |
+
if b.transpose().dot(y) > tol:
|
| 806 |
+
status = 2
|
| 807 |
+
else: # elif c.T.dot(x) < tol: ? Probably not necessary.
|
| 808 |
+
status = 3
|
| 809 |
+
message = _get_message(status)
|
| 810 |
+
break
|
| 811 |
+
elif iteration >= maxiter:
|
| 812 |
+
status = 1
|
| 813 |
+
message = _get_message(status)
|
| 814 |
+
break
|
| 815 |
+
|
| 816 |
+
x_hat = x / tau
|
| 817 |
+
# [4] Statement after Theorem 8.2
|
| 818 |
+
return x_hat, status, message, iteration
|
| 819 |
+
|
| 820 |
+
|
| 821 |
+
def _linprog_ip(c, c0, A, b, callback, postsolve_args, maxiter=1000, tol=1e-8,
|
| 822 |
+
disp=False, alpha0=.99995, beta=0.1, sparse=False, lstsq=False,
|
| 823 |
+
sym_pos=True, cholesky=None, pc=True, ip=False,
|
| 824 |
+
permc_spec='MMD_AT_PLUS_A', **unknown_options):
|
| 825 |
+
r"""
|
| 826 |
+
Minimize a linear objective function subject to linear
|
| 827 |
+
equality and non-negativity constraints using the interior point method
|
| 828 |
+
of [4]_. Linear programming is intended to solve problems
|
| 829 |
+
of the following form:
|
| 830 |
+
|
| 831 |
+
Minimize::
|
| 832 |
+
|
| 833 |
+
c @ x
|
| 834 |
+
|
| 835 |
+
Subject to::
|
| 836 |
+
|
| 837 |
+
A @ x == b
|
| 838 |
+
x >= 0
|
| 839 |
+
|
| 840 |
+
User-facing documentation is in _linprog_doc.py.
|
| 841 |
+
|
| 842 |
+
Parameters
|
| 843 |
+
----------
|
| 844 |
+
c : 1-D array
|
| 845 |
+
Coefficients of the linear objective function to be minimized.
|
| 846 |
+
c0 : float
|
| 847 |
+
Constant term in objective function due to fixed (and eliminated)
|
| 848 |
+
variables. (Purely for display.)
|
| 849 |
+
A : 2-D array
|
| 850 |
+
2-D array such that ``A @ x``, gives the values of the equality
|
| 851 |
+
constraints at ``x``.
|
| 852 |
+
b : 1-D array
|
| 853 |
+
1-D array of values representing the right hand side of each equality
|
| 854 |
+
constraint (row) in ``A``.
|
| 855 |
+
callback : callable, optional
|
| 856 |
+
Callback function to be executed once per iteration.
|
| 857 |
+
postsolve_args : tuple
|
| 858 |
+
Data needed by _postsolve to convert the solution to the standard-form
|
| 859 |
+
problem into the solution to the original problem.
|
| 860 |
+
|
| 861 |
+
Options
|
| 862 |
+
-------
|
| 863 |
+
maxiter : int (default = 1000)
|
| 864 |
+
The maximum number of iterations of the algorithm.
|
| 865 |
+
tol : float (default = 1e-8)
|
| 866 |
+
Termination tolerance to be used for all termination criteria;
|
| 867 |
+
see [4]_ Section 4.5.
|
| 868 |
+
disp : bool (default = False)
|
| 869 |
+
Set to ``True`` if indicators of optimization status are to be printed
|
| 870 |
+
to the console each iteration.
|
| 871 |
+
alpha0 : float (default = 0.99995)
|
| 872 |
+
The maximal step size for Mehrota's predictor-corrector search
|
| 873 |
+
direction; see :math:`\beta_{3}` of [4]_ Table 8.1.
|
| 874 |
+
beta : float (default = 0.1)
|
| 875 |
+
The desired reduction of the path parameter :math:`\mu` (see [6]_)
|
| 876 |
+
when Mehrota's predictor-corrector is not in use (uncommon).
|
| 877 |
+
sparse : bool (default = False)
|
| 878 |
+
Set to ``True`` if the problem is to be treated as sparse after
|
| 879 |
+
presolve. If either ``A_eq`` or ``A_ub`` is a sparse matrix,
|
| 880 |
+
this option will automatically be set ``True``, and the problem
|
| 881 |
+
will be treated as sparse even during presolve. If your constraint
|
| 882 |
+
matrices contain mostly zeros and the problem is not very small (less
|
| 883 |
+
than about 100 constraints or variables), consider setting ``True``
|
| 884 |
+
or providing ``A_eq`` and ``A_ub`` as sparse matrices.
|
| 885 |
+
lstsq : bool (default = False)
|
| 886 |
+
Set to ``True`` if the problem is expected to be very poorly
|
| 887 |
+
conditioned. This should always be left ``False`` unless severe
|
| 888 |
+
numerical difficulties are encountered. Leave this at the default
|
| 889 |
+
unless you receive a warning message suggesting otherwise.
|
| 890 |
+
sym_pos : bool (default = True)
|
| 891 |
+
Leave ``True`` if the problem is expected to yield a well conditioned
|
| 892 |
+
symmetric positive definite normal equation matrix
|
| 893 |
+
(almost always). Leave this at the default unless you receive
|
| 894 |
+
a warning message suggesting otherwise.
|
| 895 |
+
cholesky : bool (default = True)
|
| 896 |
+
Set to ``True`` if the normal equations are to be solved by explicit
|
| 897 |
+
Cholesky decomposition followed by explicit forward/backward
|
| 898 |
+
substitution. This is typically faster for problems
|
| 899 |
+
that are numerically well-behaved.
|
| 900 |
+
pc : bool (default = True)
|
| 901 |
+
Leave ``True`` if the predictor-corrector method of Mehrota is to be
|
| 902 |
+
used. This is almost always (if not always) beneficial.
|
| 903 |
+
ip : bool (default = False)
|
| 904 |
+
Set to ``True`` if the improved initial point suggestion due to [4]_
|
| 905 |
+
Section 4.3 is desired. Whether this is beneficial or not
|
| 906 |
+
depends on the problem.
|
| 907 |
+
permc_spec : str (default = 'MMD_AT_PLUS_A')
|
| 908 |
+
(Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
|
| 909 |
+
True``, and no SuiteSparse.)
|
| 910 |
+
A matrix is factorized in each iteration of the algorithm.
|
| 911 |
+
This option specifies how to permute the columns of the matrix for
|
| 912 |
+
sparsity preservation. Acceptable values are:
|
| 913 |
+
|
| 914 |
+
- ``NATURAL``: natural ordering.
|
| 915 |
+
- ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
|
| 916 |
+
- ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
|
| 917 |
+
- ``COLAMD``: approximate minimum degree column ordering.
|
| 918 |
+
|
| 919 |
+
This option can impact the convergence of the
|
| 920 |
+
interior point algorithm; test different values to determine which
|
| 921 |
+
performs best for your problem. For more information, refer to
|
| 922 |
+
``scipy.sparse.linalg.splu``.
|
| 923 |
+
unknown_options : dict
|
| 924 |
+
Optional arguments not used by this particular solver. If
|
| 925 |
+
`unknown_options` is non-empty a warning is issued listing all
|
| 926 |
+
unused options.
|
| 927 |
+
|
| 928 |
+
Returns
|
| 929 |
+
-------
|
| 930 |
+
x : 1-D array
|
| 931 |
+
Solution vector.
|
| 932 |
+
status : int
|
| 933 |
+
An integer representing the exit status of the optimization::
|
| 934 |
+
|
| 935 |
+
0 : Optimization terminated successfully
|
| 936 |
+
1 : Iteration limit reached
|
| 937 |
+
2 : Problem appears to be infeasible
|
| 938 |
+
3 : Problem appears to be unbounded
|
| 939 |
+
4 : Serious numerical difficulties encountered
|
| 940 |
+
|
| 941 |
+
message : str
|
| 942 |
+
A string descriptor of the exit status of the optimization.
|
| 943 |
+
iteration : int
|
| 944 |
+
The number of iterations taken to solve the problem.
|
| 945 |
+
|
| 946 |
+
Notes
|
| 947 |
+
-----
|
| 948 |
+
This method implements the algorithm outlined in [4]_ with ideas from [8]_
|
| 949 |
+
and a structure inspired by the simpler methods of [6]_.
|
| 950 |
+
|
| 951 |
+
The primal-dual path following method begins with initial 'guesses' of
|
| 952 |
+
the primal and dual variables of the standard form problem and iteratively
|
| 953 |
+
attempts to solve the (nonlinear) Karush-Kuhn-Tucker conditions for the
|
| 954 |
+
problem with a gradually reduced logarithmic barrier term added to the
|
| 955 |
+
objective. This particular implementation uses a homogeneous self-dual
|
| 956 |
+
formulation, which provides certificates of infeasibility or unboundedness
|
| 957 |
+
where applicable.
|
| 958 |
+
|
| 959 |
+
The default initial point for the primal and dual variables is that
|
| 960 |
+
defined in [4]_ Section 4.4 Equation 8.22. Optionally (by setting initial
|
| 961 |
+
point option ``ip=True``), an alternate (potentially improved) starting
|
| 962 |
+
point can be calculated according to the additional recommendations of
|
| 963 |
+
[4]_ Section 4.4.
|
| 964 |
+
|
| 965 |
+
A search direction is calculated using the predictor-corrector method
|
| 966 |
+
(single correction) proposed by Mehrota and detailed in [4]_ Section 4.1.
|
| 967 |
+
(A potential improvement would be to implement the method of multiple
|
| 968 |
+
corrections described in [4]_ Section 4.2.) In practice, this is
|
| 969 |
+
accomplished by solving the normal equations, [4]_ Section 5.1 Equations
|
| 970 |
+
8.31 and 8.32, derived from the Newton equations [4]_ Section 5 Equations
|
| 971 |
+
8.25 (compare to [4]_ Section 4 Equations 8.6-8.8). The advantage of
|
| 972 |
+
solving the normal equations rather than 8.25 directly is that the
|
| 973 |
+
matrices involved are symmetric positive definite, so Cholesky
|
| 974 |
+
decomposition can be used rather than the more expensive LU factorization.
|
| 975 |
+
|
| 976 |
+
With default options, the solver used to perform the factorization depends
|
| 977 |
+
on third-party software availability and the conditioning of the problem.
|
| 978 |
+
|
| 979 |
+
For dense problems, solvers are tried in the following order:
|
| 980 |
+
|
| 981 |
+
1. ``scipy.linalg.cho_factor``
|
| 982 |
+
|
| 983 |
+
2. ``scipy.linalg.solve`` with option ``sym_pos=True``
|
| 984 |
+
|
| 985 |
+
3. ``scipy.linalg.solve`` with option ``sym_pos=False``
|
| 986 |
+
|
| 987 |
+
4. ``scipy.linalg.lstsq``
|
| 988 |
+
|
| 989 |
+
For sparse problems:
|
| 990 |
+
|
| 991 |
+
1. ``sksparse.cholmod.cholesky`` (if scikit-sparse and SuiteSparse are installed)
|
| 992 |
+
|
| 993 |
+
2. ``scipy.sparse.linalg.factorized``
|
| 994 |
+
(if scikit-umfpack and SuiteSparse are installed)
|
| 995 |
+
|
| 996 |
+
3. ``scipy.sparse.linalg.splu`` (which uses SuperLU distributed with SciPy)
|
| 997 |
+
|
| 998 |
+
4. ``scipy.sparse.linalg.lsqr``
|
| 999 |
+
|
| 1000 |
+
If the solver fails for any reason, successively more robust (but slower)
|
| 1001 |
+
solvers are attempted in the order indicated. Attempting, failing, and
|
| 1002 |
+
re-starting factorization can be time consuming, so if the problem is
|
| 1003 |
+
numerically challenging, options can be set to bypass solvers that are
|
| 1004 |
+
failing. Setting ``cholesky=False`` skips to solver 2,
|
| 1005 |
+
``sym_pos=False`` skips to solver 3, and ``lstsq=True`` skips
|
| 1006 |
+
to solver 4 for both sparse and dense problems.
|
| 1007 |
+
|
| 1008 |
+
Potential improvements for combatting issues associated with dense
|
| 1009 |
+
columns in otherwise sparse problems are outlined in [4]_ Section 5.3 and
|
| 1010 |
+
[10]_ Section 4.1-4.2; the latter also discusses the alleviation of
|
| 1011 |
+
accuracy issues associated with the substitution approach to free
|
| 1012 |
+
variables.
|
| 1013 |
+
|
| 1014 |
+
After calculating the search direction, the maximum possible step size
|
| 1015 |
+
that does not activate the non-negativity constraints is calculated, and
|
| 1016 |
+
the smaller of this step size and unity is applied (as in [4]_ Section
|
| 1017 |
+
4.1.) [4]_ Section 4.3 suggests improvements for choosing the step size.
|
| 1018 |
+
|
| 1019 |
+
The new point is tested according to the termination conditions of [4]_
|
| 1020 |
+
Section 4.5. The same tolerance, which can be set using the ``tol`` option,
|
| 1021 |
+
is used for all checks. (A potential improvement would be to expose
|
| 1022 |
+
the different tolerances to be set independently.) If optimality,
|
| 1023 |
+
unboundedness, or infeasibility is detected, the solve procedure
|
| 1024 |
+
terminates; otherwise it repeats.
|
| 1025 |
+
|
| 1026 |
+
The expected problem formulation differs between the top level ``linprog``
|
| 1027 |
+
module and the method specific solvers. The method specific solvers expect a
|
| 1028 |
+
problem in standard form:
|
| 1029 |
+
|
| 1030 |
+
Minimize::
|
| 1031 |
+
|
| 1032 |
+
c @ x
|
| 1033 |
+
|
| 1034 |
+
Subject to::
|
| 1035 |
+
|
| 1036 |
+
A @ x == b
|
| 1037 |
+
x >= 0
|
| 1038 |
+
|
| 1039 |
+
Whereas the top level ``linprog`` module expects a problem of form:
|
| 1040 |
+
|
| 1041 |
+
Minimize::
|
| 1042 |
+
|
| 1043 |
+
c @ x
|
| 1044 |
+
|
| 1045 |
+
Subject to::
|
| 1046 |
+
|
| 1047 |
+
A_ub @ x <= b_ub
|
| 1048 |
+
A_eq @ x == b_eq
|
| 1049 |
+
lb <= x <= ub
|
| 1050 |
+
|
| 1051 |
+
where ``lb = 0`` and ``ub = None`` unless set in ``bounds``.
|
| 1052 |
+
|
| 1053 |
+
The original problem contains equality, upper-bound and variable constraints
|
| 1054 |
+
whereas the method specific solver requires equality constraints and
|
| 1055 |
+
variable non-negativity.
|
| 1056 |
+
|
| 1057 |
+
``linprog`` module converts the original problem to standard form by
|
| 1058 |
+
converting the simple bounds to upper bound constraints, introducing
|
| 1059 |
+
non-negative slack variables for inequality constraints, and expressing
|
| 1060 |
+
unbounded variables as the difference between two non-negative variables.
|
| 1061 |
+
|
| 1062 |
+
|
| 1063 |
+
References
|
| 1064 |
+
----------
|
| 1065 |
+
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
| 1066 |
+
optimizer for linear programming: an implementation of the
|
| 1067 |
+
homogeneous algorithm." High performance optimization. Springer US,
|
| 1068 |
+
2000. 197-232.
|
| 1069 |
+
.. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
|
| 1070 |
+
Programming based on Newton's Method." Unpublished Course Notes,
|
| 1071 |
+
March 2004. Available 2/25/2017 at
|
| 1072 |
+
https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
|
| 1073 |
+
.. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
|
| 1074 |
+
programming." Mathematical Programming 71.2 (1995): 221-245.
|
| 1075 |
+
.. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
| 1076 |
+
programming." Athena Scientific 1 (1997): 997.
|
| 1077 |
+
.. [10] Andersen, Erling D., et al. Implementation of interior point methods
|
| 1078 |
+
for large scale linear programming. HEC/Universite de Geneve, 1996.
|
| 1079 |
+
|
| 1080 |
+
"""
|
| 1081 |
+
|
| 1082 |
+
_check_unknown_options(unknown_options)
|
| 1083 |
+
|
| 1084 |
+
# These should be warnings, not errors
|
| 1085 |
+
if (cholesky or cholesky is None) and sparse and not has_cholmod:
|
| 1086 |
+
if cholesky:
|
| 1087 |
+
warn("Sparse cholesky is only available with scikit-sparse. "
|
| 1088 |
+
"Setting `cholesky = False`",
|
| 1089 |
+
OptimizeWarning, stacklevel=3)
|
| 1090 |
+
cholesky = False
|
| 1091 |
+
|
| 1092 |
+
if sparse and lstsq:
|
| 1093 |
+
warn("Option combination 'sparse':True and 'lstsq':True "
|
| 1094 |
+
"is not recommended.",
|
| 1095 |
+
OptimizeWarning, stacklevel=3)
|
| 1096 |
+
|
| 1097 |
+
if lstsq and cholesky:
|
| 1098 |
+
warn("Invalid option combination 'lstsq':True "
|
| 1099 |
+
"and 'cholesky':True; option 'cholesky' has no effect when "
|
| 1100 |
+
"'lstsq' is set True.",
|
| 1101 |
+
OptimizeWarning, stacklevel=3)
|
| 1102 |
+
|
| 1103 |
+
valid_permc_spec = ('NATURAL', 'MMD_ATA', 'MMD_AT_PLUS_A', 'COLAMD')
|
| 1104 |
+
if permc_spec.upper() not in valid_permc_spec:
|
| 1105 |
+
warn("Invalid permc_spec option: '" + str(permc_spec) + "'. "
|
| 1106 |
+
"Acceptable values are 'NATURAL', 'MMD_ATA', 'MMD_AT_PLUS_A', "
|
| 1107 |
+
"and 'COLAMD'. Reverting to default.",
|
| 1108 |
+
OptimizeWarning, stacklevel=3)
|
| 1109 |
+
permc_spec = 'MMD_AT_PLUS_A'
|
| 1110 |
+
|
| 1111 |
+
# This can be an error
|
| 1112 |
+
if not sym_pos and cholesky:
|
| 1113 |
+
raise ValueError(
|
| 1114 |
+
"Invalid option combination 'sym_pos':False "
|
| 1115 |
+
"and 'cholesky':True: Cholesky decomposition is only possible "
|
| 1116 |
+
"for symmetric positive definite matrices.")
|
| 1117 |
+
|
| 1118 |
+
cholesky = cholesky or (cholesky is None and sym_pos and not lstsq)
|
| 1119 |
+
|
| 1120 |
+
x, status, message, iteration = _ip_hsd(A, b, c, c0, alpha0, beta,
|
| 1121 |
+
maxiter, disp, tol, sparse,
|
| 1122 |
+
lstsq, sym_pos, cholesky,
|
| 1123 |
+
pc, ip, permc_spec, callback,
|
| 1124 |
+
postsolve_args)
|
| 1125 |
+
|
| 1126 |
+
return x, status, message, iteration
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_rs.py
ADDED
|
@@ -0,0 +1,572 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Revised simplex method for linear programming
|
| 2 |
+
|
| 3 |
+
The *revised simplex* method uses the method described in [1]_, except
|
| 4 |
+
that a factorization [2]_ of the basis matrix, rather than its inverse,
|
| 5 |
+
is efficiently maintained and used to solve the linear systems at each
|
| 6 |
+
iteration of the algorithm.
|
| 7 |
+
|
| 8 |
+
.. versionadded:: 1.3.0
|
| 9 |
+
|
| 10 |
+
References
|
| 11 |
+
----------
|
| 12 |
+
.. [1] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
| 13 |
+
programming." Athena Scientific 1 (1997): 997.
|
| 14 |
+
.. [2] Bartels, Richard H. "A stabilization of the simplex method."
|
| 15 |
+
Journal in Numerische Mathematik 16.5 (1971): 414-434.
|
| 16 |
+
|
| 17 |
+
"""
|
| 18 |
+
# Author: Matt Haberland
|
| 19 |
+
|
| 20 |
+
import numpy as np
|
| 21 |
+
from numpy.linalg import LinAlgError
|
| 22 |
+
|
| 23 |
+
from scipy.linalg import solve
|
| 24 |
+
from ._optimize import _check_unknown_options
|
| 25 |
+
from ._bglu_dense import LU
|
| 26 |
+
from ._bglu_dense import BGLU as BGLU
|
| 27 |
+
from ._linprog_util import _postsolve
|
| 28 |
+
from ._optimize import OptimizeResult
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _phase_one(A, b, x0, callback, postsolve_args, maxiter, tol, disp,
|
| 32 |
+
maxupdate, mast, pivot):
|
| 33 |
+
"""
|
| 34 |
+
The purpose of phase one is to find an initial basic feasible solution
|
| 35 |
+
(BFS) to the original problem.
|
| 36 |
+
|
| 37 |
+
Generates an auxiliary problem with a trivial BFS and an objective that
|
| 38 |
+
minimizes infeasibility of the original problem. Solves the auxiliary
|
| 39 |
+
problem using the main simplex routine (phase two). This either yields
|
| 40 |
+
a BFS to the original problem or determines that the original problem is
|
| 41 |
+
infeasible. If feasible, phase one detects redundant rows in the original
|
| 42 |
+
constraint matrix and removes them, then chooses additional indices as
|
| 43 |
+
necessary to complete a basis/BFS for the original problem.
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
m, n = A.shape
|
| 47 |
+
status = 0
|
| 48 |
+
|
| 49 |
+
# generate auxiliary problem to get initial BFS
|
| 50 |
+
A, b, c, basis, x, status = _generate_auxiliary_problem(A, b, x0, tol)
|
| 51 |
+
|
| 52 |
+
if status == 6:
|
| 53 |
+
residual = c.dot(x)
|
| 54 |
+
iter_k = 0
|
| 55 |
+
return x, basis, A, b, residual, status, iter_k
|
| 56 |
+
|
| 57 |
+
# solve auxiliary problem
|
| 58 |
+
phase_one_n = n
|
| 59 |
+
iter_k = 0
|
| 60 |
+
x, basis, status, iter_k = _phase_two(c, A, x, basis, callback,
|
| 61 |
+
postsolve_args,
|
| 62 |
+
maxiter, tol, disp,
|
| 63 |
+
maxupdate, mast, pivot,
|
| 64 |
+
iter_k, phase_one_n)
|
| 65 |
+
|
| 66 |
+
# check for infeasibility
|
| 67 |
+
residual = c.dot(x)
|
| 68 |
+
if status == 0 and residual > tol:
|
| 69 |
+
status = 2
|
| 70 |
+
|
| 71 |
+
# drive artificial variables out of basis
|
| 72 |
+
# TODO: test redundant row removal better
|
| 73 |
+
# TODO: make solve more efficient with BGLU? This could take a while.
|
| 74 |
+
keep_rows = np.ones(m, dtype=bool)
|
| 75 |
+
for basis_column in basis[basis >= n]:
|
| 76 |
+
B = A[:, basis]
|
| 77 |
+
try:
|
| 78 |
+
basis_finder = np.abs(solve(B, A)) # inefficient
|
| 79 |
+
pertinent_row = np.argmax(basis_finder[:, basis_column])
|
| 80 |
+
eligible_columns = np.ones(n, dtype=bool)
|
| 81 |
+
eligible_columns[basis[basis < n]] = 0
|
| 82 |
+
eligible_column_indices = np.where(eligible_columns)[0]
|
| 83 |
+
index = np.argmax(basis_finder[:, :n]
|
| 84 |
+
[pertinent_row, eligible_columns])
|
| 85 |
+
new_basis_column = eligible_column_indices[index]
|
| 86 |
+
if basis_finder[pertinent_row, new_basis_column] < tol:
|
| 87 |
+
keep_rows[pertinent_row] = False
|
| 88 |
+
else:
|
| 89 |
+
basis[basis == basis_column] = new_basis_column
|
| 90 |
+
except LinAlgError:
|
| 91 |
+
status = 4
|
| 92 |
+
|
| 93 |
+
# form solution to original problem
|
| 94 |
+
A = A[keep_rows, :n]
|
| 95 |
+
basis = basis[keep_rows]
|
| 96 |
+
x = x[:n]
|
| 97 |
+
m = A.shape[0]
|
| 98 |
+
return x, basis, A, b, residual, status, iter_k
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _get_more_basis_columns(A, basis):
|
| 102 |
+
"""
|
| 103 |
+
Called when the auxiliary problem terminates with artificial columns in
|
| 104 |
+
the basis, which must be removed and replaced with non-artificial
|
| 105 |
+
columns. Finds additional columns that do not make the matrix singular.
|
| 106 |
+
"""
|
| 107 |
+
m, n = A.shape
|
| 108 |
+
|
| 109 |
+
# options for inclusion are those that aren't already in the basis
|
| 110 |
+
a = np.arange(m+n)
|
| 111 |
+
bl = np.zeros(len(a), dtype=bool)
|
| 112 |
+
bl[basis] = 1
|
| 113 |
+
options = a[~bl]
|
| 114 |
+
options = options[options < n] # and they have to be non-artificial
|
| 115 |
+
|
| 116 |
+
# form basis matrix
|
| 117 |
+
B = np.zeros((m, m))
|
| 118 |
+
B[:, 0:len(basis)] = A[:, basis]
|
| 119 |
+
|
| 120 |
+
if (basis.size > 0 and
|
| 121 |
+
np.linalg.matrix_rank(B[:, :len(basis)]) < len(basis)):
|
| 122 |
+
raise Exception("Basis has dependent columns")
|
| 123 |
+
|
| 124 |
+
rank = 0 # just enter the loop
|
| 125 |
+
for i in range(n): # somewhat arbitrary, but we need another way out
|
| 126 |
+
# permute the options, and take as many as needed
|
| 127 |
+
new_basis = np.random.permutation(options)[:m-len(basis)]
|
| 128 |
+
B[:, len(basis):] = A[:, new_basis] # update the basis matrix
|
| 129 |
+
rank = np.linalg.matrix_rank(B) # check the rank
|
| 130 |
+
if rank == m:
|
| 131 |
+
break
|
| 132 |
+
|
| 133 |
+
return np.concatenate((basis, new_basis))
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _generate_auxiliary_problem(A, b, x0, tol):
|
| 137 |
+
"""
|
| 138 |
+
Modifies original problem to create an auxiliary problem with a trivial
|
| 139 |
+
initial basic feasible solution and an objective that minimizes
|
| 140 |
+
infeasibility in the original problem.
|
| 141 |
+
|
| 142 |
+
Conceptually, this is done by stacking an identity matrix on the right of
|
| 143 |
+
the original constraint matrix, adding artificial variables to correspond
|
| 144 |
+
with each of these new columns, and generating a cost vector that is all
|
| 145 |
+
zeros except for ones corresponding with each of the new variables.
|
| 146 |
+
|
| 147 |
+
A initial basic feasible solution is trivial: all variables are zero
|
| 148 |
+
except for the artificial variables, which are set equal to the
|
| 149 |
+
corresponding element of the right hand side `b`.
|
| 150 |
+
|
| 151 |
+
Running the simplex method on this auxiliary problem drives all of the
|
| 152 |
+
artificial variables - and thus the cost - to zero if the original problem
|
| 153 |
+
is feasible. The original problem is declared infeasible otherwise.
|
| 154 |
+
|
| 155 |
+
Much of the complexity below is to improve efficiency by using singleton
|
| 156 |
+
columns in the original problem where possible, thus generating artificial
|
| 157 |
+
variables only as necessary, and using an initial 'guess' basic feasible
|
| 158 |
+
solution.
|
| 159 |
+
"""
|
| 160 |
+
status = 0
|
| 161 |
+
m, n = A.shape
|
| 162 |
+
|
| 163 |
+
if x0 is not None:
|
| 164 |
+
x = x0
|
| 165 |
+
else:
|
| 166 |
+
x = np.zeros(n)
|
| 167 |
+
|
| 168 |
+
r = b - A@x # residual; this must be all zeros for feasibility
|
| 169 |
+
|
| 170 |
+
A[r < 0] = -A[r < 0] # express problem with RHS positive for trivial BFS
|
| 171 |
+
b[r < 0] = -b[r < 0] # to the auxiliary problem
|
| 172 |
+
r[r < 0] *= -1
|
| 173 |
+
|
| 174 |
+
# Rows which we will need to find a trivial way to zero.
|
| 175 |
+
# This should just be the rows where there is a nonzero residual.
|
| 176 |
+
# But then we would not necessarily have a column singleton in every row.
|
| 177 |
+
# This makes it difficult to find an initial basis.
|
| 178 |
+
if x0 is None:
|
| 179 |
+
nonzero_constraints = np.arange(m)
|
| 180 |
+
else:
|
| 181 |
+
nonzero_constraints = np.where(r > tol)[0]
|
| 182 |
+
|
| 183 |
+
# these are (at least some of) the initial basis columns
|
| 184 |
+
basis = np.where(np.abs(x) > tol)[0]
|
| 185 |
+
|
| 186 |
+
if len(nonzero_constraints) == 0 and len(basis) <= m: # already a BFS
|
| 187 |
+
c = np.zeros(n)
|
| 188 |
+
basis = _get_more_basis_columns(A, basis)
|
| 189 |
+
return A, b, c, basis, x, status
|
| 190 |
+
elif (len(nonzero_constraints) > m - len(basis) or
|
| 191 |
+
np.any(x < 0)): # can't get trivial BFS
|
| 192 |
+
c = np.zeros(n)
|
| 193 |
+
status = 6
|
| 194 |
+
return A, b, c, basis, x, status
|
| 195 |
+
|
| 196 |
+
# chooses existing columns appropriate for inclusion in initial basis
|
| 197 |
+
cols, rows = _select_singleton_columns(A, r)
|
| 198 |
+
|
| 199 |
+
# find the rows we need to zero that we _can_ zero with column singletons
|
| 200 |
+
i_tofix = np.isin(rows, nonzero_constraints)
|
| 201 |
+
# these columns can't already be in the basis, though
|
| 202 |
+
# we are going to add them to the basis and change the corresponding x val
|
| 203 |
+
i_notinbasis = np.logical_not(np.isin(cols, basis))
|
| 204 |
+
i_fix_without_aux = np.logical_and(i_tofix, i_notinbasis)
|
| 205 |
+
rows = rows[i_fix_without_aux]
|
| 206 |
+
cols = cols[i_fix_without_aux]
|
| 207 |
+
|
| 208 |
+
# indices of the rows we can only zero with auxiliary variable
|
| 209 |
+
# these rows will get a one in each auxiliary column
|
| 210 |
+
arows = nonzero_constraints[np.logical_not(
|
| 211 |
+
np.isin(nonzero_constraints, rows))]
|
| 212 |
+
n_aux = len(arows)
|
| 213 |
+
acols = n + np.arange(n_aux) # indices of auxiliary columns
|
| 214 |
+
|
| 215 |
+
basis_ng = np.concatenate((cols, acols)) # basis columns not from guess
|
| 216 |
+
basis_ng_rows = np.concatenate((rows, arows)) # rows we need to zero
|
| 217 |
+
|
| 218 |
+
# add auxiliary singleton columns
|
| 219 |
+
A = np.hstack((A, np.zeros((m, n_aux))))
|
| 220 |
+
A[arows, acols] = 1
|
| 221 |
+
|
| 222 |
+
# generate initial BFS
|
| 223 |
+
x = np.concatenate((x, np.zeros(n_aux)))
|
| 224 |
+
x[basis_ng] = r[basis_ng_rows]/A[basis_ng_rows, basis_ng]
|
| 225 |
+
|
| 226 |
+
# generate costs to minimize infeasibility
|
| 227 |
+
c = np.zeros(n_aux + n)
|
| 228 |
+
c[acols] = 1
|
| 229 |
+
|
| 230 |
+
# basis columns correspond with nonzeros in guess, those with column
|
| 231 |
+
# singletons we used to zero remaining constraints, and any additional
|
| 232 |
+
# columns to get a full set (m columns)
|
| 233 |
+
basis = np.concatenate((basis, basis_ng))
|
| 234 |
+
basis = _get_more_basis_columns(A, basis) # add columns as needed
|
| 235 |
+
|
| 236 |
+
return A, b, c, basis, x, status
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def _select_singleton_columns(A, b):
|
| 240 |
+
"""
|
| 241 |
+
Finds singleton columns for which the singleton entry is of the same sign
|
| 242 |
+
as the right-hand side; these columns are eligible for inclusion in an
|
| 243 |
+
initial basis. Determines the rows in which the singleton entries are
|
| 244 |
+
located. For each of these rows, returns the indices of the one singleton
|
| 245 |
+
column and its corresponding row.
|
| 246 |
+
"""
|
| 247 |
+
# find indices of all singleton columns and corresponding row indices
|
| 248 |
+
column_indices = np.nonzero(np.sum(np.abs(A) != 0, axis=0) == 1)[0]
|
| 249 |
+
columns = A[:, column_indices] # array of singleton columns
|
| 250 |
+
row_indices = np.zeros(len(column_indices), dtype=int)
|
| 251 |
+
nonzero_rows, nonzero_columns = np.nonzero(columns)
|
| 252 |
+
row_indices[nonzero_columns] = nonzero_rows # corresponding row indices
|
| 253 |
+
|
| 254 |
+
# keep only singletons with entries that have same sign as RHS
|
| 255 |
+
# this is necessary because all elements of BFS must be non-negative
|
| 256 |
+
same_sign = A[row_indices, column_indices]*b[row_indices] >= 0
|
| 257 |
+
column_indices = column_indices[same_sign][::-1]
|
| 258 |
+
row_indices = row_indices[same_sign][::-1]
|
| 259 |
+
# Reversing the order so that steps below select rightmost columns
|
| 260 |
+
# for initial basis, which will tend to be slack variables. (If the
|
| 261 |
+
# guess corresponds with a basic feasible solution but a constraint
|
| 262 |
+
# is not satisfied with the corresponding slack variable zero, the slack
|
| 263 |
+
# variable must be basic.)
|
| 264 |
+
|
| 265 |
+
# for each row, keep rightmost singleton column with an entry in that row
|
| 266 |
+
unique_row_indices, first_columns = np.unique(row_indices,
|
| 267 |
+
return_index=True)
|
| 268 |
+
return column_indices[first_columns], unique_row_indices
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def _find_nonzero_rows(A, tol):
|
| 272 |
+
"""
|
| 273 |
+
Returns logical array indicating the locations of rows with at least
|
| 274 |
+
one nonzero element.
|
| 275 |
+
"""
|
| 276 |
+
return np.any(np.abs(A) > tol, axis=1)
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def _select_enter_pivot(c_hat, bl, a, rule="bland", tol=1e-12):
|
| 280 |
+
"""
|
| 281 |
+
Selects a pivot to enter the basis. Currently Bland's rule - the smallest
|
| 282 |
+
index that has a negative reduced cost - is the default.
|
| 283 |
+
"""
|
| 284 |
+
if rule.lower() == "mrc": # index with minimum reduced cost
|
| 285 |
+
return a[~bl][np.argmin(c_hat)]
|
| 286 |
+
else: # smallest index w/ negative reduced cost
|
| 287 |
+
return a[~bl][c_hat < -tol][0]
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def _display_iter(phase, iteration, slack, con, fun):
|
| 291 |
+
"""
|
| 292 |
+
Print indicators of optimization status to the console.
|
| 293 |
+
"""
|
| 294 |
+
header = True if not iteration % 20 else False
|
| 295 |
+
|
| 296 |
+
if header:
|
| 297 |
+
print("Phase",
|
| 298 |
+
"Iteration",
|
| 299 |
+
"Minimum Slack ",
|
| 300 |
+
"Constraint Residual",
|
| 301 |
+
"Objective ")
|
| 302 |
+
|
| 303 |
+
# :<X.Y left aligns Y digits in X digit spaces
|
| 304 |
+
fmt = '{0:<6}{1:<10}{2:<20.13}{3:<20.13}{4:<20.13}'
|
| 305 |
+
try:
|
| 306 |
+
slack = np.min(slack)
|
| 307 |
+
except ValueError:
|
| 308 |
+
slack = "NA"
|
| 309 |
+
print(fmt.format(phase, iteration, slack, np.linalg.norm(con), fun))
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def _display_and_callback(phase_one_n, x, postsolve_args, status,
|
| 313 |
+
iteration, disp, callback):
|
| 314 |
+
if phase_one_n is not None:
|
| 315 |
+
phase = 1
|
| 316 |
+
x_postsolve = x[:phase_one_n]
|
| 317 |
+
else:
|
| 318 |
+
phase = 2
|
| 319 |
+
x_postsolve = x
|
| 320 |
+
x_o, fun, slack, con = _postsolve(x_postsolve,
|
| 321 |
+
postsolve_args)
|
| 322 |
+
|
| 323 |
+
if callback is not None:
|
| 324 |
+
res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
|
| 325 |
+
'con': con, 'nit': iteration,
|
| 326 |
+
'phase': phase, 'complete': False,
|
| 327 |
+
'status': status, 'message': "",
|
| 328 |
+
'success': False})
|
| 329 |
+
callback(res)
|
| 330 |
+
if disp:
|
| 331 |
+
_display_iter(phase, iteration, slack, con, fun)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def _phase_two(c, A, x, b, callback, postsolve_args, maxiter, tol, disp,
|
| 335 |
+
maxupdate, mast, pivot, iteration=0, phase_one_n=None):
|
| 336 |
+
"""
|
| 337 |
+
The heart of the simplex method. Beginning with a basic feasible solution,
|
| 338 |
+
moves to adjacent basic feasible solutions successively lower reduced cost.
|
| 339 |
+
Terminates when there are no basic feasible solutions with lower reduced
|
| 340 |
+
cost or if the problem is determined to be unbounded.
|
| 341 |
+
|
| 342 |
+
This implementation follows the revised simplex method based on LU
|
| 343 |
+
decomposition. Rather than maintaining a tableau or an inverse of the
|
| 344 |
+
basis matrix, we keep a factorization of the basis matrix that allows
|
| 345 |
+
efficient solution of linear systems while avoiding stability issues
|
| 346 |
+
associated with inverted matrices.
|
| 347 |
+
"""
|
| 348 |
+
m, n = A.shape
|
| 349 |
+
status = 0
|
| 350 |
+
a = np.arange(n) # indices of columns of A
|
| 351 |
+
ab = np.arange(m) # indices of columns of B
|
| 352 |
+
if maxupdate:
|
| 353 |
+
# basis matrix factorization object; similar to B = A[:, b]
|
| 354 |
+
B = BGLU(A, b, maxupdate, mast)
|
| 355 |
+
else:
|
| 356 |
+
B = LU(A, b)
|
| 357 |
+
|
| 358 |
+
for iteration in range(iteration, maxiter):
|
| 359 |
+
|
| 360 |
+
if disp or callback is not None:
|
| 361 |
+
_display_and_callback(phase_one_n, x, postsolve_args, status,
|
| 362 |
+
iteration, disp, callback)
|
| 363 |
+
|
| 364 |
+
bl = np.zeros(len(a), dtype=bool)
|
| 365 |
+
bl[b] = 1
|
| 366 |
+
|
| 367 |
+
xb = x[b] # basic variables
|
| 368 |
+
cb = c[b] # basic costs
|
| 369 |
+
|
| 370 |
+
try:
|
| 371 |
+
v = B.solve(cb, transposed=True) # similar to v = solve(B.T, cb)
|
| 372 |
+
except LinAlgError:
|
| 373 |
+
status = 4
|
| 374 |
+
break
|
| 375 |
+
|
| 376 |
+
# TODO: cythonize?
|
| 377 |
+
c_hat = c - v.dot(A) # reduced cost
|
| 378 |
+
c_hat = c_hat[~bl]
|
| 379 |
+
# Above is much faster than:
|
| 380 |
+
# N = A[:, ~bl] # slow!
|
| 381 |
+
# c_hat = c[~bl] - v.T.dot(N)
|
| 382 |
+
# Can we perform the multiplication only on the nonbasic columns?
|
| 383 |
+
|
| 384 |
+
if np.all(c_hat >= -tol): # all reduced costs positive -> terminate
|
| 385 |
+
break
|
| 386 |
+
|
| 387 |
+
j = _select_enter_pivot(c_hat, bl, a, rule=pivot, tol=tol)
|
| 388 |
+
u = B.solve(A[:, j]) # similar to u = solve(B, A[:, j])
|
| 389 |
+
|
| 390 |
+
i = u > tol # if none of the u are positive, unbounded
|
| 391 |
+
if not np.any(i):
|
| 392 |
+
status = 3
|
| 393 |
+
break
|
| 394 |
+
|
| 395 |
+
th = xb[i]/u[i]
|
| 396 |
+
l = np.argmin(th) # implicitly selects smallest subscript
|
| 397 |
+
th_star = th[l] # step size
|
| 398 |
+
|
| 399 |
+
x[b] = x[b] - th_star*u # take step
|
| 400 |
+
x[j] = th_star
|
| 401 |
+
B.update(ab[i][l], j) # modify basis
|
| 402 |
+
b = B.b # similar to b[ab[i][l]] =
|
| 403 |
+
|
| 404 |
+
else:
|
| 405 |
+
# If the end of the for loop is reached (without a break statement),
|
| 406 |
+
# then another step has been taken, so the iteration counter should
|
| 407 |
+
# increment, info should be displayed, and callback should be called.
|
| 408 |
+
iteration += 1
|
| 409 |
+
status = 1
|
| 410 |
+
if disp or callback is not None:
|
| 411 |
+
_display_and_callback(phase_one_n, x, postsolve_args, status,
|
| 412 |
+
iteration, disp, callback)
|
| 413 |
+
|
| 414 |
+
return x, b, status, iteration
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
def _linprog_rs(c, c0, A, b, x0, callback, postsolve_args,
|
| 418 |
+
maxiter=5000, tol=1e-12, disp=False,
|
| 419 |
+
maxupdate=10, mast=False, pivot="mrc",
|
| 420 |
+
**unknown_options):
|
| 421 |
+
"""
|
| 422 |
+
Solve the following linear programming problem via a two-phase
|
| 423 |
+
revised simplex algorithm.::
|
| 424 |
+
|
| 425 |
+
minimize: c @ x
|
| 426 |
+
|
| 427 |
+
subject to: A @ x == b
|
| 428 |
+
0 <= x < oo
|
| 429 |
+
|
| 430 |
+
User-facing documentation is in _linprog_doc.py.
|
| 431 |
+
|
| 432 |
+
Parameters
|
| 433 |
+
----------
|
| 434 |
+
c : 1-D array
|
| 435 |
+
Coefficients of the linear objective function to be minimized.
|
| 436 |
+
c0 : float
|
| 437 |
+
Constant term in objective function due to fixed (and eliminated)
|
| 438 |
+
variables. (Currently unused.)
|
| 439 |
+
A : 2-D array
|
| 440 |
+
2-D array which, when matrix-multiplied by ``x``, gives the values of
|
| 441 |
+
the equality constraints at ``x``.
|
| 442 |
+
b : 1-D array
|
| 443 |
+
1-D array of values representing the RHS of each equality constraint
|
| 444 |
+
(row) in ``A_eq``.
|
| 445 |
+
x0 : 1-D array, optional
|
| 446 |
+
Starting values of the independent variables, which will be refined by
|
| 447 |
+
the optimization algorithm. For the revised simplex method, these must
|
| 448 |
+
correspond with a basic feasible solution.
|
| 449 |
+
callback : callable, optional
|
| 450 |
+
If a callback function is provided, it will be called within each
|
| 451 |
+
iteration of the algorithm. The callback function must accept a single
|
| 452 |
+
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
| 453 |
+
|
| 454 |
+
x : 1-D array
|
| 455 |
+
Current solution vector.
|
| 456 |
+
fun : float
|
| 457 |
+
Current value of the objective function ``c @ x``.
|
| 458 |
+
success : bool
|
| 459 |
+
True only when an algorithm has completed successfully,
|
| 460 |
+
so this is always False as the callback function is called
|
| 461 |
+
only while the algorithm is still iterating.
|
| 462 |
+
slack : 1-D array
|
| 463 |
+
The values of the slack variables. Each slack variable
|
| 464 |
+
corresponds to an inequality constraint. If the slack is zero,
|
| 465 |
+
the corresponding constraint is active.
|
| 466 |
+
con : 1-D array
|
| 467 |
+
The (nominally zero) residuals of the equality constraints,
|
| 468 |
+
that is, ``b - A_eq @ x``.
|
| 469 |
+
phase : int
|
| 470 |
+
The phase of the algorithm being executed.
|
| 471 |
+
status : int
|
| 472 |
+
For revised simplex, this is always 0 because if a different
|
| 473 |
+
status is detected, the algorithm terminates.
|
| 474 |
+
nit : int
|
| 475 |
+
The number of iterations performed.
|
| 476 |
+
message : str
|
| 477 |
+
A string descriptor of the exit status of the optimization.
|
| 478 |
+
postsolve_args : tuple
|
| 479 |
+
Data needed by _postsolve to convert the solution to the standard-form
|
| 480 |
+
problem into the solution to the original problem.
|
| 481 |
+
|
| 482 |
+
Options
|
| 483 |
+
-------
|
| 484 |
+
maxiter : int
|
| 485 |
+
The maximum number of iterations to perform in either phase.
|
| 486 |
+
tol : float
|
| 487 |
+
The tolerance which determines when a solution is "close enough" to
|
| 488 |
+
zero in Phase 1 to be considered a basic feasible solution or close
|
| 489 |
+
enough to positive to serve as an optimal solution.
|
| 490 |
+
disp : bool
|
| 491 |
+
Set to ``True`` if indicators of optimization status are to be printed
|
| 492 |
+
to the console each iteration.
|
| 493 |
+
maxupdate : int
|
| 494 |
+
The maximum number of updates performed on the LU factorization.
|
| 495 |
+
After this many updates is reached, the basis matrix is factorized
|
| 496 |
+
from scratch.
|
| 497 |
+
mast : bool
|
| 498 |
+
Minimize Amortized Solve Time. If enabled, the average time to solve
|
| 499 |
+
a linear system using the basis factorization is measured. Typically,
|
| 500 |
+
the average solve time will decrease with each successive solve after
|
| 501 |
+
initial factorization, as factorization takes much more time than the
|
| 502 |
+
solve operation (and updates). Eventually, however, the updated
|
| 503 |
+
factorization becomes sufficiently complex that the average solve time
|
| 504 |
+
begins to increase. When this is detected, the basis is refactorized
|
| 505 |
+
from scratch. Enable this option to maximize speed at the risk of
|
| 506 |
+
nondeterministic behavior. Ignored if ``maxupdate`` is 0.
|
| 507 |
+
pivot : "mrc" or "bland"
|
| 508 |
+
Pivot rule: Minimum Reduced Cost (default) or Bland's rule. Choose
|
| 509 |
+
Bland's rule if iteration limit is reached and cycling is suspected.
|
| 510 |
+
unknown_options : dict
|
| 511 |
+
Optional arguments not used by this particular solver. If
|
| 512 |
+
`unknown_options` is non-empty a warning is issued listing all
|
| 513 |
+
unused options.
|
| 514 |
+
|
| 515 |
+
Returns
|
| 516 |
+
-------
|
| 517 |
+
x : 1-D array
|
| 518 |
+
Solution vector.
|
| 519 |
+
status : int
|
| 520 |
+
An integer representing the exit status of the optimization::
|
| 521 |
+
|
| 522 |
+
0 : Optimization terminated successfully
|
| 523 |
+
1 : Iteration limit reached
|
| 524 |
+
2 : Problem appears to be infeasible
|
| 525 |
+
3 : Problem appears to be unbounded
|
| 526 |
+
4 : Numerical difficulties encountered
|
| 527 |
+
5 : No constraints; turn presolve on
|
| 528 |
+
6 : Guess x0 cannot be converted to a basic feasible solution
|
| 529 |
+
|
| 530 |
+
message : str
|
| 531 |
+
A string descriptor of the exit status of the optimization.
|
| 532 |
+
iteration : int
|
| 533 |
+
The number of iterations taken to solve the problem.
|
| 534 |
+
"""
|
| 535 |
+
|
| 536 |
+
_check_unknown_options(unknown_options)
|
| 537 |
+
|
| 538 |
+
messages = ["Optimization terminated successfully.",
|
| 539 |
+
"Iteration limit reached.",
|
| 540 |
+
"The problem appears infeasible, as the phase one auxiliary "
|
| 541 |
+
"problem terminated successfully with a residual of {0:.1e}, "
|
| 542 |
+
"greater than the tolerance {1} required for the solution to "
|
| 543 |
+
"be considered feasible. Consider increasing the tolerance to "
|
| 544 |
+
"be greater than {0:.1e}. If this tolerance is unnaceptably "
|
| 545 |
+
"large, the problem is likely infeasible.",
|
| 546 |
+
"The problem is unbounded, as the simplex algorithm found "
|
| 547 |
+
"a basic feasible solution from which there is a direction "
|
| 548 |
+
"with negative reduced cost in which all decision variables "
|
| 549 |
+
"increase.",
|
| 550 |
+
"Numerical difficulties encountered; consider trying "
|
| 551 |
+
"method='interior-point'.",
|
| 552 |
+
"Problems with no constraints are trivially solved; please "
|
| 553 |
+
"turn presolve on.",
|
| 554 |
+
"The guess x0 cannot be converted to a basic feasible "
|
| 555 |
+
"solution. "
|
| 556 |
+
]
|
| 557 |
+
|
| 558 |
+
if A.size == 0: # address test_unbounded_below_no_presolve_corrected
|
| 559 |
+
return np.zeros(c.shape), 5, messages[5], 0
|
| 560 |
+
|
| 561 |
+
x, basis, A, b, residual, status, iteration = (
|
| 562 |
+
_phase_one(A, b, x0, callback, postsolve_args,
|
| 563 |
+
maxiter, tol, disp, maxupdate, mast, pivot))
|
| 564 |
+
|
| 565 |
+
if status == 0:
|
| 566 |
+
x, basis, status, iteration = _phase_two(c, A, x, basis, callback,
|
| 567 |
+
postsolve_args,
|
| 568 |
+
maxiter, tol, disp,
|
| 569 |
+
maxupdate, mast, pivot,
|
| 570 |
+
iteration)
|
| 571 |
+
|
| 572 |
+
return x, status, messages[status].format(residual, tol), iteration
|
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_simplex.py
ADDED
|
@@ -0,0 +1,661 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Simplex method for linear programming
|
| 2 |
+
|
| 3 |
+
The *simplex* method uses a traditional, full-tableau implementation of
|
| 4 |
+
Dantzig's simplex algorithm [1]_, [2]_ (*not* the Nelder-Mead simplex).
|
| 5 |
+
This algorithm is included for backwards compatibility and educational
|
| 6 |
+
purposes.
|
| 7 |
+
|
| 8 |
+
.. versionadded:: 0.15.0
|
| 9 |
+
|
| 10 |
+
Warnings
|
| 11 |
+
--------
|
| 12 |
+
|
| 13 |
+
The simplex method may encounter numerical difficulties when pivot
|
| 14 |
+
values are close to the specified tolerance. If encountered try
|
| 15 |
+
remove any redundant constraints, change the pivot strategy to Bland's
|
| 16 |
+
rule or increase the tolerance value.
|
| 17 |
+
|
| 18 |
+
Alternatively, more robust methods maybe be used. See
|
| 19 |
+
:ref:`'interior-point' <optimize.linprog-interior-point>` and
|
| 20 |
+
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`.
|
| 21 |
+
|
| 22 |
+
References
|
| 23 |
+
----------
|
| 24 |
+
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
| 25 |
+
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
| 26 |
+
1963
|
| 27 |
+
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
| 28 |
+
Mathematical Programming", McGraw-Hill, Chapter 4.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
import numpy as np
|
| 32 |
+
from warnings import warn
|
| 33 |
+
from ._optimize import OptimizeResult, OptimizeWarning, _check_unknown_options
|
| 34 |
+
from ._linprog_util import _postsolve
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _pivot_col(T, tol=1e-9, bland=False):
|
| 38 |
+
"""
|
| 39 |
+
Given a linear programming simplex tableau, determine the column
|
| 40 |
+
of the variable to enter the basis.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
T : 2-D array
|
| 45 |
+
A 2-D array representing the simplex tableau, T, corresponding to the
|
| 46 |
+
linear programming problem. It should have the form:
|
| 47 |
+
|
| 48 |
+
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
| 49 |
+
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
| 50 |
+
.
|
| 51 |
+
.
|
| 52 |
+
.
|
| 53 |
+
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
| 54 |
+
[c[0], c[1], ..., c[n_total], 0]]
|
| 55 |
+
|
| 56 |
+
for a Phase 2 problem, or the form:
|
| 57 |
+
|
| 58 |
+
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
| 59 |
+
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
| 60 |
+
.
|
| 61 |
+
.
|
| 62 |
+
.
|
| 63 |
+
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
| 64 |
+
[c[0], c[1], ..., c[n_total], 0],
|
| 65 |
+
[c'[0], c'[1], ..., c'[n_total], 0]]
|
| 66 |
+
|
| 67 |
+
for a Phase 1 problem (a problem in which a basic feasible solution is
|
| 68 |
+
sought prior to maximizing the actual objective. ``T`` is modified in
|
| 69 |
+
place by ``_solve_simplex``.
|
| 70 |
+
tol : float
|
| 71 |
+
Elements in the objective row larger than -tol will not be considered
|
| 72 |
+
for pivoting. Nominally this value is zero, but numerical issues
|
| 73 |
+
cause a tolerance about zero to be necessary.
|
| 74 |
+
bland : bool
|
| 75 |
+
If True, use Bland's rule for selection of the column (select the
|
| 76 |
+
first column with a negative coefficient in the objective row,
|
| 77 |
+
regardless of magnitude).
|
| 78 |
+
|
| 79 |
+
Returns
|
| 80 |
+
-------
|
| 81 |
+
status: bool
|
| 82 |
+
True if a suitable pivot column was found, otherwise False.
|
| 83 |
+
A return of False indicates that the linear programming simplex
|
| 84 |
+
algorithm is complete.
|
| 85 |
+
col: int
|
| 86 |
+
The index of the column of the pivot element.
|
| 87 |
+
If status is False, col will be returned as nan.
|
| 88 |
+
"""
|
| 89 |
+
ma = np.ma.masked_where(T[-1, :-1] >= -tol, T[-1, :-1], copy=False)
|
| 90 |
+
if ma.count() == 0:
|
| 91 |
+
return False, np.nan
|
| 92 |
+
if bland:
|
| 93 |
+
# ma.mask is sometimes 0d
|
| 94 |
+
return True, np.nonzero(np.logical_not(np.atleast_1d(ma.mask)))[0][0]
|
| 95 |
+
return True, np.ma.nonzero(ma == ma.min())[0][0]
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _pivot_row(T, basis, pivcol, phase, tol=1e-9, bland=False):
|
| 99 |
+
"""
|
| 100 |
+
Given a linear programming simplex tableau, determine the row for the
|
| 101 |
+
pivot operation.
|
| 102 |
+
|
| 103 |
+
Parameters
|
| 104 |
+
----------
|
| 105 |
+
T : 2-D array
|
| 106 |
+
A 2-D array representing the simplex tableau, T, corresponding to the
|
| 107 |
+
linear programming problem. It should have the form:
|
| 108 |
+
|
| 109 |
+
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
| 110 |
+
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
| 111 |
+
.
|
| 112 |
+
.
|
| 113 |
+
.
|
| 114 |
+
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
| 115 |
+
[c[0], c[1], ..., c[n_total], 0]]
|
| 116 |
+
|
| 117 |
+
for a Phase 2 problem, or the form:
|
| 118 |
+
|
| 119 |
+
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
| 120 |
+
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
| 121 |
+
.
|
| 122 |
+
.
|
| 123 |
+
.
|
| 124 |
+
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
| 125 |
+
[c[0], c[1], ..., c[n_total], 0],
|
| 126 |
+
[c'[0], c'[1], ..., c'[n_total], 0]]
|
| 127 |
+
|
| 128 |
+
for a Phase 1 problem (a Problem in which a basic feasible solution is
|
| 129 |
+
sought prior to maximizing the actual objective. ``T`` is modified in
|
| 130 |
+
place by ``_solve_simplex``.
|
| 131 |
+
basis : array
|
| 132 |
+
A list of the current basic variables.
|
| 133 |
+
pivcol : int
|
| 134 |
+
The index of the pivot column.
|
| 135 |
+
phase : int
|
| 136 |
+
The phase of the simplex algorithm (1 or 2).
|
| 137 |
+
tol : float
|
| 138 |
+
Elements in the pivot column smaller than tol will not be considered
|
| 139 |
+
for pivoting. Nominally this value is zero, but numerical issues
|
| 140 |
+
cause a tolerance about zero to be necessary.
|
| 141 |
+
bland : bool
|
| 142 |
+
If True, use Bland's rule for selection of the row (if more than one
|
| 143 |
+
row can be used, choose the one with the lowest variable index).
|
| 144 |
+
|
| 145 |
+
Returns
|
| 146 |
+
-------
|
| 147 |
+
status: bool
|
| 148 |
+
True if a suitable pivot row was found, otherwise False. A return
|
| 149 |
+
of False indicates that the linear programming problem is unbounded.
|
| 150 |
+
row: int
|
| 151 |
+
The index of the row of the pivot element. If status is False, row
|
| 152 |
+
will be returned as nan.
|
| 153 |
+
"""
|
| 154 |
+
if phase == 1:
|
| 155 |
+
k = 2
|
| 156 |
+
else:
|
| 157 |
+
k = 1
|
| 158 |
+
ma = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, pivcol], copy=False)
|
| 159 |
+
if ma.count() == 0:
|
| 160 |
+
return False, np.nan
|
| 161 |
+
mb = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, -1], copy=False)
|
| 162 |
+
q = mb / ma
|
| 163 |
+
min_rows = np.ma.nonzero(q == q.min())[0]
|
| 164 |
+
if bland:
|
| 165 |
+
return True, min_rows[np.argmin(np.take(basis, min_rows))]
|
| 166 |
+
return True, min_rows[0]
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def _apply_pivot(T, basis, pivrow, pivcol, tol=1e-9):
|
| 170 |
+
"""
|
| 171 |
+
Pivot the simplex tableau inplace on the element given by (pivrow, pivol).
|
| 172 |
+
The entering variable corresponds to the column given by pivcol forcing
|
| 173 |
+
the variable basis[pivrow] to leave the basis.
|
| 174 |
+
|
| 175 |
+
Parameters
|
| 176 |
+
----------
|
| 177 |
+
T : 2-D array
|
| 178 |
+
A 2-D array representing the simplex tableau, T, corresponding to the
|
| 179 |
+
linear programming problem. It should have the form:
|
| 180 |
+
|
| 181 |
+
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
| 182 |
+
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
| 183 |
+
.
|
| 184 |
+
.
|
| 185 |
+
.
|
| 186 |
+
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
| 187 |
+
[c[0], c[1], ..., c[n_total], 0]]
|
| 188 |
+
|
| 189 |
+
for a Phase 2 problem, or the form:
|
| 190 |
+
|
| 191 |
+
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
| 192 |
+
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
| 193 |
+
.
|
| 194 |
+
.
|
| 195 |
+
.
|
| 196 |
+
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
| 197 |
+
[c[0], c[1], ..., c[n_total], 0],
|
| 198 |
+
[c'[0], c'[1], ..., c'[n_total], 0]]
|
| 199 |
+
|
| 200 |
+
for a Phase 1 problem (a problem in which a basic feasible solution is
|
| 201 |
+
sought prior to maximizing the actual objective. ``T`` is modified in
|
| 202 |
+
place by ``_solve_simplex``.
|
| 203 |
+
basis : 1-D array
|
| 204 |
+
An array of the indices of the basic variables, such that basis[i]
|
| 205 |
+
contains the column corresponding to the basic variable for row i.
|
| 206 |
+
Basis is modified in place by _apply_pivot.
|
| 207 |
+
pivrow : int
|
| 208 |
+
Row index of the pivot.
|
| 209 |
+
pivcol : int
|
| 210 |
+
Column index of the pivot.
|
| 211 |
+
"""
|
| 212 |
+
basis[pivrow] = pivcol
|
| 213 |
+
pivval = T[pivrow, pivcol]
|
| 214 |
+
T[pivrow] = T[pivrow] / pivval
|
| 215 |
+
for irow in range(T.shape[0]):
|
| 216 |
+
if irow != pivrow:
|
| 217 |
+
T[irow] = T[irow] - T[pivrow] * T[irow, pivcol]
|
| 218 |
+
|
| 219 |
+
# The selected pivot should never lead to a pivot value less than the tol.
|
| 220 |
+
if np.isclose(pivval, tol, atol=0, rtol=1e4):
|
| 221 |
+
message = (
|
| 222 |
+
f"The pivot operation produces a pivot value of:{pivval: .1e}, "
|
| 223 |
+
"which is only slightly greater than the specified "
|
| 224 |
+
f"tolerance{tol: .1e}. This may lead to issues regarding the "
|
| 225 |
+
"numerical stability of the simplex method. "
|
| 226 |
+
"Removing redundant constraints, changing the pivot strategy "
|
| 227 |
+
"via Bland's rule or increasing the tolerance may "
|
| 228 |
+
"help reduce the issue.")
|
| 229 |
+
warn(message, OptimizeWarning, stacklevel=5)
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def _solve_simplex(T, n, basis, callback, postsolve_args,
|
| 233 |
+
maxiter=1000, tol=1e-9, phase=2, bland=False, nit0=0,
|
| 234 |
+
):
|
| 235 |
+
"""
|
| 236 |
+
Solve a linear programming problem in "standard form" using the Simplex
|
| 237 |
+
Method. Linear Programming is intended to solve the following problem form:
|
| 238 |
+
|
| 239 |
+
Minimize::
|
| 240 |
+
|
| 241 |
+
c @ x
|
| 242 |
+
|
| 243 |
+
Subject to::
|
| 244 |
+
|
| 245 |
+
A @ x == b
|
| 246 |
+
x >= 0
|
| 247 |
+
|
| 248 |
+
Parameters
|
| 249 |
+
----------
|
| 250 |
+
T : 2-D array
|
| 251 |
+
A 2-D array representing the simplex tableau, T, corresponding to the
|
| 252 |
+
linear programming problem. It should have the form:
|
| 253 |
+
|
| 254 |
+
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
| 255 |
+
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
| 256 |
+
.
|
| 257 |
+
.
|
| 258 |
+
.
|
| 259 |
+
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
| 260 |
+
[c[0], c[1], ..., c[n_total], 0]]
|
| 261 |
+
|
| 262 |
+
for a Phase 2 problem, or the form:
|
| 263 |
+
|
| 264 |
+
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
| 265 |
+
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
| 266 |
+
.
|
| 267 |
+
.
|
| 268 |
+
.
|
| 269 |
+
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
| 270 |
+
[c[0], c[1], ..., c[n_total], 0],
|
| 271 |
+
[c'[0], c'[1], ..., c'[n_total], 0]]
|
| 272 |
+
|
| 273 |
+
for a Phase 1 problem (a problem in which a basic feasible solution is
|
| 274 |
+
sought prior to maximizing the actual objective. ``T`` is modified in
|
| 275 |
+
place by ``_solve_simplex``.
|
| 276 |
+
n : int
|
| 277 |
+
The number of true variables in the problem.
|
| 278 |
+
basis : 1-D array
|
| 279 |
+
An array of the indices of the basic variables, such that basis[i]
|
| 280 |
+
contains the column corresponding to the basic variable for row i.
|
| 281 |
+
Basis is modified in place by _solve_simplex
|
| 282 |
+
callback : callable, optional
|
| 283 |
+
If a callback function is provided, it will be called within each
|
| 284 |
+
iteration of the algorithm. The callback must accept a
|
| 285 |
+
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
| 286 |
+
|
| 287 |
+
x : 1-D array
|
| 288 |
+
Current solution vector
|
| 289 |
+
fun : float
|
| 290 |
+
Current value of the objective function
|
| 291 |
+
success : bool
|
| 292 |
+
True only when a phase has completed successfully. This
|
| 293 |
+
will be False for most iterations.
|
| 294 |
+
slack : 1-D array
|
| 295 |
+
The values of the slack variables. Each slack variable
|
| 296 |
+
corresponds to an inequality constraint. If the slack is zero,
|
| 297 |
+
the corresponding constraint is active.
|
| 298 |
+
con : 1-D array
|
| 299 |
+
The (nominally zero) residuals of the equality constraints,
|
| 300 |
+
that is, ``b - A_eq @ x``
|
| 301 |
+
phase : int
|
| 302 |
+
The phase of the optimization being executed. In phase 1 a basic
|
| 303 |
+
feasible solution is sought and the T has an additional row
|
| 304 |
+
representing an alternate objective function.
|
| 305 |
+
status : int
|
| 306 |
+
An integer representing the exit status of the optimization::
|
| 307 |
+
|
| 308 |
+
0 : Optimization terminated successfully
|
| 309 |
+
1 : Iteration limit reached
|
| 310 |
+
2 : Problem appears to be infeasible
|
| 311 |
+
3 : Problem appears to be unbounded
|
| 312 |
+
4 : Serious numerical difficulties encountered
|
| 313 |
+
|
| 314 |
+
nit : int
|
| 315 |
+
The number of iterations performed.
|
| 316 |
+
message : str
|
| 317 |
+
A string descriptor of the exit status of the optimization.
|
| 318 |
+
postsolve_args : tuple
|
| 319 |
+
Data needed by _postsolve to convert the solution to the standard-form
|
| 320 |
+
problem into the solution to the original problem.
|
| 321 |
+
maxiter : int
|
| 322 |
+
The maximum number of iterations to perform before aborting the
|
| 323 |
+
optimization.
|
| 324 |
+
tol : float
|
| 325 |
+
The tolerance which determines when a solution is "close enough" to
|
| 326 |
+
zero in Phase 1 to be considered a basic feasible solution or close
|
| 327 |
+
enough to positive to serve as an optimal solution.
|
| 328 |
+
phase : int
|
| 329 |
+
The phase of the optimization being executed. In phase 1 a basic
|
| 330 |
+
feasible solution is sought and the T has an additional row
|
| 331 |
+
representing an alternate objective function.
|
| 332 |
+
bland : bool
|
| 333 |
+
If True, choose pivots using Bland's rule [3]_. In problems which
|
| 334 |
+
fail to converge due to cycling, using Bland's rule can provide
|
| 335 |
+
convergence at the expense of a less optimal path about the simplex.
|
| 336 |
+
nit0 : int
|
| 337 |
+
The initial iteration number used to keep an accurate iteration total
|
| 338 |
+
in a two-phase problem.
|
| 339 |
+
|
| 340 |
+
Returns
|
| 341 |
+
-------
|
| 342 |
+
nit : int
|
| 343 |
+
The number of iterations. Used to keep an accurate iteration total
|
| 344 |
+
in the two-phase problem.
|
| 345 |
+
status : int
|
| 346 |
+
An integer representing the exit status of the optimization::
|
| 347 |
+
|
| 348 |
+
0 : Optimization terminated successfully
|
| 349 |
+
1 : Iteration limit reached
|
| 350 |
+
2 : Problem appears to be infeasible
|
| 351 |
+
3 : Problem appears to be unbounded
|
| 352 |
+
4 : Serious numerical difficulties encountered
|
| 353 |
+
|
| 354 |
+
"""
|
| 355 |
+
nit = nit0
|
| 356 |
+
status = 0
|
| 357 |
+
message = ''
|
| 358 |
+
complete = False
|
| 359 |
+
|
| 360 |
+
if phase == 1:
|
| 361 |
+
m = T.shape[1]-2
|
| 362 |
+
elif phase == 2:
|
| 363 |
+
m = T.shape[1]-1
|
| 364 |
+
else:
|
| 365 |
+
raise ValueError("Argument 'phase' to _solve_simplex must be 1 or 2")
|
| 366 |
+
|
| 367 |
+
if phase == 2:
|
| 368 |
+
# Check if any artificial variables are still in the basis.
|
| 369 |
+
# If yes, check if any coefficients from this row and a column
|
| 370 |
+
# corresponding to one of the non-artificial variable is non-zero.
|
| 371 |
+
# If found, pivot at this term. If not, start phase 2.
|
| 372 |
+
# Do this for all artificial variables in the basis.
|
| 373 |
+
# Ref: "An Introduction to Linear Programming and Game Theory"
|
| 374 |
+
# by Paul R. Thie, Gerard E. Keough, 3rd Ed,
|
| 375 |
+
# Chapter 3.7 Redundant Systems (pag 102)
|
| 376 |
+
for pivrow in [row for row in range(basis.size)
|
| 377 |
+
if basis[row] > T.shape[1] - 2]:
|
| 378 |
+
non_zero_row = [col for col in range(T.shape[1] - 1)
|
| 379 |
+
if abs(T[pivrow, col]) > tol]
|
| 380 |
+
if len(non_zero_row) > 0:
|
| 381 |
+
pivcol = non_zero_row[0]
|
| 382 |
+
_apply_pivot(T, basis, pivrow, pivcol, tol)
|
| 383 |
+
nit += 1
|
| 384 |
+
|
| 385 |
+
if len(basis[:m]) == 0:
|
| 386 |
+
solution = np.empty(T.shape[1] - 1, dtype=np.float64)
|
| 387 |
+
else:
|
| 388 |
+
solution = np.empty(max(T.shape[1] - 1, max(basis[:m]) + 1),
|
| 389 |
+
dtype=np.float64)
|
| 390 |
+
|
| 391 |
+
while not complete:
|
| 392 |
+
# Find the pivot column
|
| 393 |
+
pivcol_found, pivcol = _pivot_col(T, tol, bland)
|
| 394 |
+
if not pivcol_found:
|
| 395 |
+
pivcol = np.nan
|
| 396 |
+
pivrow = np.nan
|
| 397 |
+
status = 0
|
| 398 |
+
complete = True
|
| 399 |
+
else:
|
| 400 |
+
# Find the pivot row
|
| 401 |
+
pivrow_found, pivrow = _pivot_row(T, basis, pivcol, phase, tol, bland)
|
| 402 |
+
if not pivrow_found:
|
| 403 |
+
status = 3
|
| 404 |
+
complete = True
|
| 405 |
+
|
| 406 |
+
if callback is not None:
|
| 407 |
+
solution[:] = 0
|
| 408 |
+
solution[basis[:n]] = T[:n, -1]
|
| 409 |
+
x = solution[:m]
|
| 410 |
+
x, fun, slack, con = _postsolve(
|
| 411 |
+
x, postsolve_args
|
| 412 |
+
)
|
| 413 |
+
res = OptimizeResult({
|
| 414 |
+
'x': x,
|
| 415 |
+
'fun': fun,
|
| 416 |
+
'slack': slack,
|
| 417 |
+
'con': con,
|
| 418 |
+
'status': status,
|
| 419 |
+
'message': message,
|
| 420 |
+
'nit': nit,
|
| 421 |
+
'success': status == 0 and complete,
|
| 422 |
+
'phase': phase,
|
| 423 |
+
'complete': complete,
|
| 424 |
+
})
|
| 425 |
+
callback(res)
|
| 426 |
+
|
| 427 |
+
if not complete:
|
| 428 |
+
if nit >= maxiter:
|
| 429 |
+
# Iteration limit exceeded
|
| 430 |
+
status = 1
|
| 431 |
+
complete = True
|
| 432 |
+
else:
|
| 433 |
+
_apply_pivot(T, basis, pivrow, pivcol, tol)
|
| 434 |
+
nit += 1
|
| 435 |
+
return nit, status
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
def _linprog_simplex(c, c0, A, b, callback, postsolve_args,
|
| 439 |
+
maxiter=1000, tol=1e-9, disp=False, bland=False,
|
| 440 |
+
**unknown_options):
|
| 441 |
+
"""
|
| 442 |
+
Minimize a linear objective function subject to linear equality and
|
| 443 |
+
non-negativity constraints using the two phase simplex method.
|
| 444 |
+
Linear programming is intended to solve problems of the following form:
|
| 445 |
+
|
| 446 |
+
Minimize::
|
| 447 |
+
|
| 448 |
+
c @ x
|
| 449 |
+
|
| 450 |
+
Subject to::
|
| 451 |
+
|
| 452 |
+
A @ x == b
|
| 453 |
+
x >= 0
|
| 454 |
+
|
| 455 |
+
User-facing documentation is in _linprog_doc.py.
|
| 456 |
+
|
| 457 |
+
Parameters
|
| 458 |
+
----------
|
| 459 |
+
c : 1-D array
|
| 460 |
+
Coefficients of the linear objective function to be minimized.
|
| 461 |
+
c0 : float
|
| 462 |
+
Constant term in objective function due to fixed (and eliminated)
|
| 463 |
+
variables. (Purely for display.)
|
| 464 |
+
A : 2-D array
|
| 465 |
+
2-D array such that ``A @ x``, gives the values of the equality
|
| 466 |
+
constraints at ``x``.
|
| 467 |
+
b : 1-D array
|
| 468 |
+
1-D array of values representing the right hand side of each equality
|
| 469 |
+
constraint (row) in ``A``.
|
| 470 |
+
callback : callable, optional
|
| 471 |
+
If a callback function is provided, it will be called within each
|
| 472 |
+
iteration of the algorithm. The callback function must accept a single
|
| 473 |
+
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
| 474 |
+
|
| 475 |
+
x : 1-D array
|
| 476 |
+
Current solution vector
|
| 477 |
+
fun : float
|
| 478 |
+
Current value of the objective function
|
| 479 |
+
success : bool
|
| 480 |
+
True when an algorithm has completed successfully.
|
| 481 |
+
slack : 1-D array
|
| 482 |
+
The values of the slack variables. Each slack variable
|
| 483 |
+
corresponds to an inequality constraint. If the slack is zero,
|
| 484 |
+
the corresponding constraint is active.
|
| 485 |
+
con : 1-D array
|
| 486 |
+
The (nominally zero) residuals of the equality constraints,
|
| 487 |
+
that is, ``b - A_eq @ x``
|
| 488 |
+
phase : int
|
| 489 |
+
The phase of the algorithm being executed.
|
| 490 |
+
status : int
|
| 491 |
+
An integer representing the status of the optimization::
|
| 492 |
+
|
| 493 |
+
0 : Algorithm proceeding nominally
|
| 494 |
+
1 : Iteration limit reached
|
| 495 |
+
2 : Problem appears to be infeasible
|
| 496 |
+
3 : Problem appears to be unbounded
|
| 497 |
+
4 : Serious numerical difficulties encountered
|
| 498 |
+
nit : int
|
| 499 |
+
The number of iterations performed.
|
| 500 |
+
message : str
|
| 501 |
+
A string descriptor of the exit status of the optimization.
|
| 502 |
+
postsolve_args : tuple
|
| 503 |
+
Data needed by _postsolve to convert the solution to the standard-form
|
| 504 |
+
problem into the solution to the original problem.
|
| 505 |
+
|
| 506 |
+
Options
|
| 507 |
+
-------
|
| 508 |
+
maxiter : int
|
| 509 |
+
The maximum number of iterations to perform.
|
| 510 |
+
disp : bool
|
| 511 |
+
If True, print exit status message to sys.stdout
|
| 512 |
+
tol : float
|
| 513 |
+
The tolerance which determines when a solution is "close enough" to
|
| 514 |
+
zero in Phase 1 to be considered a basic feasible solution or close
|
| 515 |
+
enough to positive to serve as an optimal solution.
|
| 516 |
+
bland : bool
|
| 517 |
+
If True, use Bland's anti-cycling rule [3]_ to choose pivots to
|
| 518 |
+
prevent cycling. If False, choose pivots which should lead to a
|
| 519 |
+
converged solution more quickly. The latter method is subject to
|
| 520 |
+
cycling (non-convergence) in rare instances.
|
| 521 |
+
unknown_options : dict
|
| 522 |
+
Optional arguments not used by this particular solver. If
|
| 523 |
+
`unknown_options` is non-empty a warning is issued listing all
|
| 524 |
+
unused options.
|
| 525 |
+
|
| 526 |
+
Returns
|
| 527 |
+
-------
|
| 528 |
+
x : 1-D array
|
| 529 |
+
Solution vector.
|
| 530 |
+
status : int
|
| 531 |
+
An integer representing the exit status of the optimization::
|
| 532 |
+
|
| 533 |
+
0 : Optimization terminated successfully
|
| 534 |
+
1 : Iteration limit reached
|
| 535 |
+
2 : Problem appears to be infeasible
|
| 536 |
+
3 : Problem appears to be unbounded
|
| 537 |
+
4 : Serious numerical difficulties encountered
|
| 538 |
+
|
| 539 |
+
message : str
|
| 540 |
+
A string descriptor of the exit status of the optimization.
|
| 541 |
+
iteration : int
|
| 542 |
+
The number of iterations taken to solve the problem.
|
| 543 |
+
|
| 544 |
+
References
|
| 545 |
+
----------
|
| 546 |
+
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
| 547 |
+
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
| 548 |
+
1963
|
| 549 |
+
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
| 550 |
+
Mathematical Programming", McGraw-Hill, Chapter 4.
|
| 551 |
+
.. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
|
| 552 |
+
Mathematics of Operations Research (2), 1977: pp. 103-107.
|
| 553 |
+
|
| 554 |
+
|
| 555 |
+
Notes
|
| 556 |
+
-----
|
| 557 |
+
The expected problem formulation differs between the top level ``linprog``
|
| 558 |
+
module and the method specific solvers. The method specific solvers expect a
|
| 559 |
+
problem in standard form:
|
| 560 |
+
|
| 561 |
+
Minimize::
|
| 562 |
+
|
| 563 |
+
c @ x
|
| 564 |
+
|
| 565 |
+
Subject to::
|
| 566 |
+
|
| 567 |
+
A @ x == b
|
| 568 |
+
x >= 0
|
| 569 |
+
|
| 570 |
+
Whereas the top level ``linprog`` module expects a problem of form:
|
| 571 |
+
|
| 572 |
+
Minimize::
|
| 573 |
+
|
| 574 |
+
c @ x
|
| 575 |
+
|
| 576 |
+
Subject to::
|
| 577 |
+
|
| 578 |
+
A_ub @ x <= b_ub
|
| 579 |
+
A_eq @ x == b_eq
|
| 580 |
+
lb <= x <= ub
|
| 581 |
+
|
| 582 |
+
where ``lb = 0`` and ``ub = None`` unless set in ``bounds``.
|
| 583 |
+
|
| 584 |
+
The original problem contains equality, upper-bound and variable constraints
|
| 585 |
+
whereas the method specific solver requires equality constraints and
|
| 586 |
+
variable non-negativity.
|
| 587 |
+
|
| 588 |
+
``linprog`` module converts the original problem to standard form by
|
| 589 |
+
converting the simple bounds to upper bound constraints, introducing
|
| 590 |
+
non-negative slack variables for inequality constraints, and expressing
|
| 591 |
+
unbounded variables as the difference between two non-negative variables.
|
| 592 |
+
"""
|
| 593 |
+
_check_unknown_options(unknown_options)
|
| 594 |
+
|
| 595 |
+
status = 0
|
| 596 |
+
messages = {0: "Optimization terminated successfully.",
|
| 597 |
+
1: "Iteration limit reached.",
|
| 598 |
+
2: "Optimization failed. Unable to find a feasible"
|
| 599 |
+
" starting point.",
|
| 600 |
+
3: "Optimization failed. The problem appears to be unbounded.",
|
| 601 |
+
4: "Optimization failed. Singular matrix encountered."}
|
| 602 |
+
|
| 603 |
+
n, m = A.shape
|
| 604 |
+
|
| 605 |
+
# All constraints must have b >= 0.
|
| 606 |
+
is_negative_constraint = np.less(b, 0)
|
| 607 |
+
A[is_negative_constraint] *= -1
|
| 608 |
+
b[is_negative_constraint] *= -1
|
| 609 |
+
|
| 610 |
+
# As all constraints are equality constraints the artificial variables
|
| 611 |
+
# will also be basic variables.
|
| 612 |
+
av = np.arange(n) + m
|
| 613 |
+
basis = av.copy()
|
| 614 |
+
|
| 615 |
+
# Format the phase one tableau by adding artificial variables and stacking
|
| 616 |
+
# the constraints, the objective row and pseudo-objective row.
|
| 617 |
+
row_constraints = np.hstack((A, np.eye(n), b[:, np.newaxis]))
|
| 618 |
+
row_objective = np.hstack((c, np.zeros(n), c0))
|
| 619 |
+
row_pseudo_objective = -row_constraints.sum(axis=0)
|
| 620 |
+
row_pseudo_objective[av] = 0
|
| 621 |
+
T = np.vstack((row_constraints, row_objective, row_pseudo_objective))
|
| 622 |
+
|
| 623 |
+
nit1, status = _solve_simplex(T, n, basis, callback=callback,
|
| 624 |
+
postsolve_args=postsolve_args,
|
| 625 |
+
maxiter=maxiter, tol=tol, phase=1,
|
| 626 |
+
bland=bland
|
| 627 |
+
)
|
| 628 |
+
# if pseudo objective is zero, remove the last row from the tableau and
|
| 629 |
+
# proceed to phase 2
|
| 630 |
+
nit2 = nit1
|
| 631 |
+
if abs(T[-1, -1]) < tol:
|
| 632 |
+
# Remove the pseudo-objective row from the tableau
|
| 633 |
+
T = T[:-1, :]
|
| 634 |
+
# Remove the artificial variable columns from the tableau
|
| 635 |
+
T = np.delete(T, av, 1)
|
| 636 |
+
else:
|
| 637 |
+
# Failure to find a feasible starting point
|
| 638 |
+
status = 2
|
| 639 |
+
messages[status] = (
|
| 640 |
+
"Phase 1 of the simplex method failed to find a feasible "
|
| 641 |
+
"solution. The pseudo-objective function evaluates to {0:.1e} "
|
| 642 |
+
"which exceeds the required tolerance of {1} for a solution to be "
|
| 643 |
+
"considered 'close enough' to zero to be a basic solution. "
|
| 644 |
+
"Consider increasing the tolerance to be greater than {0:.1e}. "
|
| 645 |
+
"If this tolerance is unacceptably large the problem may be "
|
| 646 |
+
"infeasible.".format(abs(T[-1, -1]), tol)
|
| 647 |
+
)
|
| 648 |
+
|
| 649 |
+
if status == 0:
|
| 650 |
+
# Phase 2
|
| 651 |
+
nit2, status = _solve_simplex(T, n, basis, callback=callback,
|
| 652 |
+
postsolve_args=postsolve_args,
|
| 653 |
+
maxiter=maxiter, tol=tol, phase=2,
|
| 654 |
+
bland=bland, nit0=nit1
|
| 655 |
+
)
|
| 656 |
+
|
| 657 |
+
solution = np.zeros(n + m)
|
| 658 |
+
solution[basis[:n]] = T[:n, -1]
|
| 659 |
+
x = solution[:m]
|
| 660 |
+
|
| 661 |
+
return x, status, messages[status], int(nit2)
|