Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +4 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__init__.py +69 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/_version.py +21 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/arrow.py +306 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py +882 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dask.py +152 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dirfs.py +364 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/git.py +127 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/http.py +868 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/jupyter.py +124 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/reference.py +1160 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/smb.py +324 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/zip.py +133 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/mapping.py +247 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/utils.py +742 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py +8 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py +4 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__init__.py +16 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/compile/__init__.py +31 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/_parsing.py +302 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/experimental/__init__.py +6 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD +20 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cusolver_cu11-11.4.1.48.dist-info/License.txt +1568 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/commands/__pycache__/index.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/index/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/base.py +81 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/__init__.py +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/metadata_legacy.py +74 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/check.py +181 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/freeze.py +258 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__init__.py +2 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/wheel.py +741 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/prepare.py +732 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/_log.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/datetime.cpython-311.pyc +0 -0
.gitattributes
CHANGED
|
@@ -42,3 +42,7 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/F
|
|
| 42 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 43 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 44 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Parsing.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 43 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 44 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Parsing.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64.exe filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/__pycache__/typing_extensions.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e280cea1d8a888db57222b560cb97e18e09c1c2613b47587acc78c908ac6124
|
| 3 |
+
size 243464
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3aab0e85d88c1a18bbe9b720f126c115c22a16b91ee8006dbd1d6fa5de099eb
|
| 3 |
+
size 396760
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__init__.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from importlib.metadata import entry_points
|
| 2 |
+
|
| 3 |
+
from . import _version, caching
|
| 4 |
+
from .callbacks import Callback
|
| 5 |
+
from .compression import available_compressions
|
| 6 |
+
from .core import get_fs_token_paths, open, open_files, open_local
|
| 7 |
+
from .exceptions import FSTimeoutError
|
| 8 |
+
from .mapping import FSMap, get_mapper
|
| 9 |
+
from .registry import (
|
| 10 |
+
available_protocols,
|
| 11 |
+
filesystem,
|
| 12 |
+
get_filesystem_class,
|
| 13 |
+
register_implementation,
|
| 14 |
+
registry,
|
| 15 |
+
)
|
| 16 |
+
from .spec import AbstractFileSystem
|
| 17 |
+
|
| 18 |
+
__version__ = _version.get_versions()["version"]
|
| 19 |
+
|
| 20 |
+
__all__ = [
|
| 21 |
+
"AbstractFileSystem",
|
| 22 |
+
"FSTimeoutError",
|
| 23 |
+
"FSMap",
|
| 24 |
+
"filesystem",
|
| 25 |
+
"register_implementation",
|
| 26 |
+
"get_filesystem_class",
|
| 27 |
+
"get_fs_token_paths",
|
| 28 |
+
"get_mapper",
|
| 29 |
+
"open",
|
| 30 |
+
"open_files",
|
| 31 |
+
"open_local",
|
| 32 |
+
"registry",
|
| 33 |
+
"caching",
|
| 34 |
+
"Callback",
|
| 35 |
+
"available_protocols",
|
| 36 |
+
"available_compressions",
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def process_entries():
|
| 41 |
+
if entry_points is not None:
|
| 42 |
+
try:
|
| 43 |
+
eps = entry_points()
|
| 44 |
+
except TypeError:
|
| 45 |
+
pass # importlib-metadata < 0.8
|
| 46 |
+
else:
|
| 47 |
+
if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
|
| 48 |
+
specs = eps.select(group="fsspec.specs")
|
| 49 |
+
else:
|
| 50 |
+
specs = eps.get("fsspec.specs", [])
|
| 51 |
+
registered_names = {}
|
| 52 |
+
for spec in specs:
|
| 53 |
+
err_msg = f"Unable to load filesystem from {spec}"
|
| 54 |
+
name = spec.name
|
| 55 |
+
if name in registered_names:
|
| 56 |
+
continue
|
| 57 |
+
registered_names[name] = True
|
| 58 |
+
register_implementation(
|
| 59 |
+
name,
|
| 60 |
+
spec.value.replace(":", "."),
|
| 61 |
+
errtxt=err_msg,
|
| 62 |
+
# We take our implementations as the ones to overload with if
|
| 63 |
+
# for some reason we encounter some, may be the same, already
|
| 64 |
+
# registered
|
| 65 |
+
clobber=True,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
process_entries()
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/_version.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# This file was generated by 'versioneer.py' (0.29) from
|
| 3 |
+
# revision-control system data, or from the parent directory name of an
|
| 4 |
+
# unpacked source archive. Distribution tarballs contain a pre-generated copy
|
| 5 |
+
# of this file.
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
version_json = '''
|
| 10 |
+
{
|
| 11 |
+
"date": "2024-02-04T20:21:42-0500",
|
| 12 |
+
"dirty": false,
|
| 13 |
+
"error": null,
|
| 14 |
+
"full-revisionid": "5dc364e13b63609717d77b7361e80cfa64e3b8fd",
|
| 15 |
+
"version": "2024.2.0"
|
| 16 |
+
}
|
| 17 |
+
''' # END VERSION_JSON
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_versions():
|
| 21 |
+
return json.loads(version_json)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc
ADDED
|
Binary file (20.1 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc
ADDED
|
Binary file (19.7 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc
ADDED
|
Binary file (6.17 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc
ADDED
|
Binary file (10.9 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc
ADDED
|
Binary file (5.39 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/arrow.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import errno
|
| 2 |
+
import io
|
| 3 |
+
import os
|
| 4 |
+
import secrets
|
| 5 |
+
import shutil
|
| 6 |
+
from contextlib import suppress
|
| 7 |
+
from functools import cached_property, wraps
|
| 8 |
+
from urllib.parse import parse_qs
|
| 9 |
+
|
| 10 |
+
from fsspec.spec import AbstractFileSystem
|
| 11 |
+
from fsspec.utils import (
|
| 12 |
+
get_package_version_without_import,
|
| 13 |
+
infer_storage_options,
|
| 14 |
+
mirror_from,
|
| 15 |
+
tokenize,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def wrap_exceptions(func):
|
| 20 |
+
@wraps(func)
|
| 21 |
+
def wrapper(*args, **kwargs):
|
| 22 |
+
try:
|
| 23 |
+
return func(*args, **kwargs)
|
| 24 |
+
except OSError as exception:
|
| 25 |
+
if not exception.args:
|
| 26 |
+
raise
|
| 27 |
+
|
| 28 |
+
message, *args = exception.args
|
| 29 |
+
if isinstance(message, str) and "does not exist" in message:
|
| 30 |
+
raise FileNotFoundError(errno.ENOENT, message) from exception
|
| 31 |
+
else:
|
| 32 |
+
raise
|
| 33 |
+
|
| 34 |
+
return wrapper
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
PYARROW_VERSION = None
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class ArrowFSWrapper(AbstractFileSystem):
|
| 41 |
+
"""FSSpec-compatible wrapper of pyarrow.fs.FileSystem.
|
| 42 |
+
|
| 43 |
+
Parameters
|
| 44 |
+
----------
|
| 45 |
+
fs : pyarrow.fs.FileSystem
|
| 46 |
+
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
root_marker = "/"
|
| 50 |
+
|
| 51 |
+
def __init__(self, fs, **kwargs):
|
| 52 |
+
global PYARROW_VERSION
|
| 53 |
+
PYARROW_VERSION = get_package_version_without_import("pyarrow")
|
| 54 |
+
self.fs = fs
|
| 55 |
+
super().__init__(**kwargs)
|
| 56 |
+
|
| 57 |
+
@property
|
| 58 |
+
def protocol(self):
|
| 59 |
+
return self.fs.type_name
|
| 60 |
+
|
| 61 |
+
@cached_property
|
| 62 |
+
def fsid(self):
|
| 63 |
+
return "hdfs_" + tokenize(self.fs.host, self.fs.port)
|
| 64 |
+
|
| 65 |
+
@classmethod
|
| 66 |
+
def _strip_protocol(cls, path):
|
| 67 |
+
ops = infer_storage_options(path)
|
| 68 |
+
path = ops["path"]
|
| 69 |
+
if path.startswith("//"):
|
| 70 |
+
# special case for "hdfs://path" (without the triple slash)
|
| 71 |
+
path = path[1:]
|
| 72 |
+
return path
|
| 73 |
+
|
| 74 |
+
def ls(self, path, detail=False, **kwargs):
|
| 75 |
+
path = self._strip_protocol(path)
|
| 76 |
+
from pyarrow.fs import FileSelector
|
| 77 |
+
|
| 78 |
+
entries = [
|
| 79 |
+
self._make_entry(entry)
|
| 80 |
+
for entry in self.fs.get_file_info(FileSelector(path))
|
| 81 |
+
]
|
| 82 |
+
if detail:
|
| 83 |
+
return entries
|
| 84 |
+
else:
|
| 85 |
+
return [entry["name"] for entry in entries]
|
| 86 |
+
|
| 87 |
+
def info(self, path, **kwargs):
|
| 88 |
+
path = self._strip_protocol(path)
|
| 89 |
+
[info] = self.fs.get_file_info([path])
|
| 90 |
+
return self._make_entry(info)
|
| 91 |
+
|
| 92 |
+
def exists(self, path):
|
| 93 |
+
path = self._strip_protocol(path)
|
| 94 |
+
try:
|
| 95 |
+
self.info(path)
|
| 96 |
+
except FileNotFoundError:
|
| 97 |
+
return False
|
| 98 |
+
else:
|
| 99 |
+
return True
|
| 100 |
+
|
| 101 |
+
def _make_entry(self, info):
|
| 102 |
+
from pyarrow.fs import FileType
|
| 103 |
+
|
| 104 |
+
if info.type is FileType.Directory:
|
| 105 |
+
kind = "directory"
|
| 106 |
+
elif info.type is FileType.File:
|
| 107 |
+
kind = "file"
|
| 108 |
+
elif info.type is FileType.NotFound:
|
| 109 |
+
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), info.path)
|
| 110 |
+
else:
|
| 111 |
+
kind = "other"
|
| 112 |
+
|
| 113 |
+
return {
|
| 114 |
+
"name": info.path,
|
| 115 |
+
"size": info.size,
|
| 116 |
+
"type": kind,
|
| 117 |
+
"mtime": info.mtime,
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
@wrap_exceptions
|
| 121 |
+
def cp_file(self, path1, path2, **kwargs):
|
| 122 |
+
path1 = self._strip_protocol(path1).rstrip("/")
|
| 123 |
+
path2 = self._strip_protocol(path2).rstrip("/")
|
| 124 |
+
|
| 125 |
+
with self._open(path1, "rb") as lstream:
|
| 126 |
+
tmp_fname = f"{path2}.tmp.{secrets.token_hex(6)}"
|
| 127 |
+
try:
|
| 128 |
+
with self.open(tmp_fname, "wb") as rstream:
|
| 129 |
+
shutil.copyfileobj(lstream, rstream)
|
| 130 |
+
self.fs.move(tmp_fname, path2)
|
| 131 |
+
except BaseException: # noqa
|
| 132 |
+
with suppress(FileNotFoundError):
|
| 133 |
+
self.fs.delete_file(tmp_fname)
|
| 134 |
+
raise
|
| 135 |
+
|
| 136 |
+
@wrap_exceptions
|
| 137 |
+
def mv(self, path1, path2, **kwargs):
|
| 138 |
+
path1 = self._strip_protocol(path1).rstrip("/")
|
| 139 |
+
path2 = self._strip_protocol(path2).rstrip("/")
|
| 140 |
+
self.fs.move(path1, path2)
|
| 141 |
+
|
| 142 |
+
mv_file = mv
|
| 143 |
+
|
| 144 |
+
@wrap_exceptions
|
| 145 |
+
def rm_file(self, path):
|
| 146 |
+
path = self._strip_protocol(path)
|
| 147 |
+
self.fs.delete_file(path)
|
| 148 |
+
|
| 149 |
+
@wrap_exceptions
|
| 150 |
+
def rm(self, path, recursive=False, maxdepth=None):
|
| 151 |
+
path = self._strip_protocol(path).rstrip("/")
|
| 152 |
+
if self.isdir(path):
|
| 153 |
+
if recursive:
|
| 154 |
+
self.fs.delete_dir(path)
|
| 155 |
+
else:
|
| 156 |
+
raise ValueError("Can't delete directories without recursive=False")
|
| 157 |
+
else:
|
| 158 |
+
self.fs.delete_file(path)
|
| 159 |
+
|
| 160 |
+
@wrap_exceptions
|
| 161 |
+
def _open(self, path, mode="rb", block_size=None, seekable=True, **kwargs):
|
| 162 |
+
if mode == "rb":
|
| 163 |
+
if seekable:
|
| 164 |
+
method = self.fs.open_input_file
|
| 165 |
+
else:
|
| 166 |
+
method = self.fs.open_input_stream
|
| 167 |
+
elif mode == "wb":
|
| 168 |
+
method = self.fs.open_output_stream
|
| 169 |
+
elif mode == "ab":
|
| 170 |
+
method = self.fs.open_append_stream
|
| 171 |
+
else:
|
| 172 |
+
raise ValueError(f"unsupported mode for Arrow filesystem: {mode!r}")
|
| 173 |
+
|
| 174 |
+
_kwargs = {}
|
| 175 |
+
if mode != "rb" or not seekable:
|
| 176 |
+
if int(PYARROW_VERSION.split(".")[0]) >= 4:
|
| 177 |
+
# disable compression auto-detection
|
| 178 |
+
_kwargs["compression"] = None
|
| 179 |
+
stream = method(path, **_kwargs)
|
| 180 |
+
|
| 181 |
+
return ArrowFile(self, stream, path, mode, block_size, **kwargs)
|
| 182 |
+
|
| 183 |
+
@wrap_exceptions
|
| 184 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
| 185 |
+
path = self._strip_protocol(path)
|
| 186 |
+
if create_parents:
|
| 187 |
+
self.makedirs(path, exist_ok=True)
|
| 188 |
+
else:
|
| 189 |
+
self.fs.create_dir(path, recursive=False)
|
| 190 |
+
|
| 191 |
+
@wrap_exceptions
|
| 192 |
+
def makedirs(self, path, exist_ok=False):
|
| 193 |
+
path = self._strip_protocol(path)
|
| 194 |
+
self.fs.create_dir(path, recursive=True)
|
| 195 |
+
|
| 196 |
+
@wrap_exceptions
|
| 197 |
+
def rmdir(self, path):
|
| 198 |
+
path = self._strip_protocol(path)
|
| 199 |
+
self.fs.delete_dir(path)
|
| 200 |
+
|
| 201 |
+
@wrap_exceptions
|
| 202 |
+
def modified(self, path):
|
| 203 |
+
path = self._strip_protocol(path)
|
| 204 |
+
return self.fs.get_file_info(path).mtime
|
| 205 |
+
|
| 206 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
| 207 |
+
kwargs["seekable"] = start not in [None, 0]
|
| 208 |
+
return super().cat_file(path, start=None, end=None, **kwargs)
|
| 209 |
+
|
| 210 |
+
def get_file(self, rpath, lpath, **kwargs):
|
| 211 |
+
kwargs["seekable"] = False
|
| 212 |
+
super().get_file(rpath, lpath, **kwargs)
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
@mirror_from(
|
| 216 |
+
"stream",
|
| 217 |
+
[
|
| 218 |
+
"read",
|
| 219 |
+
"seek",
|
| 220 |
+
"tell",
|
| 221 |
+
"write",
|
| 222 |
+
"readable",
|
| 223 |
+
"writable",
|
| 224 |
+
"close",
|
| 225 |
+
"size",
|
| 226 |
+
"seekable",
|
| 227 |
+
],
|
| 228 |
+
)
|
| 229 |
+
class ArrowFile(io.IOBase):
|
| 230 |
+
def __init__(self, fs, stream, path, mode, block_size=None, **kwargs):
|
| 231 |
+
self.path = path
|
| 232 |
+
self.mode = mode
|
| 233 |
+
|
| 234 |
+
self.fs = fs
|
| 235 |
+
self.stream = stream
|
| 236 |
+
|
| 237 |
+
self.blocksize = self.block_size = block_size
|
| 238 |
+
self.kwargs = kwargs
|
| 239 |
+
|
| 240 |
+
def __enter__(self):
|
| 241 |
+
return self
|
| 242 |
+
|
| 243 |
+
def __exit__(self, *args):
|
| 244 |
+
return self.close()
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
class HadoopFileSystem(ArrowFSWrapper):
|
| 248 |
+
"""A wrapper on top of the pyarrow.fs.HadoopFileSystem
|
| 249 |
+
to connect it's interface with fsspec"""
|
| 250 |
+
|
| 251 |
+
protocol = "hdfs"
|
| 252 |
+
|
| 253 |
+
def __init__(
|
| 254 |
+
self,
|
| 255 |
+
host="default",
|
| 256 |
+
port=0,
|
| 257 |
+
user=None,
|
| 258 |
+
kerb_ticket=None,
|
| 259 |
+
replication=3,
|
| 260 |
+
extra_conf=None,
|
| 261 |
+
**kwargs,
|
| 262 |
+
):
|
| 263 |
+
"""
|
| 264 |
+
|
| 265 |
+
Parameters
|
| 266 |
+
----------
|
| 267 |
+
host: str
|
| 268 |
+
Hostname, IP or "default" to try to read from Hadoop config
|
| 269 |
+
port: int
|
| 270 |
+
Port to connect on, or default from Hadoop config if 0
|
| 271 |
+
user: str or None
|
| 272 |
+
If given, connect as this username
|
| 273 |
+
kerb_ticket: str or None
|
| 274 |
+
If given, use this ticket for authentication
|
| 275 |
+
replication: int
|
| 276 |
+
set replication factor of file for write operations. default value is 3.
|
| 277 |
+
extra_conf: None or dict
|
| 278 |
+
Passed on to HadoopFileSystem
|
| 279 |
+
"""
|
| 280 |
+
from pyarrow.fs import HadoopFileSystem
|
| 281 |
+
|
| 282 |
+
fs = HadoopFileSystem(
|
| 283 |
+
host=host,
|
| 284 |
+
port=port,
|
| 285 |
+
user=user,
|
| 286 |
+
kerb_ticket=kerb_ticket,
|
| 287 |
+
replication=replication,
|
| 288 |
+
extra_conf=extra_conf,
|
| 289 |
+
)
|
| 290 |
+
super().__init__(fs=fs, **kwargs)
|
| 291 |
+
|
| 292 |
+
@staticmethod
|
| 293 |
+
def _get_kwargs_from_urls(path):
|
| 294 |
+
ops = infer_storage_options(path)
|
| 295 |
+
out = {}
|
| 296 |
+
if ops.get("host", None):
|
| 297 |
+
out["host"] = ops["host"]
|
| 298 |
+
if ops.get("username", None):
|
| 299 |
+
out["user"] = ops["username"]
|
| 300 |
+
if ops.get("port", None):
|
| 301 |
+
out["port"] = ops["port"]
|
| 302 |
+
if ops.get("url_query", None):
|
| 303 |
+
queries = parse_qs(ops["url_query"])
|
| 304 |
+
if queries.get("replication", None):
|
| 305 |
+
out["replication"] = int(queries["replication"][0])
|
| 306 |
+
return out
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py
ADDED
|
@@ -0,0 +1,882 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
import time
|
| 8 |
+
import weakref
|
| 9 |
+
from shutil import rmtree
|
| 10 |
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar
|
| 11 |
+
|
| 12 |
+
from fsspec import AbstractFileSystem, filesystem
|
| 13 |
+
from fsspec.callbacks import DEFAULT_CALLBACK
|
| 14 |
+
from fsspec.compression import compr
|
| 15 |
+
from fsspec.core import BaseCache, MMapCache
|
| 16 |
+
from fsspec.exceptions import BlocksizeMismatchError
|
| 17 |
+
from fsspec.implementations.cache_mapper import create_cache_mapper
|
| 18 |
+
from fsspec.implementations.cache_metadata import CacheMetadata
|
| 19 |
+
from fsspec.spec import AbstractBufferedFile
|
| 20 |
+
from fsspec.transaction import Transaction
|
| 21 |
+
from fsspec.utils import infer_compression
|
| 22 |
+
|
| 23 |
+
if TYPE_CHECKING:
|
| 24 |
+
from fsspec.implementations.cache_mapper import AbstractCacheMapper
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger("fsspec.cached")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class WriteCachedTransaction(Transaction):
|
| 30 |
+
def complete(self, commit=True):
|
| 31 |
+
rpaths = [f.path for f in self.files]
|
| 32 |
+
lpaths = [f.fn for f in self.files]
|
| 33 |
+
if commit:
|
| 34 |
+
self.fs.put(lpaths, rpaths)
|
| 35 |
+
# else remove?
|
| 36 |
+
self.fs._intrans = False
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class CachingFileSystem(AbstractFileSystem):
|
| 40 |
+
"""Locally caching filesystem, layer over any other FS
|
| 41 |
+
|
| 42 |
+
This class implements chunk-wise local storage of remote files, for quick
|
| 43 |
+
access after the initial download. The files are stored in a given
|
| 44 |
+
directory with hashes of URLs for the filenames. If no directory is given,
|
| 45 |
+
a temporary one is used, which should be cleaned up by the OS after the
|
| 46 |
+
process ends. The files themselves are sparse (as implemented in
|
| 47 |
+
:class:`~fsspec.caching.MMapCache`), so only the data which is accessed
|
| 48 |
+
takes up space.
|
| 49 |
+
|
| 50 |
+
Restrictions:
|
| 51 |
+
|
| 52 |
+
- the block-size must be the same for each access of a given file, unless
|
| 53 |
+
all blocks of the file have already been read
|
| 54 |
+
- caching can only be applied to file-systems which produce files
|
| 55 |
+
derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also
|
| 56 |
+
allowed, for testing
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached")
|
| 60 |
+
|
| 61 |
+
def __init__(
|
| 62 |
+
self,
|
| 63 |
+
target_protocol=None,
|
| 64 |
+
cache_storage="TMP",
|
| 65 |
+
cache_check=10,
|
| 66 |
+
check_files=False,
|
| 67 |
+
expiry_time=604800,
|
| 68 |
+
target_options=None,
|
| 69 |
+
fs=None,
|
| 70 |
+
same_names: bool | None = None,
|
| 71 |
+
compression=None,
|
| 72 |
+
cache_mapper: AbstractCacheMapper | None = None,
|
| 73 |
+
**kwargs,
|
| 74 |
+
):
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
Parameters
|
| 78 |
+
----------
|
| 79 |
+
target_protocol: str (optional)
|
| 80 |
+
Target filesystem protocol. Provide either this or ``fs``.
|
| 81 |
+
cache_storage: str or list(str)
|
| 82 |
+
Location to store files. If "TMP", this is a temporary directory,
|
| 83 |
+
and will be cleaned up by the OS when this process ends (or later).
|
| 84 |
+
If a list, each location will be tried in the order given, but
|
| 85 |
+
only the last will be considered writable.
|
| 86 |
+
cache_check: int
|
| 87 |
+
Number of seconds between reload of cache metadata
|
| 88 |
+
check_files: bool
|
| 89 |
+
Whether to explicitly see if the UID of the remote file matches
|
| 90 |
+
the stored one before using. Warning: some file systems such as
|
| 91 |
+
HTTP cannot reliably give a unique hash of the contents of some
|
| 92 |
+
path, so be sure to set this option to False.
|
| 93 |
+
expiry_time: int
|
| 94 |
+
The time in seconds after which a local copy is considered useless.
|
| 95 |
+
Set to falsy to prevent expiry. The default is equivalent to one
|
| 96 |
+
week.
|
| 97 |
+
target_options: dict or None
|
| 98 |
+
Passed to the instantiation of the FS, if fs is None.
|
| 99 |
+
fs: filesystem instance
|
| 100 |
+
The target filesystem to run against. Provide this or ``protocol``.
|
| 101 |
+
same_names: bool (optional)
|
| 102 |
+
By default, target URLs are hashed using a ``HashCacheMapper`` so
|
| 103 |
+
that files from different backends with the same basename do not
|
| 104 |
+
conflict. If this argument is ``true``, a ``BasenameCacheMapper``
|
| 105 |
+
is used instead. Other cache mapper options are available by using
|
| 106 |
+
the ``cache_mapper`` keyword argument. Only one of this and
|
| 107 |
+
``cache_mapper`` should be specified.
|
| 108 |
+
compression: str (optional)
|
| 109 |
+
To decompress on download. Can be 'infer' (guess from the URL name),
|
| 110 |
+
one of the entries in ``fsspec.compression.compr``, or None for no
|
| 111 |
+
decompression.
|
| 112 |
+
cache_mapper: AbstractCacheMapper (optional)
|
| 113 |
+
The object use to map from original filenames to cached filenames.
|
| 114 |
+
Only one of this and ``same_names`` should be specified.
|
| 115 |
+
"""
|
| 116 |
+
super().__init__(**kwargs)
|
| 117 |
+
if fs is None and target_protocol is None:
|
| 118 |
+
raise ValueError(
|
| 119 |
+
"Please provide filesystem instance(fs) or target_protocol"
|
| 120 |
+
)
|
| 121 |
+
if not (fs is None) ^ (target_protocol is None):
|
| 122 |
+
raise ValueError(
|
| 123 |
+
"Both filesystems (fs) and target_protocol may not be both given."
|
| 124 |
+
)
|
| 125 |
+
if cache_storage == "TMP":
|
| 126 |
+
tempdir = tempfile.mkdtemp()
|
| 127 |
+
storage = [tempdir]
|
| 128 |
+
weakref.finalize(self, self._remove_tempdir, tempdir)
|
| 129 |
+
else:
|
| 130 |
+
if isinstance(cache_storage, str):
|
| 131 |
+
storage = [cache_storage]
|
| 132 |
+
else:
|
| 133 |
+
storage = cache_storage
|
| 134 |
+
os.makedirs(storage[-1], exist_ok=True)
|
| 135 |
+
self.storage = storage
|
| 136 |
+
self.kwargs = target_options or {}
|
| 137 |
+
self.cache_check = cache_check
|
| 138 |
+
self.check_files = check_files
|
| 139 |
+
self.expiry = expiry_time
|
| 140 |
+
self.compression = compression
|
| 141 |
+
|
| 142 |
+
# Size of cache in bytes. If None then the size is unknown and will be
|
| 143 |
+
# recalculated the next time cache_size() is called. On writes to the
|
| 144 |
+
# cache this is reset to None.
|
| 145 |
+
self._cache_size = None
|
| 146 |
+
|
| 147 |
+
if same_names is not None and cache_mapper is not None:
|
| 148 |
+
raise ValueError(
|
| 149 |
+
"Cannot specify both same_names and cache_mapper in "
|
| 150 |
+
"CachingFileSystem.__init__"
|
| 151 |
+
)
|
| 152 |
+
if cache_mapper is not None:
|
| 153 |
+
self._mapper = cache_mapper
|
| 154 |
+
else:
|
| 155 |
+
self._mapper = create_cache_mapper(
|
| 156 |
+
same_names if same_names is not None else False
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
self.target_protocol = (
|
| 160 |
+
target_protocol
|
| 161 |
+
if isinstance(target_protocol, str)
|
| 162 |
+
else (fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0])
|
| 163 |
+
)
|
| 164 |
+
self._metadata = CacheMetadata(self.storage)
|
| 165 |
+
self.load_cache()
|
| 166 |
+
self.fs = fs if fs is not None else filesystem(target_protocol, **self.kwargs)
|
| 167 |
+
|
| 168 |
+
def _strip_protocol(path):
|
| 169 |
+
# acts as a method, since each instance has a difference target
|
| 170 |
+
return self.fs._strip_protocol(type(self)._strip_protocol(path))
|
| 171 |
+
|
| 172 |
+
self._strip_protocol: Callable = _strip_protocol
|
| 173 |
+
|
| 174 |
+
@staticmethod
|
| 175 |
+
def _remove_tempdir(tempdir):
|
| 176 |
+
try:
|
| 177 |
+
rmtree(tempdir)
|
| 178 |
+
except Exception:
|
| 179 |
+
pass
|
| 180 |
+
|
| 181 |
+
def _mkcache(self):
|
| 182 |
+
os.makedirs(self.storage[-1], exist_ok=True)
|
| 183 |
+
|
| 184 |
+
def cache_size(self):
|
| 185 |
+
"""Return size of cache in bytes.
|
| 186 |
+
|
| 187 |
+
If more than one cache directory is in use, only the size of the last
|
| 188 |
+
one (the writable cache directory) is returned.
|
| 189 |
+
"""
|
| 190 |
+
if self._cache_size is None:
|
| 191 |
+
cache_dir = self.storage[-1]
|
| 192 |
+
self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
|
| 193 |
+
return self._cache_size
|
| 194 |
+
|
| 195 |
+
def load_cache(self):
|
| 196 |
+
"""Read set of stored blocks from file"""
|
| 197 |
+
self._metadata.load()
|
| 198 |
+
self._mkcache()
|
| 199 |
+
self.last_cache = time.time()
|
| 200 |
+
|
| 201 |
+
def save_cache(self):
|
| 202 |
+
"""Save set of stored blocks from file"""
|
| 203 |
+
self._mkcache()
|
| 204 |
+
self._metadata.save()
|
| 205 |
+
self.last_cache = time.time()
|
| 206 |
+
self._cache_size = None
|
| 207 |
+
|
| 208 |
+
def _check_cache(self):
|
| 209 |
+
"""Reload caches if time elapsed or any disappeared"""
|
| 210 |
+
self._mkcache()
|
| 211 |
+
if not self.cache_check:
|
| 212 |
+
# explicitly told not to bother checking
|
| 213 |
+
return
|
| 214 |
+
timecond = time.time() - self.last_cache > self.cache_check
|
| 215 |
+
existcond = all(os.path.exists(storage) for storage in self.storage)
|
| 216 |
+
if timecond or not existcond:
|
| 217 |
+
self.load_cache()
|
| 218 |
+
|
| 219 |
+
def _check_file(self, path):
|
| 220 |
+
"""Is path in cache and still valid"""
|
| 221 |
+
path = self._strip_protocol(path)
|
| 222 |
+
self._check_cache()
|
| 223 |
+
return self._metadata.check_file(path, self)
|
| 224 |
+
|
| 225 |
+
def clear_cache(self):
|
| 226 |
+
"""Remove all files and metadata from the cache
|
| 227 |
+
|
| 228 |
+
In the case of multiple cache locations, this clears only the last one,
|
| 229 |
+
which is assumed to be the read/write one.
|
| 230 |
+
"""
|
| 231 |
+
rmtree(self.storage[-1])
|
| 232 |
+
self.load_cache()
|
| 233 |
+
self._cache_size = None
|
| 234 |
+
|
| 235 |
+
def clear_expired_cache(self, expiry_time=None):
|
| 236 |
+
"""Remove all expired files and metadata from the cache
|
| 237 |
+
|
| 238 |
+
In the case of multiple cache locations, this clears only the last one,
|
| 239 |
+
which is assumed to be the read/write one.
|
| 240 |
+
|
| 241 |
+
Parameters
|
| 242 |
+
----------
|
| 243 |
+
expiry_time: int
|
| 244 |
+
The time in seconds after which a local copy is considered useless.
|
| 245 |
+
If not defined the default is equivalent to the attribute from the
|
| 246 |
+
file caching instantiation.
|
| 247 |
+
"""
|
| 248 |
+
|
| 249 |
+
if not expiry_time:
|
| 250 |
+
expiry_time = self.expiry
|
| 251 |
+
|
| 252 |
+
self._check_cache()
|
| 253 |
+
|
| 254 |
+
expired_files, writable_cache_empty = self._metadata.clear_expired(expiry_time)
|
| 255 |
+
for fn in expired_files:
|
| 256 |
+
if os.path.exists(fn):
|
| 257 |
+
os.remove(fn)
|
| 258 |
+
|
| 259 |
+
if writable_cache_empty:
|
| 260 |
+
rmtree(self.storage[-1])
|
| 261 |
+
self.load_cache()
|
| 262 |
+
|
| 263 |
+
self._cache_size = None
|
| 264 |
+
|
| 265 |
+
def pop_from_cache(self, path):
|
| 266 |
+
"""Remove cached version of given file
|
| 267 |
+
|
| 268 |
+
Deletes local copy of the given (remote) path. If it is found in a cache
|
| 269 |
+
location which is not the last, it is assumed to be read-only, and
|
| 270 |
+
raises PermissionError
|
| 271 |
+
"""
|
| 272 |
+
path = self._strip_protocol(path)
|
| 273 |
+
fn = self._metadata.pop_file(path)
|
| 274 |
+
if fn is not None:
|
| 275 |
+
os.remove(fn)
|
| 276 |
+
self._cache_size = None
|
| 277 |
+
|
| 278 |
+
def _open(
|
| 279 |
+
self,
|
| 280 |
+
path,
|
| 281 |
+
mode="rb",
|
| 282 |
+
block_size=None,
|
| 283 |
+
autocommit=True,
|
| 284 |
+
cache_options=None,
|
| 285 |
+
**kwargs,
|
| 286 |
+
):
|
| 287 |
+
"""Wrap the target _open
|
| 288 |
+
|
| 289 |
+
If the whole file exists in the cache, just open it locally and
|
| 290 |
+
return that.
|
| 291 |
+
|
| 292 |
+
Otherwise, open the file on the target FS, and make it have a mmap
|
| 293 |
+
cache pointing to the location which we determine, in our cache.
|
| 294 |
+
The ``blocks`` instance is shared, so as the mmap cache instance
|
| 295 |
+
updates, so does the entry in our ``cached_files`` attribute.
|
| 296 |
+
We monkey-patch this file, so that when it closes, we call
|
| 297 |
+
``close_and_update`` to save the state of the blocks.
|
| 298 |
+
"""
|
| 299 |
+
path = self._strip_protocol(path)
|
| 300 |
+
|
| 301 |
+
path = self.fs._strip_protocol(path)
|
| 302 |
+
if "r" not in mode:
|
| 303 |
+
return self.fs._open(
|
| 304 |
+
path,
|
| 305 |
+
mode=mode,
|
| 306 |
+
block_size=block_size,
|
| 307 |
+
autocommit=autocommit,
|
| 308 |
+
cache_options=cache_options,
|
| 309 |
+
**kwargs,
|
| 310 |
+
)
|
| 311 |
+
detail = self._check_file(path)
|
| 312 |
+
if detail:
|
| 313 |
+
# file is in cache
|
| 314 |
+
detail, fn = detail
|
| 315 |
+
hash, blocks = detail["fn"], detail["blocks"]
|
| 316 |
+
if blocks is True:
|
| 317 |
+
# stored file is complete
|
| 318 |
+
logger.debug("Opening local copy of %s", path)
|
| 319 |
+
return open(fn, mode)
|
| 320 |
+
# TODO: action where partial file exists in read-only cache
|
| 321 |
+
logger.debug("Opening partially cached copy of %s", path)
|
| 322 |
+
else:
|
| 323 |
+
hash = self._mapper(path)
|
| 324 |
+
fn = os.path.join(self.storage[-1], hash)
|
| 325 |
+
blocks = set()
|
| 326 |
+
detail = {
|
| 327 |
+
"original": path,
|
| 328 |
+
"fn": hash,
|
| 329 |
+
"blocks": blocks,
|
| 330 |
+
"time": time.time(),
|
| 331 |
+
"uid": self.fs.ukey(path),
|
| 332 |
+
}
|
| 333 |
+
self._metadata.update_file(path, detail)
|
| 334 |
+
logger.debug("Creating local sparse file for %s", path)
|
| 335 |
+
|
| 336 |
+
# call target filesystems open
|
| 337 |
+
self._mkcache()
|
| 338 |
+
f = self.fs._open(
|
| 339 |
+
path,
|
| 340 |
+
mode=mode,
|
| 341 |
+
block_size=block_size,
|
| 342 |
+
autocommit=autocommit,
|
| 343 |
+
cache_options=cache_options,
|
| 344 |
+
cache_type="none",
|
| 345 |
+
**kwargs,
|
| 346 |
+
)
|
| 347 |
+
if self.compression:
|
| 348 |
+
comp = (
|
| 349 |
+
infer_compression(path)
|
| 350 |
+
if self.compression == "infer"
|
| 351 |
+
else self.compression
|
| 352 |
+
)
|
| 353 |
+
f = compr[comp](f, mode="rb")
|
| 354 |
+
if "blocksize" in detail:
|
| 355 |
+
if detail["blocksize"] != f.blocksize:
|
| 356 |
+
raise BlocksizeMismatchError(
|
| 357 |
+
f"Cached file must be reopened with same block"
|
| 358 |
+
f" size as original (old: {detail['blocksize']},"
|
| 359 |
+
f" new {f.blocksize})"
|
| 360 |
+
)
|
| 361 |
+
else:
|
| 362 |
+
detail["blocksize"] = f.blocksize
|
| 363 |
+
f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks)
|
| 364 |
+
close = f.close
|
| 365 |
+
f.close = lambda: self.close_and_update(f, close)
|
| 366 |
+
self.save_cache()
|
| 367 |
+
return f
|
| 368 |
+
|
| 369 |
+
def _parent(self, path):
|
| 370 |
+
return self.fs._parent(path)
|
| 371 |
+
|
| 372 |
+
def hash_name(self, path: str, *args: Any) -> str:
|
| 373 |
+
# Kept for backward compatibility with downstream libraries.
|
| 374 |
+
# Ignores extra arguments, previously same_name boolean.
|
| 375 |
+
return self._mapper(path)
|
| 376 |
+
|
| 377 |
+
def close_and_update(self, f, close):
|
| 378 |
+
"""Called when a file is closing, so store the set of blocks"""
|
| 379 |
+
if f.closed:
|
| 380 |
+
return
|
| 381 |
+
path = self._strip_protocol(f.path)
|
| 382 |
+
self._metadata.on_close_cached_file(f, path)
|
| 383 |
+
try:
|
| 384 |
+
logger.debug("going to save")
|
| 385 |
+
self.save_cache()
|
| 386 |
+
logger.debug("saved")
|
| 387 |
+
except OSError:
|
| 388 |
+
logger.debug("Cache saving failed while closing file")
|
| 389 |
+
except NameError:
|
| 390 |
+
logger.debug("Cache save failed due to interpreter shutdown")
|
| 391 |
+
close()
|
| 392 |
+
f.closed = True
|
| 393 |
+
|
| 394 |
+
def __getattribute__(self, item):
|
| 395 |
+
if item in [
|
| 396 |
+
"load_cache",
|
| 397 |
+
"_open",
|
| 398 |
+
"save_cache",
|
| 399 |
+
"close_and_update",
|
| 400 |
+
"__init__",
|
| 401 |
+
"__getattribute__",
|
| 402 |
+
"__reduce__",
|
| 403 |
+
"_make_local_details",
|
| 404 |
+
"open",
|
| 405 |
+
"cat",
|
| 406 |
+
"cat_file",
|
| 407 |
+
"cat_ranges",
|
| 408 |
+
"get",
|
| 409 |
+
"read_block",
|
| 410 |
+
"tail",
|
| 411 |
+
"head",
|
| 412 |
+
"_check_file",
|
| 413 |
+
"_check_cache",
|
| 414 |
+
"_mkcache",
|
| 415 |
+
"clear_cache",
|
| 416 |
+
"clear_expired_cache",
|
| 417 |
+
"pop_from_cache",
|
| 418 |
+
"_mkcache",
|
| 419 |
+
"local_file",
|
| 420 |
+
"_paths_from_path",
|
| 421 |
+
"get_mapper",
|
| 422 |
+
"open_many",
|
| 423 |
+
"commit_many",
|
| 424 |
+
"hash_name",
|
| 425 |
+
"__hash__",
|
| 426 |
+
"__eq__",
|
| 427 |
+
"to_json",
|
| 428 |
+
"cache_size",
|
| 429 |
+
"pipe_file",
|
| 430 |
+
"pipe",
|
| 431 |
+
"start_transaction",
|
| 432 |
+
"end_transaction",
|
| 433 |
+
]:
|
| 434 |
+
# all the methods defined in this class. Note `open` here, since
|
| 435 |
+
# it calls `_open`, but is actually in superclass
|
| 436 |
+
return lambda *args, **kw: getattr(type(self), item).__get__(self)(
|
| 437 |
+
*args, **kw
|
| 438 |
+
)
|
| 439 |
+
if item in ["__reduce_ex__"]:
|
| 440 |
+
raise AttributeError
|
| 441 |
+
if item in ["transaction"]:
|
| 442 |
+
# property
|
| 443 |
+
return type(self).transaction.__get__(self)
|
| 444 |
+
if item in ["_cache", "transaction_type"]:
|
| 445 |
+
# class attributes
|
| 446 |
+
return getattr(type(self), item)
|
| 447 |
+
if item == "__class__":
|
| 448 |
+
return type(self)
|
| 449 |
+
d = object.__getattribute__(self, "__dict__")
|
| 450 |
+
fs = d.get("fs", None) # fs is not immediately defined
|
| 451 |
+
if item in d:
|
| 452 |
+
return d[item]
|
| 453 |
+
elif fs is not None:
|
| 454 |
+
if item in fs.__dict__:
|
| 455 |
+
# attribute of instance
|
| 456 |
+
return fs.__dict__[item]
|
| 457 |
+
# attributed belonging to the target filesystem
|
| 458 |
+
cls = type(fs)
|
| 459 |
+
m = getattr(cls, item)
|
| 460 |
+
if (inspect.isfunction(m) or inspect.isdatadescriptor(m)) and (
|
| 461 |
+
not hasattr(m, "__self__") or m.__self__ is None
|
| 462 |
+
):
|
| 463 |
+
# instance method
|
| 464 |
+
return m.__get__(fs, cls)
|
| 465 |
+
return m # class method or attribute
|
| 466 |
+
else:
|
| 467 |
+
# attributes of the superclass, while target is being set up
|
| 468 |
+
return super().__getattribute__(item)
|
| 469 |
+
|
| 470 |
+
def __eq__(self, other):
|
| 471 |
+
"""Test for equality."""
|
| 472 |
+
if self is other:
|
| 473 |
+
return True
|
| 474 |
+
if not isinstance(other, type(self)):
|
| 475 |
+
return False
|
| 476 |
+
return (
|
| 477 |
+
self.storage == other.storage
|
| 478 |
+
and self.kwargs == other.kwargs
|
| 479 |
+
and self.cache_check == other.cache_check
|
| 480 |
+
and self.check_files == other.check_files
|
| 481 |
+
and self.expiry == other.expiry
|
| 482 |
+
and self.compression == other.compression
|
| 483 |
+
and self._mapper == other._mapper
|
| 484 |
+
and self.target_protocol == other.target_protocol
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
+
def __hash__(self):
|
| 488 |
+
"""Calculate hash."""
|
| 489 |
+
return (
|
| 490 |
+
hash(tuple(self.storage))
|
| 491 |
+
^ hash(str(self.kwargs))
|
| 492 |
+
^ hash(self.cache_check)
|
| 493 |
+
^ hash(self.check_files)
|
| 494 |
+
^ hash(self.expiry)
|
| 495 |
+
^ hash(self.compression)
|
| 496 |
+
^ hash(self._mapper)
|
| 497 |
+
^ hash(self.target_protocol)
|
| 498 |
+
)
|
| 499 |
+
|
| 500 |
+
def to_json(self):
|
| 501 |
+
"""Calculate JSON representation.
|
| 502 |
+
|
| 503 |
+
Not implemented yet for CachingFileSystem.
|
| 504 |
+
"""
|
| 505 |
+
raise NotImplementedError(
|
| 506 |
+
"CachingFileSystem JSON representation not implemented"
|
| 507 |
+
)
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
class WholeFileCacheFileSystem(CachingFileSystem):
|
| 511 |
+
"""Caches whole remote files on first access
|
| 512 |
+
|
| 513 |
+
This class is intended as a layer over any other file system, and
|
| 514 |
+
will make a local copy of each file accessed, so that all subsequent
|
| 515 |
+
reads are local. This is similar to ``CachingFileSystem``, but without
|
| 516 |
+
the block-wise functionality and so can work even when sparse files
|
| 517 |
+
are not allowed. See its docstring for definition of the init
|
| 518 |
+
arguments.
|
| 519 |
+
|
| 520 |
+
The class still needs access to the remote store for listing files,
|
| 521 |
+
and may refresh cached files.
|
| 522 |
+
"""
|
| 523 |
+
|
| 524 |
+
protocol = "filecache"
|
| 525 |
+
local_file = True
|
| 526 |
+
|
| 527 |
+
def open_many(self, open_files, **kwargs):
|
| 528 |
+
paths = [of.path for of in open_files]
|
| 529 |
+
if "r" in open_files.mode:
|
| 530 |
+
self._mkcache()
|
| 531 |
+
else:
|
| 532 |
+
return [
|
| 533 |
+
LocalTempFile(
|
| 534 |
+
self.fs,
|
| 535 |
+
path,
|
| 536 |
+
mode=open_files.mode,
|
| 537 |
+
fn=os.path.join(self.storage[-1], self._mapper(path)),
|
| 538 |
+
**kwargs,
|
| 539 |
+
)
|
| 540 |
+
for path in paths
|
| 541 |
+
]
|
| 542 |
+
|
| 543 |
+
if self.compression:
|
| 544 |
+
raise NotImplementedError
|
| 545 |
+
details = [self._check_file(sp) for sp in paths]
|
| 546 |
+
downpath = [p for p, d in zip(paths, details) if not d]
|
| 547 |
+
downfn0 = [
|
| 548 |
+
os.path.join(self.storage[-1], self._mapper(p))
|
| 549 |
+
for p, d in zip(paths, details)
|
| 550 |
+
] # keep these path names for opening later
|
| 551 |
+
downfn = [fn for fn, d in zip(downfn0, details) if not d]
|
| 552 |
+
if downpath:
|
| 553 |
+
# skip if all files are already cached and up to date
|
| 554 |
+
self.fs.get(downpath, downfn)
|
| 555 |
+
|
| 556 |
+
# update metadata - only happens when downloads are successful
|
| 557 |
+
newdetail = [
|
| 558 |
+
{
|
| 559 |
+
"original": path,
|
| 560 |
+
"fn": self._mapper(path),
|
| 561 |
+
"blocks": True,
|
| 562 |
+
"time": time.time(),
|
| 563 |
+
"uid": self.fs.ukey(path),
|
| 564 |
+
}
|
| 565 |
+
for path in downpath
|
| 566 |
+
]
|
| 567 |
+
for path, detail in zip(downpath, newdetail):
|
| 568 |
+
self._metadata.update_file(path, detail)
|
| 569 |
+
self.save_cache()
|
| 570 |
+
|
| 571 |
+
def firstpart(fn):
|
| 572 |
+
# helper to adapt both whole-file and simple-cache
|
| 573 |
+
return fn[1] if isinstance(fn, tuple) else fn
|
| 574 |
+
|
| 575 |
+
return [
|
| 576 |
+
open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode)
|
| 577 |
+
for fn0, fn1 in zip(details, downfn0)
|
| 578 |
+
]
|
| 579 |
+
|
| 580 |
+
def commit_many(self, open_files):
|
| 581 |
+
self.fs.put([f.fn for f in open_files], [f.path for f in open_files])
|
| 582 |
+
[f.close() for f in open_files]
|
| 583 |
+
for f in open_files:
|
| 584 |
+
# in case autocommit is off, and so close did not already delete
|
| 585 |
+
try:
|
| 586 |
+
os.remove(f.name)
|
| 587 |
+
except FileNotFoundError:
|
| 588 |
+
pass
|
| 589 |
+
self._cache_size = None
|
| 590 |
+
|
| 591 |
+
def _make_local_details(self, path):
|
| 592 |
+
hash = self._mapper(path)
|
| 593 |
+
fn = os.path.join(self.storage[-1], hash)
|
| 594 |
+
detail = {
|
| 595 |
+
"original": path,
|
| 596 |
+
"fn": hash,
|
| 597 |
+
"blocks": True,
|
| 598 |
+
"time": time.time(),
|
| 599 |
+
"uid": self.fs.ukey(path),
|
| 600 |
+
}
|
| 601 |
+
self._metadata.update_file(path, detail)
|
| 602 |
+
logger.debug("Copying %s to local cache", path)
|
| 603 |
+
return fn
|
| 604 |
+
|
| 605 |
+
def cat(
|
| 606 |
+
self,
|
| 607 |
+
path,
|
| 608 |
+
recursive=False,
|
| 609 |
+
on_error="raise",
|
| 610 |
+
callback=DEFAULT_CALLBACK,
|
| 611 |
+
**kwargs,
|
| 612 |
+
):
|
| 613 |
+
paths = self.expand_path(
|
| 614 |
+
path, recursive=recursive, maxdepth=kwargs.get("maxdepth", None)
|
| 615 |
+
)
|
| 616 |
+
getpaths = []
|
| 617 |
+
storepaths = []
|
| 618 |
+
fns = []
|
| 619 |
+
out = {}
|
| 620 |
+
for p in paths.copy():
|
| 621 |
+
try:
|
| 622 |
+
detail = self._check_file(p)
|
| 623 |
+
if not detail:
|
| 624 |
+
fn = self._make_local_details(p)
|
| 625 |
+
getpaths.append(p)
|
| 626 |
+
storepaths.append(fn)
|
| 627 |
+
else:
|
| 628 |
+
detail, fn = detail if isinstance(detail, tuple) else (None, detail)
|
| 629 |
+
fns.append(fn)
|
| 630 |
+
except Exception as e:
|
| 631 |
+
if on_error == "raise":
|
| 632 |
+
raise
|
| 633 |
+
if on_error == "return":
|
| 634 |
+
out[p] = e
|
| 635 |
+
paths.remove(p)
|
| 636 |
+
|
| 637 |
+
if getpaths:
|
| 638 |
+
self.fs.get(getpaths, storepaths)
|
| 639 |
+
self.save_cache()
|
| 640 |
+
|
| 641 |
+
callback.set_size(len(paths))
|
| 642 |
+
for p, fn in zip(paths, fns):
|
| 643 |
+
with open(fn, "rb") as f:
|
| 644 |
+
out[p] = f.read()
|
| 645 |
+
callback.relative_update(1)
|
| 646 |
+
if isinstance(path, str) and len(paths) == 1 and recursive is False:
|
| 647 |
+
out = out[paths[0]]
|
| 648 |
+
return out
|
| 649 |
+
|
| 650 |
+
def _open(self, path, mode="rb", **kwargs):
|
| 651 |
+
path = self._strip_protocol(path)
|
| 652 |
+
if "r" not in mode:
|
| 653 |
+
fn = self._make_local_details(path)
|
| 654 |
+
user_specified_kwargs = {
|
| 655 |
+
k: v
|
| 656 |
+
for k, v in kwargs.items()
|
| 657 |
+
# those kwargs were added by open(), we don't want them
|
| 658 |
+
if k not in ["autocommit", "block_size", "cache_options"]
|
| 659 |
+
}
|
| 660 |
+
return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)
|
| 661 |
+
detail = self._check_file(path)
|
| 662 |
+
if detail:
|
| 663 |
+
detail, fn = detail
|
| 664 |
+
_, blocks = detail["fn"], detail["blocks"]
|
| 665 |
+
if blocks is True:
|
| 666 |
+
logger.debug("Opening local copy of %s", path)
|
| 667 |
+
|
| 668 |
+
# In order to support downstream filesystems to be able to
|
| 669 |
+
# infer the compression from the original filename, like
|
| 670 |
+
# the `TarFileSystem`, let's extend the `io.BufferedReader`
|
| 671 |
+
# fileobject protocol by adding a dedicated attribute
|
| 672 |
+
# `original`.
|
| 673 |
+
f = open(fn, mode)
|
| 674 |
+
f.original = detail.get("original")
|
| 675 |
+
return f
|
| 676 |
+
else:
|
| 677 |
+
raise ValueError(
|
| 678 |
+
f"Attempt to open partially cached file {path}"
|
| 679 |
+
f" as a wholly cached file"
|
| 680 |
+
)
|
| 681 |
+
else:
|
| 682 |
+
fn = self._make_local_details(path)
|
| 683 |
+
kwargs["mode"] = mode
|
| 684 |
+
|
| 685 |
+
# call target filesystems open
|
| 686 |
+
self._mkcache()
|
| 687 |
+
if self.compression:
|
| 688 |
+
with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
|
| 689 |
+
if isinstance(f, AbstractBufferedFile):
|
| 690 |
+
# want no type of caching if just downloading whole thing
|
| 691 |
+
f.cache = BaseCache(0, f.cache.fetcher, f.size)
|
| 692 |
+
comp = (
|
| 693 |
+
infer_compression(path)
|
| 694 |
+
if self.compression == "infer"
|
| 695 |
+
else self.compression
|
| 696 |
+
)
|
| 697 |
+
f = compr[comp](f, mode="rb")
|
| 698 |
+
data = True
|
| 699 |
+
while data:
|
| 700 |
+
block = getattr(f, "blocksize", 5 * 2**20)
|
| 701 |
+
data = f.read(block)
|
| 702 |
+
f2.write(data)
|
| 703 |
+
else:
|
| 704 |
+
self.fs.get_file(path, fn)
|
| 705 |
+
self.save_cache()
|
| 706 |
+
return self._open(path, mode)
|
| 707 |
+
|
| 708 |
+
|
| 709 |
+
class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
| 710 |
+
"""Caches whole remote files on first access
|
| 711 |
+
|
| 712 |
+
This class is intended as a layer over any other file system, and
|
| 713 |
+
will make a local copy of each file accessed, so that all subsequent
|
| 714 |
+
reads are local. This implementation only copies whole files, and
|
| 715 |
+
does not keep any metadata about the download time or file details.
|
| 716 |
+
It is therefore safer to use in multi-threaded/concurrent situations.
|
| 717 |
+
|
| 718 |
+
This is the only of the caching filesystems that supports write: you will
|
| 719 |
+
be given a real local open file, and upon close and commit, it will be
|
| 720 |
+
uploaded to the target filesystem; the writability or the target URL is
|
| 721 |
+
not checked until that time.
|
| 722 |
+
|
| 723 |
+
"""
|
| 724 |
+
|
| 725 |
+
protocol = "simplecache"
|
| 726 |
+
local_file = True
|
| 727 |
+
transaction_type = WriteCachedTransaction
|
| 728 |
+
|
| 729 |
+
def __init__(self, **kwargs):
|
| 730 |
+
kw = kwargs.copy()
|
| 731 |
+
for key in ["cache_check", "expiry_time", "check_files"]:
|
| 732 |
+
kw[key] = False
|
| 733 |
+
super().__init__(**kw)
|
| 734 |
+
for storage in self.storage:
|
| 735 |
+
if not os.path.exists(storage):
|
| 736 |
+
os.makedirs(storage, exist_ok=True)
|
| 737 |
+
|
| 738 |
+
def _check_file(self, path):
|
| 739 |
+
self._check_cache()
|
| 740 |
+
sha = self._mapper(path)
|
| 741 |
+
for storage in self.storage:
|
| 742 |
+
fn = os.path.join(storage, sha)
|
| 743 |
+
if os.path.exists(fn):
|
| 744 |
+
return fn
|
| 745 |
+
|
| 746 |
+
def save_cache(self):
|
| 747 |
+
pass
|
| 748 |
+
|
| 749 |
+
def load_cache(self):
|
| 750 |
+
pass
|
| 751 |
+
|
| 752 |
+
def pipe_file(self, path, value=None, **kwargs):
|
| 753 |
+
if self._intrans:
|
| 754 |
+
with self.open(path, "wb") as f:
|
| 755 |
+
f.write(value)
|
| 756 |
+
else:
|
| 757 |
+
super().pipe_file(path, value)
|
| 758 |
+
|
| 759 |
+
def pipe(self, path, value=None, **kwargs):
|
| 760 |
+
if isinstance(path, str):
|
| 761 |
+
self.pipe_file(self._strip_protocol(path), value, **kwargs)
|
| 762 |
+
elif isinstance(path, dict):
|
| 763 |
+
for k, v in path.items():
|
| 764 |
+
self.pipe_file(self._strip_protocol(k), v, **kwargs)
|
| 765 |
+
else:
|
| 766 |
+
raise ValueError("path must be str or dict")
|
| 767 |
+
|
| 768 |
+
def cat_ranges(
|
| 769 |
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
| 770 |
+
):
|
| 771 |
+
lpaths = [self._check_file(p) for p in paths]
|
| 772 |
+
rpaths = [p for l, p in zip(lpaths, paths) if l is False]
|
| 773 |
+
lpaths = [l for l, p in zip(lpaths, paths) if l is False]
|
| 774 |
+
self.fs.get(rpaths, lpaths)
|
| 775 |
+
return super().cat_ranges(
|
| 776 |
+
paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
def _open(self, path, mode="rb", **kwargs):
|
| 780 |
+
path = self._strip_protocol(path)
|
| 781 |
+
sha = self._mapper(path)
|
| 782 |
+
|
| 783 |
+
if "r" not in mode:
|
| 784 |
+
fn = os.path.join(self.storage[-1], sha)
|
| 785 |
+
user_specified_kwargs = {
|
| 786 |
+
k: v
|
| 787 |
+
for k, v in kwargs.items()
|
| 788 |
+
if k not in ["autocommit", "block_size", "cache_options"]
|
| 789 |
+
} # those were added by open()
|
| 790 |
+
return LocalTempFile(
|
| 791 |
+
self,
|
| 792 |
+
path,
|
| 793 |
+
mode=mode,
|
| 794 |
+
autocommit=not self._intrans,
|
| 795 |
+
fn=fn,
|
| 796 |
+
**user_specified_kwargs,
|
| 797 |
+
)
|
| 798 |
+
fn = self._check_file(path)
|
| 799 |
+
if fn:
|
| 800 |
+
return open(fn, mode)
|
| 801 |
+
|
| 802 |
+
fn = os.path.join(self.storage[-1], sha)
|
| 803 |
+
logger.debug("Copying %s to local cache", path)
|
| 804 |
+
kwargs["mode"] = mode
|
| 805 |
+
|
| 806 |
+
self._mkcache()
|
| 807 |
+
self._cache_size = None
|
| 808 |
+
if self.compression:
|
| 809 |
+
with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
|
| 810 |
+
if isinstance(f, AbstractBufferedFile):
|
| 811 |
+
# want no type of caching if just downloading whole thing
|
| 812 |
+
f.cache = BaseCache(0, f.cache.fetcher, f.size)
|
| 813 |
+
comp = (
|
| 814 |
+
infer_compression(path)
|
| 815 |
+
if self.compression == "infer"
|
| 816 |
+
else self.compression
|
| 817 |
+
)
|
| 818 |
+
f = compr[comp](f, mode="rb")
|
| 819 |
+
data = True
|
| 820 |
+
while data:
|
| 821 |
+
block = getattr(f, "blocksize", 5 * 2**20)
|
| 822 |
+
data = f.read(block)
|
| 823 |
+
f2.write(data)
|
| 824 |
+
else:
|
| 825 |
+
self.fs.get_file(path, fn)
|
| 826 |
+
return self._open(path, mode)
|
| 827 |
+
|
| 828 |
+
|
| 829 |
+
class LocalTempFile:
|
| 830 |
+
"""A temporary local file, which will be uploaded on commit"""
|
| 831 |
+
|
| 832 |
+
def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
|
| 833 |
+
self.fn = fn
|
| 834 |
+
self.fh = open(fn, mode)
|
| 835 |
+
self.mode = mode
|
| 836 |
+
if seek:
|
| 837 |
+
self.fh.seek(seek)
|
| 838 |
+
self.path = path
|
| 839 |
+
self.fs = fs
|
| 840 |
+
self.closed = False
|
| 841 |
+
self.autocommit = autocommit
|
| 842 |
+
self.kwargs = kwargs
|
| 843 |
+
|
| 844 |
+
def __reduce__(self):
|
| 845 |
+
# always open in r+b to allow continuing writing at a location
|
| 846 |
+
return (
|
| 847 |
+
LocalTempFile,
|
| 848 |
+
(self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
|
| 849 |
+
)
|
| 850 |
+
|
| 851 |
+
def __enter__(self):
|
| 852 |
+
return self.fh
|
| 853 |
+
|
| 854 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 855 |
+
self.close()
|
| 856 |
+
|
| 857 |
+
def close(self):
|
| 858 |
+
if self.closed:
|
| 859 |
+
return
|
| 860 |
+
self.fh.close()
|
| 861 |
+
self.closed = True
|
| 862 |
+
if self.autocommit:
|
| 863 |
+
self.commit()
|
| 864 |
+
|
| 865 |
+
def discard(self):
|
| 866 |
+
self.fh.close()
|
| 867 |
+
os.remove(self.fn)
|
| 868 |
+
|
| 869 |
+
def commit(self):
|
| 870 |
+
self.fs.put(self.fn, self.path, **self.kwargs)
|
| 871 |
+
try:
|
| 872 |
+
os.remove(self.fn)
|
| 873 |
+
except (PermissionError, FileNotFoundError):
|
| 874 |
+
# file path may be held by new version of the file on windows
|
| 875 |
+
pass
|
| 876 |
+
|
| 877 |
+
@property
|
| 878 |
+
def name(self):
|
| 879 |
+
return self.fn
|
| 880 |
+
|
| 881 |
+
def __getattr__(self, item):
|
| 882 |
+
return getattr(self.fh, item)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dask.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dask
|
| 2 |
+
from distributed.client import Client, _get_global_client
|
| 3 |
+
from distributed.worker import Worker
|
| 4 |
+
|
| 5 |
+
from fsspec import filesystem
|
| 6 |
+
from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
|
| 7 |
+
from fsspec.utils import infer_storage_options
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _get_client(client):
|
| 11 |
+
if client is None:
|
| 12 |
+
return _get_global_client()
|
| 13 |
+
elif isinstance(client, Client):
|
| 14 |
+
return client
|
| 15 |
+
else:
|
| 16 |
+
# e.g., connection string
|
| 17 |
+
return Client(client)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _in_worker():
|
| 21 |
+
return bool(Worker._instances)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class DaskWorkerFileSystem(AbstractFileSystem):
|
| 25 |
+
"""View files accessible to a worker as any other remote file-system
|
| 26 |
+
|
| 27 |
+
When instances are run on the worker, uses the real filesystem. When
|
| 28 |
+
run on the client, they call the worker to provide information or data.
|
| 29 |
+
|
| 30 |
+
**Warning** this implementation is experimental, and read-only for now.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(
|
| 34 |
+
self, target_protocol=None, target_options=None, fs=None, client=None, **kwargs
|
| 35 |
+
):
|
| 36 |
+
super().__init__(**kwargs)
|
| 37 |
+
if not (fs is None) ^ (target_protocol is None):
|
| 38 |
+
raise ValueError(
|
| 39 |
+
"Please provide one of filesystem instance (fs) or"
|
| 40 |
+
" target_protocol, not both"
|
| 41 |
+
)
|
| 42 |
+
self.target_protocol = target_protocol
|
| 43 |
+
self.target_options = target_options
|
| 44 |
+
self.worker = None
|
| 45 |
+
self.client = client
|
| 46 |
+
self.fs = fs
|
| 47 |
+
self._determine_worker()
|
| 48 |
+
|
| 49 |
+
@staticmethod
|
| 50 |
+
def _get_kwargs_from_urls(path):
|
| 51 |
+
so = infer_storage_options(path)
|
| 52 |
+
if "host" in so and "port" in so:
|
| 53 |
+
return {"client": f"{so['host']}:{so['port']}"}
|
| 54 |
+
else:
|
| 55 |
+
return {}
|
| 56 |
+
|
| 57 |
+
def _determine_worker(self):
|
| 58 |
+
if _in_worker():
|
| 59 |
+
self.worker = True
|
| 60 |
+
if self.fs is None:
|
| 61 |
+
self.fs = filesystem(
|
| 62 |
+
self.target_protocol, **(self.target_options or {})
|
| 63 |
+
)
|
| 64 |
+
else:
|
| 65 |
+
self.worker = False
|
| 66 |
+
self.client = _get_client(self.client)
|
| 67 |
+
self.rfs = dask.delayed(self)
|
| 68 |
+
|
| 69 |
+
def mkdir(self, *args, **kwargs):
|
| 70 |
+
if self.worker:
|
| 71 |
+
self.fs.mkdir(*args, **kwargs)
|
| 72 |
+
else:
|
| 73 |
+
self.rfs.mkdir(*args, **kwargs).compute()
|
| 74 |
+
|
| 75 |
+
def rm(self, *args, **kwargs):
|
| 76 |
+
if self.worker:
|
| 77 |
+
self.fs.rm(*args, **kwargs)
|
| 78 |
+
else:
|
| 79 |
+
self.rfs.rm(*args, **kwargs).compute()
|
| 80 |
+
|
| 81 |
+
def copy(self, *args, **kwargs):
|
| 82 |
+
if self.worker:
|
| 83 |
+
self.fs.copy(*args, **kwargs)
|
| 84 |
+
else:
|
| 85 |
+
self.rfs.copy(*args, **kwargs).compute()
|
| 86 |
+
|
| 87 |
+
def mv(self, *args, **kwargs):
|
| 88 |
+
if self.worker:
|
| 89 |
+
self.fs.mv(*args, **kwargs)
|
| 90 |
+
else:
|
| 91 |
+
self.rfs.mv(*args, **kwargs).compute()
|
| 92 |
+
|
| 93 |
+
def ls(self, *args, **kwargs):
|
| 94 |
+
if self.worker:
|
| 95 |
+
return self.fs.ls(*args, **kwargs)
|
| 96 |
+
else:
|
| 97 |
+
return self.rfs.ls(*args, **kwargs).compute()
|
| 98 |
+
|
| 99 |
+
def _open(
|
| 100 |
+
self,
|
| 101 |
+
path,
|
| 102 |
+
mode="rb",
|
| 103 |
+
block_size=None,
|
| 104 |
+
autocommit=True,
|
| 105 |
+
cache_options=None,
|
| 106 |
+
**kwargs,
|
| 107 |
+
):
|
| 108 |
+
if self.worker:
|
| 109 |
+
return self.fs._open(
|
| 110 |
+
path,
|
| 111 |
+
mode=mode,
|
| 112 |
+
block_size=block_size,
|
| 113 |
+
autocommit=autocommit,
|
| 114 |
+
cache_options=cache_options,
|
| 115 |
+
**kwargs,
|
| 116 |
+
)
|
| 117 |
+
else:
|
| 118 |
+
return DaskFile(
|
| 119 |
+
fs=self,
|
| 120 |
+
path=path,
|
| 121 |
+
mode=mode,
|
| 122 |
+
block_size=block_size,
|
| 123 |
+
autocommit=autocommit,
|
| 124 |
+
cache_options=cache_options,
|
| 125 |
+
**kwargs,
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
def fetch_range(self, path, mode, start, end):
|
| 129 |
+
if self.worker:
|
| 130 |
+
with self._open(path, mode) as f:
|
| 131 |
+
f.seek(start)
|
| 132 |
+
return f.read(end - start)
|
| 133 |
+
else:
|
| 134 |
+
return self.rfs.fetch_range(path, mode, start, end).compute()
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
class DaskFile(AbstractBufferedFile):
|
| 138 |
+
def __init__(self, mode="rb", **kwargs):
|
| 139 |
+
if mode != "rb":
|
| 140 |
+
raise ValueError('Remote dask files can only be opened in "rb" mode')
|
| 141 |
+
super().__init__(**kwargs)
|
| 142 |
+
|
| 143 |
+
def _upload_chunk(self, final=False):
|
| 144 |
+
pass
|
| 145 |
+
|
| 146 |
+
def _initiate_upload(self):
|
| 147 |
+
"""Create remote file/upload"""
|
| 148 |
+
pass
|
| 149 |
+
|
| 150 |
+
def _fetch_range(self, start, end):
|
| 151 |
+
"""Get the specified set of bytes from remote"""
|
| 152 |
+
return self.fs.fetch_range(self.path, self.mode, start, end)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dirfs.py
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .. import filesystem
|
| 2 |
+
from ..asyn import AsyncFileSystem
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class DirFileSystem(AsyncFileSystem):
|
| 6 |
+
"""Directory prefix filesystem
|
| 7 |
+
|
| 8 |
+
The DirFileSystem is a filesystem-wrapper. It assumes every path it is dealing with
|
| 9 |
+
is relative to the `path`. After performing the necessary paths operation it
|
| 10 |
+
delegates everything to the wrapped filesystem.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
protocol = "dir"
|
| 14 |
+
|
| 15 |
+
def __init__(
|
| 16 |
+
self,
|
| 17 |
+
path=None,
|
| 18 |
+
fs=None,
|
| 19 |
+
fo=None,
|
| 20 |
+
target_protocol=None,
|
| 21 |
+
target_options=None,
|
| 22 |
+
**storage_options,
|
| 23 |
+
):
|
| 24 |
+
"""
|
| 25 |
+
Parameters
|
| 26 |
+
----------
|
| 27 |
+
path: str
|
| 28 |
+
Path to the directory.
|
| 29 |
+
fs: AbstractFileSystem
|
| 30 |
+
An instantiated filesystem to wrap.
|
| 31 |
+
target_protocol, target_options:
|
| 32 |
+
if fs is none, construct it from these
|
| 33 |
+
fo: str
|
| 34 |
+
Alternate for path; do not provide both
|
| 35 |
+
"""
|
| 36 |
+
super().__init__(**storage_options)
|
| 37 |
+
if fs is None:
|
| 38 |
+
fs = filesystem(protocol=target_protocol, **(target_options or {}))
|
| 39 |
+
if (path is not None) ^ (fo is not None) is False:
|
| 40 |
+
raise ValueError("Provide path or fo, not both")
|
| 41 |
+
path = path or fo
|
| 42 |
+
|
| 43 |
+
if self.asynchronous and not fs.async_impl:
|
| 44 |
+
raise ValueError("can't use asynchronous with non-async fs")
|
| 45 |
+
|
| 46 |
+
if fs.async_impl and self.asynchronous != fs.asynchronous:
|
| 47 |
+
raise ValueError("both dirfs and fs should be in the same sync/async mode")
|
| 48 |
+
|
| 49 |
+
self.path = fs._strip_protocol(path)
|
| 50 |
+
self.fs = fs
|
| 51 |
+
|
| 52 |
+
def _join(self, path):
|
| 53 |
+
if isinstance(path, str):
|
| 54 |
+
if not self.path:
|
| 55 |
+
return path
|
| 56 |
+
if not path:
|
| 57 |
+
return self.path
|
| 58 |
+
return self.fs.sep.join((self.path, self._strip_protocol(path)))
|
| 59 |
+
return [self._join(_path) for _path in path]
|
| 60 |
+
|
| 61 |
+
def _relpath(self, path):
|
| 62 |
+
if isinstance(path, str):
|
| 63 |
+
if not self.path:
|
| 64 |
+
return path
|
| 65 |
+
if path == self.path:
|
| 66 |
+
return ""
|
| 67 |
+
prefix = self.path + self.fs.sep
|
| 68 |
+
assert path.startswith(prefix)
|
| 69 |
+
return path[len(prefix) :]
|
| 70 |
+
return [self._relpath(_path) for _path in path]
|
| 71 |
+
|
| 72 |
+
# Wrappers below
|
| 73 |
+
|
| 74 |
+
@property
|
| 75 |
+
def sep(self):
|
| 76 |
+
return self.fs.sep
|
| 77 |
+
|
| 78 |
+
async def set_session(self, *args, **kwargs):
|
| 79 |
+
return await self.fs.set_session(*args, **kwargs)
|
| 80 |
+
|
| 81 |
+
async def _rm_file(self, path, **kwargs):
|
| 82 |
+
return await self.fs._rm_file(self._join(path), **kwargs)
|
| 83 |
+
|
| 84 |
+
def rm_file(self, path, **kwargs):
|
| 85 |
+
return self.fs.rm_file(self._join(path), **kwargs)
|
| 86 |
+
|
| 87 |
+
async def _rm(self, path, *args, **kwargs):
|
| 88 |
+
return await self.fs._rm(self._join(path), *args, **kwargs)
|
| 89 |
+
|
| 90 |
+
def rm(self, path, *args, **kwargs):
|
| 91 |
+
return self.fs.rm(self._join(path), *args, **kwargs)
|
| 92 |
+
|
| 93 |
+
async def _cp_file(self, path1, path2, **kwargs):
|
| 94 |
+
return await self.fs._cp_file(self._join(path1), self._join(path2), **kwargs)
|
| 95 |
+
|
| 96 |
+
def cp_file(self, path1, path2, **kwargs):
|
| 97 |
+
return self.fs.cp_file(self._join(path1), self._join(path2), **kwargs)
|
| 98 |
+
|
| 99 |
+
async def _copy(
|
| 100 |
+
self,
|
| 101 |
+
path1,
|
| 102 |
+
path2,
|
| 103 |
+
*args,
|
| 104 |
+
**kwargs,
|
| 105 |
+
):
|
| 106 |
+
return await self.fs._copy(
|
| 107 |
+
self._join(path1),
|
| 108 |
+
self._join(path2),
|
| 109 |
+
*args,
|
| 110 |
+
**kwargs,
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
def copy(self, path1, path2, *args, **kwargs):
|
| 114 |
+
return self.fs.copy(
|
| 115 |
+
self._join(path1),
|
| 116 |
+
self._join(path2),
|
| 117 |
+
*args,
|
| 118 |
+
**kwargs,
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
async def _pipe(self, path, *args, **kwargs):
|
| 122 |
+
return await self.fs._pipe(self._join(path), *args, **kwargs)
|
| 123 |
+
|
| 124 |
+
def pipe(self, path, *args, **kwargs):
|
| 125 |
+
return self.fs.pipe(self._join(path), *args, **kwargs)
|
| 126 |
+
|
| 127 |
+
async def _pipe_file(self, path, *args, **kwargs):
|
| 128 |
+
return await self.fs._pipe_file(self._join(path), *args, **kwargs)
|
| 129 |
+
|
| 130 |
+
def pipe_file(self, path, *args, **kwargs):
|
| 131 |
+
return self.fs.pipe_file(self._join(path), *args, **kwargs)
|
| 132 |
+
|
| 133 |
+
async def _cat_file(self, path, *args, **kwargs):
|
| 134 |
+
return await self.fs._cat_file(self._join(path), *args, **kwargs)
|
| 135 |
+
|
| 136 |
+
def cat_file(self, path, *args, **kwargs):
|
| 137 |
+
return self.fs.cat_file(self._join(path), *args, **kwargs)
|
| 138 |
+
|
| 139 |
+
async def _cat(self, path, *args, **kwargs):
|
| 140 |
+
ret = await self.fs._cat(
|
| 141 |
+
self._join(path),
|
| 142 |
+
*args,
|
| 143 |
+
**kwargs,
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
if isinstance(ret, dict):
|
| 147 |
+
return {self._relpath(key): value for key, value in ret.items()}
|
| 148 |
+
|
| 149 |
+
return ret
|
| 150 |
+
|
| 151 |
+
def cat(self, path, *args, **kwargs):
|
| 152 |
+
ret = self.fs.cat(
|
| 153 |
+
self._join(path),
|
| 154 |
+
*args,
|
| 155 |
+
**kwargs,
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
if isinstance(ret, dict):
|
| 159 |
+
return {self._relpath(key): value for key, value in ret.items()}
|
| 160 |
+
|
| 161 |
+
return ret
|
| 162 |
+
|
| 163 |
+
async def _put_file(self, lpath, rpath, **kwargs):
|
| 164 |
+
return await self.fs._put_file(lpath, self._join(rpath), **kwargs)
|
| 165 |
+
|
| 166 |
+
def put_file(self, lpath, rpath, **kwargs):
|
| 167 |
+
return self.fs.put_file(lpath, self._join(rpath), **kwargs)
|
| 168 |
+
|
| 169 |
+
async def _put(
|
| 170 |
+
self,
|
| 171 |
+
lpath,
|
| 172 |
+
rpath,
|
| 173 |
+
*args,
|
| 174 |
+
**kwargs,
|
| 175 |
+
):
|
| 176 |
+
return await self.fs._put(
|
| 177 |
+
lpath,
|
| 178 |
+
self._join(rpath),
|
| 179 |
+
*args,
|
| 180 |
+
**kwargs,
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
def put(self, lpath, rpath, *args, **kwargs):
|
| 184 |
+
return self.fs.put(
|
| 185 |
+
lpath,
|
| 186 |
+
self._join(rpath),
|
| 187 |
+
*args,
|
| 188 |
+
**kwargs,
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
async def _get_file(self, rpath, lpath, **kwargs):
|
| 192 |
+
return await self.fs._get_file(self._join(rpath), lpath, **kwargs)
|
| 193 |
+
|
| 194 |
+
def get_file(self, rpath, lpath, **kwargs):
|
| 195 |
+
return self.fs.get_file(self._join(rpath), lpath, **kwargs)
|
| 196 |
+
|
| 197 |
+
async def _get(self, rpath, *args, **kwargs):
|
| 198 |
+
return await self.fs._get(self._join(rpath), *args, **kwargs)
|
| 199 |
+
|
| 200 |
+
def get(self, rpath, *args, **kwargs):
|
| 201 |
+
return self.fs.get(self._join(rpath), *args, **kwargs)
|
| 202 |
+
|
| 203 |
+
async def _isfile(self, path):
|
| 204 |
+
return await self.fs._isfile(self._join(path))
|
| 205 |
+
|
| 206 |
+
def isfile(self, path):
|
| 207 |
+
return self.fs.isfile(self._join(path))
|
| 208 |
+
|
| 209 |
+
async def _isdir(self, path):
|
| 210 |
+
return await self.fs._isdir(self._join(path))
|
| 211 |
+
|
| 212 |
+
def isdir(self, path):
|
| 213 |
+
return self.fs.isdir(self._join(path))
|
| 214 |
+
|
| 215 |
+
async def _size(self, path):
|
| 216 |
+
return await self.fs._size(self._join(path))
|
| 217 |
+
|
| 218 |
+
def size(self, path):
|
| 219 |
+
return self.fs.size(self._join(path))
|
| 220 |
+
|
| 221 |
+
async def _exists(self, path):
|
| 222 |
+
return await self.fs._exists(self._join(path))
|
| 223 |
+
|
| 224 |
+
def exists(self, path):
|
| 225 |
+
return self.fs.exists(self._join(path))
|
| 226 |
+
|
| 227 |
+
async def _info(self, path, **kwargs):
|
| 228 |
+
return await self.fs._info(self._join(path), **kwargs)
|
| 229 |
+
|
| 230 |
+
def info(self, path, **kwargs):
|
| 231 |
+
return self.fs.info(self._join(path), **kwargs)
|
| 232 |
+
|
| 233 |
+
async def _ls(self, path, detail=True, **kwargs):
|
| 234 |
+
ret = (await self.fs._ls(self._join(path), detail=detail, **kwargs)).copy()
|
| 235 |
+
if detail:
|
| 236 |
+
out = []
|
| 237 |
+
for entry in ret:
|
| 238 |
+
entry = entry.copy()
|
| 239 |
+
entry["name"] = self._relpath(entry["name"])
|
| 240 |
+
out.append(entry)
|
| 241 |
+
return out
|
| 242 |
+
|
| 243 |
+
return self._relpath(ret)
|
| 244 |
+
|
| 245 |
+
def ls(self, path, detail=True, **kwargs):
|
| 246 |
+
ret = self.fs.ls(self._join(path), detail=detail, **kwargs).copy()
|
| 247 |
+
if detail:
|
| 248 |
+
out = []
|
| 249 |
+
for entry in ret:
|
| 250 |
+
entry = entry.copy()
|
| 251 |
+
entry["name"] = self._relpath(entry["name"])
|
| 252 |
+
out.append(entry)
|
| 253 |
+
return out
|
| 254 |
+
|
| 255 |
+
return self._relpath(ret)
|
| 256 |
+
|
| 257 |
+
async def _walk(self, path, *args, **kwargs):
|
| 258 |
+
async for root, dirs, files in self.fs._walk(self._join(path), *args, **kwargs):
|
| 259 |
+
yield self._relpath(root), dirs, files
|
| 260 |
+
|
| 261 |
+
def walk(self, path, *args, **kwargs):
|
| 262 |
+
for root, dirs, files in self.fs.walk(self._join(path), *args, **kwargs):
|
| 263 |
+
yield self._relpath(root), dirs, files
|
| 264 |
+
|
| 265 |
+
async def _glob(self, path, **kwargs):
|
| 266 |
+
detail = kwargs.get("detail", False)
|
| 267 |
+
ret = await self.fs._glob(self._join(path), **kwargs)
|
| 268 |
+
if detail:
|
| 269 |
+
return {self._relpath(path): info for path, info in ret.items()}
|
| 270 |
+
return self._relpath(ret)
|
| 271 |
+
|
| 272 |
+
def glob(self, path, **kwargs):
|
| 273 |
+
detail = kwargs.get("detail", False)
|
| 274 |
+
ret = self.fs.glob(self._join(path), **kwargs)
|
| 275 |
+
if detail:
|
| 276 |
+
return {self._relpath(path): info for path, info in ret.items()}
|
| 277 |
+
return self._relpath(ret)
|
| 278 |
+
|
| 279 |
+
async def _du(self, path, *args, **kwargs):
|
| 280 |
+
total = kwargs.get("total", True)
|
| 281 |
+
ret = await self.fs._du(self._join(path), *args, **kwargs)
|
| 282 |
+
if total:
|
| 283 |
+
return ret
|
| 284 |
+
|
| 285 |
+
return {self._relpath(path): size for path, size in ret.items()}
|
| 286 |
+
|
| 287 |
+
def du(self, path, *args, **kwargs):
|
| 288 |
+
total = kwargs.get("total", True)
|
| 289 |
+
ret = self.fs.du(self._join(path), *args, **kwargs)
|
| 290 |
+
if total:
|
| 291 |
+
return ret
|
| 292 |
+
|
| 293 |
+
return {self._relpath(path): size for path, size in ret.items()}
|
| 294 |
+
|
| 295 |
+
async def _find(self, path, *args, **kwargs):
|
| 296 |
+
detail = kwargs.get("detail", False)
|
| 297 |
+
ret = await self.fs._find(self._join(path), *args, **kwargs)
|
| 298 |
+
if detail:
|
| 299 |
+
return {self._relpath(path): info for path, info in ret.items()}
|
| 300 |
+
return self._relpath(ret)
|
| 301 |
+
|
| 302 |
+
def find(self, path, *args, **kwargs):
|
| 303 |
+
detail = kwargs.get("detail", False)
|
| 304 |
+
ret = self.fs.find(self._join(path), *args, **kwargs)
|
| 305 |
+
if detail:
|
| 306 |
+
return {self._relpath(path): info for path, info in ret.items()}
|
| 307 |
+
return self._relpath(ret)
|
| 308 |
+
|
| 309 |
+
async def _expand_path(self, path, *args, **kwargs):
|
| 310 |
+
return self._relpath(
|
| 311 |
+
await self.fs._expand_path(self._join(path), *args, **kwargs)
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
def expand_path(self, path, *args, **kwargs):
|
| 315 |
+
return self._relpath(self.fs.expand_path(self._join(path), *args, **kwargs))
|
| 316 |
+
|
| 317 |
+
async def _mkdir(self, path, *args, **kwargs):
|
| 318 |
+
return await self.fs._mkdir(self._join(path), *args, **kwargs)
|
| 319 |
+
|
| 320 |
+
def mkdir(self, path, *args, **kwargs):
|
| 321 |
+
return self.fs.mkdir(self._join(path), *args, **kwargs)
|
| 322 |
+
|
| 323 |
+
async def _makedirs(self, path, *args, **kwargs):
|
| 324 |
+
return await self.fs._makedirs(self._join(path), *args, **kwargs)
|
| 325 |
+
|
| 326 |
+
def makedirs(self, path, *args, **kwargs):
|
| 327 |
+
return self.fs.makedirs(self._join(path), *args, **kwargs)
|
| 328 |
+
|
| 329 |
+
def rmdir(self, path):
|
| 330 |
+
return self.fs.rmdir(self._join(path))
|
| 331 |
+
|
| 332 |
+
def mv_file(self, path1, path2, **kwargs):
|
| 333 |
+
return self.fs.mv_file(
|
| 334 |
+
self._join(path1),
|
| 335 |
+
self._join(path2),
|
| 336 |
+
**kwargs,
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
def touch(self, path, **kwargs):
|
| 340 |
+
return self.fs.touch(self._join(path), **kwargs)
|
| 341 |
+
|
| 342 |
+
def created(self, path):
|
| 343 |
+
return self.fs.created(self._join(path))
|
| 344 |
+
|
| 345 |
+
def modified(self, path):
|
| 346 |
+
return self.fs.modified(self._join(path))
|
| 347 |
+
|
| 348 |
+
def sign(self, path, *args, **kwargs):
|
| 349 |
+
return self.fs.sign(self._join(path), *args, **kwargs)
|
| 350 |
+
|
| 351 |
+
def __repr__(self):
|
| 352 |
+
return f"{self.__class__.__qualname__}(path='{self.path}', fs={self.fs})"
|
| 353 |
+
|
| 354 |
+
def open(
|
| 355 |
+
self,
|
| 356 |
+
path,
|
| 357 |
+
*args,
|
| 358 |
+
**kwargs,
|
| 359 |
+
):
|
| 360 |
+
return self.fs.open(
|
| 361 |
+
self._join(path),
|
| 362 |
+
*args,
|
| 363 |
+
**kwargs,
|
| 364 |
+
)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/git.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import pygit2
|
| 4 |
+
|
| 5 |
+
from fsspec.spec import AbstractFileSystem
|
| 6 |
+
|
| 7 |
+
from .memory import MemoryFile
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class GitFileSystem(AbstractFileSystem):
|
| 11 |
+
"""Browse the files of a local git repo at any hash/tag/branch
|
| 12 |
+
|
| 13 |
+
(experimental backend)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
root_marker = ""
|
| 17 |
+
cachable = True
|
| 18 |
+
|
| 19 |
+
def __init__(self, path=None, fo=None, ref=None, **kwargs):
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
Parameters
|
| 23 |
+
----------
|
| 24 |
+
path: str (optional)
|
| 25 |
+
Local location of the repo (uses current directory if not given).
|
| 26 |
+
May be deprecated in favour of ``fo``. When used with a higher
|
| 27 |
+
level function such as fsspec.open(), may be of the form
|
| 28 |
+
"git://[path-to-repo[:]][ref@]path/to/file" (but the actual
|
| 29 |
+
file path should not contain "@" or ":").
|
| 30 |
+
fo: str (optional)
|
| 31 |
+
Same as ``path``, but passed as part of a chained URL. This one
|
| 32 |
+
takes precedence if both are given.
|
| 33 |
+
ref: str (optional)
|
| 34 |
+
Reference to work with, could be a hash, tag or branch name. Defaults
|
| 35 |
+
to current working tree. Note that ``ls`` and ``open`` also take hash,
|
| 36 |
+
so this becomes the default for those operations
|
| 37 |
+
kwargs
|
| 38 |
+
"""
|
| 39 |
+
super().__init__(**kwargs)
|
| 40 |
+
self.repo = pygit2.Repository(fo or path or os.getcwd())
|
| 41 |
+
self.ref = ref or "master"
|
| 42 |
+
|
| 43 |
+
@classmethod
|
| 44 |
+
def _strip_protocol(cls, path):
|
| 45 |
+
path = super()._strip_protocol(path).lstrip("/")
|
| 46 |
+
if ":" in path:
|
| 47 |
+
path = path.split(":", 1)[1]
|
| 48 |
+
if "@" in path:
|
| 49 |
+
path = path.split("@", 1)[1]
|
| 50 |
+
return path.lstrip("/")
|
| 51 |
+
|
| 52 |
+
def _path_to_object(self, path, ref):
|
| 53 |
+
comm, ref = self.repo.resolve_refish(ref or self.ref)
|
| 54 |
+
parts = path.split("/")
|
| 55 |
+
tree = comm.tree
|
| 56 |
+
for part in parts:
|
| 57 |
+
if part and isinstance(tree, pygit2.Tree):
|
| 58 |
+
tree = tree[part]
|
| 59 |
+
return tree
|
| 60 |
+
|
| 61 |
+
@staticmethod
|
| 62 |
+
def _get_kwargs_from_urls(path):
|
| 63 |
+
if path.startswith("git://"):
|
| 64 |
+
path = path[6:]
|
| 65 |
+
out = {}
|
| 66 |
+
if ":" in path:
|
| 67 |
+
out["path"], path = path.split(":", 1)
|
| 68 |
+
if "@" in path:
|
| 69 |
+
out["ref"], path = path.split("@", 1)
|
| 70 |
+
return out
|
| 71 |
+
|
| 72 |
+
def ls(self, path, detail=True, ref=None, **kwargs):
|
| 73 |
+
path = self._strip_protocol(path)
|
| 74 |
+
tree = self._path_to_object(path, ref)
|
| 75 |
+
if isinstance(tree, pygit2.Tree):
|
| 76 |
+
out = []
|
| 77 |
+
for obj in tree:
|
| 78 |
+
if isinstance(obj, pygit2.Tree):
|
| 79 |
+
out.append(
|
| 80 |
+
{
|
| 81 |
+
"type": "directory",
|
| 82 |
+
"name": "/".join([path, obj.name]).lstrip("/"),
|
| 83 |
+
"hex": obj.hex,
|
| 84 |
+
"mode": f"{obj.filemode:o}",
|
| 85 |
+
"size": 0,
|
| 86 |
+
}
|
| 87 |
+
)
|
| 88 |
+
else:
|
| 89 |
+
out.append(
|
| 90 |
+
{
|
| 91 |
+
"type": "file",
|
| 92 |
+
"name": "/".join([path, obj.name]).lstrip("/"),
|
| 93 |
+
"hex": obj.hex,
|
| 94 |
+
"mode": f"{obj.filemode:o}",
|
| 95 |
+
"size": obj.size,
|
| 96 |
+
}
|
| 97 |
+
)
|
| 98 |
+
else:
|
| 99 |
+
obj = tree
|
| 100 |
+
out = [
|
| 101 |
+
{
|
| 102 |
+
"type": "file",
|
| 103 |
+
"name": obj.name,
|
| 104 |
+
"hex": obj.hex,
|
| 105 |
+
"mode": f"{obj.filemode:o}",
|
| 106 |
+
"size": obj.size,
|
| 107 |
+
}
|
| 108 |
+
]
|
| 109 |
+
if detail:
|
| 110 |
+
return out
|
| 111 |
+
return [o["name"] for o in out]
|
| 112 |
+
|
| 113 |
+
def ukey(self, path, ref=None):
|
| 114 |
+
return self.info(path, ref=ref)["hex"]
|
| 115 |
+
|
| 116 |
+
def _open(
|
| 117 |
+
self,
|
| 118 |
+
path,
|
| 119 |
+
mode="rb",
|
| 120 |
+
block_size=None,
|
| 121 |
+
autocommit=True,
|
| 122 |
+
cache_options=None,
|
| 123 |
+
ref=None,
|
| 124 |
+
**kwargs,
|
| 125 |
+
):
|
| 126 |
+
obj = self._path_to_object(path, ref or self.ref)
|
| 127 |
+
return MemoryFile(data=obj.data)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/http.py
ADDED
|
@@ -0,0 +1,868 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import io
|
| 3 |
+
import logging
|
| 4 |
+
import re
|
| 5 |
+
import weakref
|
| 6 |
+
from copy import copy
|
| 7 |
+
from urllib.parse import urlparse
|
| 8 |
+
|
| 9 |
+
import aiohttp
|
| 10 |
+
import yarl
|
| 11 |
+
|
| 12 |
+
from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_wrapper
|
| 13 |
+
from fsspec.callbacks import DEFAULT_CALLBACK
|
| 14 |
+
from fsspec.exceptions import FSTimeoutError
|
| 15 |
+
from fsspec.spec import AbstractBufferedFile
|
| 16 |
+
from fsspec.utils import (
|
| 17 |
+
DEFAULT_BLOCK_SIZE,
|
| 18 |
+
glob_translate,
|
| 19 |
+
isfilelike,
|
| 20 |
+
nullcontext,
|
| 21 |
+
tokenize,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
from ..caching import AllBytes
|
| 25 |
+
|
| 26 |
+
# https://stackoverflow.com/a/15926317/3821154
|
| 27 |
+
ex = re.compile(r"""<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P<url>[^"']+)""")
|
| 28 |
+
ex2 = re.compile(r"""(?P<url>http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""")
|
| 29 |
+
logger = logging.getLogger("fsspec.http")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
async def get_client(**kwargs):
|
| 33 |
+
return aiohttp.ClientSession(**kwargs)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class HTTPFileSystem(AsyncFileSystem):
|
| 37 |
+
"""
|
| 38 |
+
Simple File-System for fetching data via HTTP(S)
|
| 39 |
+
|
| 40 |
+
``ls()`` is implemented by loading the parent page and doing a regex
|
| 41 |
+
match on the result. If simple_link=True, anything of the form
|
| 42 |
+
"http(s)://server.com/stuff?thing=other"; otherwise only links within
|
| 43 |
+
HTML href tags will be used.
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
sep = "/"
|
| 47 |
+
|
| 48 |
+
def __init__(
|
| 49 |
+
self,
|
| 50 |
+
simple_links=True,
|
| 51 |
+
block_size=None,
|
| 52 |
+
same_scheme=True,
|
| 53 |
+
size_policy=None,
|
| 54 |
+
cache_type="bytes",
|
| 55 |
+
cache_options=None,
|
| 56 |
+
asynchronous=False,
|
| 57 |
+
loop=None,
|
| 58 |
+
client_kwargs=None,
|
| 59 |
+
get_client=get_client,
|
| 60 |
+
encoded=False,
|
| 61 |
+
**storage_options,
|
| 62 |
+
):
|
| 63 |
+
"""
|
| 64 |
+
NB: if this is called async, you must await set_client
|
| 65 |
+
|
| 66 |
+
Parameters
|
| 67 |
+
----------
|
| 68 |
+
block_size: int
|
| 69 |
+
Blocks to read bytes; if 0, will default to raw requests file-like
|
| 70 |
+
objects instead of HTTPFile instances
|
| 71 |
+
simple_links: bool
|
| 72 |
+
If True, will consider both HTML <a> tags and anything that looks
|
| 73 |
+
like a URL; if False, will consider only the former.
|
| 74 |
+
same_scheme: True
|
| 75 |
+
When doing ls/glob, if this is True, only consider paths that have
|
| 76 |
+
http/https matching the input URLs.
|
| 77 |
+
size_policy: this argument is deprecated
|
| 78 |
+
client_kwargs: dict
|
| 79 |
+
Passed to aiohttp.ClientSession, see
|
| 80 |
+
https://docs.aiohttp.org/en/stable/client_reference.html
|
| 81 |
+
For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}``
|
| 82 |
+
get_client: Callable[..., aiohttp.ClientSession]
|
| 83 |
+
A callable which takes keyword arguments and constructs
|
| 84 |
+
an aiohttp.ClientSession. It's state will be managed by
|
| 85 |
+
the HTTPFileSystem class.
|
| 86 |
+
storage_options: key-value
|
| 87 |
+
Any other parameters passed on to requests
|
| 88 |
+
cache_type, cache_options: defaults used in open
|
| 89 |
+
"""
|
| 90 |
+
super().__init__(self, asynchronous=asynchronous, loop=loop, **storage_options)
|
| 91 |
+
self.block_size = block_size if block_size is not None else DEFAULT_BLOCK_SIZE
|
| 92 |
+
self.simple_links = simple_links
|
| 93 |
+
self.same_schema = same_scheme
|
| 94 |
+
self.cache_type = cache_type
|
| 95 |
+
self.cache_options = cache_options
|
| 96 |
+
self.client_kwargs = client_kwargs or {}
|
| 97 |
+
self.get_client = get_client
|
| 98 |
+
self.encoded = encoded
|
| 99 |
+
self.kwargs = storage_options
|
| 100 |
+
self._session = None
|
| 101 |
+
|
| 102 |
+
# Clean caching-related parameters from `storage_options`
|
| 103 |
+
# before propagating them as `request_options` through `self.kwargs`.
|
| 104 |
+
# TODO: Maybe rename `self.kwargs` to `self.request_options` to make
|
| 105 |
+
# it clearer.
|
| 106 |
+
request_options = copy(storage_options)
|
| 107 |
+
self.use_listings_cache = request_options.pop("use_listings_cache", False)
|
| 108 |
+
request_options.pop("listings_expiry_time", None)
|
| 109 |
+
request_options.pop("max_paths", None)
|
| 110 |
+
request_options.pop("skip_instance_cache", None)
|
| 111 |
+
self.kwargs = request_options
|
| 112 |
+
|
| 113 |
+
@property
|
| 114 |
+
def fsid(self):
|
| 115 |
+
return "http"
|
| 116 |
+
|
| 117 |
+
def encode_url(self, url):
|
| 118 |
+
return yarl.URL(url, encoded=self.encoded)
|
| 119 |
+
|
| 120 |
+
@staticmethod
|
| 121 |
+
def close_session(loop, session):
|
| 122 |
+
if loop is not None and loop.is_running():
|
| 123 |
+
try:
|
| 124 |
+
sync(loop, session.close, timeout=0.1)
|
| 125 |
+
return
|
| 126 |
+
except (TimeoutError, FSTimeoutError, NotImplementedError):
|
| 127 |
+
pass
|
| 128 |
+
connector = getattr(session, "_connector", None)
|
| 129 |
+
if connector is not None:
|
| 130 |
+
# close after loop is dead
|
| 131 |
+
connector._close()
|
| 132 |
+
|
| 133 |
+
async def set_session(self):
|
| 134 |
+
if self._session is None:
|
| 135 |
+
self._session = await self.get_client(loop=self.loop, **self.client_kwargs)
|
| 136 |
+
if not self.asynchronous:
|
| 137 |
+
weakref.finalize(self, self.close_session, self.loop, self._session)
|
| 138 |
+
return self._session
|
| 139 |
+
|
| 140 |
+
@classmethod
|
| 141 |
+
def _strip_protocol(cls, path):
|
| 142 |
+
"""For HTTP, we always want to keep the full URL"""
|
| 143 |
+
return path
|
| 144 |
+
|
| 145 |
+
@classmethod
|
| 146 |
+
def _parent(cls, path):
|
| 147 |
+
# override, since _strip_protocol is different for URLs
|
| 148 |
+
par = super()._parent(path)
|
| 149 |
+
if len(par) > 7: # "http://..."
|
| 150 |
+
return par
|
| 151 |
+
return ""
|
| 152 |
+
|
| 153 |
+
async def _ls_real(self, url, detail=True, **kwargs):
|
| 154 |
+
# ignoring URL-encoded arguments
|
| 155 |
+
kw = self.kwargs.copy()
|
| 156 |
+
kw.update(kwargs)
|
| 157 |
+
logger.debug(url)
|
| 158 |
+
session = await self.set_session()
|
| 159 |
+
async with session.get(self.encode_url(url), **self.kwargs) as r:
|
| 160 |
+
self._raise_not_found_for_status(r, url)
|
| 161 |
+
text = await r.text()
|
| 162 |
+
if self.simple_links:
|
| 163 |
+
links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
|
| 164 |
+
else:
|
| 165 |
+
links = [u[2] for u in ex.findall(text)]
|
| 166 |
+
out = set()
|
| 167 |
+
parts = urlparse(url)
|
| 168 |
+
for l in links:
|
| 169 |
+
if isinstance(l, tuple):
|
| 170 |
+
l = l[1]
|
| 171 |
+
if l.startswith("/") and len(l) > 1:
|
| 172 |
+
# absolute URL on this server
|
| 173 |
+
l = f"{parts.scheme}://{parts.netloc}{l}"
|
| 174 |
+
if l.startswith("http"):
|
| 175 |
+
if self.same_schema and l.startswith(url.rstrip("/") + "/"):
|
| 176 |
+
out.add(l)
|
| 177 |
+
elif l.replace("https", "http").startswith(
|
| 178 |
+
url.replace("https", "http").rstrip("/") + "/"
|
| 179 |
+
):
|
| 180 |
+
# allowed to cross http <-> https
|
| 181 |
+
out.add(l)
|
| 182 |
+
else:
|
| 183 |
+
if l not in ["..", "../"]:
|
| 184 |
+
# Ignore FTP-like "parent"
|
| 185 |
+
out.add("/".join([url.rstrip("/"), l.lstrip("/")]))
|
| 186 |
+
if not out and url.endswith("/"):
|
| 187 |
+
out = await self._ls_real(url.rstrip("/"), detail=False)
|
| 188 |
+
if detail:
|
| 189 |
+
return [
|
| 190 |
+
{
|
| 191 |
+
"name": u,
|
| 192 |
+
"size": None,
|
| 193 |
+
"type": "directory" if u.endswith("/") else "file",
|
| 194 |
+
}
|
| 195 |
+
for u in out
|
| 196 |
+
]
|
| 197 |
+
else:
|
| 198 |
+
return sorted(out)
|
| 199 |
+
|
| 200 |
+
async def _ls(self, url, detail=True, **kwargs):
|
| 201 |
+
if self.use_listings_cache and url in self.dircache:
|
| 202 |
+
out = self.dircache[url]
|
| 203 |
+
else:
|
| 204 |
+
out = await self._ls_real(url, detail=detail, **kwargs)
|
| 205 |
+
self.dircache[url] = out
|
| 206 |
+
return out
|
| 207 |
+
|
| 208 |
+
ls = sync_wrapper(_ls)
|
| 209 |
+
|
| 210 |
+
def _raise_not_found_for_status(self, response, url):
|
| 211 |
+
"""
|
| 212 |
+
Raises FileNotFoundError for 404s, otherwise uses raise_for_status.
|
| 213 |
+
"""
|
| 214 |
+
if response.status == 404:
|
| 215 |
+
raise FileNotFoundError(url)
|
| 216 |
+
response.raise_for_status()
|
| 217 |
+
|
| 218 |
+
async def _cat_file(self, url, start=None, end=None, **kwargs):
|
| 219 |
+
kw = self.kwargs.copy()
|
| 220 |
+
kw.update(kwargs)
|
| 221 |
+
logger.debug(url)
|
| 222 |
+
|
| 223 |
+
if start is not None or end is not None:
|
| 224 |
+
if start == end:
|
| 225 |
+
return b""
|
| 226 |
+
headers = kw.pop("headers", {}).copy()
|
| 227 |
+
|
| 228 |
+
headers["Range"] = await self._process_limits(url, start, end)
|
| 229 |
+
kw["headers"] = headers
|
| 230 |
+
session = await self.set_session()
|
| 231 |
+
async with session.get(self.encode_url(url), **kw) as r:
|
| 232 |
+
out = await r.read()
|
| 233 |
+
self._raise_not_found_for_status(r, url)
|
| 234 |
+
return out
|
| 235 |
+
|
| 236 |
+
async def _get_file(
|
| 237 |
+
self, rpath, lpath, chunk_size=5 * 2**20, callback=DEFAULT_CALLBACK, **kwargs
|
| 238 |
+
):
|
| 239 |
+
kw = self.kwargs.copy()
|
| 240 |
+
kw.update(kwargs)
|
| 241 |
+
logger.debug(rpath)
|
| 242 |
+
session = await self.set_session()
|
| 243 |
+
async with session.get(self.encode_url(rpath), **kw) as r:
|
| 244 |
+
try:
|
| 245 |
+
size = int(r.headers["content-length"])
|
| 246 |
+
except (ValueError, KeyError):
|
| 247 |
+
size = None
|
| 248 |
+
|
| 249 |
+
callback.set_size(size)
|
| 250 |
+
self._raise_not_found_for_status(r, rpath)
|
| 251 |
+
if isfilelike(lpath):
|
| 252 |
+
outfile = lpath
|
| 253 |
+
else:
|
| 254 |
+
outfile = open(lpath, "wb") # noqa: ASYNC101
|
| 255 |
+
|
| 256 |
+
try:
|
| 257 |
+
chunk = True
|
| 258 |
+
while chunk:
|
| 259 |
+
chunk = await r.content.read(chunk_size)
|
| 260 |
+
outfile.write(chunk)
|
| 261 |
+
callback.relative_update(len(chunk))
|
| 262 |
+
finally:
|
| 263 |
+
if not isfilelike(lpath):
|
| 264 |
+
outfile.close()
|
| 265 |
+
|
| 266 |
+
async def _put_file(
|
| 267 |
+
self,
|
| 268 |
+
lpath,
|
| 269 |
+
rpath,
|
| 270 |
+
chunk_size=5 * 2**20,
|
| 271 |
+
callback=DEFAULT_CALLBACK,
|
| 272 |
+
method="post",
|
| 273 |
+
**kwargs,
|
| 274 |
+
):
|
| 275 |
+
async def gen_chunks():
|
| 276 |
+
# Support passing arbitrary file-like objects
|
| 277 |
+
# and use them instead of streams.
|
| 278 |
+
if isinstance(lpath, io.IOBase):
|
| 279 |
+
context = nullcontext(lpath)
|
| 280 |
+
use_seek = False # might not support seeking
|
| 281 |
+
else:
|
| 282 |
+
context = open(lpath, "rb") # noqa: ASYNC101
|
| 283 |
+
use_seek = True
|
| 284 |
+
|
| 285 |
+
with context as f:
|
| 286 |
+
if use_seek:
|
| 287 |
+
callback.set_size(f.seek(0, 2))
|
| 288 |
+
f.seek(0)
|
| 289 |
+
else:
|
| 290 |
+
callback.set_size(getattr(f, "size", None))
|
| 291 |
+
|
| 292 |
+
chunk = f.read(chunk_size)
|
| 293 |
+
while chunk:
|
| 294 |
+
yield chunk
|
| 295 |
+
callback.relative_update(len(chunk))
|
| 296 |
+
chunk = f.read(chunk_size)
|
| 297 |
+
|
| 298 |
+
kw = self.kwargs.copy()
|
| 299 |
+
kw.update(kwargs)
|
| 300 |
+
session = await self.set_session()
|
| 301 |
+
|
| 302 |
+
method = method.lower()
|
| 303 |
+
if method not in ("post", "put"):
|
| 304 |
+
raise ValueError(
|
| 305 |
+
f"method has to be either 'post' or 'put', not: {method!r}"
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
meth = getattr(session, method)
|
| 309 |
+
async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp:
|
| 310 |
+
self._raise_not_found_for_status(resp, rpath)
|
| 311 |
+
|
| 312 |
+
async def _exists(self, path, **kwargs):
|
| 313 |
+
kw = self.kwargs.copy()
|
| 314 |
+
kw.update(kwargs)
|
| 315 |
+
try:
|
| 316 |
+
logger.debug(path)
|
| 317 |
+
session = await self.set_session()
|
| 318 |
+
r = await session.get(self.encode_url(path), **kw)
|
| 319 |
+
async with r:
|
| 320 |
+
return r.status < 400
|
| 321 |
+
except aiohttp.ClientError:
|
| 322 |
+
return False
|
| 323 |
+
|
| 324 |
+
async def _isfile(self, path, **kwargs):
|
| 325 |
+
return await self._exists(path, **kwargs)
|
| 326 |
+
|
| 327 |
+
def _open(
|
| 328 |
+
self,
|
| 329 |
+
path,
|
| 330 |
+
mode="rb",
|
| 331 |
+
block_size=None,
|
| 332 |
+
autocommit=None, # XXX: This differs from the base class.
|
| 333 |
+
cache_type=None,
|
| 334 |
+
cache_options=None,
|
| 335 |
+
size=None,
|
| 336 |
+
**kwargs,
|
| 337 |
+
):
|
| 338 |
+
"""Make a file-like object
|
| 339 |
+
|
| 340 |
+
Parameters
|
| 341 |
+
----------
|
| 342 |
+
path: str
|
| 343 |
+
Full URL with protocol
|
| 344 |
+
mode: string
|
| 345 |
+
must be "rb"
|
| 346 |
+
block_size: int or None
|
| 347 |
+
Bytes to download in one request; use instance value if None. If
|
| 348 |
+
zero, will return a streaming Requests file-like instance.
|
| 349 |
+
kwargs: key-value
|
| 350 |
+
Any other parameters, passed to requests calls
|
| 351 |
+
"""
|
| 352 |
+
if mode != "rb":
|
| 353 |
+
raise NotImplementedError
|
| 354 |
+
block_size = block_size if block_size is not None else self.block_size
|
| 355 |
+
kw = self.kwargs.copy()
|
| 356 |
+
kw["asynchronous"] = self.asynchronous
|
| 357 |
+
kw.update(kwargs)
|
| 358 |
+
size = size or self.info(path, **kwargs)["size"]
|
| 359 |
+
session = sync(self.loop, self.set_session)
|
| 360 |
+
if block_size and size:
|
| 361 |
+
return HTTPFile(
|
| 362 |
+
self,
|
| 363 |
+
path,
|
| 364 |
+
session=session,
|
| 365 |
+
block_size=block_size,
|
| 366 |
+
mode=mode,
|
| 367 |
+
size=size,
|
| 368 |
+
cache_type=cache_type or self.cache_type,
|
| 369 |
+
cache_options=cache_options or self.cache_options,
|
| 370 |
+
loop=self.loop,
|
| 371 |
+
**kw,
|
| 372 |
+
)
|
| 373 |
+
else:
|
| 374 |
+
return HTTPStreamFile(
|
| 375 |
+
self,
|
| 376 |
+
path,
|
| 377 |
+
mode=mode,
|
| 378 |
+
loop=self.loop,
|
| 379 |
+
session=session,
|
| 380 |
+
**kw,
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
async def open_async(self, path, mode="rb", size=None, **kwargs):
|
| 384 |
+
session = await self.set_session()
|
| 385 |
+
if size is None:
|
| 386 |
+
try:
|
| 387 |
+
size = (await self._info(path, **kwargs))["size"]
|
| 388 |
+
except FileNotFoundError:
|
| 389 |
+
pass
|
| 390 |
+
return AsyncStreamFile(
|
| 391 |
+
self,
|
| 392 |
+
path,
|
| 393 |
+
loop=self.loop,
|
| 394 |
+
session=session,
|
| 395 |
+
size=size,
|
| 396 |
+
**kwargs,
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
def ukey(self, url):
|
| 400 |
+
"""Unique identifier; assume HTTP files are static, unchanging"""
|
| 401 |
+
return tokenize(url, self.kwargs, self.protocol)
|
| 402 |
+
|
| 403 |
+
async def _info(self, url, **kwargs):
|
| 404 |
+
"""Get info of URL
|
| 405 |
+
|
| 406 |
+
Tries to access location via HEAD, and then GET methods, but does
|
| 407 |
+
not fetch the data.
|
| 408 |
+
|
| 409 |
+
It is possible that the server does not supply any size information, in
|
| 410 |
+
which case size will be given as None (and certain operations on the
|
| 411 |
+
corresponding file will not work).
|
| 412 |
+
"""
|
| 413 |
+
info = {}
|
| 414 |
+
session = await self.set_session()
|
| 415 |
+
|
| 416 |
+
for policy in ["head", "get"]:
|
| 417 |
+
try:
|
| 418 |
+
info.update(
|
| 419 |
+
await _file_info(
|
| 420 |
+
self.encode_url(url),
|
| 421 |
+
size_policy=policy,
|
| 422 |
+
session=session,
|
| 423 |
+
**self.kwargs,
|
| 424 |
+
**kwargs,
|
| 425 |
+
)
|
| 426 |
+
)
|
| 427 |
+
if info.get("size") is not None:
|
| 428 |
+
break
|
| 429 |
+
except Exception as exc:
|
| 430 |
+
if policy == "get":
|
| 431 |
+
# If get failed, then raise a FileNotFoundError
|
| 432 |
+
raise FileNotFoundError(url) from exc
|
| 433 |
+
logger.debug(str(exc))
|
| 434 |
+
|
| 435 |
+
return {"name": url, "size": None, **info, "type": "file"}
|
| 436 |
+
|
| 437 |
+
async def _glob(self, path, maxdepth=None, **kwargs):
|
| 438 |
+
"""
|
| 439 |
+
Find files by glob-matching.
|
| 440 |
+
|
| 441 |
+
This implementation is idntical to the one in AbstractFileSystem,
|
| 442 |
+
but "?" is not considered as a character for globbing, because it is
|
| 443 |
+
so common in URLs, often identifying the "query" part.
|
| 444 |
+
"""
|
| 445 |
+
if maxdepth is not None and maxdepth < 1:
|
| 446 |
+
raise ValueError("maxdepth must be at least 1")
|
| 447 |
+
import re
|
| 448 |
+
|
| 449 |
+
ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
|
| 450 |
+
path = self._strip_protocol(path)
|
| 451 |
+
append_slash_to_dirname = ends_with_slash or path.endswith("/**")
|
| 452 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
| 453 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
| 454 |
+
|
| 455 |
+
min_idx = min(idx_star, idx_brace)
|
| 456 |
+
|
| 457 |
+
detail = kwargs.pop("detail", False)
|
| 458 |
+
|
| 459 |
+
if not has_magic(path):
|
| 460 |
+
if await self._exists(path, **kwargs):
|
| 461 |
+
if not detail:
|
| 462 |
+
return [path]
|
| 463 |
+
else:
|
| 464 |
+
return {path: await self._info(path, **kwargs)}
|
| 465 |
+
else:
|
| 466 |
+
if not detail:
|
| 467 |
+
return [] # glob of non-existent returns empty
|
| 468 |
+
else:
|
| 469 |
+
return {}
|
| 470 |
+
elif "/" in path[:min_idx]:
|
| 471 |
+
min_idx = path[:min_idx].rindex("/")
|
| 472 |
+
root = path[: min_idx + 1]
|
| 473 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 474 |
+
else:
|
| 475 |
+
root = ""
|
| 476 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 477 |
+
|
| 478 |
+
if "**" in path:
|
| 479 |
+
if maxdepth is not None:
|
| 480 |
+
idx_double_stars = path.find("**")
|
| 481 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
| 482 |
+
depth = depth - depth_double_stars + maxdepth
|
| 483 |
+
else:
|
| 484 |
+
depth = None
|
| 485 |
+
|
| 486 |
+
allpaths = await self._find(
|
| 487 |
+
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
|
| 488 |
+
)
|
| 489 |
+
|
| 490 |
+
pattern = glob_translate(path + ("/" if ends_with_slash else ""))
|
| 491 |
+
pattern = re.compile(pattern)
|
| 492 |
+
|
| 493 |
+
out = {
|
| 494 |
+
p: info
|
| 495 |
+
for p, info in sorted(allpaths.items())
|
| 496 |
+
if pattern.match(
|
| 497 |
+
(
|
| 498 |
+
p + "/"
|
| 499 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
| 500 |
+
else p
|
| 501 |
+
)
|
| 502 |
+
)
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
if detail:
|
| 506 |
+
return out
|
| 507 |
+
else:
|
| 508 |
+
return list(out)
|
| 509 |
+
|
| 510 |
+
async def _isdir(self, path):
|
| 511 |
+
# override, since all URLs are (also) files
|
| 512 |
+
try:
|
| 513 |
+
return bool(await self._ls(path))
|
| 514 |
+
except (FileNotFoundError, ValueError):
|
| 515 |
+
return False
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
class HTTPFile(AbstractBufferedFile):
|
| 519 |
+
"""
|
| 520 |
+
A file-like object pointing to a remove HTTP(S) resource
|
| 521 |
+
|
| 522 |
+
Supports only reading, with read-ahead of a predermined block-size.
|
| 523 |
+
|
| 524 |
+
In the case that the server does not supply the filesize, only reading of
|
| 525 |
+
the complete file in one go is supported.
|
| 526 |
+
|
| 527 |
+
Parameters
|
| 528 |
+
----------
|
| 529 |
+
url: str
|
| 530 |
+
Full URL of the remote resource, including the protocol
|
| 531 |
+
session: aiohttp.ClientSession or None
|
| 532 |
+
All calls will be made within this session, to avoid restarting
|
| 533 |
+
connections where the server allows this
|
| 534 |
+
block_size: int or None
|
| 535 |
+
The amount of read-ahead to do, in bytes. Default is 5MB, or the value
|
| 536 |
+
configured for the FileSystem creating this file
|
| 537 |
+
size: None or int
|
| 538 |
+
If given, this is the size of the file in bytes, and we don't attempt
|
| 539 |
+
to call the server to find the value.
|
| 540 |
+
kwargs: all other key-values are passed to requests calls.
|
| 541 |
+
"""
|
| 542 |
+
|
| 543 |
+
def __init__(
|
| 544 |
+
self,
|
| 545 |
+
fs,
|
| 546 |
+
url,
|
| 547 |
+
session=None,
|
| 548 |
+
block_size=None,
|
| 549 |
+
mode="rb",
|
| 550 |
+
cache_type="bytes",
|
| 551 |
+
cache_options=None,
|
| 552 |
+
size=None,
|
| 553 |
+
loop=None,
|
| 554 |
+
asynchronous=False,
|
| 555 |
+
**kwargs,
|
| 556 |
+
):
|
| 557 |
+
if mode != "rb":
|
| 558 |
+
raise NotImplementedError("File mode not supported")
|
| 559 |
+
self.asynchronous = asynchronous
|
| 560 |
+
self.url = url
|
| 561 |
+
self.session = session
|
| 562 |
+
self.details = {"name": url, "size": size, "type": "file"}
|
| 563 |
+
super().__init__(
|
| 564 |
+
fs=fs,
|
| 565 |
+
path=url,
|
| 566 |
+
mode=mode,
|
| 567 |
+
block_size=block_size,
|
| 568 |
+
cache_type=cache_type,
|
| 569 |
+
cache_options=cache_options,
|
| 570 |
+
**kwargs,
|
| 571 |
+
)
|
| 572 |
+
self.loop = loop
|
| 573 |
+
|
| 574 |
+
def read(self, length=-1):
|
| 575 |
+
"""Read bytes from file
|
| 576 |
+
|
| 577 |
+
Parameters
|
| 578 |
+
----------
|
| 579 |
+
length: int
|
| 580 |
+
Read up to this many bytes. If negative, read all content to end of
|
| 581 |
+
file. If the server has not supplied the filesize, attempting to
|
| 582 |
+
read only part of the data will raise a ValueError.
|
| 583 |
+
"""
|
| 584 |
+
if (
|
| 585 |
+
(length < 0 and self.loc == 0) # explicit read all
|
| 586 |
+
# but not when the size is known and fits into a block anyways
|
| 587 |
+
and not (self.size is not None and self.size <= self.blocksize)
|
| 588 |
+
):
|
| 589 |
+
self._fetch_all()
|
| 590 |
+
if self.size is None:
|
| 591 |
+
if length < 0:
|
| 592 |
+
self._fetch_all()
|
| 593 |
+
else:
|
| 594 |
+
length = min(self.size - self.loc, length)
|
| 595 |
+
return super().read(length)
|
| 596 |
+
|
| 597 |
+
async def async_fetch_all(self):
|
| 598 |
+
"""Read whole file in one shot, without caching
|
| 599 |
+
|
| 600 |
+
This is only called when position is still at zero,
|
| 601 |
+
and read() is called without a byte-count.
|
| 602 |
+
"""
|
| 603 |
+
logger.debug(f"Fetch all for {self}")
|
| 604 |
+
if not isinstance(self.cache, AllBytes):
|
| 605 |
+
r = await self.session.get(self.fs.encode_url(self.url), **self.kwargs)
|
| 606 |
+
async with r:
|
| 607 |
+
r.raise_for_status()
|
| 608 |
+
out = await r.read()
|
| 609 |
+
self.cache = AllBytes(
|
| 610 |
+
size=len(out), fetcher=None, blocksize=None, data=out
|
| 611 |
+
)
|
| 612 |
+
self.size = len(out)
|
| 613 |
+
|
| 614 |
+
_fetch_all = sync_wrapper(async_fetch_all)
|
| 615 |
+
|
| 616 |
+
def _parse_content_range(self, headers):
|
| 617 |
+
"""Parse the Content-Range header"""
|
| 618 |
+
s = headers.get("Content-Range", "")
|
| 619 |
+
m = re.match(r"bytes (\d+-\d+|\*)/(\d+|\*)", s)
|
| 620 |
+
if not m:
|
| 621 |
+
return None, None, None
|
| 622 |
+
|
| 623 |
+
if m[1] == "*":
|
| 624 |
+
start = end = None
|
| 625 |
+
else:
|
| 626 |
+
start, end = [int(x) for x in m[1].split("-")]
|
| 627 |
+
total = None if m[2] == "*" else int(m[2])
|
| 628 |
+
return start, end, total
|
| 629 |
+
|
| 630 |
+
async def async_fetch_range(self, start, end):
|
| 631 |
+
"""Download a block of data
|
| 632 |
+
|
| 633 |
+
The expectation is that the server returns only the requested bytes,
|
| 634 |
+
with HTTP code 206. If this is not the case, we first check the headers,
|
| 635 |
+
and then stream the output - if the data size is bigger than we
|
| 636 |
+
requested, an exception is raised.
|
| 637 |
+
"""
|
| 638 |
+
logger.debug(f"Fetch range for {self}: {start}-{end}")
|
| 639 |
+
kwargs = self.kwargs.copy()
|
| 640 |
+
headers = kwargs.pop("headers", {}).copy()
|
| 641 |
+
headers["Range"] = f"bytes={start}-{end - 1}"
|
| 642 |
+
logger.debug(f"{self.url} : {headers['Range']}")
|
| 643 |
+
r = await self.session.get(
|
| 644 |
+
self.fs.encode_url(self.url), headers=headers, **kwargs
|
| 645 |
+
)
|
| 646 |
+
async with r:
|
| 647 |
+
if r.status == 416:
|
| 648 |
+
# range request outside file
|
| 649 |
+
return b""
|
| 650 |
+
r.raise_for_status()
|
| 651 |
+
|
| 652 |
+
# If the server has handled the range request, it should reply
|
| 653 |
+
# with status 206 (partial content). But we'll guess that a suitable
|
| 654 |
+
# Content-Range header or a Content-Length no more than the
|
| 655 |
+
# requested range also mean we have got the desired range.
|
| 656 |
+
response_is_range = (
|
| 657 |
+
r.status == 206
|
| 658 |
+
or self._parse_content_range(r.headers)[0] == start
|
| 659 |
+
or int(r.headers.get("Content-Length", end + 1)) <= end - start
|
| 660 |
+
)
|
| 661 |
+
|
| 662 |
+
if response_is_range:
|
| 663 |
+
# partial content, as expected
|
| 664 |
+
out = await r.read()
|
| 665 |
+
elif start > 0:
|
| 666 |
+
raise ValueError(
|
| 667 |
+
"The HTTP server doesn't appear to support range requests. "
|
| 668 |
+
"Only reading this file from the beginning is supported. "
|
| 669 |
+
"Open with block_size=0 for a streaming file interface."
|
| 670 |
+
)
|
| 671 |
+
else:
|
| 672 |
+
# Response is not a range, but we want the start of the file,
|
| 673 |
+
# so we can read the required amount anyway.
|
| 674 |
+
cl = 0
|
| 675 |
+
out = []
|
| 676 |
+
while True:
|
| 677 |
+
chunk = await r.content.read(2**20)
|
| 678 |
+
# data size unknown, let's read until we have enough
|
| 679 |
+
if chunk:
|
| 680 |
+
out.append(chunk)
|
| 681 |
+
cl += len(chunk)
|
| 682 |
+
if cl > end - start:
|
| 683 |
+
break
|
| 684 |
+
else:
|
| 685 |
+
break
|
| 686 |
+
out = b"".join(out)[: end - start]
|
| 687 |
+
return out
|
| 688 |
+
|
| 689 |
+
_fetch_range = sync_wrapper(async_fetch_range)
|
| 690 |
+
|
| 691 |
+
def __reduce__(self):
|
| 692 |
+
return (
|
| 693 |
+
reopen,
|
| 694 |
+
(
|
| 695 |
+
self.fs,
|
| 696 |
+
self.url,
|
| 697 |
+
self.mode,
|
| 698 |
+
self.blocksize,
|
| 699 |
+
self.cache.name if self.cache else "none",
|
| 700 |
+
self.size,
|
| 701 |
+
),
|
| 702 |
+
)
|
| 703 |
+
|
| 704 |
+
|
| 705 |
+
def reopen(fs, url, mode, blocksize, cache_type, size=None):
|
| 706 |
+
return fs.open(
|
| 707 |
+
url, mode=mode, block_size=blocksize, cache_type=cache_type, size=size
|
| 708 |
+
)
|
| 709 |
+
|
| 710 |
+
|
| 711 |
+
magic_check = re.compile("([*[])")
|
| 712 |
+
|
| 713 |
+
|
| 714 |
+
def has_magic(s):
|
| 715 |
+
match = magic_check.search(s)
|
| 716 |
+
return match is not None
|
| 717 |
+
|
| 718 |
+
|
| 719 |
+
class HTTPStreamFile(AbstractBufferedFile):
|
| 720 |
+
def __init__(self, fs, url, mode="rb", loop=None, session=None, **kwargs):
|
| 721 |
+
self.asynchronous = kwargs.pop("asynchronous", False)
|
| 722 |
+
self.url = url
|
| 723 |
+
self.loop = loop
|
| 724 |
+
self.session = session
|
| 725 |
+
if mode != "rb":
|
| 726 |
+
raise ValueError
|
| 727 |
+
self.details = {"name": url, "size": None}
|
| 728 |
+
super().__init__(fs=fs, path=url, mode=mode, cache_type="none", **kwargs)
|
| 729 |
+
|
| 730 |
+
async def cor():
|
| 731 |
+
r = await self.session.get(self.fs.encode_url(url), **kwargs).__aenter__()
|
| 732 |
+
self.fs._raise_not_found_for_status(r, url)
|
| 733 |
+
return r
|
| 734 |
+
|
| 735 |
+
self.r = sync(self.loop, cor)
|
| 736 |
+
|
| 737 |
+
def seek(self, loc, whence=0):
|
| 738 |
+
if loc == 0 and whence == 1:
|
| 739 |
+
return
|
| 740 |
+
if loc == self.loc and whence == 0:
|
| 741 |
+
return
|
| 742 |
+
raise ValueError("Cannot seek streaming HTTP file")
|
| 743 |
+
|
| 744 |
+
async def _read(self, num=-1):
|
| 745 |
+
out = await self.r.content.read(num)
|
| 746 |
+
self.loc += len(out)
|
| 747 |
+
return out
|
| 748 |
+
|
| 749 |
+
read = sync_wrapper(_read)
|
| 750 |
+
|
| 751 |
+
async def _close(self):
|
| 752 |
+
self.r.close()
|
| 753 |
+
|
| 754 |
+
def close(self):
|
| 755 |
+
asyncio.run_coroutine_threadsafe(self._close(), self.loop)
|
| 756 |
+
super().close()
|
| 757 |
+
|
| 758 |
+
def __reduce__(self):
|
| 759 |
+
return reopen, (self.fs, self.url, self.mode, self.blocksize, self.cache.name)
|
| 760 |
+
|
| 761 |
+
|
| 762 |
+
class AsyncStreamFile(AbstractAsyncStreamedFile):
|
| 763 |
+
def __init__(
|
| 764 |
+
self, fs, url, mode="rb", loop=None, session=None, size=None, **kwargs
|
| 765 |
+
):
|
| 766 |
+
self.url = url
|
| 767 |
+
self.session = session
|
| 768 |
+
self.r = None
|
| 769 |
+
if mode != "rb":
|
| 770 |
+
raise ValueError
|
| 771 |
+
self.details = {"name": url, "size": None}
|
| 772 |
+
self.kwargs = kwargs
|
| 773 |
+
super().__init__(fs=fs, path=url, mode=mode, cache_type="none")
|
| 774 |
+
self.size = size
|
| 775 |
+
|
| 776 |
+
async def read(self, num=-1):
|
| 777 |
+
if self.r is None:
|
| 778 |
+
r = await self.session.get(
|
| 779 |
+
self.fs.encode_url(self.url), **self.kwargs
|
| 780 |
+
).__aenter__()
|
| 781 |
+
self.fs._raise_not_found_for_status(r, self.url)
|
| 782 |
+
self.r = r
|
| 783 |
+
out = await self.r.content.read(num)
|
| 784 |
+
self.loc += len(out)
|
| 785 |
+
return out
|
| 786 |
+
|
| 787 |
+
async def close(self):
|
| 788 |
+
if self.r is not None:
|
| 789 |
+
self.r.close()
|
| 790 |
+
self.r = None
|
| 791 |
+
await super().close()
|
| 792 |
+
|
| 793 |
+
|
| 794 |
+
async def get_range(session, url, start, end, file=None, **kwargs):
|
| 795 |
+
# explicit get a range when we know it must be safe
|
| 796 |
+
kwargs = kwargs.copy()
|
| 797 |
+
headers = kwargs.pop("headers", {}).copy()
|
| 798 |
+
headers["Range"] = f"bytes={start}-{end - 1}"
|
| 799 |
+
r = await session.get(url, headers=headers, **kwargs)
|
| 800 |
+
r.raise_for_status()
|
| 801 |
+
async with r:
|
| 802 |
+
out = await r.read()
|
| 803 |
+
if file:
|
| 804 |
+
with open(file, "r+b") as f: # noqa: ASYNC101
|
| 805 |
+
f.seek(start)
|
| 806 |
+
f.write(out)
|
| 807 |
+
else:
|
| 808 |
+
return out
|
| 809 |
+
|
| 810 |
+
|
| 811 |
+
async def _file_info(url, session, size_policy="head", **kwargs):
|
| 812 |
+
"""Call HEAD on the server to get details about the file (size/checksum etc.)
|
| 813 |
+
|
| 814 |
+
Default operation is to explicitly allow redirects and use encoding
|
| 815 |
+
'identity' (no compression) to get the true size of the target.
|
| 816 |
+
"""
|
| 817 |
+
logger.debug("Retrieve file size for %s", url)
|
| 818 |
+
kwargs = kwargs.copy()
|
| 819 |
+
ar = kwargs.pop("allow_redirects", True)
|
| 820 |
+
head = kwargs.get("headers", {}).copy()
|
| 821 |
+
head["Accept-Encoding"] = "identity"
|
| 822 |
+
kwargs["headers"] = head
|
| 823 |
+
|
| 824 |
+
info = {}
|
| 825 |
+
if size_policy == "head":
|
| 826 |
+
r = await session.head(url, allow_redirects=ar, **kwargs)
|
| 827 |
+
elif size_policy == "get":
|
| 828 |
+
r = await session.get(url, allow_redirects=ar, **kwargs)
|
| 829 |
+
else:
|
| 830 |
+
raise TypeError(f'size_policy must be "head" or "get", got {size_policy}')
|
| 831 |
+
async with r:
|
| 832 |
+
r.raise_for_status()
|
| 833 |
+
|
| 834 |
+
# TODO:
|
| 835 |
+
# recognise lack of 'Accept-Ranges',
|
| 836 |
+
# or 'Accept-Ranges': 'none' (not 'bytes')
|
| 837 |
+
# to mean streaming only, no random access => return None
|
| 838 |
+
if "Content-Length" in r.headers:
|
| 839 |
+
# Some servers may choose to ignore Accept-Encoding and return
|
| 840 |
+
# compressed content, in which case the returned size is unreliable.
|
| 841 |
+
if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
|
| 842 |
+
"identity",
|
| 843 |
+
"",
|
| 844 |
+
]:
|
| 845 |
+
info["size"] = int(r.headers["Content-Length"])
|
| 846 |
+
elif "Content-Range" in r.headers:
|
| 847 |
+
info["size"] = int(r.headers["Content-Range"].split("/")[1])
|
| 848 |
+
|
| 849 |
+
if "Content-Type" in r.headers:
|
| 850 |
+
info["mimetype"] = r.headers["Content-Type"].partition(";")[0]
|
| 851 |
+
|
| 852 |
+
info["url"] = str(r.url)
|
| 853 |
+
|
| 854 |
+
for checksum_field in ["ETag", "Content-MD5", "Digest"]:
|
| 855 |
+
if r.headers.get(checksum_field):
|
| 856 |
+
info[checksum_field] = r.headers[checksum_field]
|
| 857 |
+
|
| 858 |
+
return info
|
| 859 |
+
|
| 860 |
+
|
| 861 |
+
async def _file_size(url, session=None, *args, **kwargs):
|
| 862 |
+
if session is None:
|
| 863 |
+
session = await get_client()
|
| 864 |
+
info = await _file_info(url, session=session, *args, **kwargs)
|
| 865 |
+
return info.get("size")
|
| 866 |
+
|
| 867 |
+
|
| 868 |
+
file_size = sync_wrapper(_file_size)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/jupyter.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import io
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
import requests
|
| 6 |
+
|
| 7 |
+
import fsspec
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class JupyterFileSystem(fsspec.AbstractFileSystem):
|
| 11 |
+
"""View of the files as seen by a Jupyter server (notebook or lab)"""
|
| 12 |
+
|
| 13 |
+
protocol = ("jupyter", "jlab")
|
| 14 |
+
|
| 15 |
+
def __init__(self, url, tok=None, **kwargs):
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
Parameters
|
| 19 |
+
----------
|
| 20 |
+
url : str
|
| 21 |
+
Base URL of the server, like "http://127.0.0.1:8888". May include
|
| 22 |
+
token in the string, which is given by the process when starting up
|
| 23 |
+
tok : str
|
| 24 |
+
If the token is obtained separately, can be given here
|
| 25 |
+
kwargs
|
| 26 |
+
"""
|
| 27 |
+
if "?" in url:
|
| 28 |
+
if tok is None:
|
| 29 |
+
try:
|
| 30 |
+
tok = re.findall("token=([a-z0-9]+)", url)[0]
|
| 31 |
+
except IndexError as e:
|
| 32 |
+
raise ValueError("Could not determine token") from e
|
| 33 |
+
url = url.split("?", 1)[0]
|
| 34 |
+
self.url = url.rstrip("/") + "/api/contents"
|
| 35 |
+
self.session = requests.Session()
|
| 36 |
+
if tok:
|
| 37 |
+
self.session.headers["Authorization"] = f"token {tok}"
|
| 38 |
+
|
| 39 |
+
super().__init__(**kwargs)
|
| 40 |
+
|
| 41 |
+
def ls(self, path, detail=True, **kwargs):
|
| 42 |
+
path = self._strip_protocol(path)
|
| 43 |
+
r = self.session.get(f"{self.url}/{path}")
|
| 44 |
+
if r.status_code == 404:
|
| 45 |
+
return FileNotFoundError(path)
|
| 46 |
+
r.raise_for_status()
|
| 47 |
+
out = r.json()
|
| 48 |
+
|
| 49 |
+
if out["type"] == "directory":
|
| 50 |
+
out = out["content"]
|
| 51 |
+
else:
|
| 52 |
+
out = [out]
|
| 53 |
+
for o in out:
|
| 54 |
+
o["name"] = o.pop("path")
|
| 55 |
+
o.pop("content")
|
| 56 |
+
if o["type"] == "notebook":
|
| 57 |
+
o["type"] = "file"
|
| 58 |
+
if detail:
|
| 59 |
+
return out
|
| 60 |
+
return [o["name"] for o in out]
|
| 61 |
+
|
| 62 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
| 63 |
+
path = self._strip_protocol(path)
|
| 64 |
+
r = self.session.get(f"{self.url}/{path}")
|
| 65 |
+
if r.status_code == 404:
|
| 66 |
+
return FileNotFoundError(path)
|
| 67 |
+
r.raise_for_status()
|
| 68 |
+
out = r.json()
|
| 69 |
+
if out["format"] == "text":
|
| 70 |
+
# data should be binary
|
| 71 |
+
b = out["content"].encode()
|
| 72 |
+
else:
|
| 73 |
+
b = base64.b64decode(out["content"])
|
| 74 |
+
return b[start:end]
|
| 75 |
+
|
| 76 |
+
def pipe_file(self, path, value, **_):
|
| 77 |
+
path = self._strip_protocol(path)
|
| 78 |
+
json = {
|
| 79 |
+
"name": path.rsplit("/", 1)[-1],
|
| 80 |
+
"path": path,
|
| 81 |
+
"size": len(value),
|
| 82 |
+
"content": base64.b64encode(value).decode(),
|
| 83 |
+
"format": "base64",
|
| 84 |
+
"type": "file",
|
| 85 |
+
}
|
| 86 |
+
self.session.put(f"{self.url}/{path}", json=json)
|
| 87 |
+
|
| 88 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
| 89 |
+
path = self._strip_protocol(path)
|
| 90 |
+
if create_parents and "/" in path:
|
| 91 |
+
self.mkdir(path.rsplit("/", 1)[0], True)
|
| 92 |
+
json = {
|
| 93 |
+
"name": path.rsplit("/", 1)[-1],
|
| 94 |
+
"path": path,
|
| 95 |
+
"size": None,
|
| 96 |
+
"content": None,
|
| 97 |
+
"type": "directory",
|
| 98 |
+
}
|
| 99 |
+
self.session.put(f"{self.url}/{path}", json=json)
|
| 100 |
+
|
| 101 |
+
def _rm(self, path):
|
| 102 |
+
path = self._strip_protocol(path)
|
| 103 |
+
self.session.delete(f"{self.url}/{path}")
|
| 104 |
+
|
| 105 |
+
def _open(self, path, mode="rb", **kwargs):
|
| 106 |
+
path = self._strip_protocol(path)
|
| 107 |
+
if mode == "rb":
|
| 108 |
+
data = self.cat_file(path)
|
| 109 |
+
return io.BytesIO(data)
|
| 110 |
+
else:
|
| 111 |
+
return SimpleFileWriter(self, path, mode="wb")
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
class SimpleFileWriter(fsspec.spec.AbstractBufferedFile):
|
| 115 |
+
def _upload_chunk(self, final=False):
|
| 116 |
+
"""Never uploads a chunk until file is done
|
| 117 |
+
|
| 118 |
+
Not suitable for large files
|
| 119 |
+
"""
|
| 120 |
+
if final is False:
|
| 121 |
+
return False
|
| 122 |
+
self.buffer.seek(0)
|
| 123 |
+
data = self.buffer.read()
|
| 124 |
+
self.fs.pipe_file(self.path, data)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/reference.py
ADDED
|
@@ -0,0 +1,1160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import collections
|
| 3 |
+
import io
|
| 4 |
+
import itertools
|
| 5 |
+
import logging
|
| 6 |
+
import math
|
| 7 |
+
import os
|
| 8 |
+
from functools import lru_cache
|
| 9 |
+
from typing import TYPE_CHECKING
|
| 10 |
+
|
| 11 |
+
import fsspec.core
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
import ujson as json
|
| 15 |
+
except ImportError:
|
| 16 |
+
if not TYPE_CHECKING:
|
| 17 |
+
import json
|
| 18 |
+
|
| 19 |
+
from ..asyn import AsyncFileSystem
|
| 20 |
+
from ..callbacks import DEFAULT_CALLBACK
|
| 21 |
+
from ..core import filesystem, open, split_protocol
|
| 22 |
+
from ..utils import isfilelike, merge_offset_ranges, other_paths
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger("fsspec.reference")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class ReferenceNotReachable(RuntimeError):
|
| 28 |
+
def __init__(self, reference, target, *args):
|
| 29 |
+
super().__init__(*args)
|
| 30 |
+
self.reference = reference
|
| 31 |
+
self.target = target
|
| 32 |
+
|
| 33 |
+
def __str__(self):
|
| 34 |
+
return f'Reference "{self.reference}" failed to fetch target {self.target}'
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _first(d):
|
| 38 |
+
return list(d.values())[0]
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _prot_in_references(path, references):
|
| 42 |
+
ref = references.get(path)
|
| 43 |
+
if isinstance(ref, (list, tuple)):
|
| 44 |
+
return split_protocol(ref[0])[0] if ref[0] else ref[0]
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _protocol_groups(paths, references):
|
| 48 |
+
if isinstance(paths, str):
|
| 49 |
+
return {_prot_in_references(paths, references): [paths]}
|
| 50 |
+
out = {}
|
| 51 |
+
for path in paths:
|
| 52 |
+
protocol = _prot_in_references(path, references)
|
| 53 |
+
out.setdefault(protocol, []).append(path)
|
| 54 |
+
return out
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class RefsValuesView(collections.abc.ValuesView):
|
| 58 |
+
def __iter__(self):
|
| 59 |
+
for val in self._mapping.zmetadata.values():
|
| 60 |
+
yield json.dumps(val).encode()
|
| 61 |
+
yield from self._mapping._items.values()
|
| 62 |
+
for field in self._mapping.listdir():
|
| 63 |
+
chunk_sizes = self._mapping._get_chunk_sizes(field)
|
| 64 |
+
if len(chunk_sizes) == 0:
|
| 65 |
+
yield self._mapping[field + "/0"]
|
| 66 |
+
continue
|
| 67 |
+
yield from self._mapping._generate_all_records(field)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class RefsItemsView(collections.abc.ItemsView):
|
| 71 |
+
def __iter__(self):
|
| 72 |
+
return zip(self._mapping.keys(), self._mapping.values())
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def ravel_multi_index(idx, sizes):
|
| 76 |
+
val = 0
|
| 77 |
+
mult = 1
|
| 78 |
+
for i, s in zip(idx[::-1], sizes[::-1]):
|
| 79 |
+
val += i * mult
|
| 80 |
+
mult *= s
|
| 81 |
+
return val
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class LazyReferenceMapper(collections.abc.MutableMapping):
|
| 85 |
+
"""This interface can be used to read/write references from Parquet stores.
|
| 86 |
+
It is not intended for other types of references.
|
| 87 |
+
It can be used with Kerchunk's MultiZarrToZarr method to combine
|
| 88 |
+
references into a parquet store.
|
| 89 |
+
Examples of this use-case can be found here:
|
| 90 |
+
https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
|
| 91 |
+
|
| 92 |
+
# import is class level to prevent numpy dep requirement for fsspec
|
| 93 |
+
@property
|
| 94 |
+
def np(self):
|
| 95 |
+
import numpy as np
|
| 96 |
+
|
| 97 |
+
return np
|
| 98 |
+
|
| 99 |
+
@property
|
| 100 |
+
def pd(self):
|
| 101 |
+
import pandas as pd
|
| 102 |
+
|
| 103 |
+
return pd
|
| 104 |
+
|
| 105 |
+
def __init__(
|
| 106 |
+
self, root, fs=None, out_root=None, cache_size=128, categorical_threshold=10
|
| 107 |
+
):
|
| 108 |
+
"""
|
| 109 |
+
|
| 110 |
+
This instance will be writable, storing changes in memory until full partitions
|
| 111 |
+
are accumulated or .flush() is called.
|
| 112 |
+
|
| 113 |
+
To create an empty lazy store, use .create()
|
| 114 |
+
|
| 115 |
+
Parameters
|
| 116 |
+
----------
|
| 117 |
+
root : str
|
| 118 |
+
Root of parquet store
|
| 119 |
+
fs : fsspec.AbstractFileSystem
|
| 120 |
+
fsspec filesystem object, default is local filesystem.
|
| 121 |
+
cache_size : int, default=128
|
| 122 |
+
Maximum size of LRU cache, where cache_size*record_size denotes
|
| 123 |
+
the total number of references that can be loaded in memory at once.
|
| 124 |
+
categorical_threshold : int
|
| 125 |
+
Encode urls as pandas.Categorical to reduce memory footprint if the ratio
|
| 126 |
+
of the number of unique urls to total number of refs for each variable
|
| 127 |
+
is greater than or equal to this number. (default 10)
|
| 128 |
+
"""
|
| 129 |
+
self.root = root
|
| 130 |
+
self.chunk_sizes = {}
|
| 131 |
+
self.out_root = out_root or self.root
|
| 132 |
+
self.cat_thresh = categorical_threshold
|
| 133 |
+
self.cache_size = cache_size
|
| 134 |
+
self.dirs = None
|
| 135 |
+
self.url = self.root + "/{field}/refs.{record}.parq"
|
| 136 |
+
# TODO: derive fs from `root`
|
| 137 |
+
self.fs = fsspec.filesystem("file") if fs is None else fs
|
| 138 |
+
|
| 139 |
+
def __getattr__(self, item):
|
| 140 |
+
if item in ("_items", "record_size", "zmetadata"):
|
| 141 |
+
self.setup()
|
| 142 |
+
# avoid possible recursion if setup fails somehow
|
| 143 |
+
return self.__dict__[item]
|
| 144 |
+
raise AttributeError(item)
|
| 145 |
+
|
| 146 |
+
def setup(self):
|
| 147 |
+
self._items = {}
|
| 148 |
+
self._items[".zmetadata"] = self.fs.cat_file(
|
| 149 |
+
"/".join([self.root, ".zmetadata"])
|
| 150 |
+
)
|
| 151 |
+
met = json.loads(self._items[".zmetadata"])
|
| 152 |
+
self.record_size = met["record_size"]
|
| 153 |
+
self.zmetadata = met["metadata"]
|
| 154 |
+
|
| 155 |
+
# Define function to open and decompress refs
|
| 156 |
+
@lru_cache(maxsize=self.cache_size)
|
| 157 |
+
def open_refs(field, record):
|
| 158 |
+
"""cached parquet file loader"""
|
| 159 |
+
path = self.url.format(field=field, record=record)
|
| 160 |
+
data = io.BytesIO(self.fs.cat_file(path))
|
| 161 |
+
df = self.pd.read_parquet(data, engine="fastparquet")
|
| 162 |
+
refs = {c: df[c].values for c in df.columns}
|
| 163 |
+
return refs
|
| 164 |
+
|
| 165 |
+
self.open_refs = open_refs
|
| 166 |
+
|
| 167 |
+
@staticmethod
|
| 168 |
+
def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
|
| 169 |
+
"""Make empty parquet reference set
|
| 170 |
+
|
| 171 |
+
First deletes the contents of the given directory, if it exists.
|
| 172 |
+
|
| 173 |
+
Parameters
|
| 174 |
+
----------
|
| 175 |
+
root: str
|
| 176 |
+
Directory to contain the output; will be created
|
| 177 |
+
storage_options: dict | None
|
| 178 |
+
For making the filesystem to use for writing is fs is None
|
| 179 |
+
fs: FileSystem | None
|
| 180 |
+
Filesystem for writing
|
| 181 |
+
record_size: int
|
| 182 |
+
Number of references per parquet file
|
| 183 |
+
kwargs: passed to __init__
|
| 184 |
+
|
| 185 |
+
Returns
|
| 186 |
+
-------
|
| 187 |
+
LazyReferenceMapper instance
|
| 188 |
+
"""
|
| 189 |
+
met = {"metadata": {}, "record_size": record_size}
|
| 190 |
+
if fs is None:
|
| 191 |
+
fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
|
| 192 |
+
if fs.exists(root):
|
| 193 |
+
fs.rm(root, recursive=True)
|
| 194 |
+
fs.makedirs(root, exist_ok=True)
|
| 195 |
+
fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
|
| 196 |
+
return LazyReferenceMapper(root, fs, **kwargs)
|
| 197 |
+
|
| 198 |
+
def listdir(self, basename=True):
|
| 199 |
+
"""List top-level directories"""
|
| 200 |
+
# cache me?
|
| 201 |
+
if self.dirs is None:
|
| 202 |
+
dirs = [p.split("/", 1)[0] for p in self.zmetadata]
|
| 203 |
+
self.dirs = {p for p in dirs if p and not p.startswith(".")}
|
| 204 |
+
listing = self.dirs
|
| 205 |
+
if basename:
|
| 206 |
+
listing = [os.path.basename(path) for path in listing]
|
| 207 |
+
return listing
|
| 208 |
+
|
| 209 |
+
def ls(self, path="", detail=True):
|
| 210 |
+
"""Shortcut file listings"""
|
| 211 |
+
if not path:
|
| 212 |
+
dirnames = self.listdir()
|
| 213 |
+
others = set(
|
| 214 |
+
[".zmetadata"]
|
| 215 |
+
+ [name for name in self.zmetadata if "/" not in name]
|
| 216 |
+
+ [name for name in self._items if "/" not in name]
|
| 217 |
+
)
|
| 218 |
+
if detail is False:
|
| 219 |
+
others.update(dirnames)
|
| 220 |
+
return sorted(others)
|
| 221 |
+
dirinfo = [
|
| 222 |
+
{"name": name, "type": "directory", "size": 0} for name in dirnames
|
| 223 |
+
]
|
| 224 |
+
fileinfo = [
|
| 225 |
+
{
|
| 226 |
+
"name": name,
|
| 227 |
+
"type": "file",
|
| 228 |
+
"size": len(
|
| 229 |
+
json.dumps(self.zmetadata[name])
|
| 230 |
+
if name in self.zmetadata
|
| 231 |
+
else self._items[name]
|
| 232 |
+
),
|
| 233 |
+
}
|
| 234 |
+
for name in others
|
| 235 |
+
]
|
| 236 |
+
return sorted(dirinfo + fileinfo, key=lambda s: s["name"])
|
| 237 |
+
parts = path.split("/", 1)
|
| 238 |
+
if len(parts) > 1:
|
| 239 |
+
raise FileNotFoundError("Cannot list within directories right now")
|
| 240 |
+
field = parts[0]
|
| 241 |
+
others = set(
|
| 242 |
+
[name for name in self.zmetadata if name.startswith(f"{path}/")]
|
| 243 |
+
+ [name for name in self._items if name.startswith(f"{path}/")]
|
| 244 |
+
)
|
| 245 |
+
fileinfo = [
|
| 246 |
+
{
|
| 247 |
+
"name": name,
|
| 248 |
+
"type": "file",
|
| 249 |
+
"size": len(
|
| 250 |
+
json.dumps(self.zmetadata[name])
|
| 251 |
+
if name in self.zmetadata
|
| 252 |
+
else self._items[name]
|
| 253 |
+
),
|
| 254 |
+
}
|
| 255 |
+
for name in others
|
| 256 |
+
]
|
| 257 |
+
keys = self._keys_in_field(field)
|
| 258 |
+
|
| 259 |
+
if detail is False:
|
| 260 |
+
return list(others) + list(keys)
|
| 261 |
+
recs = self._generate_all_records(field)
|
| 262 |
+
recinfo = [
|
| 263 |
+
{"name": name, "type": "file", "size": rec[-1]}
|
| 264 |
+
for name, rec in zip(keys, recs)
|
| 265 |
+
if rec[0] # filters out path==None, deleted/missing
|
| 266 |
+
]
|
| 267 |
+
return fileinfo + recinfo
|
| 268 |
+
|
| 269 |
+
def _load_one_key(self, key):
|
| 270 |
+
"""Get the reference for one key
|
| 271 |
+
|
| 272 |
+
Returns bytes, one-element list or three-element list.
|
| 273 |
+
"""
|
| 274 |
+
if key in self._items:
|
| 275 |
+
return self._items[key]
|
| 276 |
+
elif key in self.zmetadata:
|
| 277 |
+
return json.dumps(self.zmetadata[key]).encode()
|
| 278 |
+
elif "/" not in key or self._is_meta(key):
|
| 279 |
+
raise KeyError(key)
|
| 280 |
+
field, sub_key = key.split("/")
|
| 281 |
+
record, ri, chunk_size = self._key_to_record(key)
|
| 282 |
+
maybe = self._items.get((field, record), {}).get(ri, False)
|
| 283 |
+
if maybe is None:
|
| 284 |
+
# explicitly deleted
|
| 285 |
+
raise KeyError
|
| 286 |
+
elif maybe:
|
| 287 |
+
return maybe
|
| 288 |
+
elif chunk_size == 0:
|
| 289 |
+
return b""
|
| 290 |
+
|
| 291 |
+
# Chunk keys can be loaded from row group and cached in LRU cache
|
| 292 |
+
try:
|
| 293 |
+
refs = self.open_refs(field, record)
|
| 294 |
+
except (ValueError, TypeError, FileNotFoundError):
|
| 295 |
+
raise KeyError(key)
|
| 296 |
+
columns = ["path", "offset", "size", "raw"]
|
| 297 |
+
selection = [refs[c][ri] if c in refs else None for c in columns]
|
| 298 |
+
raw = selection[-1]
|
| 299 |
+
if raw is not None:
|
| 300 |
+
return raw
|
| 301 |
+
if selection[0] is None:
|
| 302 |
+
raise KeyError("This reference does not exist or has been deleted")
|
| 303 |
+
if selection[1:3] == [0, 0]:
|
| 304 |
+
# URL only
|
| 305 |
+
return selection[:1]
|
| 306 |
+
# URL, offset, size
|
| 307 |
+
return selection[:3]
|
| 308 |
+
|
| 309 |
+
@lru_cache(4096)
|
| 310 |
+
def _key_to_record(self, key):
|
| 311 |
+
"""Details needed to construct a reference for one key"""
|
| 312 |
+
field, chunk = key.split("/")
|
| 313 |
+
chunk_sizes = self._get_chunk_sizes(field)
|
| 314 |
+
if len(chunk_sizes) == 0:
|
| 315 |
+
return 0, 0, 0
|
| 316 |
+
chunk_idx = [int(c) for c in chunk.split(".")]
|
| 317 |
+
chunk_number = ravel_multi_index(chunk_idx, chunk_sizes)
|
| 318 |
+
record = chunk_number // self.record_size
|
| 319 |
+
ri = chunk_number % self.record_size
|
| 320 |
+
return record, ri, len(chunk_sizes)
|
| 321 |
+
|
| 322 |
+
def _get_chunk_sizes(self, field):
|
| 323 |
+
"""The number of chunks along each axis for a given field"""
|
| 324 |
+
if field not in self.chunk_sizes:
|
| 325 |
+
zarray = self.zmetadata[f"{field}/.zarray"]
|
| 326 |
+
size_ratio = [
|
| 327 |
+
math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"])
|
| 328 |
+
]
|
| 329 |
+
self.chunk_sizes[field] = size_ratio or [1]
|
| 330 |
+
return self.chunk_sizes[field]
|
| 331 |
+
|
| 332 |
+
def _generate_record(self, field, record):
|
| 333 |
+
"""The references for a given parquet file of a given field"""
|
| 334 |
+
refs = self.open_refs(field, record)
|
| 335 |
+
it = iter(zip(*refs.values()))
|
| 336 |
+
if len(refs) == 3:
|
| 337 |
+
# All urls
|
| 338 |
+
return (list(t) for t in it)
|
| 339 |
+
elif len(refs) == 1:
|
| 340 |
+
# All raws
|
| 341 |
+
return refs["raw"]
|
| 342 |
+
else:
|
| 343 |
+
# Mix of urls and raws
|
| 344 |
+
return (list(t[:3]) if not t[3] else t[3] for t in it)
|
| 345 |
+
|
| 346 |
+
def _generate_all_records(self, field):
|
| 347 |
+
"""Load all the references within a field by iterating over the parquet files"""
|
| 348 |
+
nrec = 1
|
| 349 |
+
for ch in self._get_chunk_sizes(field):
|
| 350 |
+
nrec *= ch
|
| 351 |
+
nrec = math.ceil(nrec / self.record_size)
|
| 352 |
+
for record in range(nrec):
|
| 353 |
+
yield from self._generate_record(field, record)
|
| 354 |
+
|
| 355 |
+
def values(self):
|
| 356 |
+
return RefsValuesView(self)
|
| 357 |
+
|
| 358 |
+
def items(self):
|
| 359 |
+
return RefsItemsView(self)
|
| 360 |
+
|
| 361 |
+
def __hash__(self):
|
| 362 |
+
return id(self)
|
| 363 |
+
|
| 364 |
+
def __getitem__(self, key):
|
| 365 |
+
return self._load_one_key(key)
|
| 366 |
+
|
| 367 |
+
def __setitem__(self, key, value):
|
| 368 |
+
if "/" in key and not self._is_meta(key):
|
| 369 |
+
field, chunk = key.split("/")
|
| 370 |
+
record, i, _ = self._key_to_record(key)
|
| 371 |
+
subdict = self._items.setdefault((field, record), {})
|
| 372 |
+
subdict[i] = value
|
| 373 |
+
if len(subdict) == self.record_size:
|
| 374 |
+
self.write(field, record)
|
| 375 |
+
else:
|
| 376 |
+
# metadata or top-level
|
| 377 |
+
self._items[key] = value
|
| 378 |
+
new_value = json.loads(
|
| 379 |
+
value.decode() if isinstance(value, bytes) else value
|
| 380 |
+
)
|
| 381 |
+
self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
|
| 382 |
+
|
| 383 |
+
@staticmethod
|
| 384 |
+
def _is_meta(key):
|
| 385 |
+
return key.startswith(".z") or "/.z" in key
|
| 386 |
+
|
| 387 |
+
def __delitem__(self, key):
|
| 388 |
+
if key in self._items:
|
| 389 |
+
del self._items[key]
|
| 390 |
+
elif key in self.zmetadata:
|
| 391 |
+
del self.zmetadata[key]
|
| 392 |
+
else:
|
| 393 |
+
if "/" in key and not self._is_meta(key):
|
| 394 |
+
field, chunk = key.split("/")
|
| 395 |
+
record, i, _ = self._key_to_record(key)
|
| 396 |
+
subdict = self._items.setdefault((field, record), {})
|
| 397 |
+
subdict[i] = None
|
| 398 |
+
if len(subdict) == self.record_size:
|
| 399 |
+
self.write(field, record)
|
| 400 |
+
else:
|
| 401 |
+
# metadata or top-level
|
| 402 |
+
self._items[key] = None
|
| 403 |
+
|
| 404 |
+
def write(self, field, record, base_url=None, storage_options=None):
|
| 405 |
+
# extra requirements if writing
|
| 406 |
+
import kerchunk.df
|
| 407 |
+
import numpy as np
|
| 408 |
+
import pandas as pd
|
| 409 |
+
|
| 410 |
+
partition = self._items[(field, record)]
|
| 411 |
+
original = False
|
| 412 |
+
if len(partition) < self.record_size:
|
| 413 |
+
try:
|
| 414 |
+
original = self.open_refs(field, record)
|
| 415 |
+
except IOError:
|
| 416 |
+
pass
|
| 417 |
+
|
| 418 |
+
if original:
|
| 419 |
+
paths = original["path"]
|
| 420 |
+
offsets = original["offset"]
|
| 421 |
+
sizes = original["size"]
|
| 422 |
+
raws = original["raw"]
|
| 423 |
+
else:
|
| 424 |
+
paths = np.full(self.record_size, np.nan, dtype="O")
|
| 425 |
+
offsets = np.zeros(self.record_size, dtype="int64")
|
| 426 |
+
sizes = np.zeros(self.record_size, dtype="int64")
|
| 427 |
+
raws = np.full(self.record_size, np.nan, dtype="O")
|
| 428 |
+
for j, data in partition.items():
|
| 429 |
+
if isinstance(data, list):
|
| 430 |
+
if (
|
| 431 |
+
str(paths.dtype) == "category"
|
| 432 |
+
and data[0] not in paths.dtype.categories
|
| 433 |
+
):
|
| 434 |
+
paths = paths.add_categories(data[0])
|
| 435 |
+
paths[j] = data[0]
|
| 436 |
+
if len(data) > 1:
|
| 437 |
+
offsets[j] = data[1]
|
| 438 |
+
sizes[j] = data[2]
|
| 439 |
+
elif data is None:
|
| 440 |
+
# delete
|
| 441 |
+
paths[j] = None
|
| 442 |
+
offsets[j] = 0
|
| 443 |
+
sizes[j] = 0
|
| 444 |
+
raws[j] = None
|
| 445 |
+
else:
|
| 446 |
+
# this is the only call into kerchunk, could remove
|
| 447 |
+
raws[j] = kerchunk.df._proc_raw(data)
|
| 448 |
+
# TODO: only save needed columns
|
| 449 |
+
df = pd.DataFrame(
|
| 450 |
+
{
|
| 451 |
+
"path": paths,
|
| 452 |
+
"offset": offsets,
|
| 453 |
+
"size": sizes,
|
| 454 |
+
"raw": raws,
|
| 455 |
+
},
|
| 456 |
+
copy=False,
|
| 457 |
+
)
|
| 458 |
+
if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
|
| 459 |
+
df["path"] = df["path"].astype("category")
|
| 460 |
+
object_encoding = {"raw": "bytes", "path": "utf8"}
|
| 461 |
+
has_nulls = ["path", "raw"]
|
| 462 |
+
|
| 463 |
+
fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
|
| 464 |
+
self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
|
| 465 |
+
df.to_parquet(
|
| 466 |
+
fn,
|
| 467 |
+
engine="fastparquet",
|
| 468 |
+
storage_options=storage_options
|
| 469 |
+
or getattr(self.fs, "storage_options", None),
|
| 470 |
+
compression="zstd",
|
| 471 |
+
index=False,
|
| 472 |
+
stats=False,
|
| 473 |
+
object_encoding=object_encoding,
|
| 474 |
+
has_nulls=has_nulls,
|
| 475 |
+
# **kwargs,
|
| 476 |
+
)
|
| 477 |
+
partition.clear()
|
| 478 |
+
self._items.pop((field, record))
|
| 479 |
+
|
| 480 |
+
def flush(self, base_url=None, storage_options=None):
|
| 481 |
+
"""Output any modified or deleted keys
|
| 482 |
+
|
| 483 |
+
Parameters
|
| 484 |
+
----------
|
| 485 |
+
base_url: str
|
| 486 |
+
Location of the output
|
| 487 |
+
"""
|
| 488 |
+
# write what we have so far and clear sub chunks
|
| 489 |
+
for thing in list(self._items):
|
| 490 |
+
if isinstance(thing, tuple):
|
| 491 |
+
field, record = thing
|
| 492 |
+
self.write(
|
| 493 |
+
field,
|
| 494 |
+
record,
|
| 495 |
+
base_url=base_url,
|
| 496 |
+
storage_options=storage_options,
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
# gather .zmetadata from self._items and write that too
|
| 500 |
+
for k in list(self._items):
|
| 501 |
+
if k != ".zmetadata" and ".z" in k:
|
| 502 |
+
self.zmetadata[k] = json.loads(self._items.pop(k))
|
| 503 |
+
met = {"metadata": self.zmetadata, "record_size": self.record_size}
|
| 504 |
+
self._items[".zmetadata"] = json.dumps(met).encode()
|
| 505 |
+
self.fs.pipe(
|
| 506 |
+
"/".join([base_url or self.out_root, ".zmetadata"]),
|
| 507 |
+
self._items[".zmetadata"],
|
| 508 |
+
)
|
| 509 |
+
|
| 510 |
+
# TODO: only clear those that we wrote to?
|
| 511 |
+
self.open_refs.cache_clear()
|
| 512 |
+
|
| 513 |
+
def __len__(self):
|
| 514 |
+
# Caveat: This counts expected references, not actual - but is fast
|
| 515 |
+
count = 0
|
| 516 |
+
for field in self.listdir():
|
| 517 |
+
if field.startswith("."):
|
| 518 |
+
count += 1
|
| 519 |
+
else:
|
| 520 |
+
count += math.prod(self._get_chunk_sizes(field))
|
| 521 |
+
count += len(self.zmetadata) # all metadata keys
|
| 522 |
+
# any other files not in reference partitions
|
| 523 |
+
count += sum(1 for _ in self._items if not isinstance(_, tuple))
|
| 524 |
+
return count
|
| 525 |
+
|
| 526 |
+
def __iter__(self):
|
| 527 |
+
# Caveat: returns only existing keys, so the number of these does not
|
| 528 |
+
# match len(self)
|
| 529 |
+
metas = set(self.zmetadata)
|
| 530 |
+
metas.update(self._items)
|
| 531 |
+
for bit in metas:
|
| 532 |
+
if isinstance(bit, str):
|
| 533 |
+
yield bit
|
| 534 |
+
for field in self.listdir():
|
| 535 |
+
for k in self._keys_in_field(field):
|
| 536 |
+
if k in self:
|
| 537 |
+
yield k
|
| 538 |
+
|
| 539 |
+
def __contains__(self, item):
|
| 540 |
+
try:
|
| 541 |
+
self._load_one_key(item)
|
| 542 |
+
return True
|
| 543 |
+
except KeyError:
|
| 544 |
+
return False
|
| 545 |
+
|
| 546 |
+
def _keys_in_field(self, field):
|
| 547 |
+
"""List key names in given field
|
| 548 |
+
|
| 549 |
+
Produces strings like "field/x.y" appropriate from the chunking of the array
|
| 550 |
+
"""
|
| 551 |
+
chunk_sizes = self._get_chunk_sizes(field)
|
| 552 |
+
if len(chunk_sizes) == 0:
|
| 553 |
+
yield field + "/0"
|
| 554 |
+
return
|
| 555 |
+
inds = itertools.product(*(range(i) for i in chunk_sizes))
|
| 556 |
+
for ind in inds:
|
| 557 |
+
yield field + "/" + ".".join([str(c) for c in ind])
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
class ReferenceFileSystem(AsyncFileSystem):
|
| 561 |
+
"""View byte ranges of some other file as a file system
|
| 562 |
+
Initial version: single file system target, which must support
|
| 563 |
+
async, and must allow start and end args in _cat_file. Later versions
|
| 564 |
+
may allow multiple arbitrary URLs for the targets.
|
| 565 |
+
This FileSystem is read-only. It is designed to be used with async
|
| 566 |
+
targets (for now). This FileSystem only allows whole-file access, no
|
| 567 |
+
``open``. We do not get original file details from the target FS.
|
| 568 |
+
Configuration is by passing a dict of references at init, or a URL to
|
| 569 |
+
a JSON file containing the same; this dict
|
| 570 |
+
can also contain concrete data for some set of paths.
|
| 571 |
+
Reference dict format:
|
| 572 |
+
{path0: bytes_data, path1: (target_url, offset, size)}
|
| 573 |
+
https://github.com/fsspec/kerchunk/blob/main/README.md
|
| 574 |
+
"""
|
| 575 |
+
|
| 576 |
+
protocol = "reference"
|
| 577 |
+
|
| 578 |
+
def __init__(
|
| 579 |
+
self,
|
| 580 |
+
fo,
|
| 581 |
+
target=None,
|
| 582 |
+
ref_storage_args=None,
|
| 583 |
+
target_protocol=None,
|
| 584 |
+
target_options=None,
|
| 585 |
+
remote_protocol=None,
|
| 586 |
+
remote_options=None,
|
| 587 |
+
fs=None,
|
| 588 |
+
template_overrides=None,
|
| 589 |
+
simple_templates=True,
|
| 590 |
+
max_gap=64_000,
|
| 591 |
+
max_block=256_000_000,
|
| 592 |
+
cache_size=128,
|
| 593 |
+
**kwargs,
|
| 594 |
+
):
|
| 595 |
+
"""
|
| 596 |
+
Parameters
|
| 597 |
+
----------
|
| 598 |
+
fo : dict or str
|
| 599 |
+
The set of references to use for this instance, with a structure as above.
|
| 600 |
+
If str referencing a JSON file, will use fsspec.open, in conjunction
|
| 601 |
+
with target_options and target_protocol to open and parse JSON at this
|
| 602 |
+
location. If a directory, then assume references are a set of parquet
|
| 603 |
+
files to be loaded lazily.
|
| 604 |
+
target : str
|
| 605 |
+
For any references having target_url as None, this is the default file
|
| 606 |
+
target to use
|
| 607 |
+
ref_storage_args : dict
|
| 608 |
+
If references is a str, use these kwargs for loading the JSON file.
|
| 609 |
+
Deprecated: use target_options instead.
|
| 610 |
+
target_protocol : str
|
| 611 |
+
Used for loading the reference file, if it is a path. If None, protocol
|
| 612 |
+
will be derived from the given path
|
| 613 |
+
target_options : dict
|
| 614 |
+
Extra FS options for loading the reference file ``fo``, if given as a path
|
| 615 |
+
remote_protocol : str
|
| 616 |
+
The protocol of the filesystem on which the references will be evaluated
|
| 617 |
+
(unless fs is provided). If not given, will be derived from the first
|
| 618 |
+
URL that has a protocol in the templates or in the references, in that
|
| 619 |
+
order.
|
| 620 |
+
remote_options : dict
|
| 621 |
+
kwargs to go with remote_protocol
|
| 622 |
+
fs : AbstractFileSystem | dict(str, (AbstractFileSystem | dict))
|
| 623 |
+
Directly provide a file system(s):
|
| 624 |
+
- a single filesystem instance
|
| 625 |
+
- a dict of protocol:filesystem, where each value is either a filesystem
|
| 626 |
+
instance, or a dict of kwargs that can be used to create in
|
| 627 |
+
instance for the given protocol
|
| 628 |
+
|
| 629 |
+
If this is given, remote_options and remote_protocol are ignored.
|
| 630 |
+
template_overrides : dict
|
| 631 |
+
Swap out any templates in the references file with these - useful for
|
| 632 |
+
testing.
|
| 633 |
+
simple_templates: bool
|
| 634 |
+
Whether templates can be processed with simple replace (True) or if
|
| 635 |
+
jinja is needed (False, much slower). All reference sets produced by
|
| 636 |
+
``kerchunk`` are simple in this sense, but the spec allows for complex.
|
| 637 |
+
max_gap, max_block: int
|
| 638 |
+
For merging multiple concurrent requests to the same remote file.
|
| 639 |
+
Neighboring byte ranges will only be merged when their
|
| 640 |
+
inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0
|
| 641 |
+
to only merge when it requires no extra bytes. Pass a negative
|
| 642 |
+
number to disable merging, appropriate for local target files.
|
| 643 |
+
Neighboring byte ranges will only be merged when the size of
|
| 644 |
+
the aggregated range is <= ``max_block``. Default is 256MB.
|
| 645 |
+
cache_size : int
|
| 646 |
+
Maximum size of LRU cache, where cache_size*record_size denotes
|
| 647 |
+
the total number of references that can be loaded in memory at once.
|
| 648 |
+
Only used for lazily loaded references.
|
| 649 |
+
kwargs : passed to parent class
|
| 650 |
+
"""
|
| 651 |
+
super().__init__(**kwargs)
|
| 652 |
+
self.target = target
|
| 653 |
+
self.template_overrides = template_overrides
|
| 654 |
+
self.simple_templates = simple_templates
|
| 655 |
+
self.templates = {}
|
| 656 |
+
self.fss = {}
|
| 657 |
+
self._dircache = {}
|
| 658 |
+
self.max_gap = max_gap
|
| 659 |
+
self.max_block = max_block
|
| 660 |
+
if isinstance(fo, str):
|
| 661 |
+
dic = dict(
|
| 662 |
+
**(ref_storage_args or target_options or {}), protocol=target_protocol
|
| 663 |
+
)
|
| 664 |
+
ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
|
| 665 |
+
if ref_fs.isfile(fo2):
|
| 666 |
+
# text JSON
|
| 667 |
+
with fsspec.open(fo, "rb", **dic) as f:
|
| 668 |
+
logger.info("Read reference from URL %s", fo)
|
| 669 |
+
text = json.load(f)
|
| 670 |
+
self._process_references(text, template_overrides)
|
| 671 |
+
else:
|
| 672 |
+
# Lazy parquet refs
|
| 673 |
+
logger.info("Open lazy reference dict from URL %s", fo)
|
| 674 |
+
self.references = LazyReferenceMapper(
|
| 675 |
+
fo2,
|
| 676 |
+
fs=ref_fs,
|
| 677 |
+
cache_size=cache_size,
|
| 678 |
+
)
|
| 679 |
+
else:
|
| 680 |
+
# dictionaries
|
| 681 |
+
self._process_references(fo, template_overrides)
|
| 682 |
+
if isinstance(fs, dict):
|
| 683 |
+
self.fss = {
|
| 684 |
+
k: (
|
| 685 |
+
fsspec.filesystem(k.split(":", 1)[0], **opts)
|
| 686 |
+
if isinstance(opts, dict)
|
| 687 |
+
else opts
|
| 688 |
+
)
|
| 689 |
+
for k, opts in fs.items()
|
| 690 |
+
}
|
| 691 |
+
if None not in self.fss:
|
| 692 |
+
self.fss[None] = filesystem("file")
|
| 693 |
+
return
|
| 694 |
+
if fs is not None:
|
| 695 |
+
# single remote FS
|
| 696 |
+
remote_protocol = (
|
| 697 |
+
fs.protocol[0] if isinstance(fs.protocol, tuple) else fs.protocol
|
| 698 |
+
)
|
| 699 |
+
self.fss[remote_protocol] = fs
|
| 700 |
+
|
| 701 |
+
if remote_protocol is None:
|
| 702 |
+
# get single protocol from any templates
|
| 703 |
+
for ref in self.templates.values():
|
| 704 |
+
if callable(ref):
|
| 705 |
+
ref = ref()
|
| 706 |
+
protocol, _ = fsspec.core.split_protocol(ref)
|
| 707 |
+
if protocol and protocol not in self.fss:
|
| 708 |
+
fs = filesystem(protocol, **(remote_options or {}))
|
| 709 |
+
self.fss[protocol] = fs
|
| 710 |
+
if remote_protocol is None:
|
| 711 |
+
# get single protocol from references
|
| 712 |
+
# TODO: warning here, since this can be very expensive?
|
| 713 |
+
for ref in self.references.values():
|
| 714 |
+
if callable(ref):
|
| 715 |
+
ref = ref()
|
| 716 |
+
if isinstance(ref, list) and ref[0]:
|
| 717 |
+
protocol, _ = fsspec.core.split_protocol(ref[0])
|
| 718 |
+
if protocol not in self.fss:
|
| 719 |
+
fs = filesystem(protocol, **(remote_options or {}))
|
| 720 |
+
self.fss[protocol] = fs
|
| 721 |
+
# only use first remote URL
|
| 722 |
+
break
|
| 723 |
+
|
| 724 |
+
if remote_protocol and remote_protocol not in self.fss:
|
| 725 |
+
fs = filesystem(remote_protocol, **(remote_options or {}))
|
| 726 |
+
self.fss[remote_protocol] = fs
|
| 727 |
+
|
| 728 |
+
self.fss[None] = fs or filesystem("file") # default one
|
| 729 |
+
|
| 730 |
+
def _cat_common(self, path, start=None, end=None):
|
| 731 |
+
path = self._strip_protocol(path)
|
| 732 |
+
logger.debug(f"cat: {path}")
|
| 733 |
+
try:
|
| 734 |
+
part = self.references[path]
|
| 735 |
+
except KeyError:
|
| 736 |
+
raise FileNotFoundError(path)
|
| 737 |
+
if isinstance(part, str):
|
| 738 |
+
part = part.encode()
|
| 739 |
+
if isinstance(part, bytes):
|
| 740 |
+
logger.debug(f"Reference: {path}, type bytes")
|
| 741 |
+
if part.startswith(b"base64:"):
|
| 742 |
+
part = base64.b64decode(part[7:])
|
| 743 |
+
return part, None, None
|
| 744 |
+
|
| 745 |
+
if len(part) == 1:
|
| 746 |
+
logger.debug(f"Reference: {path}, whole file => {part}")
|
| 747 |
+
url = part[0]
|
| 748 |
+
start1, end1 = start, end
|
| 749 |
+
else:
|
| 750 |
+
url, start0, size = part
|
| 751 |
+
logger.debug(f"Reference: {path} => {url}, offset {start0}, size {size}")
|
| 752 |
+
end0 = start0 + size
|
| 753 |
+
|
| 754 |
+
if start is not None:
|
| 755 |
+
if start >= 0:
|
| 756 |
+
start1 = start0 + start
|
| 757 |
+
else:
|
| 758 |
+
start1 = end0 + start
|
| 759 |
+
else:
|
| 760 |
+
start1 = start0
|
| 761 |
+
if end is not None:
|
| 762 |
+
if end >= 0:
|
| 763 |
+
end1 = start0 + end
|
| 764 |
+
else:
|
| 765 |
+
end1 = end0 + end
|
| 766 |
+
else:
|
| 767 |
+
end1 = end0
|
| 768 |
+
if url is None:
|
| 769 |
+
url = self.target
|
| 770 |
+
return url, start1, end1
|
| 771 |
+
|
| 772 |
+
async def _cat_file(self, path, start=None, end=None, **kwargs):
|
| 773 |
+
part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
|
| 774 |
+
if isinstance(part_or_url, bytes):
|
| 775 |
+
return part_or_url[start:end]
|
| 776 |
+
protocol, _ = split_protocol(part_or_url)
|
| 777 |
+
try:
|
| 778 |
+
await self.fss[protocol]._cat_file(part_or_url, start=start, end=end)
|
| 779 |
+
except Exception as e:
|
| 780 |
+
raise ReferenceNotReachable(path, part_or_url) from e
|
| 781 |
+
|
| 782 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
| 783 |
+
part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
|
| 784 |
+
if isinstance(part_or_url, bytes):
|
| 785 |
+
return part_or_url[start:end]
|
| 786 |
+
protocol, _ = split_protocol(part_or_url)
|
| 787 |
+
try:
|
| 788 |
+
return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0)
|
| 789 |
+
except Exception as e:
|
| 790 |
+
raise ReferenceNotReachable(path, part_or_url) from e
|
| 791 |
+
|
| 792 |
+
def pipe_file(self, path, value, **_):
|
| 793 |
+
"""Temporarily add binary data or reference as a file"""
|
| 794 |
+
self.references[path] = value
|
| 795 |
+
|
| 796 |
+
async def _get_file(self, rpath, lpath, **kwargs):
|
| 797 |
+
if self.isdir(rpath):
|
| 798 |
+
return os.makedirs(lpath, exist_ok=True)
|
| 799 |
+
data = await self._cat_file(rpath)
|
| 800 |
+
with open(lpath, "wb") as f:
|
| 801 |
+
f.write(data)
|
| 802 |
+
|
| 803 |
+
def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs):
|
| 804 |
+
if self.isdir(rpath):
|
| 805 |
+
return os.makedirs(lpath, exist_ok=True)
|
| 806 |
+
data = self.cat_file(rpath, **kwargs)
|
| 807 |
+
callback.set_size(len(data))
|
| 808 |
+
if isfilelike(lpath):
|
| 809 |
+
lpath.write(data)
|
| 810 |
+
else:
|
| 811 |
+
with open(lpath, "wb") as f:
|
| 812 |
+
f.write(data)
|
| 813 |
+
callback.absolute_update(len(data))
|
| 814 |
+
|
| 815 |
+
def get(self, rpath, lpath, recursive=False, **kwargs):
|
| 816 |
+
if recursive:
|
| 817 |
+
# trigger directory build
|
| 818 |
+
self.ls("")
|
| 819 |
+
rpath = self.expand_path(rpath, recursive=recursive)
|
| 820 |
+
fs = fsspec.filesystem("file", auto_mkdir=True)
|
| 821 |
+
targets = other_paths(rpath, lpath)
|
| 822 |
+
if recursive:
|
| 823 |
+
data = self.cat([r for r in rpath if not self.isdir(r)])
|
| 824 |
+
else:
|
| 825 |
+
data = self.cat(rpath)
|
| 826 |
+
for remote, local in zip(rpath, targets):
|
| 827 |
+
if remote in data:
|
| 828 |
+
fs.pipe_file(local, data[remote])
|
| 829 |
+
|
| 830 |
+
def cat(self, path, recursive=False, on_error="raise", **kwargs):
|
| 831 |
+
if isinstance(path, str) and recursive:
|
| 832 |
+
raise NotImplementedError
|
| 833 |
+
if isinstance(path, list) and (recursive or any("*" in p for p in path)):
|
| 834 |
+
raise NotImplementedError
|
| 835 |
+
# TODO: if references is lazy, pre-fetch all paths in batch before access
|
| 836 |
+
proto_dict = _protocol_groups(path, self.references)
|
| 837 |
+
out = {}
|
| 838 |
+
for proto, paths in proto_dict.items():
|
| 839 |
+
fs = self.fss[proto]
|
| 840 |
+
urls, starts, ends, valid_paths = [], [], [], []
|
| 841 |
+
for p in paths:
|
| 842 |
+
# find references or label not-found. Early exit if any not
|
| 843 |
+
# found and on_error is "raise"
|
| 844 |
+
try:
|
| 845 |
+
u, s, e = self._cat_common(p)
|
| 846 |
+
except FileNotFoundError as err:
|
| 847 |
+
if on_error == "raise":
|
| 848 |
+
raise
|
| 849 |
+
if on_error != "omit":
|
| 850 |
+
out[p] = err
|
| 851 |
+
else:
|
| 852 |
+
urls.append(u)
|
| 853 |
+
starts.append(s)
|
| 854 |
+
ends.append(e)
|
| 855 |
+
valid_paths.append(p)
|
| 856 |
+
|
| 857 |
+
# process references into form for merging
|
| 858 |
+
urls2 = []
|
| 859 |
+
starts2 = []
|
| 860 |
+
ends2 = []
|
| 861 |
+
paths2 = []
|
| 862 |
+
whole_files = set()
|
| 863 |
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
| 864 |
+
if isinstance(u, bytes):
|
| 865 |
+
# data
|
| 866 |
+
out[p] = u
|
| 867 |
+
elif s is None:
|
| 868 |
+
# whole file - limits are None, None, but no further
|
| 869 |
+
# entries take for this file
|
| 870 |
+
whole_files.add(u)
|
| 871 |
+
urls2.append(u)
|
| 872 |
+
starts2.append(s)
|
| 873 |
+
ends2.append(e)
|
| 874 |
+
paths2.append(p)
|
| 875 |
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
| 876 |
+
# second run to account for files that are to be loaded whole
|
| 877 |
+
if s is not None and u not in whole_files:
|
| 878 |
+
urls2.append(u)
|
| 879 |
+
starts2.append(s)
|
| 880 |
+
ends2.append(e)
|
| 881 |
+
paths2.append(p)
|
| 882 |
+
|
| 883 |
+
# merge and fetch consolidated ranges
|
| 884 |
+
new_paths, new_starts, new_ends = merge_offset_ranges(
|
| 885 |
+
list(urls2),
|
| 886 |
+
list(starts2),
|
| 887 |
+
list(ends2),
|
| 888 |
+
sort=True,
|
| 889 |
+
max_gap=self.max_gap,
|
| 890 |
+
max_block=self.max_block,
|
| 891 |
+
)
|
| 892 |
+
bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
|
| 893 |
+
|
| 894 |
+
# unbundle from merged bytes - simple approach
|
| 895 |
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
| 896 |
+
if p in out:
|
| 897 |
+
continue # was bytes, already handled
|
| 898 |
+
for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
|
| 899 |
+
if np == u and (ns is None or ne is None):
|
| 900 |
+
if isinstance(b, Exception):
|
| 901 |
+
out[p] = b
|
| 902 |
+
else:
|
| 903 |
+
out[p] = b[s:e]
|
| 904 |
+
elif np == u and s >= ns and e <= ne:
|
| 905 |
+
if isinstance(b, Exception):
|
| 906 |
+
out[p] = b
|
| 907 |
+
else:
|
| 908 |
+
out[p] = b[s - ns : (e - ne) or None]
|
| 909 |
+
|
| 910 |
+
for k, v in out.copy().items():
|
| 911 |
+
# these were valid references, but fetch failed, so transform exc
|
| 912 |
+
if isinstance(v, Exception) and k in self.references:
|
| 913 |
+
ex = out[k]
|
| 914 |
+
new_ex = ReferenceNotReachable(k, self.references[k])
|
| 915 |
+
new_ex.__cause__ = ex
|
| 916 |
+
if on_error == "raise":
|
| 917 |
+
raise new_ex
|
| 918 |
+
elif on_error != "omit":
|
| 919 |
+
out[k] = new_ex
|
| 920 |
+
|
| 921 |
+
if len(out) == 1 and isinstance(path, str) and "*" not in path:
|
| 922 |
+
return _first(out)
|
| 923 |
+
return out
|
| 924 |
+
|
| 925 |
+
def _process_references(self, references, template_overrides=None):
|
| 926 |
+
vers = references.get("version", None)
|
| 927 |
+
if vers is None:
|
| 928 |
+
self._process_references0(references)
|
| 929 |
+
elif vers == 1:
|
| 930 |
+
self._process_references1(references, template_overrides=template_overrides)
|
| 931 |
+
else:
|
| 932 |
+
raise ValueError(f"Unknown reference spec version: {vers}")
|
| 933 |
+
# TODO: we make dircache by iterating over all entries, but for Spec >= 1,
|
| 934 |
+
# can replace with programmatic. Is it even needed for mapper interface?
|
| 935 |
+
|
| 936 |
+
def _process_references0(self, references):
|
| 937 |
+
"""Make reference dict for Spec Version 0"""
|
| 938 |
+
self.references = references
|
| 939 |
+
|
| 940 |
+
def _process_references1(self, references, template_overrides=None):
|
| 941 |
+
if not self.simple_templates or self.templates:
|
| 942 |
+
import jinja2
|
| 943 |
+
self.references = {}
|
| 944 |
+
self._process_templates(references.get("templates", {}))
|
| 945 |
+
|
| 946 |
+
@lru_cache(1000)
|
| 947 |
+
def _render_jinja(u):
|
| 948 |
+
return jinja2.Template(u).render(**self.templates)
|
| 949 |
+
|
| 950 |
+
for k, v in references.get("refs", {}).items():
|
| 951 |
+
if isinstance(v, str):
|
| 952 |
+
if v.startswith("base64:"):
|
| 953 |
+
self.references[k] = base64.b64decode(v[7:])
|
| 954 |
+
self.references[k] = v
|
| 955 |
+
elif self.templates:
|
| 956 |
+
u = v[0]
|
| 957 |
+
if "{{" in u:
|
| 958 |
+
if self.simple_templates:
|
| 959 |
+
u = (
|
| 960 |
+
u.replace("{{", "{")
|
| 961 |
+
.replace("}}", "}")
|
| 962 |
+
.format(**self.templates)
|
| 963 |
+
)
|
| 964 |
+
else:
|
| 965 |
+
u = _render_jinja(u)
|
| 966 |
+
self.references[k] = [u] if len(v) == 1 else [u, v[1], v[2]]
|
| 967 |
+
else:
|
| 968 |
+
self.references[k] = v
|
| 969 |
+
self.references.update(self._process_gen(references.get("gen", [])))
|
| 970 |
+
|
| 971 |
+
def _process_templates(self, tmp):
|
| 972 |
+
self.templates = {}
|
| 973 |
+
if self.template_overrides is not None:
|
| 974 |
+
tmp.update(self.template_overrides)
|
| 975 |
+
for k, v in tmp.items():
|
| 976 |
+
if "{{" in v:
|
| 977 |
+
import jinja2
|
| 978 |
+
|
| 979 |
+
self.templates[k] = lambda temp=v, **kwargs: jinja2.Template(
|
| 980 |
+
temp
|
| 981 |
+
).render(**kwargs)
|
| 982 |
+
else:
|
| 983 |
+
self.templates[k] = v
|
| 984 |
+
|
| 985 |
+
def _process_gen(self, gens):
|
| 986 |
+
out = {}
|
| 987 |
+
for gen in gens:
|
| 988 |
+
dimension = {
|
| 989 |
+
k: v
|
| 990 |
+
if isinstance(v, list)
|
| 991 |
+
else range(v.get("start", 0), v["stop"], v.get("step", 1))
|
| 992 |
+
for k, v in gen["dimensions"].items()
|
| 993 |
+
}
|
| 994 |
+
products = (
|
| 995 |
+
dict(zip(dimension.keys(), values))
|
| 996 |
+
for values in itertools.product(*dimension.values())
|
| 997 |
+
)
|
| 998 |
+
for pr in products:
|
| 999 |
+
import jinja2
|
| 1000 |
+
|
| 1001 |
+
key = jinja2.Template(gen["key"]).render(**pr, **self.templates)
|
| 1002 |
+
url = jinja2.Template(gen["url"]).render(**pr, **self.templates)
|
| 1003 |
+
if ("offset" in gen) and ("length" in gen):
|
| 1004 |
+
offset = int(
|
| 1005 |
+
jinja2.Template(gen["offset"]).render(**pr, **self.templates)
|
| 1006 |
+
)
|
| 1007 |
+
length = int(
|
| 1008 |
+
jinja2.Template(gen["length"]).render(**pr, **self.templates)
|
| 1009 |
+
)
|
| 1010 |
+
out[key] = [url, offset, length]
|
| 1011 |
+
elif ("offset" in gen) ^ ("length" in gen):
|
| 1012 |
+
raise ValueError(
|
| 1013 |
+
"Both 'offset' and 'length' are required for a "
|
| 1014 |
+
"reference generator entry if either is provided."
|
| 1015 |
+
)
|
| 1016 |
+
else:
|
| 1017 |
+
out[key] = [url]
|
| 1018 |
+
return out
|
| 1019 |
+
|
| 1020 |
+
def _dircache_from_items(self):
|
| 1021 |
+
self.dircache = {"": []}
|
| 1022 |
+
it = self.references.items()
|
| 1023 |
+
for path, part in it:
|
| 1024 |
+
if isinstance(part, (bytes, str)):
|
| 1025 |
+
size = len(part)
|
| 1026 |
+
elif len(part) == 1:
|
| 1027 |
+
size = None
|
| 1028 |
+
else:
|
| 1029 |
+
_, _, size = part
|
| 1030 |
+
par = path.rsplit("/", 1)[0] if "/" in path else ""
|
| 1031 |
+
par0 = par
|
| 1032 |
+
subdirs = [par0]
|
| 1033 |
+
while par0 and par0 not in self.dircache:
|
| 1034 |
+
# collect parent directories
|
| 1035 |
+
par0 = self._parent(par0)
|
| 1036 |
+
subdirs.append(par0)
|
| 1037 |
+
|
| 1038 |
+
subdirs = subdirs[::-1]
|
| 1039 |
+
for parent, child in zip(subdirs, subdirs[1:]):
|
| 1040 |
+
# register newly discovered directories
|
| 1041 |
+
assert child not in self.dircache
|
| 1042 |
+
assert parent in self.dircache
|
| 1043 |
+
self.dircache[parent].append(
|
| 1044 |
+
{"name": child, "type": "directory", "size": 0}
|
| 1045 |
+
)
|
| 1046 |
+
self.dircache[child] = []
|
| 1047 |
+
|
| 1048 |
+
self.dircache[par].append({"name": path, "type": "file", "size": size})
|
| 1049 |
+
|
| 1050 |
+
def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
|
| 1051 |
+
data = self.cat_file(path) # load whole chunk into memory
|
| 1052 |
+
return io.BytesIO(data)
|
| 1053 |
+
|
| 1054 |
+
def ls(self, path, detail=True, **kwargs):
|
| 1055 |
+
path = self._strip_protocol(path)
|
| 1056 |
+
if isinstance(self.references, LazyReferenceMapper):
|
| 1057 |
+
try:
|
| 1058 |
+
return self.references.ls(path, detail)
|
| 1059 |
+
except KeyError:
|
| 1060 |
+
pass
|
| 1061 |
+
raise FileNotFoundError(f"'{path}' is not a known key")
|
| 1062 |
+
if not self.dircache:
|
| 1063 |
+
self._dircache_from_items()
|
| 1064 |
+
out = self._ls_from_cache(path)
|
| 1065 |
+
if out is None:
|
| 1066 |
+
raise FileNotFoundError(path)
|
| 1067 |
+
if detail:
|
| 1068 |
+
return out
|
| 1069 |
+
return [o["name"] for o in out]
|
| 1070 |
+
|
| 1071 |
+
def exists(self, path, **kwargs): # overwrite auto-sync version
|
| 1072 |
+
return self.isdir(path) or self.isfile(path)
|
| 1073 |
+
|
| 1074 |
+
def isdir(self, path): # overwrite auto-sync version
|
| 1075 |
+
if self.dircache:
|
| 1076 |
+
return path in self.dircache
|
| 1077 |
+
elif isinstance(self.references, LazyReferenceMapper):
|
| 1078 |
+
return path in self.references.listdir("")
|
| 1079 |
+
else:
|
| 1080 |
+
# this may be faster than building dircache for single calls, but
|
| 1081 |
+
# by looping will be slow for many calls; could cache it?
|
| 1082 |
+
return any(_.startswith(f"{path}/") for _ in self.references)
|
| 1083 |
+
|
| 1084 |
+
def isfile(self, path): # overwrite auto-sync version
|
| 1085 |
+
return path in self.references
|
| 1086 |
+
|
| 1087 |
+
async def _ls(self, path, detail=True, **kwargs): # calls fast sync code
|
| 1088 |
+
return self.ls(path, detail, **kwargs)
|
| 1089 |
+
|
| 1090 |
+
def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
| 1091 |
+
if withdirs:
|
| 1092 |
+
return super().find(
|
| 1093 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, **kwargs
|
| 1094 |
+
)
|
| 1095 |
+
if path:
|
| 1096 |
+
path = self._strip_protocol(path)
|
| 1097 |
+
r = sorted(k for k in self.references if k.startswith(path))
|
| 1098 |
+
else:
|
| 1099 |
+
r = sorted(self.references)
|
| 1100 |
+
if detail:
|
| 1101 |
+
if not self.dircache:
|
| 1102 |
+
self._dircache_from_items()
|
| 1103 |
+
return {k: self._ls_from_cache(k)[0] for k in r}
|
| 1104 |
+
else:
|
| 1105 |
+
return r
|
| 1106 |
+
|
| 1107 |
+
def info(self, path, **kwargs):
|
| 1108 |
+
out = self.references.get(path)
|
| 1109 |
+
if out is not None:
|
| 1110 |
+
if isinstance(out, (str, bytes)):
|
| 1111 |
+
# decode base64 here
|
| 1112 |
+
return {"name": path, "type": "file", "size": len(out)}
|
| 1113 |
+
elif len(out) > 1:
|
| 1114 |
+
return {"name": path, "type": "file", "size": out[2]}
|
| 1115 |
+
else:
|
| 1116 |
+
out0 = [{"name": path, "type": "file", "size": None}]
|
| 1117 |
+
else:
|
| 1118 |
+
out = self.ls(path, True)
|
| 1119 |
+
out0 = [o for o in out if o["name"] == path]
|
| 1120 |
+
if not out0:
|
| 1121 |
+
return {"name": path, "type": "directory", "size": 0}
|
| 1122 |
+
if out0[0]["size"] is None:
|
| 1123 |
+
# if this is a whole remote file, update size using remote FS
|
| 1124 |
+
prot, _ = split_protocol(self.references[path][0])
|
| 1125 |
+
out0[0]["size"] = self.fss[prot].size(self.references[path][0])
|
| 1126 |
+
return out0[0]
|
| 1127 |
+
|
| 1128 |
+
async def _info(self, path, **kwargs): # calls fast sync code
|
| 1129 |
+
return self.info(path)
|
| 1130 |
+
|
| 1131 |
+
async def _rm_file(self, path, **kwargs):
|
| 1132 |
+
self.references.pop(
|
| 1133 |
+
path, None
|
| 1134 |
+
) # ignores FileNotFound, just as well for directories
|
| 1135 |
+
self.dircache.clear() # this is a bit heavy handed
|
| 1136 |
+
|
| 1137 |
+
async def _pipe_file(self, path, data):
|
| 1138 |
+
# can be str or bytes
|
| 1139 |
+
self.references[path] = data
|
| 1140 |
+
self.dircache.clear() # this is a bit heavy handed
|
| 1141 |
+
|
| 1142 |
+
async def _put_file(self, lpath, rpath, **kwargs):
|
| 1143 |
+
# puts binary
|
| 1144 |
+
with open(lpath, "rb") as f:
|
| 1145 |
+
self.references[rpath] = f.read()
|
| 1146 |
+
self.dircache.clear() # this is a bit heavy handed
|
| 1147 |
+
|
| 1148 |
+
def save_json(self, url, **storage_options):
|
| 1149 |
+
"""Write modified references into new location"""
|
| 1150 |
+
out = {}
|
| 1151 |
+
for k, v in self.references.items():
|
| 1152 |
+
if isinstance(v, bytes):
|
| 1153 |
+
try:
|
| 1154 |
+
out[k] = v.decode("ascii")
|
| 1155 |
+
except UnicodeDecodeError:
|
| 1156 |
+
out[k] = (b"base64:" + base64.b64encode(v)).decode()
|
| 1157 |
+
else:
|
| 1158 |
+
out[k] = v
|
| 1159 |
+
with fsspec.open(url, "wb", **storage_options) as f:
|
| 1160 |
+
f.write(json.dumps({"version": 1, "refs": out}).encode())
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/smb.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This module contains SMBFileSystem class responsible for handling access to
|
| 3 |
+
Windows Samba network shares by using package smbprotocol
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import datetime
|
| 7 |
+
import uuid
|
| 8 |
+
from stat import S_ISDIR, S_ISLNK
|
| 9 |
+
|
| 10 |
+
import smbclient
|
| 11 |
+
|
| 12 |
+
from .. import AbstractFileSystem
|
| 13 |
+
from ..utils import infer_storage_options
|
| 14 |
+
|
| 15 |
+
# ! pylint: disable=bad-continuation
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class SMBFileSystem(AbstractFileSystem):
|
| 19 |
+
"""Allow reading and writing to Windows and Samba network shares.
|
| 20 |
+
|
| 21 |
+
When using `fsspec.open()` for getting a file-like object the URI
|
| 22 |
+
should be specified as this format:
|
| 23 |
+
``smb://workgroup;user:password@server:port/share/folder/file.csv``.
|
| 24 |
+
|
| 25 |
+
Example::
|
| 26 |
+
|
| 27 |
+
>>> import fsspec
|
| 28 |
+
>>> with fsspec.open(
|
| 29 |
+
... 'smb://myuser:mypassword@myserver.com/' 'share/folder/file.csv'
|
| 30 |
+
... ) as smbfile:
|
| 31 |
+
... df = pd.read_csv(smbfile, sep='|', header=None)
|
| 32 |
+
|
| 33 |
+
Note that you need to pass in a valid hostname or IP address for the host
|
| 34 |
+
component of the URL. Do not use the Windows/NetBIOS machine name for the
|
| 35 |
+
host component.
|
| 36 |
+
|
| 37 |
+
The first component of the path in the URL points to the name of the shared
|
| 38 |
+
folder. Subsequent path components will point to the directory/folder/file.
|
| 39 |
+
|
| 40 |
+
The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
|
| 41 |
+
optional.
|
| 42 |
+
|
| 43 |
+
.. note::
|
| 44 |
+
|
| 45 |
+
For working this source require `smbprotocol`_ to be installed, e.g.::
|
| 46 |
+
|
| 47 |
+
$ pip install smbprotocol
|
| 48 |
+
# or
|
| 49 |
+
# pip install smbprotocol[kerberos]
|
| 50 |
+
|
| 51 |
+
.. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
|
| 52 |
+
|
| 53 |
+
Note: if using this with the ``open`` or ``open_files``, with full URLs,
|
| 54 |
+
there is no way to tell if a path is relative, so all paths are assumed
|
| 55 |
+
to be absolute.
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
protocol = "smb"
|
| 59 |
+
|
| 60 |
+
# pylint: disable=too-many-arguments
|
| 61 |
+
def __init__(
|
| 62 |
+
self,
|
| 63 |
+
host,
|
| 64 |
+
port=None,
|
| 65 |
+
username=None,
|
| 66 |
+
password=None,
|
| 67 |
+
timeout=60,
|
| 68 |
+
encrypt=None,
|
| 69 |
+
share_access=None,
|
| 70 |
+
**kwargs,
|
| 71 |
+
):
|
| 72 |
+
"""
|
| 73 |
+
You can use _get_kwargs_from_urls to get some kwargs from
|
| 74 |
+
a reasonable SMB url.
|
| 75 |
+
|
| 76 |
+
Authentication will be anonymous or integrated if username/password are not
|
| 77 |
+
given.
|
| 78 |
+
|
| 79 |
+
Parameters
|
| 80 |
+
----------
|
| 81 |
+
host: str
|
| 82 |
+
The remote server name/ip to connect to
|
| 83 |
+
port: int or None
|
| 84 |
+
Port to connect with. Usually 445, sometimes 139.
|
| 85 |
+
username: str or None
|
| 86 |
+
Username to connect with. Required if Kerberos auth is not being used.
|
| 87 |
+
password: str or None
|
| 88 |
+
User's password on the server, if using username
|
| 89 |
+
timeout: int
|
| 90 |
+
Connection timeout in seconds
|
| 91 |
+
encrypt: bool
|
| 92 |
+
Whether to force encryption or not, once this has been set to True
|
| 93 |
+
the session cannot be changed back to False.
|
| 94 |
+
share_access: str or None
|
| 95 |
+
Specifies the default access applied to file open operations
|
| 96 |
+
performed with this file system object.
|
| 97 |
+
This affects whether other processes can concurrently open a handle
|
| 98 |
+
to the same file.
|
| 99 |
+
|
| 100 |
+
- None (the default): exclusively locks the file until closed.
|
| 101 |
+
- 'r': Allow other handles to be opened with read access.
|
| 102 |
+
- 'w': Allow other handles to be opened with write access.
|
| 103 |
+
- 'd': Allow other handles to be opened with delete access.
|
| 104 |
+
"""
|
| 105 |
+
super().__init__(**kwargs)
|
| 106 |
+
self.host = host
|
| 107 |
+
self.port = port
|
| 108 |
+
self.username = username
|
| 109 |
+
self.password = password
|
| 110 |
+
self.timeout = timeout
|
| 111 |
+
self.encrypt = encrypt
|
| 112 |
+
self.temppath = kwargs.pop("temppath", "")
|
| 113 |
+
self.share_access = share_access
|
| 114 |
+
self._connect()
|
| 115 |
+
|
| 116 |
+
@property
|
| 117 |
+
def _port(self):
|
| 118 |
+
return 445 if self.port is None else self.port
|
| 119 |
+
|
| 120 |
+
def _connect(self):
|
| 121 |
+
smbclient.register_session(
|
| 122 |
+
self.host,
|
| 123 |
+
username=self.username,
|
| 124 |
+
password=self.password,
|
| 125 |
+
port=self._port,
|
| 126 |
+
encrypt=self.encrypt,
|
| 127 |
+
connection_timeout=self.timeout,
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
@classmethod
|
| 131 |
+
def _strip_protocol(cls, path):
|
| 132 |
+
return infer_storage_options(path)["path"]
|
| 133 |
+
|
| 134 |
+
@staticmethod
|
| 135 |
+
def _get_kwargs_from_urls(path):
|
| 136 |
+
# smb://workgroup;user:password@host:port/share/folder/file.csv
|
| 137 |
+
out = infer_storage_options(path)
|
| 138 |
+
out.pop("path", None)
|
| 139 |
+
out.pop("protocol", None)
|
| 140 |
+
return out
|
| 141 |
+
|
| 142 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
| 143 |
+
wpath = _as_unc_path(self.host, path)
|
| 144 |
+
if create_parents:
|
| 145 |
+
smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
|
| 146 |
+
else:
|
| 147 |
+
smbclient.mkdir(wpath, port=self._port, **kwargs)
|
| 148 |
+
|
| 149 |
+
def makedirs(self, path, exist_ok=False):
|
| 150 |
+
if _share_has_path(path):
|
| 151 |
+
wpath = _as_unc_path(self.host, path)
|
| 152 |
+
smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
|
| 153 |
+
|
| 154 |
+
def rmdir(self, path):
|
| 155 |
+
if _share_has_path(path):
|
| 156 |
+
wpath = _as_unc_path(self.host, path)
|
| 157 |
+
smbclient.rmdir(wpath, port=self._port)
|
| 158 |
+
|
| 159 |
+
def info(self, path, **kwargs):
|
| 160 |
+
wpath = _as_unc_path(self.host, path)
|
| 161 |
+
stats = smbclient.stat(wpath, port=self._port, **kwargs)
|
| 162 |
+
if S_ISDIR(stats.st_mode):
|
| 163 |
+
stype = "directory"
|
| 164 |
+
elif S_ISLNK(stats.st_mode):
|
| 165 |
+
stype = "link"
|
| 166 |
+
else:
|
| 167 |
+
stype = "file"
|
| 168 |
+
res = {
|
| 169 |
+
"name": path + "/" if stype == "directory" else path,
|
| 170 |
+
"size": stats.st_size,
|
| 171 |
+
"type": stype,
|
| 172 |
+
"uid": stats.st_uid,
|
| 173 |
+
"gid": stats.st_gid,
|
| 174 |
+
"time": stats.st_atime,
|
| 175 |
+
"mtime": stats.st_mtime,
|
| 176 |
+
}
|
| 177 |
+
return res
|
| 178 |
+
|
| 179 |
+
def created(self, path):
|
| 180 |
+
"""Return the created timestamp of a file as a datetime.datetime"""
|
| 181 |
+
wpath = _as_unc_path(self.host, path)
|
| 182 |
+
stats = smbclient.stat(wpath, port=self._port)
|
| 183 |
+
return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
|
| 184 |
+
|
| 185 |
+
def modified(self, path):
|
| 186 |
+
"""Return the modified timestamp of a file as a datetime.datetime"""
|
| 187 |
+
wpath = _as_unc_path(self.host, path)
|
| 188 |
+
stats = smbclient.stat(wpath, port=self._port)
|
| 189 |
+
return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
|
| 190 |
+
|
| 191 |
+
def ls(self, path, detail=True, **kwargs):
|
| 192 |
+
unc = _as_unc_path(self.host, path)
|
| 193 |
+
listed = smbclient.listdir(unc, port=self._port, **kwargs)
|
| 194 |
+
dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
|
| 195 |
+
if detail:
|
| 196 |
+
dirs = [self.info(d) for d in dirs]
|
| 197 |
+
return dirs
|
| 198 |
+
|
| 199 |
+
# pylint: disable=too-many-arguments
|
| 200 |
+
def _open(
|
| 201 |
+
self,
|
| 202 |
+
path,
|
| 203 |
+
mode="rb",
|
| 204 |
+
block_size=-1,
|
| 205 |
+
autocommit=True,
|
| 206 |
+
cache_options=None,
|
| 207 |
+
**kwargs,
|
| 208 |
+
):
|
| 209 |
+
"""
|
| 210 |
+
block_size: int or None
|
| 211 |
+
If 0, no buffering, 1, line buffering, >1, buffer that many bytes
|
| 212 |
+
|
| 213 |
+
Notes
|
| 214 |
+
-----
|
| 215 |
+
By specifying 'share_access' in 'kwargs' it is possible to override the
|
| 216 |
+
default shared access setting applied in the constructor of this object.
|
| 217 |
+
"""
|
| 218 |
+
bls = block_size if block_size is not None and block_size >= 0 else -1
|
| 219 |
+
wpath = _as_unc_path(self.host, path)
|
| 220 |
+
share_access = kwargs.pop("share_access", self.share_access)
|
| 221 |
+
if "w" in mode and autocommit is False:
|
| 222 |
+
temp = _as_temp_path(self.host, path, self.temppath)
|
| 223 |
+
return SMBFileOpener(
|
| 224 |
+
wpath, temp, mode, port=self._port, block_size=bls, **kwargs
|
| 225 |
+
)
|
| 226 |
+
return smbclient.open_file(
|
| 227 |
+
wpath,
|
| 228 |
+
mode,
|
| 229 |
+
buffering=bls,
|
| 230 |
+
share_access=share_access,
|
| 231 |
+
port=self._port,
|
| 232 |
+
**kwargs,
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
def copy(self, path1, path2, **kwargs):
|
| 236 |
+
"""Copy within two locations in the same filesystem"""
|
| 237 |
+
wpath1 = _as_unc_path(self.host, path1)
|
| 238 |
+
wpath2 = _as_unc_path(self.host, path2)
|
| 239 |
+
smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
|
| 240 |
+
|
| 241 |
+
def _rm(self, path):
|
| 242 |
+
if _share_has_path(path):
|
| 243 |
+
wpath = _as_unc_path(self.host, path)
|
| 244 |
+
stats = smbclient.stat(wpath, port=self._port)
|
| 245 |
+
if S_ISDIR(stats.st_mode):
|
| 246 |
+
smbclient.rmdir(wpath, port=self._port)
|
| 247 |
+
else:
|
| 248 |
+
smbclient.remove(wpath, port=self._port)
|
| 249 |
+
|
| 250 |
+
def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
|
| 251 |
+
wpath1 = _as_unc_path(self.host, path1)
|
| 252 |
+
wpath2 = _as_unc_path(self.host, path2)
|
| 253 |
+
smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def _as_unc_path(host, path):
|
| 257 |
+
rpath = path.replace("/", "\\")
|
| 258 |
+
unc = f"\\\\{host}{rpath}"
|
| 259 |
+
return unc
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def _as_temp_path(host, path, temppath):
|
| 263 |
+
share = path.split("/")[1]
|
| 264 |
+
temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
|
| 265 |
+
unc = _as_unc_path(host, temp_file)
|
| 266 |
+
return unc
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def _share_has_path(path):
|
| 270 |
+
parts = path.count("/")
|
| 271 |
+
if path.endswith("/"):
|
| 272 |
+
return parts > 2
|
| 273 |
+
return parts > 1
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
class SMBFileOpener:
|
| 277 |
+
"""writes to remote temporary file, move on commit"""
|
| 278 |
+
|
| 279 |
+
def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
|
| 280 |
+
self.path = path
|
| 281 |
+
self.temp = temp
|
| 282 |
+
self.mode = mode
|
| 283 |
+
self.block_size = block_size
|
| 284 |
+
self.kwargs = kwargs
|
| 285 |
+
self.smbfile = None
|
| 286 |
+
self._incontext = False
|
| 287 |
+
self.port = port
|
| 288 |
+
self._open()
|
| 289 |
+
|
| 290 |
+
def _open(self):
|
| 291 |
+
if self.smbfile is None or self.smbfile.closed:
|
| 292 |
+
self.smbfile = smbclient.open_file(
|
| 293 |
+
self.temp,
|
| 294 |
+
self.mode,
|
| 295 |
+
port=self.port,
|
| 296 |
+
buffering=self.block_size,
|
| 297 |
+
**self.kwargs,
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
def commit(self):
|
| 301 |
+
"""Move temp file to definitive on success."""
|
| 302 |
+
# TODO: use transaction support in SMB protocol
|
| 303 |
+
smbclient.replace(self.temp, self.path, port=self.port)
|
| 304 |
+
|
| 305 |
+
def discard(self):
|
| 306 |
+
"""Remove the temp file on failure."""
|
| 307 |
+
smbclient.remove(self.temp, port=self.port)
|
| 308 |
+
|
| 309 |
+
def __fspath__(self):
|
| 310 |
+
return self.path
|
| 311 |
+
|
| 312 |
+
def __iter__(self):
|
| 313 |
+
return self.smbfile.__iter__()
|
| 314 |
+
|
| 315 |
+
def __getattr__(self, item):
|
| 316 |
+
return getattr(self.smbfile, item)
|
| 317 |
+
|
| 318 |
+
def __enter__(self):
|
| 319 |
+
self._incontext = True
|
| 320 |
+
return self.smbfile.__enter__()
|
| 321 |
+
|
| 322 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
| 323 |
+
self._incontext = False
|
| 324 |
+
self.smbfile.__exit__(exc_type, exc_value, traceback)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/zip.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import zipfile
|
| 2 |
+
|
| 3 |
+
import fsspec
|
| 4 |
+
from fsspec.archive import AbstractArchiveFileSystem
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class ZipFileSystem(AbstractArchiveFileSystem):
|
| 8 |
+
"""Read/Write contents of ZIP archive as a file-system
|
| 9 |
+
|
| 10 |
+
Keeps file object open while instance lives.
|
| 11 |
+
|
| 12 |
+
This class is pickleable, but not necessarily thread-safe
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
root_marker = ""
|
| 16 |
+
protocol = "zip"
|
| 17 |
+
cachable = False
|
| 18 |
+
|
| 19 |
+
def __init__(
|
| 20 |
+
self,
|
| 21 |
+
fo="",
|
| 22 |
+
mode="r",
|
| 23 |
+
target_protocol=None,
|
| 24 |
+
target_options=None,
|
| 25 |
+
compression=zipfile.ZIP_STORED,
|
| 26 |
+
allowZip64=True,
|
| 27 |
+
compresslevel=None,
|
| 28 |
+
**kwargs,
|
| 29 |
+
):
|
| 30 |
+
"""
|
| 31 |
+
Parameters
|
| 32 |
+
----------
|
| 33 |
+
fo: str or file-like
|
| 34 |
+
Contains ZIP, and must exist. If a str, will fetch file using
|
| 35 |
+
:meth:`~fsspec.open_files`, which must return one file exactly.
|
| 36 |
+
mode: str
|
| 37 |
+
Accept: "r", "w", "a"
|
| 38 |
+
target_protocol: str (optional)
|
| 39 |
+
If ``fo`` is a string, this value can be used to override the
|
| 40 |
+
FS protocol inferred from a URL
|
| 41 |
+
target_options: dict (optional)
|
| 42 |
+
Kwargs passed when instantiating the target FS, if ``fo`` is
|
| 43 |
+
a string.
|
| 44 |
+
compression, allowZip64, compresslevel: passed to ZipFile
|
| 45 |
+
Only relevant when creating a ZIP
|
| 46 |
+
"""
|
| 47 |
+
super().__init__(self, **kwargs)
|
| 48 |
+
if mode not in set("rwa"):
|
| 49 |
+
raise ValueError(f"mode '{mode}' no understood")
|
| 50 |
+
self.mode = mode
|
| 51 |
+
if isinstance(fo, str):
|
| 52 |
+
if mode == "a":
|
| 53 |
+
m = "r+b"
|
| 54 |
+
else:
|
| 55 |
+
m = mode + "b"
|
| 56 |
+
fo = fsspec.open(
|
| 57 |
+
fo, mode=m, protocol=target_protocol, **(target_options or {})
|
| 58 |
+
)
|
| 59 |
+
self.of = fo
|
| 60 |
+
self.fo = fo.__enter__() # the whole instance is a context
|
| 61 |
+
self.zip = zipfile.ZipFile(
|
| 62 |
+
self.fo,
|
| 63 |
+
mode=mode,
|
| 64 |
+
compression=compression,
|
| 65 |
+
allowZip64=allowZip64,
|
| 66 |
+
compresslevel=compresslevel,
|
| 67 |
+
)
|
| 68 |
+
self.dir_cache = None
|
| 69 |
+
|
| 70 |
+
@classmethod
|
| 71 |
+
def _strip_protocol(cls, path):
|
| 72 |
+
# zip file paths are always relative to the archive root
|
| 73 |
+
return super()._strip_protocol(path).lstrip("/")
|
| 74 |
+
|
| 75 |
+
def __del__(self):
|
| 76 |
+
if hasattr(self, "zip"):
|
| 77 |
+
self.close()
|
| 78 |
+
del self.zip
|
| 79 |
+
|
| 80 |
+
def close(self):
|
| 81 |
+
"""Commits any write changes to the file. Done on ``del`` too."""
|
| 82 |
+
self.zip.close()
|
| 83 |
+
|
| 84 |
+
def _get_dirs(self):
|
| 85 |
+
if self.dir_cache is None or self.mode in set("wa"):
|
| 86 |
+
# when writing, dir_cache is always in the ZipFile's attributes,
|
| 87 |
+
# not read from the file.
|
| 88 |
+
files = self.zip.infolist()
|
| 89 |
+
self.dir_cache = {
|
| 90 |
+
dirname.rstrip("/"): {
|
| 91 |
+
"name": dirname.rstrip("/"),
|
| 92 |
+
"size": 0,
|
| 93 |
+
"type": "directory",
|
| 94 |
+
}
|
| 95 |
+
for dirname in self._all_dirnames(self.zip.namelist())
|
| 96 |
+
}
|
| 97 |
+
for z in files:
|
| 98 |
+
f = {s: getattr(z, s, None) for s in zipfile.ZipInfo.__slots__}
|
| 99 |
+
f.update(
|
| 100 |
+
{
|
| 101 |
+
"name": z.filename.rstrip("/"),
|
| 102 |
+
"size": z.file_size,
|
| 103 |
+
"type": ("directory" if z.is_dir() else "file"),
|
| 104 |
+
}
|
| 105 |
+
)
|
| 106 |
+
self.dir_cache[f["name"]] = f
|
| 107 |
+
|
| 108 |
+
def pipe_file(self, path, value, **kwargs):
|
| 109 |
+
# override upstream, because we know the exact file size in this case
|
| 110 |
+
self.zip.writestr(path, value, **kwargs)
|
| 111 |
+
|
| 112 |
+
def _open(
|
| 113 |
+
self,
|
| 114 |
+
path,
|
| 115 |
+
mode="rb",
|
| 116 |
+
block_size=None,
|
| 117 |
+
autocommit=True,
|
| 118 |
+
cache_options=None,
|
| 119 |
+
**kwargs,
|
| 120 |
+
):
|
| 121 |
+
path = self._strip_protocol(path)
|
| 122 |
+
if "r" in mode and self.mode in set("wa"):
|
| 123 |
+
if self.exists(path):
|
| 124 |
+
raise OSError("ZipFS can only be open for reading or writing, not both")
|
| 125 |
+
raise FileNotFoundError(path)
|
| 126 |
+
if "r" in self.mode and "w" in mode:
|
| 127 |
+
raise OSError("ZipFS can only be open for reading or writing, not both")
|
| 128 |
+
out = self.zip.open(path, mode.strip("b"))
|
| 129 |
+
if "r" in mode:
|
| 130 |
+
info = self.info(path)
|
| 131 |
+
out.size = info["size"]
|
| 132 |
+
out.name = info["name"]
|
| 133 |
+
return out
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/mapping.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import array
|
| 2 |
+
import posixpath
|
| 3 |
+
import warnings
|
| 4 |
+
from collections.abc import MutableMapping
|
| 5 |
+
from functools import cached_property
|
| 6 |
+
|
| 7 |
+
from .core import url_to_fs
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class FSMap(MutableMapping):
|
| 11 |
+
"""Wrap a FileSystem instance as a mutable wrapping.
|
| 12 |
+
|
| 13 |
+
The keys of the mapping become files under the given root, and the
|
| 14 |
+
values (which must be bytes) the contents of those files.
|
| 15 |
+
|
| 16 |
+
Parameters
|
| 17 |
+
----------
|
| 18 |
+
root: string
|
| 19 |
+
prefix for all the files
|
| 20 |
+
fs: FileSystem instance
|
| 21 |
+
check: bool (=True)
|
| 22 |
+
performs a touch at the location, to check for write access.
|
| 23 |
+
|
| 24 |
+
Examples
|
| 25 |
+
--------
|
| 26 |
+
>>> fs = FileSystem(**parameters) # doctest: +SKIP
|
| 27 |
+
>>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
|
| 28 |
+
or, more likely
|
| 29 |
+
>>> d = fs.get_mapper('my-data/path/')
|
| 30 |
+
|
| 31 |
+
>>> d['loc1'] = b'Hello World' # doctest: +SKIP
|
| 32 |
+
>>> list(d.keys()) # doctest: +SKIP
|
| 33 |
+
['loc1']
|
| 34 |
+
>>> d['loc1'] # doctest: +SKIP
|
| 35 |
+
b'Hello World'
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
|
| 39 |
+
self.fs = fs
|
| 40 |
+
self.root = fs._strip_protocol(root).rstrip("/")
|
| 41 |
+
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
|
| 42 |
+
if missing_exceptions is None:
|
| 43 |
+
missing_exceptions = (
|
| 44 |
+
FileNotFoundError,
|
| 45 |
+
IsADirectoryError,
|
| 46 |
+
NotADirectoryError,
|
| 47 |
+
)
|
| 48 |
+
self.missing_exceptions = missing_exceptions
|
| 49 |
+
self.check = check
|
| 50 |
+
self.create = create
|
| 51 |
+
if create:
|
| 52 |
+
if not self.fs.exists(root):
|
| 53 |
+
self.fs.mkdir(root)
|
| 54 |
+
if check:
|
| 55 |
+
if not self.fs.exists(root):
|
| 56 |
+
raise ValueError(
|
| 57 |
+
f"Path {root} does not exist. Create "
|
| 58 |
+
f" with the ``create=True`` keyword"
|
| 59 |
+
)
|
| 60 |
+
self.fs.touch(root + "/a")
|
| 61 |
+
self.fs.rm(root + "/a")
|
| 62 |
+
|
| 63 |
+
@cached_property
|
| 64 |
+
def dirfs(self):
|
| 65 |
+
"""dirfs instance that can be used with the same keys as the mapper"""
|
| 66 |
+
from .implementations.dirfs import DirFileSystem
|
| 67 |
+
|
| 68 |
+
return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
|
| 69 |
+
|
| 70 |
+
def clear(self):
|
| 71 |
+
"""Remove all keys below root - empties out mapping"""
|
| 72 |
+
try:
|
| 73 |
+
self.fs.rm(self.root, True)
|
| 74 |
+
self.fs.mkdir(self.root)
|
| 75 |
+
except: # noqa: E722
|
| 76 |
+
pass
|
| 77 |
+
|
| 78 |
+
def getitems(self, keys, on_error="raise"):
|
| 79 |
+
"""Fetch multiple items from the store
|
| 80 |
+
|
| 81 |
+
If the backend is async-able, this might proceed concurrently
|
| 82 |
+
|
| 83 |
+
Parameters
|
| 84 |
+
----------
|
| 85 |
+
keys: list(str)
|
| 86 |
+
They keys to be fetched
|
| 87 |
+
on_error : "raise", "omit", "return"
|
| 88 |
+
If raise, an underlying exception will be raised (converted to KeyError
|
| 89 |
+
if the type is in self.missing_exceptions); if omit, keys with exception
|
| 90 |
+
will simply not be included in the output; if "return", all keys are
|
| 91 |
+
included in the output, but the value will be bytes or an exception
|
| 92 |
+
instance.
|
| 93 |
+
|
| 94 |
+
Returns
|
| 95 |
+
-------
|
| 96 |
+
dict(key, bytes|exception)
|
| 97 |
+
"""
|
| 98 |
+
keys2 = [self._key_to_str(k) for k in keys]
|
| 99 |
+
oe = on_error if on_error == "raise" else "return"
|
| 100 |
+
try:
|
| 101 |
+
out = self.fs.cat(keys2, on_error=oe)
|
| 102 |
+
if isinstance(out, bytes):
|
| 103 |
+
out = {keys2[0]: out}
|
| 104 |
+
except self.missing_exceptions as e:
|
| 105 |
+
raise KeyError from e
|
| 106 |
+
out = {
|
| 107 |
+
k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
|
| 108 |
+
for k, v in out.items()
|
| 109 |
+
}
|
| 110 |
+
return {
|
| 111 |
+
key: out[k2]
|
| 112 |
+
for key, k2 in zip(keys, keys2)
|
| 113 |
+
if on_error == "return" or not isinstance(out[k2], BaseException)
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
def setitems(self, values_dict):
|
| 117 |
+
"""Set the values of multiple items in the store
|
| 118 |
+
|
| 119 |
+
Parameters
|
| 120 |
+
----------
|
| 121 |
+
values_dict: dict(str, bytes)
|
| 122 |
+
"""
|
| 123 |
+
values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
|
| 124 |
+
self.fs.pipe(values)
|
| 125 |
+
|
| 126 |
+
def delitems(self, keys):
|
| 127 |
+
"""Remove multiple keys from the store"""
|
| 128 |
+
self.fs.rm([self._key_to_str(k) for k in keys])
|
| 129 |
+
|
| 130 |
+
def _key_to_str(self, key):
|
| 131 |
+
"""Generate full path for the key"""
|
| 132 |
+
if not isinstance(key, str):
|
| 133 |
+
# raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
|
| 134 |
+
warnings.warn(
|
| 135 |
+
"from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
|
| 136 |
+
DeprecationWarning,
|
| 137 |
+
)
|
| 138 |
+
if isinstance(key, list):
|
| 139 |
+
key = tuple(key)
|
| 140 |
+
key = str(key)
|
| 141 |
+
return f"{self._root_key_to_str}{key}"
|
| 142 |
+
|
| 143 |
+
def _str_to_key(self, s):
|
| 144 |
+
"""Strip path of to leave key name"""
|
| 145 |
+
return s[len(self.root) :].lstrip("/")
|
| 146 |
+
|
| 147 |
+
def __getitem__(self, key, default=None):
|
| 148 |
+
"""Retrieve data"""
|
| 149 |
+
k = self._key_to_str(key)
|
| 150 |
+
try:
|
| 151 |
+
result = self.fs.cat(k)
|
| 152 |
+
except self.missing_exceptions:
|
| 153 |
+
if default is not None:
|
| 154 |
+
return default
|
| 155 |
+
raise KeyError(key)
|
| 156 |
+
return result
|
| 157 |
+
|
| 158 |
+
def pop(self, key, default=None):
|
| 159 |
+
"""Pop data"""
|
| 160 |
+
result = self.__getitem__(key, default)
|
| 161 |
+
try:
|
| 162 |
+
del self[key]
|
| 163 |
+
except KeyError:
|
| 164 |
+
pass
|
| 165 |
+
return result
|
| 166 |
+
|
| 167 |
+
def __setitem__(self, key, value):
|
| 168 |
+
"""Store value in key"""
|
| 169 |
+
key = self._key_to_str(key)
|
| 170 |
+
self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
|
| 171 |
+
self.fs.pipe_file(key, maybe_convert(value))
|
| 172 |
+
|
| 173 |
+
def __iter__(self):
|
| 174 |
+
return (self._str_to_key(x) for x in self.fs.find(self.root))
|
| 175 |
+
|
| 176 |
+
def __len__(self):
|
| 177 |
+
return len(self.fs.find(self.root))
|
| 178 |
+
|
| 179 |
+
def __delitem__(self, key):
|
| 180 |
+
"""Remove key"""
|
| 181 |
+
try:
|
| 182 |
+
self.fs.rm(self._key_to_str(key))
|
| 183 |
+
except: # noqa: E722
|
| 184 |
+
raise KeyError
|
| 185 |
+
|
| 186 |
+
def __contains__(self, key):
|
| 187 |
+
"""Does key exist in mapping?"""
|
| 188 |
+
path = self._key_to_str(key)
|
| 189 |
+
return self.fs.exists(path) and self.fs.isfile(path)
|
| 190 |
+
|
| 191 |
+
def __reduce__(self):
|
| 192 |
+
return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def maybe_convert(value):
|
| 196 |
+
if isinstance(value, array.array) or hasattr(value, "__array__"):
|
| 197 |
+
# bytes-like things
|
| 198 |
+
if hasattr(value, "dtype") and value.dtype.kind in "Mm":
|
| 199 |
+
# The buffer interface doesn't support datetime64/timdelta64 numpy
|
| 200 |
+
# arrays
|
| 201 |
+
value = value.view("int64")
|
| 202 |
+
value = bytes(memoryview(value))
|
| 203 |
+
return value
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def get_mapper(
|
| 207 |
+
url="",
|
| 208 |
+
check=False,
|
| 209 |
+
create=False,
|
| 210 |
+
missing_exceptions=None,
|
| 211 |
+
alternate_root=None,
|
| 212 |
+
**kwargs,
|
| 213 |
+
):
|
| 214 |
+
"""Create key-value interface for given URL and options
|
| 215 |
+
|
| 216 |
+
The URL will be of the form "protocol://location" and point to the root
|
| 217 |
+
of the mapper required. All keys will be file-names below this location,
|
| 218 |
+
and their values the contents of each key.
|
| 219 |
+
|
| 220 |
+
Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
|
| 221 |
+
|
| 222 |
+
Parameters
|
| 223 |
+
----------
|
| 224 |
+
url: str
|
| 225 |
+
Root URL of mapping
|
| 226 |
+
check: bool
|
| 227 |
+
Whether to attempt to read from the location before instantiation, to
|
| 228 |
+
check that the mapping does exist
|
| 229 |
+
create: bool
|
| 230 |
+
Whether to make the directory corresponding to the root before
|
| 231 |
+
instantiating
|
| 232 |
+
missing_exceptions: None or tuple
|
| 233 |
+
If given, these exception types will be regarded as missing keys and
|
| 234 |
+
return KeyError when trying to read data. By default, you get
|
| 235 |
+
(FileNotFoundError, IsADirectoryError, NotADirectoryError)
|
| 236 |
+
alternate_root: None or str
|
| 237 |
+
In cases of complex URLs, the parser may fail to pick the correct part
|
| 238 |
+
for the mapper root, so this arg can override
|
| 239 |
+
|
| 240 |
+
Returns
|
| 241 |
+
-------
|
| 242 |
+
``FSMap`` instance, the dict-like key-value store.
|
| 243 |
+
"""
|
| 244 |
+
# Removing protocol here - could defer to each open() on the backend
|
| 245 |
+
fs, urlpath = url_to_fs(url, **kwargs)
|
| 246 |
+
root = alternate_root if alternate_root is not None else urlpath
|
| 247 |
+
return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/utils.py
ADDED
|
@@ -0,0 +1,742 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import logging
|
| 5 |
+
import math
|
| 6 |
+
import os
|
| 7 |
+
import pathlib
|
| 8 |
+
import re
|
| 9 |
+
import sys
|
| 10 |
+
import tempfile
|
| 11 |
+
from functools import partial
|
| 12 |
+
from hashlib import md5
|
| 13 |
+
from importlib.metadata import version
|
| 14 |
+
from typing import (
|
| 15 |
+
IO,
|
| 16 |
+
TYPE_CHECKING,
|
| 17 |
+
Any,
|
| 18 |
+
Callable,
|
| 19 |
+
Iterable,
|
| 20 |
+
Iterator,
|
| 21 |
+
Sequence,
|
| 22 |
+
TypeVar,
|
| 23 |
+
)
|
| 24 |
+
from urllib.parse import urlsplit
|
| 25 |
+
|
| 26 |
+
if TYPE_CHECKING:
|
| 27 |
+
from typing_extensions import TypeGuard
|
| 28 |
+
|
| 29 |
+
from fsspec.spec import AbstractFileSystem
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
| 33 |
+
|
| 34 |
+
T = TypeVar("T")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def infer_storage_options(
|
| 38 |
+
urlpath: str, inherit_storage_options: dict[str, Any] | None = None
|
| 39 |
+
) -> dict[str, Any]:
|
| 40 |
+
"""Infer storage options from URL path and merge it with existing storage
|
| 41 |
+
options.
|
| 42 |
+
|
| 43 |
+
Parameters
|
| 44 |
+
----------
|
| 45 |
+
urlpath: str or unicode
|
| 46 |
+
Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
|
| 47 |
+
inherit_storage_options: dict (optional)
|
| 48 |
+
Its contents will get merged with the inferred information from the
|
| 49 |
+
given path
|
| 50 |
+
|
| 51 |
+
Returns
|
| 52 |
+
-------
|
| 53 |
+
Storage options dict.
|
| 54 |
+
|
| 55 |
+
Examples
|
| 56 |
+
--------
|
| 57 |
+
>>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
|
| 58 |
+
{"protocol": "file", "path", "/mnt/datasets/test.csv"}
|
| 59 |
+
>>> infer_storage_options(
|
| 60 |
+
... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
|
| 61 |
+
... inherit_storage_options={'extra': 'value'},
|
| 62 |
+
... ) # doctest: +SKIP
|
| 63 |
+
{"protocol": "hdfs", "username": "username", "password": "pwd",
|
| 64 |
+
"host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
|
| 65 |
+
"url_query": "q=1", "extra": "value"}
|
| 66 |
+
"""
|
| 67 |
+
# Handle Windows paths including disk name in this special case
|
| 68 |
+
if (
|
| 69 |
+
re.match(r"^[a-zA-Z]:[\\/]", urlpath)
|
| 70 |
+
or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
|
| 71 |
+
):
|
| 72 |
+
return {"protocol": "file", "path": urlpath}
|
| 73 |
+
|
| 74 |
+
parsed_path = urlsplit(urlpath)
|
| 75 |
+
protocol = parsed_path.scheme or "file"
|
| 76 |
+
if parsed_path.fragment:
|
| 77 |
+
path = "#".join([parsed_path.path, parsed_path.fragment])
|
| 78 |
+
else:
|
| 79 |
+
path = parsed_path.path
|
| 80 |
+
if protocol == "file":
|
| 81 |
+
# Special case parsing file protocol URL on Windows according to:
|
| 82 |
+
# https://msdn.microsoft.com/en-us/library/jj710207.aspx
|
| 83 |
+
windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
|
| 84 |
+
if windows_path:
|
| 85 |
+
path = "%s:%s" % windows_path.groups()
|
| 86 |
+
|
| 87 |
+
if protocol in ["http", "https"]:
|
| 88 |
+
# for HTTP, we don't want to parse, as requests will anyway
|
| 89 |
+
return {"protocol": protocol, "path": urlpath}
|
| 90 |
+
|
| 91 |
+
options: dict[str, Any] = {"protocol": protocol, "path": path}
|
| 92 |
+
|
| 93 |
+
if parsed_path.netloc:
|
| 94 |
+
# Parse `hostname` from netloc manually because `parsed_path.hostname`
|
| 95 |
+
# lowercases the hostname which is not always desirable (e.g. in S3):
|
| 96 |
+
# https://github.com/dask/dask/issues/1417
|
| 97 |
+
options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
|
| 98 |
+
|
| 99 |
+
if protocol in ("s3", "s3a", "gcs", "gs"):
|
| 100 |
+
options["path"] = options["host"] + options["path"]
|
| 101 |
+
else:
|
| 102 |
+
options["host"] = options["host"]
|
| 103 |
+
if parsed_path.port:
|
| 104 |
+
options["port"] = parsed_path.port
|
| 105 |
+
if parsed_path.username:
|
| 106 |
+
options["username"] = parsed_path.username
|
| 107 |
+
if parsed_path.password:
|
| 108 |
+
options["password"] = parsed_path.password
|
| 109 |
+
|
| 110 |
+
if parsed_path.query:
|
| 111 |
+
options["url_query"] = parsed_path.query
|
| 112 |
+
if parsed_path.fragment:
|
| 113 |
+
options["url_fragment"] = parsed_path.fragment
|
| 114 |
+
|
| 115 |
+
if inherit_storage_options:
|
| 116 |
+
update_storage_options(options, inherit_storage_options)
|
| 117 |
+
|
| 118 |
+
return options
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def update_storage_options(
|
| 122 |
+
options: dict[str, Any], inherited: dict[str, Any] | None = None
|
| 123 |
+
) -> None:
|
| 124 |
+
if not inherited:
|
| 125 |
+
inherited = {}
|
| 126 |
+
collisions = set(options) & set(inherited)
|
| 127 |
+
if collisions:
|
| 128 |
+
for collision in collisions:
|
| 129 |
+
if options.get(collision) != inherited.get(collision):
|
| 130 |
+
raise KeyError(
|
| 131 |
+
f"Collision between inferred and specified storage "
|
| 132 |
+
f"option:\n{collision}"
|
| 133 |
+
)
|
| 134 |
+
options.update(inherited)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# Compression extensions registered via fsspec.compression.register_compression
|
| 138 |
+
compressions: dict[str, str] = {}
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def infer_compression(filename: str) -> str | None:
|
| 142 |
+
"""Infer compression, if available, from filename.
|
| 143 |
+
|
| 144 |
+
Infer a named compression type, if registered and available, from filename
|
| 145 |
+
extension. This includes builtin (gz, bz2, zip) compressions, as well as
|
| 146 |
+
optional compressions. See fsspec.compression.register_compression.
|
| 147 |
+
"""
|
| 148 |
+
extension = os.path.splitext(filename)[-1].strip(".").lower()
|
| 149 |
+
if extension in compressions:
|
| 150 |
+
return compressions[extension]
|
| 151 |
+
return None
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def build_name_function(max_int: float) -> Callable[[int], str]:
|
| 155 |
+
"""Returns a function that receives a single integer
|
| 156 |
+
and returns it as a string padded by enough zero characters
|
| 157 |
+
to align with maximum possible integer
|
| 158 |
+
|
| 159 |
+
>>> name_f = build_name_function(57)
|
| 160 |
+
|
| 161 |
+
>>> name_f(7)
|
| 162 |
+
'07'
|
| 163 |
+
>>> name_f(31)
|
| 164 |
+
'31'
|
| 165 |
+
>>> build_name_function(1000)(42)
|
| 166 |
+
'0042'
|
| 167 |
+
>>> build_name_function(999)(42)
|
| 168 |
+
'042'
|
| 169 |
+
>>> build_name_function(0)(0)
|
| 170 |
+
'0'
|
| 171 |
+
"""
|
| 172 |
+
# handle corner cases max_int is 0 or exact power of 10
|
| 173 |
+
max_int += 1e-8
|
| 174 |
+
|
| 175 |
+
pad_length = int(math.ceil(math.log10(max_int)))
|
| 176 |
+
|
| 177 |
+
def name_function(i: int) -> str:
|
| 178 |
+
return str(i).zfill(pad_length)
|
| 179 |
+
|
| 180 |
+
return name_function
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
|
| 184 |
+
r"""Seek current file to file start, file end, or byte after delimiter seq.
|
| 185 |
+
|
| 186 |
+
Seeks file to next chunk delimiter, where chunks are defined on file start,
|
| 187 |
+
a delimiting sequence, and file end. Use file.tell() to see location afterwards.
|
| 188 |
+
Note that file start is a valid split, so must be at offset > 0 to seek for
|
| 189 |
+
delimiter.
|
| 190 |
+
|
| 191 |
+
Parameters
|
| 192 |
+
----------
|
| 193 |
+
file: a file
|
| 194 |
+
delimiter: bytes
|
| 195 |
+
a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
|
| 196 |
+
blocksize: int
|
| 197 |
+
Number of bytes to read from the file at once.
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
Returns
|
| 201 |
+
-------
|
| 202 |
+
Returns True if a delimiter was found, False if at file start or end.
|
| 203 |
+
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
if file.tell() == 0:
|
| 207 |
+
# beginning-of-file, return without seek
|
| 208 |
+
return False
|
| 209 |
+
|
| 210 |
+
# Interface is for binary IO, with delimiter as bytes, but initialize last
|
| 211 |
+
# with result of file.read to preserve compatibility with text IO.
|
| 212 |
+
last: bytes | None = None
|
| 213 |
+
while True:
|
| 214 |
+
current = file.read(blocksize)
|
| 215 |
+
if not current:
|
| 216 |
+
# end-of-file without delimiter
|
| 217 |
+
return False
|
| 218 |
+
full = last + current if last else current
|
| 219 |
+
try:
|
| 220 |
+
if delimiter in full:
|
| 221 |
+
i = full.index(delimiter)
|
| 222 |
+
file.seek(file.tell() - (len(full) - i) + len(delimiter))
|
| 223 |
+
return True
|
| 224 |
+
elif len(current) < blocksize:
|
| 225 |
+
# end-of-file without delimiter
|
| 226 |
+
return False
|
| 227 |
+
except (OSError, ValueError):
|
| 228 |
+
pass
|
| 229 |
+
last = full[-len(delimiter) :]
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def read_block(
|
| 233 |
+
f: IO[bytes],
|
| 234 |
+
offset: int,
|
| 235 |
+
length: int | None,
|
| 236 |
+
delimiter: bytes | None = None,
|
| 237 |
+
split_before: bool = False,
|
| 238 |
+
) -> bytes:
|
| 239 |
+
"""Read a block of bytes from a file
|
| 240 |
+
|
| 241 |
+
Parameters
|
| 242 |
+
----------
|
| 243 |
+
f: File
|
| 244 |
+
Open file
|
| 245 |
+
offset: int
|
| 246 |
+
Byte offset to start read
|
| 247 |
+
length: int
|
| 248 |
+
Number of bytes to read, read through end of file if None
|
| 249 |
+
delimiter: bytes (optional)
|
| 250 |
+
Ensure reading starts and stops at delimiter bytestring
|
| 251 |
+
split_before: bool (optional)
|
| 252 |
+
Start/stop read *before* delimiter bytestring.
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
If using the ``delimiter=`` keyword argument we ensure that the read
|
| 256 |
+
starts and stops at delimiter boundaries that follow the locations
|
| 257 |
+
``offset`` and ``offset + length``. If ``offset`` is zero then we
|
| 258 |
+
start at zero, regardless of delimiter. The bytestring returned WILL
|
| 259 |
+
include the terminating delimiter string.
|
| 260 |
+
|
| 261 |
+
Examples
|
| 262 |
+
--------
|
| 263 |
+
|
| 264 |
+
>>> from io import BytesIO # doctest: +SKIP
|
| 265 |
+
>>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
|
| 266 |
+
>>> read_block(f, 0, 13) # doctest: +SKIP
|
| 267 |
+
b'Alice, 100\\nBo'
|
| 268 |
+
|
| 269 |
+
>>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
| 270 |
+
b'Alice, 100\\nBob, 200\\n'
|
| 271 |
+
|
| 272 |
+
>>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
|
| 273 |
+
b'Bob, 200\\nCharlie, 300'
|
| 274 |
+
"""
|
| 275 |
+
if delimiter:
|
| 276 |
+
f.seek(offset)
|
| 277 |
+
found_start_delim = seek_delimiter(f, delimiter, 2**16)
|
| 278 |
+
if length is None:
|
| 279 |
+
return f.read()
|
| 280 |
+
start = f.tell()
|
| 281 |
+
length -= start - offset
|
| 282 |
+
|
| 283 |
+
f.seek(start + length)
|
| 284 |
+
found_end_delim = seek_delimiter(f, delimiter, 2**16)
|
| 285 |
+
end = f.tell()
|
| 286 |
+
|
| 287 |
+
# Adjust split location to before delimiter iff seek found the
|
| 288 |
+
# delimiter sequence, not start or end of file.
|
| 289 |
+
if found_start_delim and split_before:
|
| 290 |
+
start -= len(delimiter)
|
| 291 |
+
|
| 292 |
+
if found_end_delim and split_before:
|
| 293 |
+
end -= len(delimiter)
|
| 294 |
+
|
| 295 |
+
offset = start
|
| 296 |
+
length = end - start
|
| 297 |
+
|
| 298 |
+
f.seek(offset)
|
| 299 |
+
|
| 300 |
+
# TODO: allow length to be None and read to the end of the file?
|
| 301 |
+
assert length is not None
|
| 302 |
+
b = f.read(length)
|
| 303 |
+
return b
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def tokenize(*args: Any, **kwargs: Any) -> str:
|
| 307 |
+
"""Deterministic token
|
| 308 |
+
|
| 309 |
+
(modified from dask.base)
|
| 310 |
+
|
| 311 |
+
>>> tokenize([1, 2, '3'])
|
| 312 |
+
'9d71491b50023b06fc76928e6eddb952'
|
| 313 |
+
|
| 314 |
+
>>> tokenize('Hello') == tokenize('Hello')
|
| 315 |
+
True
|
| 316 |
+
"""
|
| 317 |
+
if kwargs:
|
| 318 |
+
args += (kwargs,)
|
| 319 |
+
try:
|
| 320 |
+
h = md5(str(args).encode())
|
| 321 |
+
except ValueError:
|
| 322 |
+
# FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
|
| 323 |
+
h = md5(str(args).encode(), usedforsecurity=False)
|
| 324 |
+
return h.hexdigest()
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
|
| 328 |
+
"""Attempt to convert a path-like object to a string.
|
| 329 |
+
|
| 330 |
+
Parameters
|
| 331 |
+
----------
|
| 332 |
+
filepath: object to be converted
|
| 333 |
+
|
| 334 |
+
Returns
|
| 335 |
+
-------
|
| 336 |
+
filepath_str: maybe a string version of the object
|
| 337 |
+
|
| 338 |
+
Notes
|
| 339 |
+
-----
|
| 340 |
+
Objects supporting the fspath protocol are coerced according to its
|
| 341 |
+
__fspath__ method.
|
| 342 |
+
|
| 343 |
+
For backwards compatibility with older Python version, pathlib.Path
|
| 344 |
+
objects are specially coerced.
|
| 345 |
+
|
| 346 |
+
Any other object is passed through unchanged, which includes bytes,
|
| 347 |
+
strings, buffers, or anything else that's not even path-like.
|
| 348 |
+
"""
|
| 349 |
+
if isinstance(filepath, str):
|
| 350 |
+
return filepath
|
| 351 |
+
elif hasattr(filepath, "__fspath__"):
|
| 352 |
+
return filepath.__fspath__()
|
| 353 |
+
elif isinstance(filepath, pathlib.Path):
|
| 354 |
+
return str(filepath)
|
| 355 |
+
elif hasattr(filepath, "path"):
|
| 356 |
+
return filepath.path
|
| 357 |
+
else:
|
| 358 |
+
return filepath # type: ignore[return-value]
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def make_instance(
|
| 362 |
+
cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
|
| 363 |
+
) -> T:
|
| 364 |
+
inst = cls(*args, **kwargs)
|
| 365 |
+
inst._determine_worker() # type: ignore[attr-defined]
|
| 366 |
+
return inst
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def common_prefix(paths: Iterable[str]) -> str:
|
| 370 |
+
"""For a list of paths, find the shortest prefix common to all"""
|
| 371 |
+
parts = [p.split("/") for p in paths]
|
| 372 |
+
lmax = min(len(p) for p in parts)
|
| 373 |
+
end = 0
|
| 374 |
+
for i in range(lmax):
|
| 375 |
+
end = all(p[i] == parts[0][i] for p in parts)
|
| 376 |
+
if not end:
|
| 377 |
+
break
|
| 378 |
+
i += end
|
| 379 |
+
return "/".join(parts[0][:i])
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
def other_paths(
|
| 383 |
+
paths: list[str],
|
| 384 |
+
path2: str | list[str],
|
| 385 |
+
exists: bool = False,
|
| 386 |
+
flatten: bool = False,
|
| 387 |
+
) -> list[str]:
|
| 388 |
+
"""In bulk file operations, construct a new file tree from a list of files
|
| 389 |
+
|
| 390 |
+
Parameters
|
| 391 |
+
----------
|
| 392 |
+
paths: list of str
|
| 393 |
+
The input file tree
|
| 394 |
+
path2: str or list of str
|
| 395 |
+
Root to construct the new list in. If this is already a list of str, we just
|
| 396 |
+
assert it has the right number of elements.
|
| 397 |
+
exists: bool (optional)
|
| 398 |
+
For a str destination, it is already exists (and is a dir), files should
|
| 399 |
+
end up inside.
|
| 400 |
+
flatten: bool (optional)
|
| 401 |
+
Whether to flatten the input directory tree structure so that the output files
|
| 402 |
+
are in the same directory.
|
| 403 |
+
|
| 404 |
+
Returns
|
| 405 |
+
-------
|
| 406 |
+
list of str
|
| 407 |
+
"""
|
| 408 |
+
|
| 409 |
+
if isinstance(path2, str):
|
| 410 |
+
path2 = path2.rstrip("/")
|
| 411 |
+
|
| 412 |
+
if flatten:
|
| 413 |
+
path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
|
| 414 |
+
else:
|
| 415 |
+
cp = common_prefix(paths)
|
| 416 |
+
if exists:
|
| 417 |
+
cp = cp.rsplit("/", 1)[0]
|
| 418 |
+
if not cp and all(not s.startswith("/") for s in paths):
|
| 419 |
+
path2 = ["/".join([path2, p]) for p in paths]
|
| 420 |
+
else:
|
| 421 |
+
path2 = [p.replace(cp, path2, 1) for p in paths]
|
| 422 |
+
else:
|
| 423 |
+
assert len(paths) == len(path2)
|
| 424 |
+
return path2
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def is_exception(obj: Any) -> bool:
|
| 428 |
+
return isinstance(obj, BaseException)
|
| 429 |
+
|
| 430 |
+
|
| 431 |
+
def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
|
| 432 |
+
for attr in ["read", "close", "tell"]:
|
| 433 |
+
if not hasattr(f, attr):
|
| 434 |
+
return False
|
| 435 |
+
return True
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
def get_protocol(url: str) -> str:
|
| 439 |
+
url = stringify_path(url)
|
| 440 |
+
parts = re.split(r"(\:\:|\://)", url, 1)
|
| 441 |
+
if len(parts) > 1:
|
| 442 |
+
return parts[0]
|
| 443 |
+
return "file"
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def can_be_local(path: str) -> bool:
|
| 447 |
+
"""Can the given URL be used with open_local?"""
|
| 448 |
+
from fsspec import get_filesystem_class
|
| 449 |
+
|
| 450 |
+
try:
|
| 451 |
+
return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
|
| 452 |
+
except (ValueError, ImportError):
|
| 453 |
+
# not in registry or import failed
|
| 454 |
+
return False
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def get_package_version_without_import(name: str) -> str | None:
|
| 458 |
+
"""For given package name, try to find the version without importing it
|
| 459 |
+
|
| 460 |
+
Import and package.__version__ is still the backup here, so an import
|
| 461 |
+
*might* happen.
|
| 462 |
+
|
| 463 |
+
Returns either the version string, or None if the package
|
| 464 |
+
or the version was not readily found.
|
| 465 |
+
"""
|
| 466 |
+
if name in sys.modules:
|
| 467 |
+
mod = sys.modules[name]
|
| 468 |
+
if hasattr(mod, "__version__"):
|
| 469 |
+
return mod.__version__
|
| 470 |
+
try:
|
| 471 |
+
return version(name)
|
| 472 |
+
except: # noqa: E722
|
| 473 |
+
pass
|
| 474 |
+
try:
|
| 475 |
+
import importlib
|
| 476 |
+
|
| 477 |
+
mod = importlib.import_module(name)
|
| 478 |
+
return mod.__version__
|
| 479 |
+
except (ImportError, AttributeError):
|
| 480 |
+
return None
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
def setup_logging(
|
| 484 |
+
logger: logging.Logger | None = None,
|
| 485 |
+
logger_name: str | None = None,
|
| 486 |
+
level: str = "DEBUG",
|
| 487 |
+
clear: bool = True,
|
| 488 |
+
) -> logging.Logger:
|
| 489 |
+
if logger is None and logger_name is None:
|
| 490 |
+
raise ValueError("Provide either logger object or logger name")
|
| 491 |
+
logger = logger or logging.getLogger(logger_name)
|
| 492 |
+
handle = logging.StreamHandler()
|
| 493 |
+
formatter = logging.Formatter(
|
| 494 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
|
| 495 |
+
)
|
| 496 |
+
handle.setFormatter(formatter)
|
| 497 |
+
if clear:
|
| 498 |
+
logger.handlers.clear()
|
| 499 |
+
logger.addHandler(handle)
|
| 500 |
+
logger.setLevel(level)
|
| 501 |
+
return logger
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
|
| 505 |
+
return fs.unstrip_protocol(name)
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
def mirror_from(
|
| 509 |
+
origin_name: str, methods: Iterable[str]
|
| 510 |
+
) -> Callable[[type[T]], type[T]]:
|
| 511 |
+
"""Mirror attributes and methods from the given
|
| 512 |
+
origin_name attribute of the instance to the
|
| 513 |
+
decorated class"""
|
| 514 |
+
|
| 515 |
+
def origin_getter(method: str, self: Any) -> Any:
|
| 516 |
+
origin = getattr(self, origin_name)
|
| 517 |
+
return getattr(origin, method)
|
| 518 |
+
|
| 519 |
+
def wrapper(cls: type[T]) -> type[T]:
|
| 520 |
+
for method in methods:
|
| 521 |
+
wrapped_method = partial(origin_getter, method)
|
| 522 |
+
setattr(cls, method, property(wrapped_method))
|
| 523 |
+
return cls
|
| 524 |
+
|
| 525 |
+
return wrapper
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
@contextlib.contextmanager
|
| 529 |
+
def nullcontext(obj: T) -> Iterator[T]:
|
| 530 |
+
yield obj
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
def merge_offset_ranges(
|
| 534 |
+
paths: list[str],
|
| 535 |
+
starts: list[int] | int,
|
| 536 |
+
ends: list[int] | int,
|
| 537 |
+
max_gap: int = 0,
|
| 538 |
+
max_block: int | None = None,
|
| 539 |
+
sort: bool = True,
|
| 540 |
+
) -> tuple[list[str], list[int], list[int]]:
|
| 541 |
+
"""Merge adjacent byte-offset ranges when the inter-range
|
| 542 |
+
gap is <= `max_gap`, and when the merged byte range does not
|
| 543 |
+
exceed `max_block` (if specified). By default, this function
|
| 544 |
+
will re-order the input paths and byte ranges to ensure sorted
|
| 545 |
+
order. If the user can guarantee that the inputs are already
|
| 546 |
+
sorted, passing `sort=False` will skip the re-ordering.
|
| 547 |
+
"""
|
| 548 |
+
# Check input
|
| 549 |
+
if not isinstance(paths, list):
|
| 550 |
+
raise TypeError
|
| 551 |
+
if not isinstance(starts, list):
|
| 552 |
+
starts = [starts] * len(paths)
|
| 553 |
+
if not isinstance(ends, list):
|
| 554 |
+
ends = [ends] * len(paths)
|
| 555 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 556 |
+
raise ValueError
|
| 557 |
+
|
| 558 |
+
# Early Return
|
| 559 |
+
if len(starts) <= 1:
|
| 560 |
+
return paths, starts, ends
|
| 561 |
+
|
| 562 |
+
starts = [s or 0 for s in starts]
|
| 563 |
+
# Sort by paths and then ranges if `sort=True`
|
| 564 |
+
if sort:
|
| 565 |
+
paths, starts, ends = (
|
| 566 |
+
list(v)
|
| 567 |
+
for v in zip(
|
| 568 |
+
*sorted(
|
| 569 |
+
zip(paths, starts, ends),
|
| 570 |
+
)
|
| 571 |
+
)
|
| 572 |
+
)
|
| 573 |
+
|
| 574 |
+
if paths:
|
| 575 |
+
# Loop through the coupled `paths`, `starts`, and
|
| 576 |
+
# `ends`, and merge adjacent blocks when appropriate
|
| 577 |
+
new_paths = paths[:1]
|
| 578 |
+
new_starts = starts[:1]
|
| 579 |
+
new_ends = ends[:1]
|
| 580 |
+
for i in range(1, len(paths)):
|
| 581 |
+
if paths[i] == paths[i - 1] and new_ends[-1] is None:
|
| 582 |
+
continue
|
| 583 |
+
elif (
|
| 584 |
+
paths[i] != paths[i - 1]
|
| 585 |
+
or ((starts[i] - new_ends[-1]) > max_gap)
|
| 586 |
+
or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
|
| 587 |
+
):
|
| 588 |
+
# Cannot merge with previous block.
|
| 589 |
+
# Add new `paths`, `starts`, and `ends` elements
|
| 590 |
+
new_paths.append(paths[i])
|
| 591 |
+
new_starts.append(starts[i])
|
| 592 |
+
new_ends.append(ends[i])
|
| 593 |
+
else:
|
| 594 |
+
# Merge with previous block by updating the
|
| 595 |
+
# last element of `ends`
|
| 596 |
+
new_ends[-1] = ends[i]
|
| 597 |
+
return new_paths, new_starts, new_ends
|
| 598 |
+
|
| 599 |
+
# `paths` is empty. Just return input lists
|
| 600 |
+
return paths, starts, ends
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
def file_size(filelike: IO[bytes]) -> int:
|
| 604 |
+
"""Find length of any open read-mode file-like"""
|
| 605 |
+
pos = filelike.tell()
|
| 606 |
+
try:
|
| 607 |
+
return filelike.seek(0, 2)
|
| 608 |
+
finally:
|
| 609 |
+
filelike.seek(pos)
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
@contextlib.contextmanager
|
| 613 |
+
def atomic_write(path: str, mode: str = "wb"):
|
| 614 |
+
"""
|
| 615 |
+
A context manager that opens a temporary file next to `path` and, on exit,
|
| 616 |
+
replaces `path` with the temporary file, thereby updating `path`
|
| 617 |
+
atomically.
|
| 618 |
+
"""
|
| 619 |
+
fd, fn = tempfile.mkstemp(
|
| 620 |
+
dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
|
| 621 |
+
)
|
| 622 |
+
try:
|
| 623 |
+
with open(fd, mode) as fp:
|
| 624 |
+
yield fp
|
| 625 |
+
except BaseException:
|
| 626 |
+
with contextlib.suppress(FileNotFoundError):
|
| 627 |
+
os.unlink(fn)
|
| 628 |
+
raise
|
| 629 |
+
else:
|
| 630 |
+
os.replace(fn, path)
|
| 631 |
+
|
| 632 |
+
|
| 633 |
+
def _translate(pat, STAR, QUESTION_MARK):
|
| 634 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
| 635 |
+
res: list[str] = []
|
| 636 |
+
add = res.append
|
| 637 |
+
i, n = 0, len(pat)
|
| 638 |
+
while i < n:
|
| 639 |
+
c = pat[i]
|
| 640 |
+
i = i + 1
|
| 641 |
+
if c == "*":
|
| 642 |
+
# compress consecutive `*` into one
|
| 643 |
+
if (not res) or res[-1] is not STAR:
|
| 644 |
+
add(STAR)
|
| 645 |
+
elif c == "?":
|
| 646 |
+
add(QUESTION_MARK)
|
| 647 |
+
elif c == "[":
|
| 648 |
+
j = i
|
| 649 |
+
if j < n and pat[j] == "!":
|
| 650 |
+
j = j + 1
|
| 651 |
+
if j < n and pat[j] == "]":
|
| 652 |
+
j = j + 1
|
| 653 |
+
while j < n and pat[j] != "]":
|
| 654 |
+
j = j + 1
|
| 655 |
+
if j >= n:
|
| 656 |
+
add("\\[")
|
| 657 |
+
else:
|
| 658 |
+
stuff = pat[i:j]
|
| 659 |
+
if "-" not in stuff:
|
| 660 |
+
stuff = stuff.replace("\\", r"\\")
|
| 661 |
+
else:
|
| 662 |
+
chunks = []
|
| 663 |
+
k = i + 2 if pat[i] == "!" else i + 1
|
| 664 |
+
while True:
|
| 665 |
+
k = pat.find("-", k, j)
|
| 666 |
+
if k < 0:
|
| 667 |
+
break
|
| 668 |
+
chunks.append(pat[i:k])
|
| 669 |
+
i = k + 1
|
| 670 |
+
k = k + 3
|
| 671 |
+
chunk = pat[i:j]
|
| 672 |
+
if chunk:
|
| 673 |
+
chunks.append(chunk)
|
| 674 |
+
else:
|
| 675 |
+
chunks[-1] += "-"
|
| 676 |
+
# Remove empty ranges -- invalid in RE.
|
| 677 |
+
for k in range(len(chunks) - 1, 0, -1):
|
| 678 |
+
if chunks[k - 1][-1] > chunks[k][0]:
|
| 679 |
+
chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
|
| 680 |
+
del chunks[k]
|
| 681 |
+
# Escape backslashes and hyphens for set difference (--).
|
| 682 |
+
# Hyphens that create ranges shouldn't be escaped.
|
| 683 |
+
stuff = "-".join(
|
| 684 |
+
s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
|
| 685 |
+
)
|
| 686 |
+
# Escape set operations (&&, ~~ and ||).
|
| 687 |
+
stuff = re.sub(r"([&~|])", r"\\\1", stuff)
|
| 688 |
+
i = j + 1
|
| 689 |
+
if not stuff:
|
| 690 |
+
# Empty range: never match.
|
| 691 |
+
add("(?!)")
|
| 692 |
+
elif stuff == "!":
|
| 693 |
+
# Negated empty range: match any character.
|
| 694 |
+
add(".")
|
| 695 |
+
else:
|
| 696 |
+
if stuff[0] == "!":
|
| 697 |
+
stuff = "^" + stuff[1:]
|
| 698 |
+
elif stuff[0] in ("^", "["):
|
| 699 |
+
stuff = "\\" + stuff
|
| 700 |
+
add(f"[{stuff}]")
|
| 701 |
+
else:
|
| 702 |
+
add(re.escape(c))
|
| 703 |
+
assert i == n
|
| 704 |
+
return res
|
| 705 |
+
|
| 706 |
+
|
| 707 |
+
def glob_translate(pat):
|
| 708 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
| 709 |
+
# The keyword parameters' values are fixed to:
|
| 710 |
+
# recursive=True, include_hidden=True, seps=None
|
| 711 |
+
"""Translate a pathname with shell wildcards to a regular expression."""
|
| 712 |
+
if os.path.altsep:
|
| 713 |
+
seps = os.path.sep + os.path.altsep
|
| 714 |
+
else:
|
| 715 |
+
seps = os.path.sep
|
| 716 |
+
escaped_seps = "".join(map(re.escape, seps))
|
| 717 |
+
any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
|
| 718 |
+
not_sep = f"[^{escaped_seps}]"
|
| 719 |
+
one_last_segment = f"{not_sep}+"
|
| 720 |
+
one_segment = f"{one_last_segment}{any_sep}"
|
| 721 |
+
any_segments = f"(?:.+{any_sep})?"
|
| 722 |
+
any_last_segments = ".*"
|
| 723 |
+
results = []
|
| 724 |
+
parts = re.split(any_sep, pat)
|
| 725 |
+
last_part_idx = len(parts) - 1
|
| 726 |
+
for idx, part in enumerate(parts):
|
| 727 |
+
if part == "*":
|
| 728 |
+
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
| 729 |
+
continue
|
| 730 |
+
if part == "**":
|
| 731 |
+
results.append(any_segments if idx < last_part_idx else any_last_segments)
|
| 732 |
+
continue
|
| 733 |
+
elif "**" in part:
|
| 734 |
+
raise ValueError(
|
| 735 |
+
"Invalid pattern: '**' can only be an entire path component"
|
| 736 |
+
)
|
| 737 |
+
if part:
|
| 738 |
+
results.extend(_translate(part, f"{not_sep}*", not_sep))
|
| 739 |
+
if idx < last_part_idx:
|
| 740 |
+
results.append(any_sep)
|
| 741 |
+
res = "".join(results)
|
| 742 |
+
return rf"(?s:{res})\Z"
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
| 2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
| 3 |
+
# imports, please file an issue.
|
| 4 |
+
from torch._functorch.aot_autograd import (
|
| 5 |
+
aot_autograd_decompositions,
|
| 6 |
+
KNOWN_TYPES,
|
| 7 |
+
PytreeThunk,
|
| 8 |
+
)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (403 Bytes). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
| 2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
| 3 |
+
# imports, please file an issue.
|
| 4 |
+
from torch._functorch.make_functional import _swap_state
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (315 Bytes). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
| 2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
| 3 |
+
# imports, please file an issue.
|
| 4 |
+
from torch._functorch.vmap import (
|
| 5 |
+
_add_batch_dim,
|
| 6 |
+
_broadcast_to_and_flatten,
|
| 7 |
+
_create_batched_inputs,
|
| 8 |
+
_get_name,
|
| 9 |
+
_process_batched_inputs,
|
| 10 |
+
_remove_batch_dim,
|
| 11 |
+
_unwrap_batched,
|
| 12 |
+
_validate_and_get_batch_size,
|
| 13 |
+
Tensor,
|
| 14 |
+
tree_flatten,
|
| 15 |
+
tree_unflatten,
|
| 16 |
+
)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (691 Bytes). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/compile/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch._functorch import config
|
| 2 |
+
from torch._functorch.aot_autograd import (
|
| 3 |
+
aot_function,
|
| 4 |
+
aot_module,
|
| 5 |
+
aot_module_simplified,
|
| 6 |
+
compiled_function,
|
| 7 |
+
compiled_module,
|
| 8 |
+
get_aot_compilation_context,
|
| 9 |
+
get_aot_graph_name,
|
| 10 |
+
get_graph_being_compiled,
|
| 11 |
+
make_boxed_compiler,
|
| 12 |
+
make_boxed_func,
|
| 13 |
+
)
|
| 14 |
+
from torch._functorch.compilers import (
|
| 15 |
+
debug_compile,
|
| 16 |
+
default_decompositions,
|
| 17 |
+
draw_graph_compile,
|
| 18 |
+
memory_efficient_fusion,
|
| 19 |
+
nnc_jit,
|
| 20 |
+
nop,
|
| 21 |
+
print_compile,
|
| 22 |
+
ts_compile,
|
| 23 |
+
)
|
| 24 |
+
from torch._functorch.fx_minifier import minifier
|
| 25 |
+
from torch._functorch.partitioners import (
|
| 26 |
+
default_partition,
|
| 27 |
+
draw_graph,
|
| 28 |
+
draw_joint_graph,
|
| 29 |
+
min_cut_rematerialization_partition,
|
| 30 |
+
)
|
| 31 |
+
from torch._functorch.python_key import pythonkey_decompose
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (7.88 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc
ADDED
|
Binary file (32.4 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/_parsing.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Adapted from https://github.com/arogozhnikov/einops/blob/36c7bb16e57d6e57f8f3050f9e07abdf3f00469f/einops/parsing.py.
|
| 2 |
+
|
| 3 |
+
MIT License
|
| 4 |
+
|
| 5 |
+
Copyright (c) 2018 Alex Rogozhnikov
|
| 6 |
+
|
| 7 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 8 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 9 |
+
in the Software without restriction, including without limitation the rights
|
| 10 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 11 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 12 |
+
furnished to do so, subject to the following conditions:
|
| 13 |
+
|
| 14 |
+
The above copyright notice and this permission notice shall be included in all
|
| 15 |
+
copies or substantial portions of the Software.
|
| 16 |
+
|
| 17 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 18 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 19 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 20 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 21 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 22 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 23 |
+
SOFTWARE.
|
| 24 |
+
"""
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import keyword
|
| 28 |
+
import warnings
|
| 29 |
+
from typing import Collection, List, Mapping, Optional, Set, Tuple, Union
|
| 30 |
+
|
| 31 |
+
_ellipsis: str = "…" # NB, this is a single unicode symbol. String is used as it is not a list, but can be iterated
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class AnonymousAxis:
|
| 35 |
+
"""Used by `ParsedExpression` to represent an axis with a size (> 1), but no associated identifier.
|
| 36 |
+
|
| 37 |
+
Note: Different instances of this class are not equal to each other, even if they have the same value.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self, value: str) -> None:
|
| 41 |
+
self.value = int(value)
|
| 42 |
+
if self.value < 1:
|
| 43 |
+
raise ValueError(
|
| 44 |
+
f"Anonymous axis should have positive length, not {self.value}"
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
def __repr__(self) -> str:
|
| 48 |
+
return f"{self.value}-axis"
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class ParsedExpression:
|
| 52 |
+
"""Structure containing information about one side of an `einops`-style pattern (e.g. 'b c (h w)')."""
|
| 53 |
+
|
| 54 |
+
def __init__(
|
| 55 |
+
self,
|
| 56 |
+
expression: str,
|
| 57 |
+
*,
|
| 58 |
+
allow_underscore: bool = False,
|
| 59 |
+
allow_duplicates: bool = False,
|
| 60 |
+
) -> None:
|
| 61 |
+
"""Parse the expression and store relevant metadata.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
expression (str): the `einops`-pattern to parse
|
| 65 |
+
allow_underscore (bool): whether to allow axis identifier names to begin with an underscore
|
| 66 |
+
allow_duplicates (bool): whether to allow an identifier to appear more than once in the expression
|
| 67 |
+
"""
|
| 68 |
+
self.has_ellipsis: bool = False
|
| 69 |
+
self.has_ellipsis_parenthesized: Optional[bool] = None
|
| 70 |
+
self.identifiers: Set[Union[str, AnonymousAxis]] = set()
|
| 71 |
+
# that's axes like 2, 3, 4 or 5. Axes with size 1 are exceptional and replaced with empty composition
|
| 72 |
+
self.has_non_unitary_anonymous_axes: bool = False
|
| 73 |
+
# composition keeps structure of composite axes, see how different corner cases are handled in tests
|
| 74 |
+
self.composition: List[Union[List[Union[str, AnonymousAxis]], str]] = []
|
| 75 |
+
if "." in expression:
|
| 76 |
+
if "..." not in expression:
|
| 77 |
+
raise ValueError(
|
| 78 |
+
"Expression may contain dots only inside ellipsis (...)"
|
| 79 |
+
)
|
| 80 |
+
if str.count(expression, "...") != 1 or str.count(expression, ".") != 3:
|
| 81 |
+
raise ValueError(
|
| 82 |
+
"Expression may contain dots only inside ellipsis (...); only one ellipsis for tensor "
|
| 83 |
+
)
|
| 84 |
+
expression = expression.replace("...", _ellipsis)
|
| 85 |
+
self.has_ellipsis = True
|
| 86 |
+
|
| 87 |
+
bracket_group: Optional[List[Union[str, AnonymousAxis]]] = None
|
| 88 |
+
|
| 89 |
+
def add_axis_name(x: str) -> None:
|
| 90 |
+
if x in self.identifiers:
|
| 91 |
+
if not (allow_underscore and x == "_") and not allow_duplicates:
|
| 92 |
+
raise ValueError(
|
| 93 |
+
f"Indexing expression contains duplicate dimension '{x}'"
|
| 94 |
+
)
|
| 95 |
+
if x == _ellipsis:
|
| 96 |
+
self.identifiers.add(_ellipsis)
|
| 97 |
+
if bracket_group is None:
|
| 98 |
+
self.composition.append(_ellipsis)
|
| 99 |
+
self.has_ellipsis_parenthesized = False
|
| 100 |
+
else:
|
| 101 |
+
bracket_group.append(_ellipsis)
|
| 102 |
+
self.has_ellipsis_parenthesized = True
|
| 103 |
+
else:
|
| 104 |
+
is_number = str.isdecimal(x)
|
| 105 |
+
if is_number and int(x) == 1:
|
| 106 |
+
# handling the case of anonymous axis of length 1
|
| 107 |
+
if bracket_group is None:
|
| 108 |
+
self.composition.append([])
|
| 109 |
+
else:
|
| 110 |
+
pass # no need to think about 1s inside parenthesis
|
| 111 |
+
return
|
| 112 |
+
is_axis_name, reason = self.check_axis_name_return_reason(
|
| 113 |
+
x, allow_underscore=allow_underscore
|
| 114 |
+
)
|
| 115 |
+
if not (is_number or is_axis_name):
|
| 116 |
+
raise ValueError(f"Invalid axis identifier: {x}\n{reason}")
|
| 117 |
+
axis_name: Union[str, AnonymousAxis] = (
|
| 118 |
+
AnonymousAxis(x) if is_number else x
|
| 119 |
+
)
|
| 120 |
+
self.identifiers.add(axis_name)
|
| 121 |
+
if is_number:
|
| 122 |
+
self.has_non_unitary_anonymous_axes = True
|
| 123 |
+
if bracket_group is None:
|
| 124 |
+
self.composition.append([axis_name])
|
| 125 |
+
else:
|
| 126 |
+
bracket_group.append(axis_name)
|
| 127 |
+
|
| 128 |
+
current_identifier = None
|
| 129 |
+
for char in expression:
|
| 130 |
+
if char in "() ":
|
| 131 |
+
if current_identifier is not None:
|
| 132 |
+
add_axis_name(current_identifier)
|
| 133 |
+
current_identifier = None
|
| 134 |
+
if char == "(":
|
| 135 |
+
if bracket_group is not None:
|
| 136 |
+
raise ValueError(
|
| 137 |
+
"Axis composition is one-level (brackets inside brackets not allowed)"
|
| 138 |
+
)
|
| 139 |
+
bracket_group = []
|
| 140 |
+
elif char == ")":
|
| 141 |
+
if bracket_group is None:
|
| 142 |
+
raise ValueError("Brackets are not balanced")
|
| 143 |
+
self.composition.append(bracket_group)
|
| 144 |
+
bracket_group = None
|
| 145 |
+
elif str.isalnum(char) or char in ["_", _ellipsis]:
|
| 146 |
+
if current_identifier is None:
|
| 147 |
+
current_identifier = char
|
| 148 |
+
else:
|
| 149 |
+
current_identifier += char
|
| 150 |
+
else:
|
| 151 |
+
raise ValueError(f"Unknown character '{char}'")
|
| 152 |
+
|
| 153 |
+
if bracket_group is not None:
|
| 154 |
+
raise ValueError(f"Imbalanced parentheses in expression: '{expression}'")
|
| 155 |
+
if current_identifier is not None:
|
| 156 |
+
add_axis_name(current_identifier)
|
| 157 |
+
|
| 158 |
+
@staticmethod
|
| 159 |
+
def check_axis_name_return_reason(
|
| 160 |
+
name: str, allow_underscore: bool = False
|
| 161 |
+
) -> Tuple[bool, str]:
|
| 162 |
+
"""Check if the given axis name is valid, and a message explaining why if not.
|
| 163 |
+
|
| 164 |
+
Valid axes names are python identifiers except keywords, and should not start or end with an underscore.
|
| 165 |
+
|
| 166 |
+
Args:
|
| 167 |
+
name (str): the axis name to check
|
| 168 |
+
allow_underscore (bool): whether axis names are allowed to start with an underscore
|
| 169 |
+
|
| 170 |
+
Returns:
|
| 171 |
+
Tuple[bool, str]: whether the axis name is valid, a message explaining why if not
|
| 172 |
+
"""
|
| 173 |
+
if not str.isidentifier(name):
|
| 174 |
+
return False, "not a valid python identifier"
|
| 175 |
+
elif name[0] == "_" or name[-1] == "_":
|
| 176 |
+
if name == "_" and allow_underscore:
|
| 177 |
+
return True, ""
|
| 178 |
+
return False, "axis name should should not start or end with underscore"
|
| 179 |
+
else:
|
| 180 |
+
if keyword.iskeyword(name):
|
| 181 |
+
warnings.warn(
|
| 182 |
+
f"It is discouraged to use axes names that are keywords: {name}",
|
| 183 |
+
RuntimeWarning,
|
| 184 |
+
)
|
| 185 |
+
if name in ["axis"]:
|
| 186 |
+
warnings.warn(
|
| 187 |
+
"It is discouraged to use 'axis' as an axis name and will raise an error in future",
|
| 188 |
+
FutureWarning,
|
| 189 |
+
)
|
| 190 |
+
return True, ""
|
| 191 |
+
|
| 192 |
+
@staticmethod
|
| 193 |
+
def check_axis_name(name: str) -> bool:
|
| 194 |
+
"""Check if the name is a valid axis name.
|
| 195 |
+
|
| 196 |
+
Args:
|
| 197 |
+
name (str): the axis name to check
|
| 198 |
+
|
| 199 |
+
Returns:
|
| 200 |
+
bool: whether the axis name is valid
|
| 201 |
+
"""
|
| 202 |
+
is_valid, _ = ParsedExpression.check_axis_name_return_reason(name)
|
| 203 |
+
return is_valid
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def parse_pattern(
|
| 207 |
+
pattern: str, axes_lengths: Mapping[str, int]
|
| 208 |
+
) -> Tuple[ParsedExpression, ParsedExpression]:
|
| 209 |
+
"""Parse an `einops`-style pattern into a left-hand side and right-hand side `ParsedExpression` object.
|
| 210 |
+
|
| 211 |
+
Args:
|
| 212 |
+
pattern (str): the `einops`-style rearrangement pattern
|
| 213 |
+
axes_lengths (Mapping[str, int]): any additional length specifications for dimensions
|
| 214 |
+
|
| 215 |
+
Returns:
|
| 216 |
+
Tuple[ParsedExpression, ParsedExpression]: a tuple containing the left-hand side and right-hand side expressions
|
| 217 |
+
"""
|
| 218 |
+
# adapted from einops.einops._prepare_transformation_recipe
|
| 219 |
+
# https://github.com/arogozhnikov/einops/blob/230ac1526c1f42c9e1f7373912c7f8047496df11/einops/einops.py
|
| 220 |
+
try:
|
| 221 |
+
left_str, right_str = pattern.split("->")
|
| 222 |
+
except ValueError:
|
| 223 |
+
raise ValueError("Pattern must contain a single '->' separator") from None
|
| 224 |
+
|
| 225 |
+
if _ellipsis in axes_lengths:
|
| 226 |
+
raise ValueError(f"'{_ellipsis}' is not an allowed axis identifier")
|
| 227 |
+
|
| 228 |
+
left = ParsedExpression(left_str)
|
| 229 |
+
right = ParsedExpression(right_str)
|
| 230 |
+
|
| 231 |
+
if not left.has_ellipsis and right.has_ellipsis:
|
| 232 |
+
raise ValueError(
|
| 233 |
+
f"Ellipsis found in right side, but not left side of a pattern {pattern}"
|
| 234 |
+
)
|
| 235 |
+
if left.has_ellipsis and left.has_ellipsis_parenthesized:
|
| 236 |
+
raise ValueError(
|
| 237 |
+
f"Ellipsis is parenthesis in the left side is not allowed: {pattern}"
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
return left, right
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def validate_rearrange_expressions(
|
| 244 |
+
left: ParsedExpression, right: ParsedExpression, axes_lengths: Mapping[str, int]
|
| 245 |
+
) -> None:
|
| 246 |
+
"""Perform expression validations that are specific to the `rearrange` operation.
|
| 247 |
+
|
| 248 |
+
Args:
|
| 249 |
+
left (ParsedExpression): left-hand side expression
|
| 250 |
+
right (ParsedExpression): right-hand side expression
|
| 251 |
+
axes_lengths (Mapping[str, int]): any additional length specifications for dimensions
|
| 252 |
+
"""
|
| 253 |
+
for length in axes_lengths.values():
|
| 254 |
+
if (length_type := type(length)) is not int:
|
| 255 |
+
raise TypeError(
|
| 256 |
+
f"rearrange axis lengths must be integers, got: {length_type}"
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
if left.has_non_unitary_anonymous_axes or right.has_non_unitary_anonymous_axes:
|
| 260 |
+
raise ValueError("rearrange only supports unnamed axes of size 1")
|
| 261 |
+
|
| 262 |
+
difference = set.symmetric_difference(left.identifiers, right.identifiers)
|
| 263 |
+
if len(difference) > 0:
|
| 264 |
+
raise ValueError(
|
| 265 |
+
f"Identifiers only on one side of rearrange expression (should be on both): {difference}"
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
unmatched_axes = axes_lengths.keys() - left.identifiers
|
| 269 |
+
if len(unmatched_axes) > 0:
|
| 270 |
+
raise ValueError(
|
| 271 |
+
f"Identifiers not found in rearrange expression: {unmatched_axes}"
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def comma_separate(collection: Collection[Union[str, Collection[str]]]) -> str:
|
| 276 |
+
"""Convert a collection of strings representing first class dims into a comma-separated string.
|
| 277 |
+
|
| 278 |
+
Args:
|
| 279 |
+
collection (Collection[Union[str, Collection[str]]]): the collection of strings to convert
|
| 280 |
+
|
| 281 |
+
Returns:
|
| 282 |
+
str: the comma-separated string
|
| 283 |
+
|
| 284 |
+
Examples:
|
| 285 |
+
>>> comma_separate(('d0',))
|
| 286 |
+
'd0'
|
| 287 |
+
|
| 288 |
+
>>> comma_separate(('d0', 'd1', 'd2', 'd3'))
|
| 289 |
+
'd0, d1, d2, d3'
|
| 290 |
+
|
| 291 |
+
>>> comma_separate([('d1', 'd4')])
|
| 292 |
+
'(d1, d4)'
|
| 293 |
+
|
| 294 |
+
>>> comma_separate([('d0',), (), ('d1',), ('d2',), ('d3', 'd4')])
|
| 295 |
+
'(d0,), (), (d1,), (d2,), (d3, d4)'
|
| 296 |
+
"""
|
| 297 |
+
return ", ".join(
|
| 298 |
+
item
|
| 299 |
+
if isinstance(item, str)
|
| 300 |
+
else f"({comma_separate(item)}{',' if len(item) == 1 else ''})"
|
| 301 |
+
for item in collection
|
| 302 |
+
)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/experimental/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PyTorch forward-mode is not mature yet
|
| 2 |
+
from torch._functorch.apis import chunk_vmap
|
| 3 |
+
from torch._functorch.batch_norm_replacement import replace_all_batch_norm_modules_
|
| 4 |
+
from torch._functorch.eager_transforms import hessian, jacfwd, jvp
|
| 5 |
+
|
| 6 |
+
from functorch import functionalize
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 2 |
+
nvidia/__pycache__/__init__.cpython-311.pyc,,
|
| 3 |
+
nvidia/cufft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 4 |
+
nvidia/cufft/__pycache__/__init__.cpython-311.pyc,,
|
| 5 |
+
nvidia/cufft/include/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 6 |
+
nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc,,
|
| 7 |
+
nvidia/cufft/include/cudalibxt.h,sha256=9GDuRiOzJuO61zRDhIpWpF7XHp8FXSOIlHJNoIMwOZQ,4105
|
| 8 |
+
nvidia/cufft/include/cufft.h,sha256=Ui7ajKuYZcP-2bm9mpH96YN1igLKeDLgrttyc4jMQJE,12570
|
| 9 |
+
nvidia/cufft/include/cufftXt.h,sha256=LfRdibvAlaNQ35vYqI4n8YcMpPYROrIjpZu2L0tISi4,11463
|
| 10 |
+
nvidia/cufft/include/cufftw.h,sha256=DBrJQf-dnCWD-OYgdhnEzn8OiAX0U3xdteEaNdhs7mU,19412
|
| 11 |
+
nvidia/cufft/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 12 |
+
nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc,,
|
| 13 |
+
nvidia/cufft/lib/libcufft.so.10,sha256=ylxvgdWEkGtNMrmErYcE3WW_db2rQzTtIs5-73UBqVo,279161544
|
| 14 |
+
nvidia/cufft/lib/libcufftw.so.10,sha256=GlkqWy81mpB3VQ7h_a3VjrLPnMC_q4_jl6N0-5SdoUM,1618440
|
| 15 |
+
nvidia_cufft_cu11-10.9.0.58.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 16 |
+
nvidia_cufft_cu11-10.9.0.58.dist-info/License.txt,sha256=rW9YU_ugyg0VnQ9Y1JrkmDDC-Mk_epJki5zpCttMbM0,59262
|
| 17 |
+
nvidia_cufft_cu11-10.9.0.58.dist-info/METADATA,sha256=XITT6bPOjdOxPQa-kAVw4XjFf4_iU-JoLUXrOwPJ4JA,1503
|
| 18 |
+
nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD,,
|
| 19 |
+
nvidia_cufft_cu11-10.9.0.58.dist-info/WHEEL,sha256=-kQi_VMfvRQozZJT7HUPMfY-5vLo0LVTmAylNJ3Ft98,106
|
| 20 |
+
nvidia_cufft_cu11-10.9.0.58.dist-info/top_level.txt,sha256=fTkAtiFuL16nUrB9ytDDtpytz2t0B4NvYTnRzwAhO14,7
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cusolver_cu11-11.4.1.48.dist-info/License.txt
ADDED
|
@@ -0,0 +1,1568 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
End User License Agreement
|
| 2 |
+
--------------------------
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
Preface
|
| 6 |
+
-------
|
| 7 |
+
|
| 8 |
+
The Software License Agreement in Chapter 1 and the Supplement
|
| 9 |
+
in Chapter 2 contain license terms and conditions that govern
|
| 10 |
+
the use of NVIDIA software. By accepting this agreement, you
|
| 11 |
+
agree to comply with all the terms and conditions applicable
|
| 12 |
+
to the product(s) included herein.
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
NVIDIA Driver
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
Description
|
| 19 |
+
|
| 20 |
+
This package contains the operating system driver and
|
| 21 |
+
fundamental system software components for NVIDIA GPUs.
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
NVIDIA CUDA Toolkit
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
Description
|
| 28 |
+
|
| 29 |
+
The NVIDIA CUDA Toolkit provides command-line and graphical
|
| 30 |
+
tools for building, debugging and optimizing the performance
|
| 31 |
+
of applications accelerated by NVIDIA GPUs, runtime and math
|
| 32 |
+
libraries, and documentation including programming guides,
|
| 33 |
+
user manuals, and API references.
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
Default Install Location of CUDA Toolkit
|
| 37 |
+
|
| 38 |
+
Windows platform:
|
| 39 |
+
|
| 40 |
+
%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
|
| 41 |
+
|
| 42 |
+
Linux platform:
|
| 43 |
+
|
| 44 |
+
/usr/local/cuda-#.#
|
| 45 |
+
|
| 46 |
+
Mac platform:
|
| 47 |
+
|
| 48 |
+
/Developer/NVIDIA/CUDA-#.#
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
NVIDIA CUDA Samples
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
Description
|
| 55 |
+
|
| 56 |
+
This package includes over 100+ CUDA examples that demonstrate
|
| 57 |
+
various CUDA programming principles, and efficient CUDA
|
| 58 |
+
implementation of algorithms in specific application domains.
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
Default Install Location of CUDA Samples
|
| 62 |
+
|
| 63 |
+
Windows platform:
|
| 64 |
+
|
| 65 |
+
%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.#
|
| 66 |
+
|
| 67 |
+
Linux platform:
|
| 68 |
+
|
| 69 |
+
/usr/local/cuda-#.#/samples
|
| 70 |
+
|
| 71 |
+
and
|
| 72 |
+
|
| 73 |
+
$HOME/NVIDIA_CUDA-#.#_Samples
|
| 74 |
+
|
| 75 |
+
Mac platform:
|
| 76 |
+
|
| 77 |
+
/Developer/NVIDIA/CUDA-#.#/samples
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
NVIDIA Nsight Visual Studio Edition (Windows only)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
Description
|
| 84 |
+
|
| 85 |
+
NVIDIA Nsight Development Platform, Visual Studio Edition is a
|
| 86 |
+
development environment integrated into Microsoft Visual
|
| 87 |
+
Studio that provides tools for debugging, profiling, analyzing
|
| 88 |
+
and optimizing your GPU computing and graphics applications.
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
Default Install Location of Nsight Visual Studio Edition
|
| 92 |
+
|
| 93 |
+
Windows platform:
|
| 94 |
+
|
| 95 |
+
%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
1. License Agreement for NVIDIA Software Development Kits
|
| 99 |
+
---------------------------------------------------------
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
Release Date: July 26, 2018
|
| 103 |
+
---------------------------
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
Important NoticeRead before downloading, installing,
|
| 107 |
+
copying or using the licensed software:
|
| 108 |
+
-------------------------------------------------------
|
| 109 |
+
|
| 110 |
+
This license agreement, including exhibits attached
|
| 111 |
+
("Agreement”) is a legal agreement between you and NVIDIA
|
| 112 |
+
Corporation ("NVIDIA") and governs your use of a NVIDIA
|
| 113 |
+
software development kit (“SDK”).
|
| 114 |
+
|
| 115 |
+
Each SDK has its own set of software and materials, but here
|
| 116 |
+
is a description of the types of items that may be included in
|
| 117 |
+
a SDK: source code, header files, APIs, data sets and assets
|
| 118 |
+
(examples include images, textures, models, scenes, videos,
|
| 119 |
+
native API input/output files), binary software, sample code,
|
| 120 |
+
libraries, utility programs, programming code and
|
| 121 |
+
documentation.
|
| 122 |
+
|
| 123 |
+
This Agreement can be accepted only by an adult of legal age
|
| 124 |
+
of majority in the country in which the SDK is used.
|
| 125 |
+
|
| 126 |
+
If you are entering into this Agreement on behalf of a company
|
| 127 |
+
or other legal entity, you represent that you have the legal
|
| 128 |
+
authority to bind the entity to this Agreement, in which case
|
| 129 |
+
“you” will mean the entity you represent.
|
| 130 |
+
|
| 131 |
+
If you don’t have the required age or authority to accept
|
| 132 |
+
this Agreement, or if you don’t accept all the terms and
|
| 133 |
+
conditions of this Agreement, do not download, install or use
|
| 134 |
+
the SDK.
|
| 135 |
+
|
| 136 |
+
You agree to use the SDK only for purposes that are permitted
|
| 137 |
+
by (a) this Agreement, and (b) any applicable law, regulation
|
| 138 |
+
or generally accepted practices or guidelines in the relevant
|
| 139 |
+
jurisdictions.
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
1.1. License
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
1.1.1. License Grant
|
| 146 |
+
|
| 147 |
+
Subject to the terms of this Agreement, NVIDIA hereby grants
|
| 148 |
+
you a non-exclusive, non-transferable license, without the
|
| 149 |
+
right to sublicense (except as expressly provided in this
|
| 150 |
+
Agreement) to:
|
| 151 |
+
|
| 152 |
+
1. Install and use the SDK,
|
| 153 |
+
|
| 154 |
+
2. Modify and create derivative works of sample source code
|
| 155 |
+
delivered in the SDK, and
|
| 156 |
+
|
| 157 |
+
3. Distribute those portions of the SDK that are identified
|
| 158 |
+
in this Agreement as distributable, as incorporated in
|
| 159 |
+
object code format into a software application that meets
|
| 160 |
+
the distribution requirements indicated in this Agreement.
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
1.1.2. Distribution Requirements
|
| 164 |
+
|
| 165 |
+
These are the distribution requirements for you to exercise
|
| 166 |
+
the distribution grant:
|
| 167 |
+
|
| 168 |
+
1. Your application must have material additional
|
| 169 |
+
functionality, beyond the included portions of the SDK.
|
| 170 |
+
|
| 171 |
+
2. The distributable portions of the SDK shall only be
|
| 172 |
+
accessed by your application.
|
| 173 |
+
|
| 174 |
+
3. The following notice shall be included in modifications
|
| 175 |
+
and derivative works of sample source code distributed:
|
| 176 |
+
“This software contains source code provided by NVIDIA
|
| 177 |
+
Corporation.”
|
| 178 |
+
|
| 179 |
+
4. Unless a developer tool is identified in this Agreement
|
| 180 |
+
as distributable, it is delivered for your internal use
|
| 181 |
+
only.
|
| 182 |
+
|
| 183 |
+
5. The terms under which you distribute your application
|
| 184 |
+
must be consistent with the terms of this Agreement,
|
| 185 |
+
including (without limitation) terms relating to the
|
| 186 |
+
license grant and license restrictions and protection of
|
| 187 |
+
NVIDIA’s intellectual property rights. Additionally, you
|
| 188 |
+
agree that you will protect the privacy, security and
|
| 189 |
+
legal rights of your application users.
|
| 190 |
+
|
| 191 |
+
6. You agree to notify NVIDIA in writing of any known or
|
| 192 |
+
suspected distribution or use of the SDK not in compliance
|
| 193 |
+
with the requirements of this Agreement, and to enforce
|
| 194 |
+
the terms of your agreements with respect to distributed
|
| 195 |
+
SDK.
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
1.1.3. Authorized Users
|
| 199 |
+
|
| 200 |
+
You may allow employees and contractors of your entity or of
|
| 201 |
+
your subsidiary(ies) to access and use the SDK from your
|
| 202 |
+
secure network to perform work on your behalf.
|
| 203 |
+
|
| 204 |
+
If you are an academic institution you may allow users
|
| 205 |
+
enrolled or employed by the academic institution to access and
|
| 206 |
+
use the SDK from your secure network.
|
| 207 |
+
|
| 208 |
+
You are responsible for the compliance with the terms of this
|
| 209 |
+
Agreement by your authorized users. If you become aware that
|
| 210 |
+
your authorized users didn’t follow the terms of this
|
| 211 |
+
Agreement, you agree to take reasonable steps to resolve the
|
| 212 |
+
non-compliance and prevent new occurrences.
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
1.1.4. Pre-Release SDK
|
| 216 |
+
|
| 217 |
+
The SDK versions identified as alpha, beta, preview or
|
| 218 |
+
otherwise as pre-release, may not be fully functional, may
|
| 219 |
+
contain errors or design flaws, and may have reduced or
|
| 220 |
+
different security, privacy, accessibility, availability, and
|
| 221 |
+
reliability standards relative to commercial versions of
|
| 222 |
+
NVIDIA software and materials. Use of a pre-release SDK may
|
| 223 |
+
result in unexpected results, loss of data, project delays or
|
| 224 |
+
other unpredictable damage or loss.
|
| 225 |
+
|
| 226 |
+
You may use a pre-release SDK at your own risk, understanding
|
| 227 |
+
that pre-release SDKs are not intended for use in production
|
| 228 |
+
or business-critical systems.
|
| 229 |
+
|
| 230 |
+
NVIDIA may choose not to make available a commercial version
|
| 231 |
+
of any pre-release SDK. NVIDIA may also choose to abandon
|
| 232 |
+
development and terminate the availability of a pre-release
|
| 233 |
+
SDK at any time without liability.
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
1.1.5. Updates
|
| 237 |
+
|
| 238 |
+
NVIDIA may, at its option, make available patches, workarounds
|
| 239 |
+
or other updates to this SDK. Unless the updates are provided
|
| 240 |
+
with their separate governing terms, they are deemed part of
|
| 241 |
+
the SDK licensed to you as provided in this Agreement. You
|
| 242 |
+
agree that the form and content of the SDK that NVIDIA
|
| 243 |
+
provides may change without prior notice to you. While NVIDIA
|
| 244 |
+
generally maintains compatibility between versions, NVIDIA may
|
| 245 |
+
in some cases make changes that introduce incompatibilities in
|
| 246 |
+
future versions of the SDK.
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
1.1.6. Third Party Licenses
|
| 250 |
+
|
| 251 |
+
The SDK may come bundled with, or otherwise include or be
|
| 252 |
+
distributed with, third party software licensed by a NVIDIA
|
| 253 |
+
supplier and/or open source software provided under an open
|
| 254 |
+
source license. Use of third party software is subject to the
|
| 255 |
+
third-party license terms, or in the absence of third party
|
| 256 |
+
terms, the terms of this Agreement. Copyright to third party
|
| 257 |
+
software is held by the copyright holders indicated in the
|
| 258 |
+
third-party software or license.
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
1.1.7. Reservation of Rights
|
| 262 |
+
|
| 263 |
+
NVIDIA reserves all rights, title, and interest in and to the
|
| 264 |
+
SDK, not expressly granted to you under this Agreement.
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
1.2. Limitations
|
| 268 |
+
|
| 269 |
+
The following license limitations apply to your use of the
|
| 270 |
+
SDK:
|
| 271 |
+
|
| 272 |
+
1. You may not reverse engineer, decompile or disassemble,
|
| 273 |
+
or remove copyright or other proprietary notices from any
|
| 274 |
+
portion of the SDK or copies of the SDK.
|
| 275 |
+
|
| 276 |
+
2. Except as expressly provided in this Agreement, you may
|
| 277 |
+
not copy, sell, rent, sublicense, transfer, distribute,
|
| 278 |
+
modify, or create derivative works of any portion of the
|
| 279 |
+
SDK. For clarity, you may not distribute or sublicense the
|
| 280 |
+
SDK as a stand-alone product.
|
| 281 |
+
|
| 282 |
+
3. Unless you have an agreement with NVIDIA for this
|
| 283 |
+
purpose, you may not indicate that an application created
|
| 284 |
+
with the SDK is sponsored or endorsed by NVIDIA.
|
| 285 |
+
|
| 286 |
+
4. You may not bypass, disable, or circumvent any
|
| 287 |
+
encryption, security, digital rights management or
|
| 288 |
+
authentication mechanism in the SDK.
|
| 289 |
+
|
| 290 |
+
5. You may not use the SDK in any manner that would cause it
|
| 291 |
+
to become subject to an open source software license. As
|
| 292 |
+
examples, licenses that require as a condition of use,
|
| 293 |
+
modification, and/or distribution that the SDK be:
|
| 294 |
+
|
| 295 |
+
a. Disclosed or distributed in source code form;
|
| 296 |
+
|
| 297 |
+
b. Licensed for the purpose of making derivative works;
|
| 298 |
+
or
|
| 299 |
+
|
| 300 |
+
c. Redistributable at no charge.
|
| 301 |
+
|
| 302 |
+
6. Unless you have an agreement with NVIDIA for this
|
| 303 |
+
purpose, you may not use the SDK with any system or
|
| 304 |
+
application where the use or failure of the system or
|
| 305 |
+
application can reasonably be expected to threaten or
|
| 306 |
+
result in personal injury, death, or catastrophic loss.
|
| 307 |
+
Examples include use in avionics, navigation, military,
|
| 308 |
+
medical, life support or other life critical applications.
|
| 309 |
+
NVIDIA does not design, test or manufacture the SDK for
|
| 310 |
+
these critical uses and NVIDIA shall not be liable to you
|
| 311 |
+
or any third party, in whole or in part, for any claims or
|
| 312 |
+
damages arising from such uses.
|
| 313 |
+
|
| 314 |
+
7. You agree to defend, indemnify and hold harmless NVIDIA
|
| 315 |
+
and its affiliates, and their respective employees,
|
| 316 |
+
contractors, agents, officers and directors, from and
|
| 317 |
+
against any and all claims, damages, obligations, losses,
|
| 318 |
+
liabilities, costs or debt, fines, restitutions and
|
| 319 |
+
expenses (including but not limited to attorney’s fees
|
| 320 |
+
and costs incident to establishing the right of
|
| 321 |
+
indemnification) arising out of or related to your use of
|
| 322 |
+
the SDK outside of the scope of this Agreement, or not in
|
| 323 |
+
compliance with its terms.
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
1.3. Ownership
|
| 327 |
+
|
| 328 |
+
1. NVIDIA or its licensors hold all rights, title and
|
| 329 |
+
interest in and to the SDK and its modifications and
|
| 330 |
+
derivative works, including their respective intellectual
|
| 331 |
+
property rights, subject to your rights described in this
|
| 332 |
+
section. This SDK may include software and materials from
|
| 333 |
+
NVIDIA’s licensors, and these licensors are intended
|
| 334 |
+
third party beneficiaries that may enforce this Agreement
|
| 335 |
+
with respect to their intellectual property rights.
|
| 336 |
+
|
| 337 |
+
2. You hold all rights, title and interest in and to your
|
| 338 |
+
applications and your derivative works of the sample
|
| 339 |
+
source code delivered in the SDK, including their
|
| 340 |
+
respective intellectual property rights, subject to
|
| 341 |
+
NVIDIA’s rights described in this section.
|
| 342 |
+
|
| 343 |
+
3. You may, but don’t have to, provide to NVIDIA
|
| 344 |
+
suggestions, feature requests or other feedback regarding
|
| 345 |
+
the SDK, including possible enhancements or modifications
|
| 346 |
+
to the SDK. For any feedback that you voluntarily provide,
|
| 347 |
+
you hereby grant NVIDIA and its affiliates a perpetual,
|
| 348 |
+
non-exclusive, worldwide, irrevocable license to use,
|
| 349 |
+
reproduce, modify, license, sublicense (through multiple
|
| 350 |
+
tiers of sublicensees), and distribute (through multiple
|
| 351 |
+
tiers of distributors) it without the payment of any
|
| 352 |
+
royalties or fees to you. NVIDIA will use feedback at its
|
| 353 |
+
choice. NVIDIA is constantly looking for ways to improve
|
| 354 |
+
its products, so you may send feedback to NVIDIA through
|
| 355 |
+
the developer portal at https://developer.nvidia.com.
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
1.4. No Warranties
|
| 359 |
+
|
| 360 |
+
THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL
|
| 361 |
+
FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND
|
| 362 |
+
ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND
|
| 363 |
+
OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING,
|
| 364 |
+
BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
|
| 365 |
+
FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE
|
| 366 |
+
ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO
|
| 367 |
+
WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF
|
| 368 |
+
DEALING OR COURSE OF TRADE.
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
1.5. Limitation of Liability
|
| 372 |
+
|
| 373 |
+
TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS
|
| 374 |
+
AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
|
| 375 |
+
PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS
|
| 376 |
+
OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF
|
| 377 |
+
PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION
|
| 378 |
+
WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK,
|
| 379 |
+
WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH
|
| 380 |
+
OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE),
|
| 381 |
+
PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF
|
| 382 |
+
LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES
|
| 383 |
+
TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS
|
| 384 |
+
AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE
|
| 385 |
+
NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS
|
| 386 |
+
LIMIT.
|
| 387 |
+
|
| 388 |
+
These exclusions and limitations of liability shall apply
|
| 389 |
+
regardless if NVIDIA or its affiliates have been advised of
|
| 390 |
+
the possibility of such damages, and regardless of whether a
|
| 391 |
+
remedy fails its essential purpose. These exclusions and
|
| 392 |
+
limitations of liability form an essential basis of the
|
| 393 |
+
bargain between the parties, and, absent any of these
|
| 394 |
+
exclusions or limitations of liability, the provisions of this
|
| 395 |
+
Agreement, including, without limitation, the economic terms,
|
| 396 |
+
would be substantially different.
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
1.6. Termination
|
| 400 |
+
|
| 401 |
+
1. This Agreement will continue to apply until terminated by
|
| 402 |
+
either you or NVIDIA as described below.
|
| 403 |
+
|
| 404 |
+
2. If you want to terminate this Agreement, you may do so by
|
| 405 |
+
stopping to use the SDK.
|
| 406 |
+
|
| 407 |
+
3. NVIDIA may, at any time, terminate this Agreement if:
|
| 408 |
+
|
| 409 |
+
a. (i) you fail to comply with any term of this
|
| 410 |
+
Agreement and the non-compliance is not fixed within
|
| 411 |
+
thirty (30) days following notice from NVIDIA (or
|
| 412 |
+
immediately if you violate NVIDIA’s intellectual
|
| 413 |
+
property rights);
|
| 414 |
+
|
| 415 |
+
b. (ii) you commence or participate in any legal
|
| 416 |
+
proceeding against NVIDIA with respect to the SDK; or
|
| 417 |
+
|
| 418 |
+
c. (iii) NVIDIA decides to no longer provide the SDK in
|
| 419 |
+
a country or, in NVIDIA’s sole discretion, the
|
| 420 |
+
continued use of it is no longer commercially viable.
|
| 421 |
+
|
| 422 |
+
4. Upon any termination of this Agreement, you agree to
|
| 423 |
+
promptly discontinue use of the SDK and destroy all copies
|
| 424 |
+
in your possession or control. Your prior distributions in
|
| 425 |
+
accordance with this Agreement are not affected by the
|
| 426 |
+
termination of this Agreement. Upon written request, you
|
| 427 |
+
will certify in writing that you have complied with your
|
| 428 |
+
commitments under this section. Upon any termination of
|
| 429 |
+
this Agreement all provisions survive except for the
|
| 430 |
+
license grant provisions.
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
1.7. General
|
| 434 |
+
|
| 435 |
+
If you wish to assign this Agreement or your rights and
|
| 436 |
+
obligations, including by merger, consolidation, dissolution
|
| 437 |
+
or operation of law, contact NVIDIA to ask for permission. Any
|
| 438 |
+
attempted assignment not approved by NVIDIA in writing shall
|
| 439 |
+
be void and of no effect. NVIDIA may assign, delegate or
|
| 440 |
+
transfer this Agreement and its rights and obligations, and if
|
| 441 |
+
to a non-affiliate you will be notified.
|
| 442 |
+
|
| 443 |
+
You agree to cooperate with NVIDIA and provide reasonably
|
| 444 |
+
requested information to verify your compliance with this
|
| 445 |
+
Agreement.
|
| 446 |
+
|
| 447 |
+
This Agreement will be governed in all respects by the laws of
|
| 448 |
+
the United States and of the State of Delaware as those laws
|
| 449 |
+
are applied to contracts entered into and performed entirely
|
| 450 |
+
within Delaware by Delaware residents, without regard to the
|
| 451 |
+
conflicts of laws principles. The United Nations Convention on
|
| 452 |
+
Contracts for the International Sale of Goods is specifically
|
| 453 |
+
disclaimed. You agree to all terms of this Agreement in the
|
| 454 |
+
English language.
|
| 455 |
+
|
| 456 |
+
The state or federal courts residing in Santa Clara County,
|
| 457 |
+
California shall have exclusive jurisdiction over any dispute
|
| 458 |
+
or claim arising out of this Agreement. Notwithstanding this,
|
| 459 |
+
you agree that NVIDIA shall still be allowed to apply for
|
| 460 |
+
injunctive remedies or an equivalent type of urgent legal
|
| 461 |
+
relief in any jurisdiction.
|
| 462 |
+
|
| 463 |
+
If any court of competent jurisdiction determines that any
|
| 464 |
+
provision of this Agreement is illegal, invalid or
|
| 465 |
+
unenforceable, such provision will be construed as limited to
|
| 466 |
+
the extent necessary to be consistent with and fully
|
| 467 |
+
enforceable under the law and the remaining provisions will
|
| 468 |
+
remain in full force and effect. Unless otherwise specified,
|
| 469 |
+
remedies are cumulative.
|
| 470 |
+
|
| 471 |
+
Each party acknowledges and agrees that the other is an
|
| 472 |
+
independent contractor in the performance of this Agreement.
|
| 473 |
+
|
| 474 |
+
The SDK has been developed entirely at private expense and is
|
| 475 |
+
“commercial items” consisting of “commercial computer
|
| 476 |
+
software” and “commercial computer software
|
| 477 |
+
documentation” provided with RESTRICTED RIGHTS. Use,
|
| 478 |
+
duplication or disclosure by the U.S. Government or a U.S.
|
| 479 |
+
Government subcontractor is subject to the restrictions in
|
| 480 |
+
this Agreement pursuant to DFARS 227.7202-3(a) or as set forth
|
| 481 |
+
in subparagraphs (c)(1) and (2) of the Commercial Computer
|
| 482 |
+
Software - Restricted Rights clause at FAR 52.227-19, as
|
| 483 |
+
applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas
|
| 484 |
+
Expressway, Santa Clara, CA 95051.
|
| 485 |
+
|
| 486 |
+
The SDK is subject to United States export laws and
|
| 487 |
+
regulations. You agree that you will not ship, transfer or
|
| 488 |
+
export the SDK into any country, or use the SDK in any manner,
|
| 489 |
+
prohibited by the United States Bureau of Industry and
|
| 490 |
+
Security or economic sanctions regulations administered by the
|
| 491 |
+
U.S. Department of Treasury’s Office of Foreign Assets
|
| 492 |
+
Control (OFAC), or any applicable export laws, restrictions or
|
| 493 |
+
regulations. These laws include restrictions on destinations,
|
| 494 |
+
end users and end use. By accepting this Agreement, you
|
| 495 |
+
confirm that you are not a resident or citizen of any country
|
| 496 |
+
currently embargoed by the U.S. and that you are not otherwise
|
| 497 |
+
prohibited from receiving the SDK.
|
| 498 |
+
|
| 499 |
+
Any notice delivered by NVIDIA to you under this Agreement
|
| 500 |
+
will be delivered via mail, email or fax. You agree that any
|
| 501 |
+
notices that NVIDIA sends you electronically will satisfy any
|
| 502 |
+
legal communication requirements. Please direct your legal
|
| 503 |
+
notices or other correspondence to NVIDIA Corporation, 2788
|
| 504 |
+
San Tomas Expressway, Santa Clara, California 95051, United
|
| 505 |
+
States of America, Attention: Legal Department.
|
| 506 |
+
|
| 507 |
+
This Agreement and any exhibits incorporated into this
|
| 508 |
+
Agreement constitute the entire agreement of the parties with
|
| 509 |
+
respect to the subject matter of this Agreement and supersede
|
| 510 |
+
all prior negotiations or documentation exchanged between the
|
| 511 |
+
parties relating to this SDK license. Any additional and/or
|
| 512 |
+
conflicting terms on documents issued by you are null, void,
|
| 513 |
+
and invalid. Any amendment or waiver under this Agreement
|
| 514 |
+
shall be in writing and signed by representatives of both
|
| 515 |
+
parties.
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
2. CUDA Toolkit Supplement to Software License Agreement for
|
| 519 |
+
NVIDIA Software Development Kits
|
| 520 |
+
------------------------------------------------------------
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
Release date: August 16, 2018
|
| 524 |
+
-----------------------------
|
| 525 |
+
|
| 526 |
+
The terms in this supplement govern your use of the NVIDIA
|
| 527 |
+
CUDA Toolkit SDK under the terms of your license agreement
|
| 528 |
+
(“Agreement”) as modified by this supplement. Capitalized
|
| 529 |
+
terms used but not defined below have the meaning assigned to
|
| 530 |
+
them in the Agreement.
|
| 531 |
+
|
| 532 |
+
This supplement is an exhibit to the Agreement and is
|
| 533 |
+
incorporated as an integral part of the Agreement. In the
|
| 534 |
+
event of conflict between the terms in this supplement and the
|
| 535 |
+
terms in the Agreement, the terms in this supplement govern.
|
| 536 |
+
|
| 537 |
+
|
| 538 |
+
2.1. License Scope
|
| 539 |
+
|
| 540 |
+
The SDK is licensed for you to develop applications only for
|
| 541 |
+
use in systems with NVIDIA GPUs.
|
| 542 |
+
|
| 543 |
+
|
| 544 |
+
2.2. Distribution
|
| 545 |
+
|
| 546 |
+
The portions of the SDK that are distributable under the
|
| 547 |
+
Agreement are listed in Attachment A.
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
2.3. Operating Systems
|
| 551 |
+
|
| 552 |
+
Those portions of the SDK designed exclusively for use on the
|
| 553 |
+
Linux or FreeBSD operating systems, or other operating systems
|
| 554 |
+
derived from the source code to these operating systems, may
|
| 555 |
+
be copied and redistributed for use in accordance with this
|
| 556 |
+
Agreement, provided that the object code files are not
|
| 557 |
+
modified in any way (except for unzipping of compressed
|
| 558 |
+
files).
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
2.4. Audio and Video Encoders and Decoders
|
| 562 |
+
|
| 563 |
+
You acknowledge and agree that it is your sole responsibility
|
| 564 |
+
to obtain any additional third-party licenses required to
|
| 565 |
+
make, have made, use, have used, sell, import, and offer for
|
| 566 |
+
sale your products or services that include or incorporate any
|
| 567 |
+
third-party software and content relating to audio and/or
|
| 568 |
+
video encoders and decoders from, including but not limited
|
| 569 |
+
to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A.,
|
| 570 |
+
MPEG-LA, and Coding Technologies. NVIDIA does not grant to you
|
| 571 |
+
under this Agreement any necessary patent or other rights with
|
| 572 |
+
respect to any audio and/or video encoders and decoders.
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
2.5. Licensing
|
| 576 |
+
|
| 577 |
+
If the distribution terms in this Agreement are not suitable
|
| 578 |
+
for your organization, or for any questions regarding this
|
| 579 |
+
Agreement, please contact NVIDIA at
|
| 580 |
+
nvidia-compute-license-questions@nvidia.com.
|
| 581 |
+
|
| 582 |
+
|
| 583 |
+
2.6. Attachment A
|
| 584 |
+
|
| 585 |
+
The following portions of the SDK are distributable under the
|
| 586 |
+
Agreement:
|
| 587 |
+
|
| 588 |
+
Component
|
| 589 |
+
|
| 590 |
+
CUDA Runtime
|
| 591 |
+
|
| 592 |
+
Windows
|
| 593 |
+
|
| 594 |
+
cudart.dll, cudart_static.lib, cudadevrt.lib
|
| 595 |
+
|
| 596 |
+
Mac OSX
|
| 597 |
+
|
| 598 |
+
libcudart.dylib, libcudart_static.a, libcudadevrt.a
|
| 599 |
+
|
| 600 |
+
Linux
|
| 601 |
+
|
| 602 |
+
libcudart.so, libcudart_static.a, libcudadevrt.a
|
| 603 |
+
|
| 604 |
+
Android
|
| 605 |
+
|
| 606 |
+
libcudart.so, libcudart_static.a, libcudadevrt.a
|
| 607 |
+
|
| 608 |
+
Component
|
| 609 |
+
|
| 610 |
+
CUDA FFT Library
|
| 611 |
+
|
| 612 |
+
Windows
|
| 613 |
+
|
| 614 |
+
cufft.dll, cufftw.dll, cufft.lib, cufftw.lib
|
| 615 |
+
|
| 616 |
+
Mac OSX
|
| 617 |
+
|
| 618 |
+
libcufft.dylib, libcufft_static.a, libcufftw.dylib,
|
| 619 |
+
libcufftw_static.a
|
| 620 |
+
|
| 621 |
+
Linux
|
| 622 |
+
|
| 623 |
+
libcufft.so, libcufft_static.a, libcufftw.so,
|
| 624 |
+
libcufftw_static.a
|
| 625 |
+
|
| 626 |
+
Android
|
| 627 |
+
|
| 628 |
+
libcufft.so, libcufft_static.a, libcufftw.so,
|
| 629 |
+
libcufftw_static.a
|
| 630 |
+
|
| 631 |
+
Component
|
| 632 |
+
|
| 633 |
+
CUDA BLAS Library
|
| 634 |
+
|
| 635 |
+
Windows
|
| 636 |
+
|
| 637 |
+
cublas.dll, cublasLt.dll
|
| 638 |
+
|
| 639 |
+
Mac OSX
|
| 640 |
+
|
| 641 |
+
libcublas.dylib, libcublasLt.dylib, libcublas_static.a,
|
| 642 |
+
libcublasLt_static.a
|
| 643 |
+
|
| 644 |
+
Linux
|
| 645 |
+
|
| 646 |
+
libcublas.so, libcublasLt.so, libcublas_static.a,
|
| 647 |
+
libcublasLt_static.a
|
| 648 |
+
|
| 649 |
+
Android
|
| 650 |
+
|
| 651 |
+
libcublas.so, libcublasLt.so, libcublas_static.a,
|
| 652 |
+
libcublasLt_static.a
|
| 653 |
+
|
| 654 |
+
Component
|
| 655 |
+
|
| 656 |
+
NVIDIA "Drop-in" BLAS Library
|
| 657 |
+
|
| 658 |
+
Windows
|
| 659 |
+
|
| 660 |
+
nvblas.dll
|
| 661 |
+
|
| 662 |
+
Mac OSX
|
| 663 |
+
|
| 664 |
+
libnvblas.dylib
|
| 665 |
+
|
| 666 |
+
Linux
|
| 667 |
+
|
| 668 |
+
libnvblas.so
|
| 669 |
+
|
| 670 |
+
Component
|
| 671 |
+
|
| 672 |
+
CUDA Sparse Matrix Library
|
| 673 |
+
|
| 674 |
+
Windows
|
| 675 |
+
|
| 676 |
+
cusparse.dll, cusparse.lib
|
| 677 |
+
|
| 678 |
+
Mac OSX
|
| 679 |
+
|
| 680 |
+
libcusparse.dylib, libcusparse_static.a
|
| 681 |
+
|
| 682 |
+
Linux
|
| 683 |
+
|
| 684 |
+
libcusparse.so, libcusparse_static.a
|
| 685 |
+
|
| 686 |
+
Android
|
| 687 |
+
|
| 688 |
+
libcusparse.so, libcusparse_static.a
|
| 689 |
+
|
| 690 |
+
Component
|
| 691 |
+
|
| 692 |
+
CUDA Linear Solver Library
|
| 693 |
+
|
| 694 |
+
Windows
|
| 695 |
+
|
| 696 |
+
cusolver.dll, cusolver.lib
|
| 697 |
+
|
| 698 |
+
Mac OSX
|
| 699 |
+
|
| 700 |
+
libcusolver.dylib, libcusolver_static.a
|
| 701 |
+
|
| 702 |
+
Linux
|
| 703 |
+
|
| 704 |
+
libcusolver.so, libcusolver_static.a
|
| 705 |
+
|
| 706 |
+
Android
|
| 707 |
+
|
| 708 |
+
libcusolver.so, libcusolver_static.a
|
| 709 |
+
|
| 710 |
+
Component
|
| 711 |
+
|
| 712 |
+
CUDA Random Number Generation Library
|
| 713 |
+
|
| 714 |
+
Windows
|
| 715 |
+
|
| 716 |
+
curand.dll, curand.lib
|
| 717 |
+
|
| 718 |
+
Mac OSX
|
| 719 |
+
|
| 720 |
+
libcurand.dylib, libcurand_static.a
|
| 721 |
+
|
| 722 |
+
Linux
|
| 723 |
+
|
| 724 |
+
libcurand.so, libcurand_static.a
|
| 725 |
+
|
| 726 |
+
Android
|
| 727 |
+
|
| 728 |
+
libcurand.so, libcurand_static.a
|
| 729 |
+
|
| 730 |
+
Component
|
| 731 |
+
|
| 732 |
+
CUDA Accelerated Graph Library
|
| 733 |
+
|
| 734 |
+
Component
|
| 735 |
+
|
| 736 |
+
NVIDIA Performance Primitives Library
|
| 737 |
+
|
| 738 |
+
Windows
|
| 739 |
+
|
| 740 |
+
nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll,
|
| 741 |
+
nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll,
|
| 742 |
+
nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib,
|
| 743 |
+
nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll,
|
| 744 |
+
nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib
|
| 745 |
+
|
| 746 |
+
Mac OSX
|
| 747 |
+
|
| 748 |
+
libnppc.dylib, libnppc_static.a, libnppial.dylib,
|
| 749 |
+
libnppial_static.a, libnppicc.dylib, libnppicc_static.a,
|
| 750 |
+
libnppicom.dylib, libnppicom_static.a, libnppidei.dylib,
|
| 751 |
+
libnppidei_static.a, libnppif.dylib, libnppif_static.a,
|
| 752 |
+
libnppig.dylib, libnppig_static.a, libnppim.dylib,
|
| 753 |
+
libnppisu_static.a, libnppitc.dylib, libnppitc_static.a,
|
| 754 |
+
libnpps.dylib, libnpps_static.a
|
| 755 |
+
|
| 756 |
+
Linux
|
| 757 |
+
|
| 758 |
+
libnppc.so, libnppc_static.a, libnppial.so,
|
| 759 |
+
libnppial_static.a, libnppicc.so, libnppicc_static.a,
|
| 760 |
+
libnppicom.so, libnppicom_static.a, libnppidei.so,
|
| 761 |
+
libnppidei_static.a, libnppif.so, libnppif_static.a
|
| 762 |
+
libnppig.so, libnppig_static.a, libnppim.so,
|
| 763 |
+
libnppim_static.a, libnppist.so, libnppist_static.a,
|
| 764 |
+
libnppisu.so, libnppisu_static.a, libnppitc.so
|
| 765 |
+
libnppitc_static.a, libnpps.so, libnpps_static.a
|
| 766 |
+
|
| 767 |
+
Android
|
| 768 |
+
|
| 769 |
+
libnppc.so, libnppc_static.a, libnppial.so,
|
| 770 |
+
libnppial_static.a, libnppicc.so, libnppicc_static.a,
|
| 771 |
+
libnppicom.so, libnppicom_static.a, libnppidei.so,
|
| 772 |
+
libnppidei_static.a, libnppif.so, libnppif_static.a
|
| 773 |
+
libnppig.so, libnppig_static.a, libnppim.so,
|
| 774 |
+
libnppim_static.a, libnppist.so, libnppist_static.a,
|
| 775 |
+
libnppisu.so, libnppisu_static.a, libnppitc.so
|
| 776 |
+
libnppitc_static.a, libnpps.so, libnpps_static.a
|
| 777 |
+
|
| 778 |
+
Component
|
| 779 |
+
|
| 780 |
+
NVIDIA JPEG Library
|
| 781 |
+
|
| 782 |
+
Linux
|
| 783 |
+
|
| 784 |
+
libnvjpeg.so, libnvjpeg_static.a
|
| 785 |
+
|
| 786 |
+
Component
|
| 787 |
+
|
| 788 |
+
Internal common library required for statically linking to
|
| 789 |
+
cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP
|
| 790 |
+
|
| 791 |
+
Mac OSX
|
| 792 |
+
|
| 793 |
+
libculibos.a
|
| 794 |
+
|
| 795 |
+
Linux
|
| 796 |
+
|
| 797 |
+
libculibos.a
|
| 798 |
+
|
| 799 |
+
Component
|
| 800 |
+
|
| 801 |
+
NVIDIA Runtime Compilation Library and Header
|
| 802 |
+
|
| 803 |
+
All
|
| 804 |
+
|
| 805 |
+
nvrtc.h
|
| 806 |
+
|
| 807 |
+
Windows
|
| 808 |
+
|
| 809 |
+
nvrtc.dll, nvrtc-builtins.dll
|
| 810 |
+
|
| 811 |
+
Mac OSX
|
| 812 |
+
|
| 813 |
+
libnvrtc.dylib, libnvrtc-builtins.dylib
|
| 814 |
+
|
| 815 |
+
Linux
|
| 816 |
+
|
| 817 |
+
libnvrtc.so, libnvrtc-builtins.so
|
| 818 |
+
|
| 819 |
+
Component
|
| 820 |
+
|
| 821 |
+
NVIDIA Optimizing Compiler Library
|
| 822 |
+
|
| 823 |
+
Windows
|
| 824 |
+
|
| 825 |
+
nvvm.dll
|
| 826 |
+
|
| 827 |
+
Mac OSX
|
| 828 |
+
|
| 829 |
+
libnvvm.dylib
|
| 830 |
+
|
| 831 |
+
Linux
|
| 832 |
+
|
| 833 |
+
libnvvm.so
|
| 834 |
+
|
| 835 |
+
Component
|
| 836 |
+
|
| 837 |
+
NVIDIA Common Device Math Functions Library
|
| 838 |
+
|
| 839 |
+
Windows
|
| 840 |
+
|
| 841 |
+
libdevice.10.bc
|
| 842 |
+
|
| 843 |
+
Mac OSX
|
| 844 |
+
|
| 845 |
+
libdevice.10.bc
|
| 846 |
+
|
| 847 |
+
Linux
|
| 848 |
+
|
| 849 |
+
libdevice.10.bc
|
| 850 |
+
|
| 851 |
+
Component
|
| 852 |
+
|
| 853 |
+
CUDA Occupancy Calculation Header Library
|
| 854 |
+
|
| 855 |
+
All
|
| 856 |
+
|
| 857 |
+
cuda_occupancy.h
|
| 858 |
+
|
| 859 |
+
Component
|
| 860 |
+
|
| 861 |
+
CUDA Half Precision Headers
|
| 862 |
+
|
| 863 |
+
All
|
| 864 |
+
|
| 865 |
+
cuda_fp16.h, cuda_fp16.hpp
|
| 866 |
+
|
| 867 |
+
Component
|
| 868 |
+
|
| 869 |
+
CUDA Profiling Tools Interface (CUPTI) Library
|
| 870 |
+
|
| 871 |
+
Windows
|
| 872 |
+
|
| 873 |
+
cupti.dll
|
| 874 |
+
|
| 875 |
+
Mac OSX
|
| 876 |
+
|
| 877 |
+
libcupti.dylib
|
| 878 |
+
|
| 879 |
+
Linux
|
| 880 |
+
|
| 881 |
+
libcupti.so
|
| 882 |
+
|
| 883 |
+
Component
|
| 884 |
+
|
| 885 |
+
NVIDIA Tools Extension Library
|
| 886 |
+
|
| 887 |
+
Windows
|
| 888 |
+
|
| 889 |
+
nvToolsExt.dll, nvToolsExt.lib
|
| 890 |
+
|
| 891 |
+
Mac OSX
|
| 892 |
+
|
| 893 |
+
libnvToolsExt.dylib
|
| 894 |
+
|
| 895 |
+
Linux
|
| 896 |
+
|
| 897 |
+
libnvToolsExt.so
|
| 898 |
+
|
| 899 |
+
Component
|
| 900 |
+
|
| 901 |
+
NVIDIA CUDA Driver Libraries
|
| 902 |
+
|
| 903 |
+
Linux
|
| 904 |
+
|
| 905 |
+
libcuda.so, libnvidia-fatbinaryloader.so,
|
| 906 |
+
libnvidia-ptxjitcompiler.so
|
| 907 |
+
|
| 908 |
+
The NVIDIA CUDA Driver Libraries are only distributable in
|
| 909 |
+
applications that meet this criteria:
|
| 910 |
+
|
| 911 |
+
1. The application was developed starting from a NVIDIA CUDA
|
| 912 |
+
container obtained from Docker Hub or the NVIDIA GPU
|
| 913 |
+
Cloud, and
|
| 914 |
+
|
| 915 |
+
2. The resulting application is packaged as a Docker
|
| 916 |
+
container and distributed to users on Docker Hub or the
|
| 917 |
+
NVIDIA GPU Cloud only.
|
| 918 |
+
|
| 919 |
+
|
| 920 |
+
2.7. Attachment B
|
| 921 |
+
|
| 922 |
+
|
| 923 |
+
Additional Licensing Obligations
|
| 924 |
+
|
| 925 |
+
The following third party components included in the SOFTWARE
|
| 926 |
+
are licensed to Licensee pursuant to the following terms and
|
| 927 |
+
conditions:
|
| 928 |
+
|
| 929 |
+
1. Licensee's use of the GDB third party component is
|
| 930 |
+
subject to the terms and conditions of GNU GPL v3:
|
| 931 |
+
|
| 932 |
+
This product includes copyrighted third-party software licensed
|
| 933 |
+
under the terms of the GNU General Public License v3 ("GPL v3").
|
| 934 |
+
All third-party software packages are copyright by their respective
|
| 935 |
+
authors. GPL v3 terms and conditions are hereby incorporated into
|
| 936 |
+
the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt
|
| 937 |
+
|
| 938 |
+
Consistent with these licensing requirements, the software
|
| 939 |
+
listed below is provided under the terms of the specified
|
| 940 |
+
open source software licenses. To obtain source code for
|
| 941 |
+
software provided under licenses that require
|
| 942 |
+
redistribution of source code, including the GNU General
|
| 943 |
+
Public License (GPL) and GNU Lesser General Public License
|
| 944 |
+
(LGPL), contact oss-requests@nvidia.com. This offer is
|
| 945 |
+
valid for a period of three (3) years from the date of the
|
| 946 |
+
distribution of this product by NVIDIA CORPORATION.
|
| 947 |
+
|
| 948 |
+
Component License
|
| 949 |
+
CUDA-GDB GPL v3
|
| 950 |
+
|
| 951 |
+
2. Licensee represents and warrants that any and all third
|
| 952 |
+
party licensing and/or royalty payment obligations in
|
| 953 |
+
connection with Licensee's use of the H.264 video codecs
|
| 954 |
+
are solely the responsibility of Licensee.
|
| 955 |
+
|
| 956 |
+
3. Licensee's use of the Thrust library is subject to the
|
| 957 |
+
terms and conditions of the Apache License Version 2.0.
|
| 958 |
+
All third-party software packages are copyright by their
|
| 959 |
+
respective authors. Apache License Version 2.0 terms and
|
| 960 |
+
conditions are hereby incorporated into the Agreement by
|
| 961 |
+
this reference.
|
| 962 |
+
http://www.apache.org/licenses/LICENSE-2.0.html
|
| 963 |
+
|
| 964 |
+
In addition, Licensee acknowledges the following notice:
|
| 965 |
+
Thrust includes source code from the Boost Iterator,
|
| 966 |
+
Tuple, System, and Random Number libraries.
|
| 967 |
+
|
| 968 |
+
Boost Software License - Version 1.0 - August 17th, 2003
|
| 969 |
+
. . . .
|
| 970 |
+
|
| 971 |
+
Permission is hereby granted, free of charge, to any person or
|
| 972 |
+
organization obtaining a copy of the software and accompanying
|
| 973 |
+
documentation covered by this license (the "Software") to use,
|
| 974 |
+
reproduce, display, distribute, execute, and transmit the Software,
|
| 975 |
+
and to prepare derivative works of the Software, and to permit
|
| 976 |
+
third-parties to whom the Software is furnished to do so, all
|
| 977 |
+
subject to the following:
|
| 978 |
+
|
| 979 |
+
The copyright notices in the Software and this entire statement,
|
| 980 |
+
including the above license grant, this restriction and the following
|
| 981 |
+
disclaimer, must be included in all copies of the Software, in whole
|
| 982 |
+
or in part, and all derivative works of the Software, unless such
|
| 983 |
+
copies or derivative works are solely in the form of machine-executable
|
| 984 |
+
object code generated by a source language processor.
|
| 985 |
+
|
| 986 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
| 987 |
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
| 988 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
|
| 989 |
+
NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
| 990 |
+
ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
|
| 991 |
+
OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
|
| 992 |
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
| 993 |
+
OTHER DEALINGS IN THE SOFTWARE.
|
| 994 |
+
|
| 995 |
+
4. Licensee's use of the LLVM third party component is
|
| 996 |
+
subject to the following terms and conditions:
|
| 997 |
+
|
| 998 |
+
======================================================
|
| 999 |
+
LLVM Release License
|
| 1000 |
+
======================================================
|
| 1001 |
+
University of Illinois/NCSA
|
| 1002 |
+
Open Source License
|
| 1003 |
+
|
| 1004 |
+
Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
|
| 1005 |
+
All rights reserved.
|
| 1006 |
+
|
| 1007 |
+
Developed by:
|
| 1008 |
+
|
| 1009 |
+
LLVM Team
|
| 1010 |
+
|
| 1011 |
+
University of Illinois at Urbana-Champaign
|
| 1012 |
+
|
| 1013 |
+
http://llvm.org
|
| 1014 |
+
|
| 1015 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 1016 |
+
of this software and associated documentation files (the "Software"), to
|
| 1017 |
+
deal with the Software without restriction, including without limitation the
|
| 1018 |
+
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
| 1019 |
+
sell copies of the Software, and to permit persons to whom the Software is
|
| 1020 |
+
furnished to do so, subject to the following conditions:
|
| 1021 |
+
|
| 1022 |
+
* Redistributions of source code must retain the above copyright notice,
|
| 1023 |
+
this list of conditions and the following disclaimers.
|
| 1024 |
+
|
| 1025 |
+
* Redistributions in binary form must reproduce the above copyright
|
| 1026 |
+
notice, this list of conditions and the following disclaimers in the
|
| 1027 |
+
documentation and/or other materials provided with the distribution.
|
| 1028 |
+
|
| 1029 |
+
* Neither the names of the LLVM Team, University of Illinois at Urbana-
|
| 1030 |
+
Champaign, nor the names of its contributors may be used to endorse or
|
| 1031 |
+
promote products derived from this Software without specific prior
|
| 1032 |
+
written permission.
|
| 1033 |
+
|
| 1034 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 1035 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 1036 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 1037 |
+
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
| 1038 |
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
| 1039 |
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 1040 |
+
DEALINGS WITH THE SOFTWARE.
|
| 1041 |
+
|
| 1042 |
+
5. Licensee's use (e.g. nvprof) of the PCRE third party
|
| 1043 |
+
component is subject to the following terms and
|
| 1044 |
+
conditions:
|
| 1045 |
+
|
| 1046 |
+
------------
|
| 1047 |
+
PCRE LICENCE
|
| 1048 |
+
------------
|
| 1049 |
+
PCRE is a library of functions to support regular expressions whose syntax
|
| 1050 |
+
and semantics are as close as possible to those of the Perl 5 language.
|
| 1051 |
+
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
|
| 1052 |
+
specified below. The documentation for PCRE, supplied in the "doc"
|
| 1053 |
+
directory, is distributed under the same terms as the software itself. The
|
| 1054 |
+
basic library functions are written in C and are freestanding. Also
|
| 1055 |
+
included in the distribution is a set of C++ wrapper functions, and a just-
|
| 1056 |
+
in-time compiler that can be used to optimize pattern matching. These are
|
| 1057 |
+
both optional features that can be omitted when the library is built.
|
| 1058 |
+
|
| 1059 |
+
THE BASIC LIBRARY FUNCTIONS
|
| 1060 |
+
---------------------------
|
| 1061 |
+
Written by: Philip Hazel
|
| 1062 |
+
Email local part: ph10
|
| 1063 |
+
Email domain: cam.ac.uk
|
| 1064 |
+
University of Cambridge Computing Service,
|
| 1065 |
+
Cambridge, England.
|
| 1066 |
+
Copyright (c) 1997-2012 University of Cambridge
|
| 1067 |
+
All rights reserved.
|
| 1068 |
+
|
| 1069 |
+
PCRE JUST-IN-TIME COMPILATION SUPPORT
|
| 1070 |
+
-------------------------------------
|
| 1071 |
+
Written by: Zoltan Herczeg
|
| 1072 |
+
Email local part: hzmester
|
| 1073 |
+
Emain domain: freemail.hu
|
| 1074 |
+
Copyright(c) 2010-2012 Zoltan Herczeg
|
| 1075 |
+
All rights reserved.
|
| 1076 |
+
|
| 1077 |
+
STACK-LESS JUST-IN-TIME COMPILER
|
| 1078 |
+
--------------------------------
|
| 1079 |
+
Written by: Zoltan Herczeg
|
| 1080 |
+
Email local part: hzmester
|
| 1081 |
+
Emain domain: freemail.hu
|
| 1082 |
+
Copyright(c) 2009-2012 Zoltan Herczeg
|
| 1083 |
+
All rights reserved.
|
| 1084 |
+
|
| 1085 |
+
THE C++ WRAPPER FUNCTIONS
|
| 1086 |
+
-------------------------
|
| 1087 |
+
Contributed by: Google Inc.
|
| 1088 |
+
Copyright (c) 2007-2012, Google Inc.
|
| 1089 |
+
All rights reserved.
|
| 1090 |
+
|
| 1091 |
+
THE "BSD" LICENCE
|
| 1092 |
+
-----------------
|
| 1093 |
+
Redistribution and use in source and binary forms, with or without
|
| 1094 |
+
modification, are permitted provided that the following conditions are met:
|
| 1095 |
+
|
| 1096 |
+
* Redistributions of source code must retain the above copyright notice,
|
| 1097 |
+
this list of conditions and the following disclaimer.
|
| 1098 |
+
|
| 1099 |
+
* Redistributions in binary form must reproduce the above copyright
|
| 1100 |
+
notice, this list of conditions and the following disclaimer in the
|
| 1101 |
+
documentation and/or other materials provided with the distribution.
|
| 1102 |
+
|
| 1103 |
+
* Neither the name of the University of Cambridge nor the name of Google
|
| 1104 |
+
Inc. nor the names of their contributors may be used to endorse or
|
| 1105 |
+
promote products derived from this software without specific prior
|
| 1106 |
+
written permission.
|
| 1107 |
+
|
| 1108 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| 1109 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| 1110 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
| 1111 |
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
| 1112 |
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| 1113 |
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| 1114 |
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| 1115 |
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
| 1116 |
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| 1117 |
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| 1118 |
+
POSSIBILITY OF SUCH DAMAGE.
|
| 1119 |
+
|
| 1120 |
+
6. Some of the cuBLAS library routines were written by or
|
| 1121 |
+
derived from code written by Vasily Volkov and are subject
|
| 1122 |
+
to the Modified Berkeley Software Distribution License as
|
| 1123 |
+
follows:
|
| 1124 |
+
|
| 1125 |
+
Copyright (c) 2007-2009, Regents of the University of California
|
| 1126 |
+
|
| 1127 |
+
All rights reserved.
|
| 1128 |
+
|
| 1129 |
+
Redistribution and use in source and binary forms, with or without
|
| 1130 |
+
modification, are permitted provided that the following conditions are
|
| 1131 |
+
met:
|
| 1132 |
+
* Redistributions of source code must retain the above copyright
|
| 1133 |
+
notice, this list of conditions and the following disclaimer.
|
| 1134 |
+
* Redistributions in binary form must reproduce the above
|
| 1135 |
+
copyright notice, this list of conditions and the following
|
| 1136 |
+
disclaimer in the documentation and/or other materials provided
|
| 1137 |
+
with the distribution.
|
| 1138 |
+
* Neither the name of the University of California, Berkeley nor
|
| 1139 |
+
the names of its contributors may be used to endorse or promote
|
| 1140 |
+
products derived from this software without specific prior
|
| 1141 |
+
written permission.
|
| 1142 |
+
|
| 1143 |
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
|
| 1144 |
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
| 1145 |
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| 1146 |
+
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
| 1147 |
+
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
| 1148 |
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
| 1149 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
| 1150 |
+
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
| 1151 |
+
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
| 1152 |
+
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| 1153 |
+
POSSIBILITY OF SUCH DAMAGE.
|
| 1154 |
+
|
| 1155 |
+
7. Some of the cuBLAS library routines were written by or
|
| 1156 |
+
derived from code written by Davide Barbieri and are
|
| 1157 |
+
subject to the Modified Berkeley Software Distribution
|
| 1158 |
+
License as follows:
|
| 1159 |
+
|
| 1160 |
+
Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata.
|
| 1161 |
+
|
| 1162 |
+
All rights reserved.
|
| 1163 |
+
|
| 1164 |
+
Redistribution and use in source and binary forms, with or without
|
| 1165 |
+
modification, are permitted provided that the following conditions are
|
| 1166 |
+
met:
|
| 1167 |
+
* Redistributions of source code must retain the above copyright
|
| 1168 |
+
notice, this list of conditions and the following disclaimer.
|
| 1169 |
+
* Redistributions in binary form must reproduce the above
|
| 1170 |
+
copyright notice, this list of conditions and the following
|
| 1171 |
+
disclaimer in the documentation and/or other materials provided
|
| 1172 |
+
with the distribution.
|
| 1173 |
+
* The name of the author may not be used to endorse or promote
|
| 1174 |
+
products derived from this software without specific prior
|
| 1175 |
+
written permission.
|
| 1176 |
+
|
| 1177 |
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
|
| 1178 |
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
| 1179 |
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| 1180 |
+
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
| 1181 |
+
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
| 1182 |
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
| 1183 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
| 1184 |
+
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
| 1185 |
+
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
| 1186 |
+
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| 1187 |
+
POSSIBILITY OF SUCH DAMAGE.
|
| 1188 |
+
|
| 1189 |
+
8. Some of the cuBLAS library routines were derived from
|
| 1190 |
+
code developed by the University of Tennessee and are
|
| 1191 |
+
subject to the Modified Berkeley Software Distribution
|
| 1192 |
+
License as follows:
|
| 1193 |
+
|
| 1194 |
+
Copyright (c) 2010 The University of Tennessee.
|
| 1195 |
+
|
| 1196 |
+
All rights reserved.
|
| 1197 |
+
|
| 1198 |
+
Redistribution and use in source and binary forms, with or without
|
| 1199 |
+
modification, are permitted provided that the following conditions are
|
| 1200 |
+
met:
|
| 1201 |
+
* Redistributions of source code must retain the above copyright
|
| 1202 |
+
notice, this list of conditions and the following disclaimer.
|
| 1203 |
+
* Redistributions in binary form must reproduce the above
|
| 1204 |
+
copyright notice, this list of conditions and the following
|
| 1205 |
+
disclaimer listed in this license in the documentation and/or
|
| 1206 |
+
other materials provided with the distribution.
|
| 1207 |
+
* Neither the name of the copyright holders nor the names of its
|
| 1208 |
+
contributors may be used to endorse or promote products derived
|
| 1209 |
+
from this software without specific prior written permission.
|
| 1210 |
+
|
| 1211 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 1212 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 1213 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 1214 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 1215 |
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 1216 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 1217 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 1218 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 1219 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 1220 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 1221 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 1222 |
+
|
| 1223 |
+
9. Some of the cuBLAS library routines were written by or
|
| 1224 |
+
derived from code written by Jonathan Hogg and are subject
|
| 1225 |
+
to the Modified Berkeley Software Distribution License as
|
| 1226 |
+
follows:
|
| 1227 |
+
|
| 1228 |
+
Copyright (c) 2012, The Science and Technology Facilities Council (STFC).
|
| 1229 |
+
|
| 1230 |
+
All rights reserved.
|
| 1231 |
+
|
| 1232 |
+
Redistribution and use in source and binary forms, with or without
|
| 1233 |
+
modification, are permitted provided that the following conditions are
|
| 1234 |
+
met:
|
| 1235 |
+
* Redistributions of source code must retain the above copyright
|
| 1236 |
+
notice, this list of conditions and the following disclaimer.
|
| 1237 |
+
* Redistributions in binary form must reproduce the above
|
| 1238 |
+
copyright notice, this list of conditions and the following
|
| 1239 |
+
disclaimer in the documentation and/or other materials provided
|
| 1240 |
+
with the distribution.
|
| 1241 |
+
* Neither the name of the STFC nor the names of its contributors
|
| 1242 |
+
may be used to endorse or promote products derived from this
|
| 1243 |
+
software without specific prior written permission.
|
| 1244 |
+
|
| 1245 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 1246 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 1247 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 1248 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE
|
| 1249 |
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| 1250 |
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| 1251 |
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
| 1252 |
+
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
| 1253 |
+
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
| 1254 |
+
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
| 1255 |
+
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 1256 |
+
|
| 1257 |
+
10. Some of the cuBLAS library routines were written by or
|
| 1258 |
+
derived from code written by Ahmad M. Abdelfattah, David
|
| 1259 |
+
Keyes, and Hatem Ltaief, and are subject to the Apache
|
| 1260 |
+
License, Version 2.0, as follows:
|
| 1261 |
+
|
| 1262 |
+
-- (C) Copyright 2013 King Abdullah University of Science and Technology
|
| 1263 |
+
Authors:
|
| 1264 |
+
Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa)
|
| 1265 |
+
David Keyes (david.keyes@kaust.edu.sa)
|
| 1266 |
+
Hatem Ltaief (hatem.ltaief@kaust.edu.sa)
|
| 1267 |
+
|
| 1268 |
+
Redistribution and use in source and binary forms, with or without
|
| 1269 |
+
modification, are permitted provided that the following conditions
|
| 1270 |
+
are met:
|
| 1271 |
+
|
| 1272 |
+
* Redistributions of source code must retain the above copyright
|
| 1273 |
+
notice, this list of conditions and the following disclaimer.
|
| 1274 |
+
* Redistributions in binary form must reproduce the above copyright
|
| 1275 |
+
notice, this list of conditions and the following disclaimer in the
|
| 1276 |
+
documentation and/or other materials provided with the distribution.
|
| 1277 |
+
* Neither the name of the King Abdullah University of Science and
|
| 1278 |
+
Technology nor the names of its contributors may be used to endorse
|
| 1279 |
+
or promote products derived from this software without specific prior
|
| 1280 |
+
written permission.
|
| 1281 |
+
|
| 1282 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 1283 |
+
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 1284 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 1285 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 1286 |
+
HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 1287 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 1288 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 1289 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 1290 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 1291 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 1292 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
|
| 1293 |
+
|
| 1294 |
+
11. Some of the cuSPARSE library routines were written by or
|
| 1295 |
+
derived from code written by Li-Wen Chang and are subject
|
| 1296 |
+
to the NCSA Open Source License as follows:
|
| 1297 |
+
|
| 1298 |
+
Copyright (c) 2012, University of Illinois.
|
| 1299 |
+
|
| 1300 |
+
All rights reserved.
|
| 1301 |
+
|
| 1302 |
+
Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu
|
| 1303 |
+
|
| 1304 |
+
Permission is hereby granted, free of charge, to any person obtaining
|
| 1305 |
+
a copy of this software and associated documentation files (the
|
| 1306 |
+
"Software"), to deal with the Software without restriction, including
|
| 1307 |
+
without limitation the rights to use, copy, modify, merge, publish,
|
| 1308 |
+
distribute, sublicense, and/or sell copies of the Software, and to
|
| 1309 |
+
permit persons to whom the Software is furnished to do so, subject to
|
| 1310 |
+
the following conditions:
|
| 1311 |
+
* Redistributions of source code must retain the above copyright
|
| 1312 |
+
notice, this list of conditions and the following disclaimer.
|
| 1313 |
+
* Redistributions in binary form must reproduce the above
|
| 1314 |
+
copyright notice, this list of conditions and the following
|
| 1315 |
+
disclaimers in the documentation and/or other materials provided
|
| 1316 |
+
with the distribution.
|
| 1317 |
+
* Neither the names of IMPACT Group, University of Illinois, nor
|
| 1318 |
+
the names of its contributors may be used to endorse or promote
|
| 1319 |
+
products derived from this Software without specific prior
|
| 1320 |
+
written permission.
|
| 1321 |
+
|
| 1322 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
| 1323 |
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
| 1324 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
| 1325 |
+
NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
|
| 1326 |
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
| 1327 |
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
| 1328 |
+
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
| 1329 |
+
SOFTWARE.
|
| 1330 |
+
|
| 1331 |
+
12. Some of the cuRAND library routines were written by or
|
| 1332 |
+
derived from code written by Mutsuo Saito and Makoto
|
| 1333 |
+
Matsumoto and are subject to the following license:
|
| 1334 |
+
|
| 1335 |
+
Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
|
| 1336 |
+
University. All rights reserved.
|
| 1337 |
+
|
| 1338 |
+
Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
|
| 1339 |
+
University and University of Tokyo. All rights reserved.
|
| 1340 |
+
|
| 1341 |
+
Redistribution and use in source and binary forms, with or without
|
| 1342 |
+
modification, are permitted provided that the following conditions are
|
| 1343 |
+
met:
|
| 1344 |
+
* Redistributions of source code must retain the above copyright
|
| 1345 |
+
notice, this list of conditions and the following disclaimer.
|
| 1346 |
+
* Redistributions in binary form must reproduce the above
|
| 1347 |
+
copyright notice, this list of conditions and the following
|
| 1348 |
+
disclaimer in the documentation and/or other materials provided
|
| 1349 |
+
with the distribution.
|
| 1350 |
+
* Neither the name of the Hiroshima University nor the names of
|
| 1351 |
+
its contributors may be used to endorse or promote products
|
| 1352 |
+
derived from this software without specific prior written
|
| 1353 |
+
permission.
|
| 1354 |
+
|
| 1355 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 1356 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 1357 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 1358 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 1359 |
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 1360 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 1361 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 1362 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 1363 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 1364 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 1365 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 1366 |
+
|
| 1367 |
+
13. Some of the cuRAND library routines were derived from
|
| 1368 |
+
code developed by D. E. Shaw Research and are subject to
|
| 1369 |
+
the following license:
|
| 1370 |
+
|
| 1371 |
+
Copyright 2010-2011, D. E. Shaw Research.
|
| 1372 |
+
|
| 1373 |
+
All rights reserved.
|
| 1374 |
+
|
| 1375 |
+
Redistribution and use in source and binary forms, with or without
|
| 1376 |
+
modification, are permitted provided that the following conditions are
|
| 1377 |
+
met:
|
| 1378 |
+
* Redistributions of source code must retain the above copyright
|
| 1379 |
+
notice, this list of conditions, and the following disclaimer.
|
| 1380 |
+
* Redistributions in binary form must reproduce the above
|
| 1381 |
+
copyright notice, this list of conditions, and the following
|
| 1382 |
+
disclaimer in the documentation and/or other materials provided
|
| 1383 |
+
with the distribution.
|
| 1384 |
+
* Neither the name of D. E. Shaw Research nor the names of its
|
| 1385 |
+
contributors may be used to endorse or promote products derived
|
| 1386 |
+
from this software without specific prior written permission.
|
| 1387 |
+
|
| 1388 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 1389 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 1390 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 1391 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 1392 |
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 1393 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 1394 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 1395 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 1396 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 1397 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 1398 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 1399 |
+
|
| 1400 |
+
14. Some of the Math library routines were written by or
|
| 1401 |
+
derived from code developed by Norbert Juffa and are
|
| 1402 |
+
subject to the following license:
|
| 1403 |
+
|
| 1404 |
+
Copyright (c) 2015-2017, Norbert Juffa
|
| 1405 |
+
All rights reserved.
|
| 1406 |
+
|
| 1407 |
+
Redistribution and use in source and binary forms, with or without
|
| 1408 |
+
modification, are permitted provided that the following conditions
|
| 1409 |
+
are met:
|
| 1410 |
+
|
| 1411 |
+
1. Redistributions of source code must retain the above copyright
|
| 1412 |
+
notice, this list of conditions and the following disclaimer.
|
| 1413 |
+
|
| 1414 |
+
2. Redistributions in binary form must reproduce the above copyright
|
| 1415 |
+
notice, this list of conditions and the following disclaimer in the
|
| 1416 |
+
documentation and/or other materials provided with the distribution.
|
| 1417 |
+
|
| 1418 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 1419 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 1420 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 1421 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 1422 |
+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 1423 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 1424 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 1425 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 1426 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 1427 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 1428 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 1429 |
+
|
| 1430 |
+
15. Licensee's use of the lz4 third party component is
|
| 1431 |
+
subject to the following terms and conditions:
|
| 1432 |
+
|
| 1433 |
+
Copyright (C) 2011-2013, Yann Collet.
|
| 1434 |
+
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
| 1435 |
+
|
| 1436 |
+
Redistribution and use in source and binary forms, with or without
|
| 1437 |
+
modification, are permitted provided that the following conditions are
|
| 1438 |
+
met:
|
| 1439 |
+
|
| 1440 |
+
* Redistributions of source code must retain the above copyright
|
| 1441 |
+
notice, this list of conditions and the following disclaimer.
|
| 1442 |
+
* Redistributions in binary form must reproduce the above
|
| 1443 |
+
copyright notice, this list of conditions and the following disclaimer
|
| 1444 |
+
in the documentation and/or other materials provided with the
|
| 1445 |
+
distribution.
|
| 1446 |
+
|
| 1447 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 1448 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 1449 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 1450 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 1451 |
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 1452 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 1453 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 1454 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 1455 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 1456 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 1457 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 1458 |
+
|
| 1459 |
+
16. The NPP library uses code from the Boost Math Toolkit,
|
| 1460 |
+
and is subject to the following license:
|
| 1461 |
+
|
| 1462 |
+
Boost Software License - Version 1.0 - August 17th, 2003
|
| 1463 |
+
. . . .
|
| 1464 |
+
|
| 1465 |
+
Permission is hereby granted, free of charge, to any person or
|
| 1466 |
+
organization obtaining a copy of the software and accompanying
|
| 1467 |
+
documentation covered by this license (the "Software") to use,
|
| 1468 |
+
reproduce, display, distribute, execute, and transmit the Software,
|
| 1469 |
+
and to prepare derivative works of the Software, and to permit
|
| 1470 |
+
third-parties to whom the Software is furnished to do so, all
|
| 1471 |
+
subject to the following:
|
| 1472 |
+
|
| 1473 |
+
The copyright notices in the Software and this entire statement,
|
| 1474 |
+
including the above license grant, this restriction and the following
|
| 1475 |
+
disclaimer, must be included in all copies of the Software, in whole
|
| 1476 |
+
or in part, and all derivative works of the Software, unless such
|
| 1477 |
+
copies or derivative works are solely in the form of machine-executable
|
| 1478 |
+
object code generated by a source language processor.
|
| 1479 |
+
|
| 1480 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
| 1481 |
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
| 1482 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
|
| 1483 |
+
NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
| 1484 |
+
ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
|
| 1485 |
+
OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
|
| 1486 |
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
| 1487 |
+
OTHER DEALINGS IN THE SOFTWARE.
|
| 1488 |
+
|
| 1489 |
+
17. Portions of the Nsight Eclipse Edition is subject to the
|
| 1490 |
+
following license:
|
| 1491 |
+
|
| 1492 |
+
The Eclipse Foundation makes available all content in this plug-in
|
| 1493 |
+
("Content"). Unless otherwise indicated below, the Content is provided
|
| 1494 |
+
to you under the terms and conditions of the Eclipse Public License
|
| 1495 |
+
Version 1.0 ("EPL"). A copy of the EPL is available at http://
|
| 1496 |
+
www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program"
|
| 1497 |
+
will mean the Content.
|
| 1498 |
+
|
| 1499 |
+
If you did not receive this Content directly from the Eclipse
|
| 1500 |
+
Foundation, the Content is being redistributed by another party
|
| 1501 |
+
("Redistributor") and different terms and conditions may apply to your
|
| 1502 |
+
use of any object code in the Content. Check the Redistributor's
|
| 1503 |
+
license that was provided with the Content. If no such license exists,
|
| 1504 |
+
contact the Redistributor. Unless otherwise indicated below, the terms
|
| 1505 |
+
and conditions of the EPL still apply to any source code in the
|
| 1506 |
+
Content and such source code may be obtained at http://www.eclipse.org.
|
| 1507 |
+
|
| 1508 |
+
18. Some of the cuBLAS library routines uses code from
|
| 1509 |
+
OpenAI, which is subject to the following license:
|
| 1510 |
+
|
| 1511 |
+
License URL
|
| 1512 |
+
https://github.com/openai/openai-gemm/blob/master/LICENSE
|
| 1513 |
+
|
| 1514 |
+
License Text
|
| 1515 |
+
The MIT License
|
| 1516 |
+
|
| 1517 |
+
Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc.
|
| 1518 |
+
|
| 1519 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 1520 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 1521 |
+
in the Software without restriction, including without limitation the rights
|
| 1522 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 1523 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 1524 |
+
furnished to do so, subject to the following conditions:
|
| 1525 |
+
|
| 1526 |
+
The above copyright notice and this permission notice shall be included in
|
| 1527 |
+
all copies or substantial portions of the Software.
|
| 1528 |
+
|
| 1529 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 1530 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 1531 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 1532 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 1533 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 1534 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
| 1535 |
+
THE SOFTWARE.
|
| 1536 |
+
|
| 1537 |
+
19. Licensee's use of the Visual Studio Setup Configuration
|
| 1538 |
+
Samples is subject to the following license:
|
| 1539 |
+
|
| 1540 |
+
The MIT License (MIT)
|
| 1541 |
+
Copyright (C) Microsoft Corporation. All rights reserved.
|
| 1542 |
+
|
| 1543 |
+
Permission is hereby granted, free of charge, to any person
|
| 1544 |
+
obtaining a copy of this software and associated documentation
|
| 1545 |
+
files (the "Software"), to deal in the Software without restriction,
|
| 1546 |
+
including without limitation the rights to use, copy, modify, merge,
|
| 1547 |
+
publish, distribute, sublicense, and/or sell copies of the Software,
|
| 1548 |
+
and to permit persons to whom the Software is furnished to do so,
|
| 1549 |
+
subject to the following conditions:
|
| 1550 |
+
|
| 1551 |
+
The above copyright notice and this permission notice shall be included
|
| 1552 |
+
in all copies or substantial portions of the Software.
|
| 1553 |
+
|
| 1554 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
| 1555 |
+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 1556 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 1557 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 1558 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 1559 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 1560 |
+
|
| 1561 |
+
20. Licensee's use of linmath.h header for CPU functions for
|
| 1562 |
+
GL vector/matrix operations from lunarG is subject to the
|
| 1563 |
+
Apache License Version 2.0.
|
| 1564 |
+
|
| 1565 |
+
21. The DX12-CUDA sample uses the d3dx12.h header, which is
|
| 1566 |
+
subject to the MIT license .
|
| 1567 |
+
|
| 1568 |
+
-----------------
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/commands/__pycache__/index.cpython-311.pyc
ADDED
|
Binary file (7.68 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/index/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (265 Bytes). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (17.9 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/base.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import functools
|
| 2 |
+
import os
|
| 3 |
+
import site
|
| 4 |
+
import sys
|
| 5 |
+
import sysconfig
|
| 6 |
+
import typing
|
| 7 |
+
|
| 8 |
+
from pip._internal.exceptions import InstallationError
|
| 9 |
+
from pip._internal.utils import appdirs
|
| 10 |
+
from pip._internal.utils.virtualenv import running_under_virtualenv
|
| 11 |
+
|
| 12 |
+
# Application Directories
|
| 13 |
+
USER_CACHE_DIR = appdirs.user_cache_dir("pip")
|
| 14 |
+
|
| 15 |
+
# FIXME doesn't account for venv linked to global site-packages
|
| 16 |
+
site_packages: str = sysconfig.get_path("purelib")
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_major_minor_version() -> str:
|
| 20 |
+
"""
|
| 21 |
+
Return the major-minor version of the current Python as a string, e.g.
|
| 22 |
+
"3.7" or "3.10".
|
| 23 |
+
"""
|
| 24 |
+
return "{}.{}".format(*sys.version_info)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def change_root(new_root: str, pathname: str) -> str:
|
| 28 |
+
"""Return 'pathname' with 'new_root' prepended.
|
| 29 |
+
|
| 30 |
+
If 'pathname' is relative, this is equivalent to os.path.join(new_root, pathname).
|
| 31 |
+
Otherwise, it requires making 'pathname' relative and then joining the
|
| 32 |
+
two, which is tricky on DOS/Windows and Mac OS.
|
| 33 |
+
|
| 34 |
+
This is borrowed from Python's standard library's distutils module.
|
| 35 |
+
"""
|
| 36 |
+
if os.name == "posix":
|
| 37 |
+
if not os.path.isabs(pathname):
|
| 38 |
+
return os.path.join(new_root, pathname)
|
| 39 |
+
else:
|
| 40 |
+
return os.path.join(new_root, pathname[1:])
|
| 41 |
+
|
| 42 |
+
elif os.name == "nt":
|
| 43 |
+
(drive, path) = os.path.splitdrive(pathname)
|
| 44 |
+
if path[0] == "\\":
|
| 45 |
+
path = path[1:]
|
| 46 |
+
return os.path.join(new_root, path)
|
| 47 |
+
|
| 48 |
+
else:
|
| 49 |
+
raise InstallationError(
|
| 50 |
+
f"Unknown platform: {os.name}\n"
|
| 51 |
+
"Can not change root path prefix on unknown platform."
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def get_src_prefix() -> str:
|
| 56 |
+
if running_under_virtualenv():
|
| 57 |
+
src_prefix = os.path.join(sys.prefix, "src")
|
| 58 |
+
else:
|
| 59 |
+
# FIXME: keep src in cwd for now (it is not a temporary folder)
|
| 60 |
+
try:
|
| 61 |
+
src_prefix = os.path.join(os.getcwd(), "src")
|
| 62 |
+
except OSError:
|
| 63 |
+
# In case the current working directory has been renamed or deleted
|
| 64 |
+
sys.exit("The folder you are executing pip from can no longer be found.")
|
| 65 |
+
|
| 66 |
+
# under macOS + virtualenv sys.prefix is not properly resolved
|
| 67 |
+
# it is something like /path/to/python/bin/..
|
| 68 |
+
return os.path.abspath(src_prefix)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
# Use getusersitepackages if this is present, as it ensures that the
|
| 73 |
+
# value is initialised properly.
|
| 74 |
+
user_site: typing.Optional[str] = site.getusersitepackages()
|
| 75 |
+
except AttributeError:
|
| 76 |
+
user_site = site.USER_SITE
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
@functools.lru_cache(maxsize=None)
|
| 80 |
+
def is_osx_framework() -> bool:
|
| 81 |
+
return bool(sysconfig.get_config_var("PYTHONFRAMEWORK"))
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-311.pyc
ADDED
|
Binary file (11.7 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/__init__.py
ADDED
|
File without changes
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/metadata_legacy.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Metadata generation logic for legacy source distributions.
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
from pip._internal.build_env import BuildEnvironment
|
| 8 |
+
from pip._internal.cli.spinners import open_spinner
|
| 9 |
+
from pip._internal.exceptions import (
|
| 10 |
+
InstallationError,
|
| 11 |
+
InstallationSubprocessError,
|
| 12 |
+
MetadataGenerationFailed,
|
| 13 |
+
)
|
| 14 |
+
from pip._internal.utils.setuptools_build import make_setuptools_egg_info_args
|
| 15 |
+
from pip._internal.utils.subprocess import call_subprocess
|
| 16 |
+
from pip._internal.utils.temp_dir import TempDirectory
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _find_egg_info(directory: str) -> str:
|
| 22 |
+
"""Find an .egg-info subdirectory in `directory`."""
|
| 23 |
+
filenames = [f for f in os.listdir(directory) if f.endswith(".egg-info")]
|
| 24 |
+
|
| 25 |
+
if not filenames:
|
| 26 |
+
raise InstallationError(f"No .egg-info directory found in {directory}")
|
| 27 |
+
|
| 28 |
+
if len(filenames) > 1:
|
| 29 |
+
raise InstallationError(
|
| 30 |
+
f"More than one .egg-info directory found in {directory}"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
return os.path.join(directory, filenames[0])
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def generate_metadata(
|
| 37 |
+
build_env: BuildEnvironment,
|
| 38 |
+
setup_py_path: str,
|
| 39 |
+
source_dir: str,
|
| 40 |
+
isolated: bool,
|
| 41 |
+
details: str,
|
| 42 |
+
) -> str:
|
| 43 |
+
"""Generate metadata using setup.py-based defacto mechanisms.
|
| 44 |
+
|
| 45 |
+
Returns the generated metadata directory.
|
| 46 |
+
"""
|
| 47 |
+
logger.debug(
|
| 48 |
+
"Running setup.py (path:%s) egg_info for package %s",
|
| 49 |
+
setup_py_path,
|
| 50 |
+
details,
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
egg_info_dir = TempDirectory(kind="pip-egg-info", globally_managed=True).path
|
| 54 |
+
|
| 55 |
+
args = make_setuptools_egg_info_args(
|
| 56 |
+
setup_py_path,
|
| 57 |
+
egg_info_dir=egg_info_dir,
|
| 58 |
+
no_user_config=isolated,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
with build_env:
|
| 62 |
+
with open_spinner("Preparing metadata (setup.py)") as spinner:
|
| 63 |
+
try:
|
| 64 |
+
call_subprocess(
|
| 65 |
+
args,
|
| 66 |
+
cwd=source_dir,
|
| 67 |
+
command_desc="python setup.py egg_info",
|
| 68 |
+
spinner=spinner,
|
| 69 |
+
)
|
| 70 |
+
except InstallationSubprocessError as error:
|
| 71 |
+
raise MetadataGenerationFailed(package_details=details) from error
|
| 72 |
+
|
| 73 |
+
# Return the .egg-info directory.
|
| 74 |
+
return _find_egg_info(egg_info_dir)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/check.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Validation of dependencies of packages
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
from contextlib import suppress
|
| 6 |
+
from email.parser import Parser
|
| 7 |
+
from functools import reduce
|
| 8 |
+
from typing import (
|
| 9 |
+
Callable,
|
| 10 |
+
Dict,
|
| 11 |
+
FrozenSet,
|
| 12 |
+
Generator,
|
| 13 |
+
Iterable,
|
| 14 |
+
List,
|
| 15 |
+
NamedTuple,
|
| 16 |
+
Optional,
|
| 17 |
+
Set,
|
| 18 |
+
Tuple,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
from pip._vendor.packaging.requirements import Requirement
|
| 22 |
+
from pip._vendor.packaging.tags import Tag, parse_tag
|
| 23 |
+
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
|
| 24 |
+
from pip._vendor.packaging.version import Version
|
| 25 |
+
|
| 26 |
+
from pip._internal.distributions import make_distribution_for_install_requirement
|
| 27 |
+
from pip._internal.metadata import get_default_environment
|
| 28 |
+
from pip._internal.metadata.base import BaseDistribution
|
| 29 |
+
from pip._internal.req.req_install import InstallRequirement
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class PackageDetails(NamedTuple):
|
| 35 |
+
version: Version
|
| 36 |
+
dependencies: List[Requirement]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# Shorthands
|
| 40 |
+
PackageSet = Dict[NormalizedName, PackageDetails]
|
| 41 |
+
Missing = Tuple[NormalizedName, Requirement]
|
| 42 |
+
Conflicting = Tuple[NormalizedName, Version, Requirement]
|
| 43 |
+
|
| 44 |
+
MissingDict = Dict[NormalizedName, List[Missing]]
|
| 45 |
+
ConflictingDict = Dict[NormalizedName, List[Conflicting]]
|
| 46 |
+
CheckResult = Tuple[MissingDict, ConflictingDict]
|
| 47 |
+
ConflictDetails = Tuple[PackageSet, CheckResult]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def create_package_set_from_installed() -> Tuple[PackageSet, bool]:
|
| 51 |
+
"""Converts a list of distributions into a PackageSet."""
|
| 52 |
+
package_set = {}
|
| 53 |
+
problems = False
|
| 54 |
+
env = get_default_environment()
|
| 55 |
+
for dist in env.iter_installed_distributions(local_only=False, skip=()):
|
| 56 |
+
name = dist.canonical_name
|
| 57 |
+
try:
|
| 58 |
+
dependencies = list(dist.iter_dependencies())
|
| 59 |
+
package_set[name] = PackageDetails(dist.version, dependencies)
|
| 60 |
+
except (OSError, ValueError) as e:
|
| 61 |
+
# Don't crash on unreadable or broken metadata.
|
| 62 |
+
logger.warning("Error parsing dependencies of %s: %s", name, e)
|
| 63 |
+
problems = True
|
| 64 |
+
return package_set, problems
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def check_package_set(
|
| 68 |
+
package_set: PackageSet, should_ignore: Optional[Callable[[str], bool]] = None
|
| 69 |
+
) -> CheckResult:
|
| 70 |
+
"""Check if a package set is consistent
|
| 71 |
+
|
| 72 |
+
If should_ignore is passed, it should be a callable that takes a
|
| 73 |
+
package name and returns a boolean.
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
missing = {}
|
| 77 |
+
conflicting = {}
|
| 78 |
+
|
| 79 |
+
for package_name, package_detail in package_set.items():
|
| 80 |
+
# Info about dependencies of package_name
|
| 81 |
+
missing_deps: Set[Missing] = set()
|
| 82 |
+
conflicting_deps: Set[Conflicting] = set()
|
| 83 |
+
|
| 84 |
+
if should_ignore and should_ignore(package_name):
|
| 85 |
+
continue
|
| 86 |
+
|
| 87 |
+
for req in package_detail.dependencies:
|
| 88 |
+
name = canonicalize_name(req.name)
|
| 89 |
+
|
| 90 |
+
# Check if it's missing
|
| 91 |
+
if name not in package_set:
|
| 92 |
+
missed = True
|
| 93 |
+
if req.marker is not None:
|
| 94 |
+
missed = req.marker.evaluate({"extra": ""})
|
| 95 |
+
if missed:
|
| 96 |
+
missing_deps.add((name, req))
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
# Check if there's a conflict
|
| 100 |
+
version = package_set[name].version
|
| 101 |
+
if not req.specifier.contains(version, prereleases=True):
|
| 102 |
+
conflicting_deps.add((name, version, req))
|
| 103 |
+
|
| 104 |
+
if missing_deps:
|
| 105 |
+
missing[package_name] = sorted(missing_deps, key=str)
|
| 106 |
+
if conflicting_deps:
|
| 107 |
+
conflicting[package_name] = sorted(conflicting_deps, key=str)
|
| 108 |
+
|
| 109 |
+
return missing, conflicting
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def check_install_conflicts(to_install: List[InstallRequirement]) -> ConflictDetails:
|
| 113 |
+
"""For checking if the dependency graph would be consistent after \
|
| 114 |
+
installing given requirements
|
| 115 |
+
"""
|
| 116 |
+
# Start from the current state
|
| 117 |
+
package_set, _ = create_package_set_from_installed()
|
| 118 |
+
# Install packages
|
| 119 |
+
would_be_installed = _simulate_installation_of(to_install, package_set)
|
| 120 |
+
|
| 121 |
+
# Only warn about directly-dependent packages; create a whitelist of them
|
| 122 |
+
whitelist = _create_whitelist(would_be_installed, package_set)
|
| 123 |
+
|
| 124 |
+
return (
|
| 125 |
+
package_set,
|
| 126 |
+
check_package_set(
|
| 127 |
+
package_set, should_ignore=lambda name: name not in whitelist
|
| 128 |
+
),
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def check_unsupported(
|
| 133 |
+
packages: Iterable[BaseDistribution],
|
| 134 |
+
supported_tags: Iterable[Tag],
|
| 135 |
+
) -> Generator[BaseDistribution, None, None]:
|
| 136 |
+
for p in packages:
|
| 137 |
+
with suppress(FileNotFoundError):
|
| 138 |
+
wheel_file = p.read_text("WHEEL")
|
| 139 |
+
wheel_tags: FrozenSet[Tag] = reduce(
|
| 140 |
+
frozenset.union,
|
| 141 |
+
map(parse_tag, Parser().parsestr(wheel_file).get_all("Tag", [])),
|
| 142 |
+
frozenset(),
|
| 143 |
+
)
|
| 144 |
+
if wheel_tags.isdisjoint(supported_tags):
|
| 145 |
+
yield p
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _simulate_installation_of(
|
| 149 |
+
to_install: List[InstallRequirement], package_set: PackageSet
|
| 150 |
+
) -> Set[NormalizedName]:
|
| 151 |
+
"""Computes the version of packages after installing to_install."""
|
| 152 |
+
# Keep track of packages that were installed
|
| 153 |
+
installed = set()
|
| 154 |
+
|
| 155 |
+
# Modify it as installing requirement_set would (assuming no errors)
|
| 156 |
+
for inst_req in to_install:
|
| 157 |
+
abstract_dist = make_distribution_for_install_requirement(inst_req)
|
| 158 |
+
dist = abstract_dist.get_metadata_distribution()
|
| 159 |
+
name = dist.canonical_name
|
| 160 |
+
package_set[name] = PackageDetails(dist.version, list(dist.iter_dependencies()))
|
| 161 |
+
|
| 162 |
+
installed.add(name)
|
| 163 |
+
|
| 164 |
+
return installed
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def _create_whitelist(
|
| 168 |
+
would_be_installed: Set[NormalizedName], package_set: PackageSet
|
| 169 |
+
) -> Set[NormalizedName]:
|
| 170 |
+
packages_affected = set(would_be_installed)
|
| 171 |
+
|
| 172 |
+
for package_name in package_set:
|
| 173 |
+
if package_name in packages_affected:
|
| 174 |
+
continue
|
| 175 |
+
|
| 176 |
+
for req in package_set[package_name].dependencies:
|
| 177 |
+
if canonicalize_name(req.name) in packages_affected:
|
| 178 |
+
packages_affected.add(package_name)
|
| 179 |
+
break
|
| 180 |
+
|
| 181 |
+
return packages_affected
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/freeze.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import collections
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
from typing import Container, Dict, Generator, Iterable, List, NamedTuple, Optional, Set
|
| 5 |
+
|
| 6 |
+
from pip._vendor.packaging.utils import canonicalize_name
|
| 7 |
+
from pip._vendor.packaging.version import InvalidVersion
|
| 8 |
+
|
| 9 |
+
from pip._internal.exceptions import BadCommand, InstallationError
|
| 10 |
+
from pip._internal.metadata import BaseDistribution, get_environment
|
| 11 |
+
from pip._internal.req.constructors import (
|
| 12 |
+
install_req_from_editable,
|
| 13 |
+
install_req_from_line,
|
| 14 |
+
)
|
| 15 |
+
from pip._internal.req.req_file import COMMENT_RE
|
| 16 |
+
from pip._internal.utils.direct_url_helpers import direct_url_as_pep440_direct_reference
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class _EditableInfo(NamedTuple):
|
| 22 |
+
requirement: str
|
| 23 |
+
comments: List[str]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def freeze(
|
| 27 |
+
requirement: Optional[List[str]] = None,
|
| 28 |
+
local_only: bool = False,
|
| 29 |
+
user_only: bool = False,
|
| 30 |
+
paths: Optional[List[str]] = None,
|
| 31 |
+
isolated: bool = False,
|
| 32 |
+
exclude_editable: bool = False,
|
| 33 |
+
skip: Container[str] = (),
|
| 34 |
+
) -> Generator[str, None, None]:
|
| 35 |
+
installations: Dict[str, FrozenRequirement] = {}
|
| 36 |
+
|
| 37 |
+
dists = get_environment(paths).iter_installed_distributions(
|
| 38 |
+
local_only=local_only,
|
| 39 |
+
skip=(),
|
| 40 |
+
user_only=user_only,
|
| 41 |
+
)
|
| 42 |
+
for dist in dists:
|
| 43 |
+
req = FrozenRequirement.from_dist(dist)
|
| 44 |
+
if exclude_editable and req.editable:
|
| 45 |
+
continue
|
| 46 |
+
installations[req.canonical_name] = req
|
| 47 |
+
|
| 48 |
+
if requirement:
|
| 49 |
+
# the options that don't get turned into an InstallRequirement
|
| 50 |
+
# should only be emitted once, even if the same option is in multiple
|
| 51 |
+
# requirements files, so we need to keep track of what has been emitted
|
| 52 |
+
# so that we don't emit it again if it's seen again
|
| 53 |
+
emitted_options: Set[str] = set()
|
| 54 |
+
# keep track of which files a requirement is in so that we can
|
| 55 |
+
# give an accurate warning if a requirement appears multiple times.
|
| 56 |
+
req_files: Dict[str, List[str]] = collections.defaultdict(list)
|
| 57 |
+
for req_file_path in requirement:
|
| 58 |
+
with open(req_file_path) as req_file:
|
| 59 |
+
for line in req_file:
|
| 60 |
+
if (
|
| 61 |
+
not line.strip()
|
| 62 |
+
or line.strip().startswith("#")
|
| 63 |
+
or line.startswith(
|
| 64 |
+
(
|
| 65 |
+
"-r",
|
| 66 |
+
"--requirement",
|
| 67 |
+
"-f",
|
| 68 |
+
"--find-links",
|
| 69 |
+
"-i",
|
| 70 |
+
"--index-url",
|
| 71 |
+
"--pre",
|
| 72 |
+
"--trusted-host",
|
| 73 |
+
"--process-dependency-links",
|
| 74 |
+
"--extra-index-url",
|
| 75 |
+
"--use-feature",
|
| 76 |
+
)
|
| 77 |
+
)
|
| 78 |
+
):
|
| 79 |
+
line = line.rstrip()
|
| 80 |
+
if line not in emitted_options:
|
| 81 |
+
emitted_options.add(line)
|
| 82 |
+
yield line
|
| 83 |
+
continue
|
| 84 |
+
|
| 85 |
+
if line.startswith("-e") or line.startswith("--editable"):
|
| 86 |
+
if line.startswith("-e"):
|
| 87 |
+
line = line[2:].strip()
|
| 88 |
+
else:
|
| 89 |
+
line = line[len("--editable") :].strip().lstrip("=")
|
| 90 |
+
line_req = install_req_from_editable(
|
| 91 |
+
line,
|
| 92 |
+
isolated=isolated,
|
| 93 |
+
)
|
| 94 |
+
else:
|
| 95 |
+
line_req = install_req_from_line(
|
| 96 |
+
COMMENT_RE.sub("", line).strip(),
|
| 97 |
+
isolated=isolated,
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
if not line_req.name:
|
| 101 |
+
logger.info(
|
| 102 |
+
"Skipping line in requirement file [%s] because "
|
| 103 |
+
"it's not clear what it would install: %s",
|
| 104 |
+
req_file_path,
|
| 105 |
+
line.strip(),
|
| 106 |
+
)
|
| 107 |
+
logger.info(
|
| 108 |
+
" (add #egg=PackageName to the URL to avoid"
|
| 109 |
+
" this warning)"
|
| 110 |
+
)
|
| 111 |
+
else:
|
| 112 |
+
line_req_canonical_name = canonicalize_name(line_req.name)
|
| 113 |
+
if line_req_canonical_name not in installations:
|
| 114 |
+
# either it's not installed, or it is installed
|
| 115 |
+
# but has been processed already
|
| 116 |
+
if not req_files[line_req.name]:
|
| 117 |
+
logger.warning(
|
| 118 |
+
"Requirement file [%s] contains %s, but "
|
| 119 |
+
"package %r is not installed",
|
| 120 |
+
req_file_path,
|
| 121 |
+
COMMENT_RE.sub("", line).strip(),
|
| 122 |
+
line_req.name,
|
| 123 |
+
)
|
| 124 |
+
else:
|
| 125 |
+
req_files[line_req.name].append(req_file_path)
|
| 126 |
+
else:
|
| 127 |
+
yield str(installations[line_req_canonical_name]).rstrip()
|
| 128 |
+
del installations[line_req_canonical_name]
|
| 129 |
+
req_files[line_req.name].append(req_file_path)
|
| 130 |
+
|
| 131 |
+
# Warn about requirements that were included multiple times (in a
|
| 132 |
+
# single requirements file or in different requirements files).
|
| 133 |
+
for name, files in req_files.items():
|
| 134 |
+
if len(files) > 1:
|
| 135 |
+
logger.warning(
|
| 136 |
+
"Requirement %s included multiple times [%s]",
|
| 137 |
+
name,
|
| 138 |
+
", ".join(sorted(set(files))),
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
yield ("## The following requirements were added by pip freeze:")
|
| 142 |
+
for installation in sorted(installations.values(), key=lambda x: x.name.lower()):
|
| 143 |
+
if installation.canonical_name not in skip:
|
| 144 |
+
yield str(installation).rstrip()
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def _format_as_name_version(dist: BaseDistribution) -> str:
|
| 148 |
+
try:
|
| 149 |
+
dist_version = dist.version
|
| 150 |
+
except InvalidVersion:
|
| 151 |
+
# legacy version
|
| 152 |
+
return f"{dist.raw_name}==={dist.raw_version}"
|
| 153 |
+
else:
|
| 154 |
+
return f"{dist.raw_name}=={dist_version}"
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def _get_editable_info(dist: BaseDistribution) -> _EditableInfo:
|
| 158 |
+
"""
|
| 159 |
+
Compute and return values (req, comments) for use in
|
| 160 |
+
FrozenRequirement.from_dist().
|
| 161 |
+
"""
|
| 162 |
+
editable_project_location = dist.editable_project_location
|
| 163 |
+
assert editable_project_location
|
| 164 |
+
location = os.path.normcase(os.path.abspath(editable_project_location))
|
| 165 |
+
|
| 166 |
+
from pip._internal.vcs import RemoteNotFoundError, RemoteNotValidError, vcs
|
| 167 |
+
|
| 168 |
+
vcs_backend = vcs.get_backend_for_dir(location)
|
| 169 |
+
|
| 170 |
+
if vcs_backend is None:
|
| 171 |
+
display = _format_as_name_version(dist)
|
| 172 |
+
logger.debug(
|
| 173 |
+
'No VCS found for editable requirement "%s" in: %r',
|
| 174 |
+
display,
|
| 175 |
+
location,
|
| 176 |
+
)
|
| 177 |
+
return _EditableInfo(
|
| 178 |
+
requirement=location,
|
| 179 |
+
comments=[f"# Editable install with no version control ({display})"],
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
vcs_name = type(vcs_backend).__name__
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
req = vcs_backend.get_src_requirement(location, dist.raw_name)
|
| 186 |
+
except RemoteNotFoundError:
|
| 187 |
+
display = _format_as_name_version(dist)
|
| 188 |
+
return _EditableInfo(
|
| 189 |
+
requirement=location,
|
| 190 |
+
comments=[f"# Editable {vcs_name} install with no remote ({display})"],
|
| 191 |
+
)
|
| 192 |
+
except RemoteNotValidError as ex:
|
| 193 |
+
display = _format_as_name_version(dist)
|
| 194 |
+
return _EditableInfo(
|
| 195 |
+
requirement=location,
|
| 196 |
+
comments=[
|
| 197 |
+
f"# Editable {vcs_name} install ({display}) with either a deleted "
|
| 198 |
+
f"local remote or invalid URI:",
|
| 199 |
+
f"# '{ex.url}'",
|
| 200 |
+
],
|
| 201 |
+
)
|
| 202 |
+
except BadCommand:
|
| 203 |
+
logger.warning(
|
| 204 |
+
"cannot determine version of editable source in %s "
|
| 205 |
+
"(%s command not found in path)",
|
| 206 |
+
location,
|
| 207 |
+
vcs_backend.name,
|
| 208 |
+
)
|
| 209 |
+
return _EditableInfo(requirement=location, comments=[])
|
| 210 |
+
except InstallationError as exc:
|
| 211 |
+
logger.warning("Error when trying to get requirement for VCS system %s", exc)
|
| 212 |
+
else:
|
| 213 |
+
return _EditableInfo(requirement=req, comments=[])
|
| 214 |
+
|
| 215 |
+
logger.warning("Could not determine repository location of %s", location)
|
| 216 |
+
|
| 217 |
+
return _EditableInfo(
|
| 218 |
+
requirement=location,
|
| 219 |
+
comments=["## !! Could not determine repository location"],
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
class FrozenRequirement:
|
| 224 |
+
def __init__(
|
| 225 |
+
self,
|
| 226 |
+
name: str,
|
| 227 |
+
req: str,
|
| 228 |
+
editable: bool,
|
| 229 |
+
comments: Iterable[str] = (),
|
| 230 |
+
) -> None:
|
| 231 |
+
self.name = name
|
| 232 |
+
self.canonical_name = canonicalize_name(name)
|
| 233 |
+
self.req = req
|
| 234 |
+
self.editable = editable
|
| 235 |
+
self.comments = comments
|
| 236 |
+
|
| 237 |
+
@classmethod
|
| 238 |
+
def from_dist(cls, dist: BaseDistribution) -> "FrozenRequirement":
|
| 239 |
+
editable = dist.editable
|
| 240 |
+
if editable:
|
| 241 |
+
req, comments = _get_editable_info(dist)
|
| 242 |
+
else:
|
| 243 |
+
comments = []
|
| 244 |
+
direct_url = dist.direct_url
|
| 245 |
+
if direct_url:
|
| 246 |
+
# if PEP 610 metadata is present, use it
|
| 247 |
+
req = direct_url_as_pep440_direct_reference(direct_url, dist.raw_name)
|
| 248 |
+
else:
|
| 249 |
+
# name==version requirement
|
| 250 |
+
req = _format_as_name_version(dist)
|
| 251 |
+
|
| 252 |
+
return cls(dist.raw_name, req, editable, comments=comments)
|
| 253 |
+
|
| 254 |
+
def __str__(self) -> str:
|
| 255 |
+
req = self.req
|
| 256 |
+
if self.editable:
|
| 257 |
+
req = f"-e {req}"
|
| 258 |
+
return "\n".join(list(self.comments) + [str(req)]) + "\n"
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""For modules related to installing packages.
|
| 2 |
+
"""
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-311.pyc
ADDED
|
Binary file (2.21 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/wheel.py
ADDED
|
@@ -0,0 +1,741 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Support for installing and building the "wheel" binary package format.
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
import collections
|
| 5 |
+
import compileall
|
| 6 |
+
import contextlib
|
| 7 |
+
import csv
|
| 8 |
+
import importlib
|
| 9 |
+
import logging
|
| 10 |
+
import os.path
|
| 11 |
+
import re
|
| 12 |
+
import shutil
|
| 13 |
+
import sys
|
| 14 |
+
import warnings
|
| 15 |
+
from base64 import urlsafe_b64encode
|
| 16 |
+
from email.message import Message
|
| 17 |
+
from itertools import chain, filterfalse, starmap
|
| 18 |
+
from typing import (
|
| 19 |
+
IO,
|
| 20 |
+
TYPE_CHECKING,
|
| 21 |
+
Any,
|
| 22 |
+
BinaryIO,
|
| 23 |
+
Callable,
|
| 24 |
+
Dict,
|
| 25 |
+
Generator,
|
| 26 |
+
Iterable,
|
| 27 |
+
Iterator,
|
| 28 |
+
List,
|
| 29 |
+
NewType,
|
| 30 |
+
Optional,
|
| 31 |
+
Protocol,
|
| 32 |
+
Sequence,
|
| 33 |
+
Set,
|
| 34 |
+
Tuple,
|
| 35 |
+
Union,
|
| 36 |
+
cast,
|
| 37 |
+
)
|
| 38 |
+
from zipfile import ZipFile, ZipInfo
|
| 39 |
+
|
| 40 |
+
from pip._vendor.distlib.scripts import ScriptMaker
|
| 41 |
+
from pip._vendor.distlib.util import get_export_entry
|
| 42 |
+
from pip._vendor.packaging.utils import canonicalize_name
|
| 43 |
+
|
| 44 |
+
from pip._internal.exceptions import InstallationError
|
| 45 |
+
from pip._internal.locations import get_major_minor_version
|
| 46 |
+
from pip._internal.metadata import (
|
| 47 |
+
BaseDistribution,
|
| 48 |
+
FilesystemWheel,
|
| 49 |
+
get_wheel_distribution,
|
| 50 |
+
)
|
| 51 |
+
from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, DirectUrl
|
| 52 |
+
from pip._internal.models.scheme import SCHEME_KEYS, Scheme
|
| 53 |
+
from pip._internal.utils.filesystem import adjacent_tmp_file, replace
|
| 54 |
+
from pip._internal.utils.misc import StreamWrapper, ensure_dir, hash_file, partition
|
| 55 |
+
from pip._internal.utils.unpacking import (
|
| 56 |
+
current_umask,
|
| 57 |
+
is_within_directory,
|
| 58 |
+
set_extracted_file_to_default_mode_plus_executable,
|
| 59 |
+
zip_item_is_executable,
|
| 60 |
+
)
|
| 61 |
+
from pip._internal.utils.wheel import parse_wheel
|
| 62 |
+
|
| 63 |
+
if TYPE_CHECKING:
|
| 64 |
+
|
| 65 |
+
class File(Protocol):
|
| 66 |
+
src_record_path: "RecordPath"
|
| 67 |
+
dest_path: str
|
| 68 |
+
changed: bool
|
| 69 |
+
|
| 70 |
+
def save(self) -> None:
|
| 71 |
+
pass
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
logger = logging.getLogger(__name__)
|
| 75 |
+
|
| 76 |
+
RecordPath = NewType("RecordPath", str)
|
| 77 |
+
InstalledCSVRow = Tuple[RecordPath, str, Union[int, str]]
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def rehash(path: str, blocksize: int = 1 << 20) -> Tuple[str, str]:
|
| 81 |
+
"""Return (encoded_digest, length) for path using hashlib.sha256()"""
|
| 82 |
+
h, length = hash_file(path, blocksize)
|
| 83 |
+
digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
|
| 84 |
+
return (digest, str(length))
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def csv_io_kwargs(mode: str) -> Dict[str, Any]:
|
| 88 |
+
"""Return keyword arguments to properly open a CSV file
|
| 89 |
+
in the given mode.
|
| 90 |
+
"""
|
| 91 |
+
return {"mode": mode, "newline": "", "encoding": "utf-8"}
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def fix_script(path: str) -> bool:
|
| 95 |
+
"""Replace #!python with #!/path/to/python
|
| 96 |
+
Return True if file was changed.
|
| 97 |
+
"""
|
| 98 |
+
# XXX RECORD hashes will need to be updated
|
| 99 |
+
assert os.path.isfile(path)
|
| 100 |
+
|
| 101 |
+
with open(path, "rb") as script:
|
| 102 |
+
firstline = script.readline()
|
| 103 |
+
if not firstline.startswith(b"#!python"):
|
| 104 |
+
return False
|
| 105 |
+
exename = sys.executable.encode(sys.getfilesystemencoding())
|
| 106 |
+
firstline = b"#!" + exename + os.linesep.encode("ascii")
|
| 107 |
+
rest = script.read()
|
| 108 |
+
with open(path, "wb") as script:
|
| 109 |
+
script.write(firstline)
|
| 110 |
+
script.write(rest)
|
| 111 |
+
return True
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def wheel_root_is_purelib(metadata: Message) -> bool:
|
| 115 |
+
return metadata.get("Root-Is-Purelib", "").lower() == "true"
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def get_entrypoints(dist: BaseDistribution) -> Tuple[Dict[str, str], Dict[str, str]]:
|
| 119 |
+
console_scripts = {}
|
| 120 |
+
gui_scripts = {}
|
| 121 |
+
for entry_point in dist.iter_entry_points():
|
| 122 |
+
if entry_point.group == "console_scripts":
|
| 123 |
+
console_scripts[entry_point.name] = entry_point.value
|
| 124 |
+
elif entry_point.group == "gui_scripts":
|
| 125 |
+
gui_scripts[entry_point.name] = entry_point.value
|
| 126 |
+
return console_scripts, gui_scripts
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def message_about_scripts_not_on_PATH(scripts: Sequence[str]) -> Optional[str]:
|
| 130 |
+
"""Determine if any scripts are not on PATH and format a warning.
|
| 131 |
+
Returns a warning message if one or more scripts are not on PATH,
|
| 132 |
+
otherwise None.
|
| 133 |
+
"""
|
| 134 |
+
if not scripts:
|
| 135 |
+
return None
|
| 136 |
+
|
| 137 |
+
# Group scripts by the path they were installed in
|
| 138 |
+
grouped_by_dir: Dict[str, Set[str]] = collections.defaultdict(set)
|
| 139 |
+
for destfile in scripts:
|
| 140 |
+
parent_dir = os.path.dirname(destfile)
|
| 141 |
+
script_name = os.path.basename(destfile)
|
| 142 |
+
grouped_by_dir[parent_dir].add(script_name)
|
| 143 |
+
|
| 144 |
+
# We don't want to warn for directories that are on PATH.
|
| 145 |
+
not_warn_dirs = [
|
| 146 |
+
os.path.normcase(os.path.normpath(i)).rstrip(os.sep)
|
| 147 |
+
for i in os.environ.get("PATH", "").split(os.pathsep)
|
| 148 |
+
]
|
| 149 |
+
# If an executable sits with sys.executable, we don't warn for it.
|
| 150 |
+
# This covers the case of venv invocations without activating the venv.
|
| 151 |
+
not_warn_dirs.append(
|
| 152 |
+
os.path.normcase(os.path.normpath(os.path.dirname(sys.executable)))
|
| 153 |
+
)
|
| 154 |
+
warn_for: Dict[str, Set[str]] = {
|
| 155 |
+
parent_dir: scripts
|
| 156 |
+
for parent_dir, scripts in grouped_by_dir.items()
|
| 157 |
+
if os.path.normcase(os.path.normpath(parent_dir)) not in not_warn_dirs
|
| 158 |
+
}
|
| 159 |
+
if not warn_for:
|
| 160 |
+
return None
|
| 161 |
+
|
| 162 |
+
# Format a message
|
| 163 |
+
msg_lines = []
|
| 164 |
+
for parent_dir, dir_scripts in warn_for.items():
|
| 165 |
+
sorted_scripts: List[str] = sorted(dir_scripts)
|
| 166 |
+
if len(sorted_scripts) == 1:
|
| 167 |
+
start_text = f"script {sorted_scripts[0]} is"
|
| 168 |
+
else:
|
| 169 |
+
start_text = "scripts {} are".format(
|
| 170 |
+
", ".join(sorted_scripts[:-1]) + " and " + sorted_scripts[-1]
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
msg_lines.append(
|
| 174 |
+
f"The {start_text} installed in '{parent_dir}' which is not on PATH."
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
last_line_fmt = (
|
| 178 |
+
"Consider adding {} to PATH or, if you prefer "
|
| 179 |
+
"to suppress this warning, use --no-warn-script-location."
|
| 180 |
+
)
|
| 181 |
+
if len(msg_lines) == 1:
|
| 182 |
+
msg_lines.append(last_line_fmt.format("this directory"))
|
| 183 |
+
else:
|
| 184 |
+
msg_lines.append(last_line_fmt.format("these directories"))
|
| 185 |
+
|
| 186 |
+
# Add a note if any directory starts with ~
|
| 187 |
+
warn_for_tilde = any(
|
| 188 |
+
i[0] == "~" for i in os.environ.get("PATH", "").split(os.pathsep) if i
|
| 189 |
+
)
|
| 190 |
+
if warn_for_tilde:
|
| 191 |
+
tilde_warning_msg = (
|
| 192 |
+
"NOTE: The current PATH contains path(s) starting with `~`, "
|
| 193 |
+
"which may not be expanded by all applications."
|
| 194 |
+
)
|
| 195 |
+
msg_lines.append(tilde_warning_msg)
|
| 196 |
+
|
| 197 |
+
# Returns the formatted multiline message
|
| 198 |
+
return "\n".join(msg_lines)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _normalized_outrows(
|
| 202 |
+
outrows: Iterable[InstalledCSVRow],
|
| 203 |
+
) -> List[Tuple[str, str, str]]:
|
| 204 |
+
"""Normalize the given rows of a RECORD file.
|
| 205 |
+
|
| 206 |
+
Items in each row are converted into str. Rows are then sorted to make
|
| 207 |
+
the value more predictable for tests.
|
| 208 |
+
|
| 209 |
+
Each row is a 3-tuple (path, hash, size) and corresponds to a record of
|
| 210 |
+
a RECORD file (see PEP 376 and PEP 427 for details). For the rows
|
| 211 |
+
passed to this function, the size can be an integer as an int or string,
|
| 212 |
+
or the empty string.
|
| 213 |
+
"""
|
| 214 |
+
# Normally, there should only be one row per path, in which case the
|
| 215 |
+
# second and third elements don't come into play when sorting.
|
| 216 |
+
# However, in cases in the wild where a path might happen to occur twice,
|
| 217 |
+
# we don't want the sort operation to trigger an error (but still want
|
| 218 |
+
# determinism). Since the third element can be an int or string, we
|
| 219 |
+
# coerce each element to a string to avoid a TypeError in this case.
|
| 220 |
+
# For additional background, see--
|
| 221 |
+
# https://github.com/pypa/pip/issues/5868
|
| 222 |
+
return sorted(
|
| 223 |
+
(record_path, hash_, str(size)) for record_path, hash_, size in outrows
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def _record_to_fs_path(record_path: RecordPath, lib_dir: str) -> str:
|
| 228 |
+
return os.path.join(lib_dir, record_path)
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def _fs_to_record_path(path: str, lib_dir: str) -> RecordPath:
|
| 232 |
+
# On Windows, do not handle relative paths if they belong to different
|
| 233 |
+
# logical disks
|
| 234 |
+
if os.path.splitdrive(path)[0].lower() == os.path.splitdrive(lib_dir)[0].lower():
|
| 235 |
+
path = os.path.relpath(path, lib_dir)
|
| 236 |
+
|
| 237 |
+
path = path.replace(os.path.sep, "/")
|
| 238 |
+
return cast("RecordPath", path)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def get_csv_rows_for_installed(
|
| 242 |
+
old_csv_rows: List[List[str]],
|
| 243 |
+
installed: Dict[RecordPath, RecordPath],
|
| 244 |
+
changed: Set[RecordPath],
|
| 245 |
+
generated: List[str],
|
| 246 |
+
lib_dir: str,
|
| 247 |
+
) -> List[InstalledCSVRow]:
|
| 248 |
+
"""
|
| 249 |
+
:param installed: A map from archive RECORD path to installation RECORD
|
| 250 |
+
path.
|
| 251 |
+
"""
|
| 252 |
+
installed_rows: List[InstalledCSVRow] = []
|
| 253 |
+
for row in old_csv_rows:
|
| 254 |
+
if len(row) > 3:
|
| 255 |
+
logger.warning("RECORD line has more than three elements: %s", row)
|
| 256 |
+
old_record_path = cast("RecordPath", row[0])
|
| 257 |
+
new_record_path = installed.pop(old_record_path, old_record_path)
|
| 258 |
+
if new_record_path in changed:
|
| 259 |
+
digest, length = rehash(_record_to_fs_path(new_record_path, lib_dir))
|
| 260 |
+
else:
|
| 261 |
+
digest = row[1] if len(row) > 1 else ""
|
| 262 |
+
length = row[2] if len(row) > 2 else ""
|
| 263 |
+
installed_rows.append((new_record_path, digest, length))
|
| 264 |
+
for f in generated:
|
| 265 |
+
path = _fs_to_record_path(f, lib_dir)
|
| 266 |
+
digest, length = rehash(f)
|
| 267 |
+
installed_rows.append((path, digest, length))
|
| 268 |
+
return installed_rows + [
|
| 269 |
+
(installed_record_path, "", "") for installed_record_path in installed.values()
|
| 270 |
+
]
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def get_console_script_specs(console: Dict[str, str]) -> List[str]:
|
| 274 |
+
"""
|
| 275 |
+
Given the mapping from entrypoint name to callable, return the relevant
|
| 276 |
+
console script specs.
|
| 277 |
+
"""
|
| 278 |
+
# Don't mutate caller's version
|
| 279 |
+
console = console.copy()
|
| 280 |
+
|
| 281 |
+
scripts_to_generate = []
|
| 282 |
+
|
| 283 |
+
# Special case pip and setuptools to generate versioned wrappers
|
| 284 |
+
#
|
| 285 |
+
# The issue is that some projects (specifically, pip and setuptools) use
|
| 286 |
+
# code in setup.py to create "versioned" entry points - pip2.7 on Python
|
| 287 |
+
# 2.7, pip3.3 on Python 3.3, etc. But these entry points are baked into
|
| 288 |
+
# the wheel metadata at build time, and so if the wheel is installed with
|
| 289 |
+
# a *different* version of Python the entry points will be wrong. The
|
| 290 |
+
# correct fix for this is to enhance the metadata to be able to describe
|
| 291 |
+
# such versioned entry points.
|
| 292 |
+
# Currently, projects using versioned entry points will either have
|
| 293 |
+
# incorrect versioned entry points, or they will not be able to distribute
|
| 294 |
+
# "universal" wheels (i.e., they will need a wheel per Python version).
|
| 295 |
+
#
|
| 296 |
+
# Because setuptools and pip are bundled with _ensurepip and virtualenv,
|
| 297 |
+
# we need to use universal wheels. As a workaround, we
|
| 298 |
+
# override the versioned entry points in the wheel and generate the
|
| 299 |
+
# correct ones.
|
| 300 |
+
#
|
| 301 |
+
# To add the level of hack in this section of code, in order to support
|
| 302 |
+
# ensurepip this code will look for an ``ENSUREPIP_OPTIONS`` environment
|
| 303 |
+
# variable which will control which version scripts get installed.
|
| 304 |
+
#
|
| 305 |
+
# ENSUREPIP_OPTIONS=altinstall
|
| 306 |
+
# - Only pipX.Y and easy_install-X.Y will be generated and installed
|
| 307 |
+
# ENSUREPIP_OPTIONS=install
|
| 308 |
+
# - pipX.Y, pipX, easy_install-X.Y will be generated and installed. Note
|
| 309 |
+
# that this option is technically if ENSUREPIP_OPTIONS is set and is
|
| 310 |
+
# not altinstall
|
| 311 |
+
# DEFAULT
|
| 312 |
+
# - The default behavior is to install pip, pipX, pipX.Y, easy_install
|
| 313 |
+
# and easy_install-X.Y.
|
| 314 |
+
pip_script = console.pop("pip", None)
|
| 315 |
+
if pip_script:
|
| 316 |
+
if "ENSUREPIP_OPTIONS" not in os.environ:
|
| 317 |
+
scripts_to_generate.append("pip = " + pip_script)
|
| 318 |
+
|
| 319 |
+
if os.environ.get("ENSUREPIP_OPTIONS", "") != "altinstall":
|
| 320 |
+
scripts_to_generate.append(f"pip{sys.version_info[0]} = {pip_script}")
|
| 321 |
+
|
| 322 |
+
scripts_to_generate.append(f"pip{get_major_minor_version()} = {pip_script}")
|
| 323 |
+
# Delete any other versioned pip entry points
|
| 324 |
+
pip_ep = [k for k in console if re.match(r"pip(\d+(\.\d+)?)?$", k)]
|
| 325 |
+
for k in pip_ep:
|
| 326 |
+
del console[k]
|
| 327 |
+
easy_install_script = console.pop("easy_install", None)
|
| 328 |
+
if easy_install_script:
|
| 329 |
+
if "ENSUREPIP_OPTIONS" not in os.environ:
|
| 330 |
+
scripts_to_generate.append("easy_install = " + easy_install_script)
|
| 331 |
+
|
| 332 |
+
scripts_to_generate.append(
|
| 333 |
+
f"easy_install-{get_major_minor_version()} = {easy_install_script}"
|
| 334 |
+
)
|
| 335 |
+
# Delete any other versioned easy_install entry points
|
| 336 |
+
easy_install_ep = [
|
| 337 |
+
k for k in console if re.match(r"easy_install(-\d+\.\d+)?$", k)
|
| 338 |
+
]
|
| 339 |
+
for k in easy_install_ep:
|
| 340 |
+
del console[k]
|
| 341 |
+
|
| 342 |
+
# Generate the console entry points specified in the wheel
|
| 343 |
+
scripts_to_generate.extend(starmap("{} = {}".format, console.items()))
|
| 344 |
+
|
| 345 |
+
return scripts_to_generate
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
class ZipBackedFile:
|
| 349 |
+
def __init__(
|
| 350 |
+
self, src_record_path: RecordPath, dest_path: str, zip_file: ZipFile
|
| 351 |
+
) -> None:
|
| 352 |
+
self.src_record_path = src_record_path
|
| 353 |
+
self.dest_path = dest_path
|
| 354 |
+
self._zip_file = zip_file
|
| 355 |
+
self.changed = False
|
| 356 |
+
|
| 357 |
+
def _getinfo(self) -> ZipInfo:
|
| 358 |
+
return self._zip_file.getinfo(self.src_record_path)
|
| 359 |
+
|
| 360 |
+
def save(self) -> None:
|
| 361 |
+
# When we open the output file below, any existing file is truncated
|
| 362 |
+
# before we start writing the new contents. This is fine in most
|
| 363 |
+
# cases, but can cause a segfault if pip has loaded a shared
|
| 364 |
+
# object (e.g. from pyopenssl through its vendored urllib3)
|
| 365 |
+
# Since the shared object is mmap'd an attempt to call a
|
| 366 |
+
# symbol in it will then cause a segfault. Unlinking the file
|
| 367 |
+
# allows writing of new contents while allowing the process to
|
| 368 |
+
# continue to use the old copy.
|
| 369 |
+
if os.path.exists(self.dest_path):
|
| 370 |
+
os.unlink(self.dest_path)
|
| 371 |
+
|
| 372 |
+
zipinfo = self._getinfo()
|
| 373 |
+
|
| 374 |
+
# optimization: the file is created by open(),
|
| 375 |
+
# skip the decompression when there is 0 bytes to decompress.
|
| 376 |
+
with open(self.dest_path, "wb") as dest:
|
| 377 |
+
if zipinfo.file_size > 0:
|
| 378 |
+
with self._zip_file.open(zipinfo) as f:
|
| 379 |
+
blocksize = min(zipinfo.file_size, 1024 * 1024)
|
| 380 |
+
shutil.copyfileobj(f, dest, blocksize)
|
| 381 |
+
|
| 382 |
+
if zip_item_is_executable(zipinfo):
|
| 383 |
+
set_extracted_file_to_default_mode_plus_executable(self.dest_path)
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
class ScriptFile:
|
| 387 |
+
def __init__(self, file: "File") -> None:
|
| 388 |
+
self._file = file
|
| 389 |
+
self.src_record_path = self._file.src_record_path
|
| 390 |
+
self.dest_path = self._file.dest_path
|
| 391 |
+
self.changed = False
|
| 392 |
+
|
| 393 |
+
def save(self) -> None:
|
| 394 |
+
self._file.save()
|
| 395 |
+
self.changed = fix_script(self.dest_path)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
class MissingCallableSuffix(InstallationError):
|
| 399 |
+
def __init__(self, entry_point: str) -> None:
|
| 400 |
+
super().__init__(
|
| 401 |
+
f"Invalid script entry point: {entry_point} - A callable "
|
| 402 |
+
"suffix is required. Cf https://packaging.python.org/"
|
| 403 |
+
"specifications/entry-points/#use-for-scripts for more "
|
| 404 |
+
"information."
|
| 405 |
+
)
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
def _raise_for_invalid_entrypoint(specification: str) -> None:
|
| 409 |
+
entry = get_export_entry(specification)
|
| 410 |
+
if entry is not None and entry.suffix is None:
|
| 411 |
+
raise MissingCallableSuffix(str(entry))
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
class PipScriptMaker(ScriptMaker):
|
| 415 |
+
def make(
|
| 416 |
+
self, specification: str, options: Optional[Dict[str, Any]] = None
|
| 417 |
+
) -> List[str]:
|
| 418 |
+
_raise_for_invalid_entrypoint(specification)
|
| 419 |
+
return super().make(specification, options)
|
| 420 |
+
|
| 421 |
+
|
| 422 |
+
def _install_wheel( # noqa: C901, PLR0915 function is too long
|
| 423 |
+
name: str,
|
| 424 |
+
wheel_zip: ZipFile,
|
| 425 |
+
wheel_path: str,
|
| 426 |
+
scheme: Scheme,
|
| 427 |
+
pycompile: bool = True,
|
| 428 |
+
warn_script_location: bool = True,
|
| 429 |
+
direct_url: Optional[DirectUrl] = None,
|
| 430 |
+
requested: bool = False,
|
| 431 |
+
) -> None:
|
| 432 |
+
"""Install a wheel.
|
| 433 |
+
|
| 434 |
+
:param name: Name of the project to install
|
| 435 |
+
:param wheel_zip: open ZipFile for wheel being installed
|
| 436 |
+
:param scheme: Distutils scheme dictating the install directories
|
| 437 |
+
:param req_description: String used in place of the requirement, for
|
| 438 |
+
logging
|
| 439 |
+
:param pycompile: Whether to byte-compile installed Python files
|
| 440 |
+
:param warn_script_location: Whether to check that scripts are installed
|
| 441 |
+
into a directory on PATH
|
| 442 |
+
:raises UnsupportedWheel:
|
| 443 |
+
* when the directory holds an unpacked wheel with incompatible
|
| 444 |
+
Wheel-Version
|
| 445 |
+
* when the .dist-info dir does not match the wheel
|
| 446 |
+
"""
|
| 447 |
+
info_dir, metadata = parse_wheel(wheel_zip, name)
|
| 448 |
+
|
| 449 |
+
if wheel_root_is_purelib(metadata):
|
| 450 |
+
lib_dir = scheme.purelib
|
| 451 |
+
else:
|
| 452 |
+
lib_dir = scheme.platlib
|
| 453 |
+
|
| 454 |
+
# Record details of the files moved
|
| 455 |
+
# installed = files copied from the wheel to the destination
|
| 456 |
+
# changed = files changed while installing (scripts #! line typically)
|
| 457 |
+
# generated = files newly generated during the install (script wrappers)
|
| 458 |
+
installed: Dict[RecordPath, RecordPath] = {}
|
| 459 |
+
changed: Set[RecordPath] = set()
|
| 460 |
+
generated: List[str] = []
|
| 461 |
+
|
| 462 |
+
def record_installed(
|
| 463 |
+
srcfile: RecordPath, destfile: str, modified: bool = False
|
| 464 |
+
) -> None:
|
| 465 |
+
"""Map archive RECORD paths to installation RECORD paths."""
|
| 466 |
+
newpath = _fs_to_record_path(destfile, lib_dir)
|
| 467 |
+
installed[srcfile] = newpath
|
| 468 |
+
if modified:
|
| 469 |
+
changed.add(newpath)
|
| 470 |
+
|
| 471 |
+
def is_dir_path(path: RecordPath) -> bool:
|
| 472 |
+
return path.endswith("/")
|
| 473 |
+
|
| 474 |
+
def assert_no_path_traversal(dest_dir_path: str, target_path: str) -> None:
|
| 475 |
+
if not is_within_directory(dest_dir_path, target_path):
|
| 476 |
+
message = (
|
| 477 |
+
"The wheel {!r} has a file {!r} trying to install"
|
| 478 |
+
" outside the target directory {!r}"
|
| 479 |
+
)
|
| 480 |
+
raise InstallationError(
|
| 481 |
+
message.format(wheel_path, target_path, dest_dir_path)
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
def root_scheme_file_maker(
|
| 485 |
+
zip_file: ZipFile, dest: str
|
| 486 |
+
) -> Callable[[RecordPath], "File"]:
|
| 487 |
+
def make_root_scheme_file(record_path: RecordPath) -> "File":
|
| 488 |
+
normed_path = os.path.normpath(record_path)
|
| 489 |
+
dest_path = os.path.join(dest, normed_path)
|
| 490 |
+
assert_no_path_traversal(dest, dest_path)
|
| 491 |
+
return ZipBackedFile(record_path, dest_path, zip_file)
|
| 492 |
+
|
| 493 |
+
return make_root_scheme_file
|
| 494 |
+
|
| 495 |
+
def data_scheme_file_maker(
|
| 496 |
+
zip_file: ZipFile, scheme: Scheme
|
| 497 |
+
) -> Callable[[RecordPath], "File"]:
|
| 498 |
+
scheme_paths = {key: getattr(scheme, key) for key in SCHEME_KEYS}
|
| 499 |
+
|
| 500 |
+
def make_data_scheme_file(record_path: RecordPath) -> "File":
|
| 501 |
+
normed_path = os.path.normpath(record_path)
|
| 502 |
+
try:
|
| 503 |
+
_, scheme_key, dest_subpath = normed_path.split(os.path.sep, 2)
|
| 504 |
+
except ValueError:
|
| 505 |
+
message = (
|
| 506 |
+
f"Unexpected file in {wheel_path}: {record_path!r}. .data directory"
|
| 507 |
+
" contents should be named like: '<scheme key>/<path>'."
|
| 508 |
+
)
|
| 509 |
+
raise InstallationError(message)
|
| 510 |
+
|
| 511 |
+
try:
|
| 512 |
+
scheme_path = scheme_paths[scheme_key]
|
| 513 |
+
except KeyError:
|
| 514 |
+
valid_scheme_keys = ", ".join(sorted(scheme_paths))
|
| 515 |
+
message = (
|
| 516 |
+
f"Unknown scheme key used in {wheel_path}: {scheme_key} "
|
| 517 |
+
f"(for file {record_path!r}). .data directory contents "
|
| 518 |
+
f"should be in subdirectories named with a valid scheme "
|
| 519 |
+
f"key ({valid_scheme_keys})"
|
| 520 |
+
)
|
| 521 |
+
raise InstallationError(message)
|
| 522 |
+
|
| 523 |
+
dest_path = os.path.join(scheme_path, dest_subpath)
|
| 524 |
+
assert_no_path_traversal(scheme_path, dest_path)
|
| 525 |
+
return ZipBackedFile(record_path, dest_path, zip_file)
|
| 526 |
+
|
| 527 |
+
return make_data_scheme_file
|
| 528 |
+
|
| 529 |
+
def is_data_scheme_path(path: RecordPath) -> bool:
|
| 530 |
+
return path.split("/", 1)[0].endswith(".data")
|
| 531 |
+
|
| 532 |
+
paths = cast(List[RecordPath], wheel_zip.namelist())
|
| 533 |
+
file_paths = filterfalse(is_dir_path, paths)
|
| 534 |
+
root_scheme_paths, data_scheme_paths = partition(is_data_scheme_path, file_paths)
|
| 535 |
+
|
| 536 |
+
make_root_scheme_file = root_scheme_file_maker(wheel_zip, lib_dir)
|
| 537 |
+
files: Iterator[File] = map(make_root_scheme_file, root_scheme_paths)
|
| 538 |
+
|
| 539 |
+
def is_script_scheme_path(path: RecordPath) -> bool:
|
| 540 |
+
parts = path.split("/", 2)
|
| 541 |
+
return len(parts) > 2 and parts[0].endswith(".data") and parts[1] == "scripts"
|
| 542 |
+
|
| 543 |
+
other_scheme_paths, script_scheme_paths = partition(
|
| 544 |
+
is_script_scheme_path, data_scheme_paths
|
| 545 |
+
)
|
| 546 |
+
|
| 547 |
+
make_data_scheme_file = data_scheme_file_maker(wheel_zip, scheme)
|
| 548 |
+
other_scheme_files = map(make_data_scheme_file, other_scheme_paths)
|
| 549 |
+
files = chain(files, other_scheme_files)
|
| 550 |
+
|
| 551 |
+
# Get the defined entry points
|
| 552 |
+
distribution = get_wheel_distribution(
|
| 553 |
+
FilesystemWheel(wheel_path),
|
| 554 |
+
canonicalize_name(name),
|
| 555 |
+
)
|
| 556 |
+
console, gui = get_entrypoints(distribution)
|
| 557 |
+
|
| 558 |
+
def is_entrypoint_wrapper(file: "File") -> bool:
|
| 559 |
+
# EP, EP.exe and EP-script.py are scripts generated for
|
| 560 |
+
# entry point EP by setuptools
|
| 561 |
+
path = file.dest_path
|
| 562 |
+
name = os.path.basename(path)
|
| 563 |
+
if name.lower().endswith(".exe"):
|
| 564 |
+
matchname = name[:-4]
|
| 565 |
+
elif name.lower().endswith("-script.py"):
|
| 566 |
+
matchname = name[:-10]
|
| 567 |
+
elif name.lower().endswith(".pya"):
|
| 568 |
+
matchname = name[:-4]
|
| 569 |
+
else:
|
| 570 |
+
matchname = name
|
| 571 |
+
# Ignore setuptools-generated scripts
|
| 572 |
+
return matchname in console or matchname in gui
|
| 573 |
+
|
| 574 |
+
script_scheme_files: Iterator[File] = map(
|
| 575 |
+
make_data_scheme_file, script_scheme_paths
|
| 576 |
+
)
|
| 577 |
+
script_scheme_files = filterfalse(is_entrypoint_wrapper, script_scheme_files)
|
| 578 |
+
script_scheme_files = map(ScriptFile, script_scheme_files)
|
| 579 |
+
files = chain(files, script_scheme_files)
|
| 580 |
+
|
| 581 |
+
existing_parents = set()
|
| 582 |
+
for file in files:
|
| 583 |
+
# directory creation is lazy and after file filtering
|
| 584 |
+
# to ensure we don't install empty dirs; empty dirs can't be
|
| 585 |
+
# uninstalled.
|
| 586 |
+
parent_dir = os.path.dirname(file.dest_path)
|
| 587 |
+
if parent_dir not in existing_parents:
|
| 588 |
+
ensure_dir(parent_dir)
|
| 589 |
+
existing_parents.add(parent_dir)
|
| 590 |
+
file.save()
|
| 591 |
+
record_installed(file.src_record_path, file.dest_path, file.changed)
|
| 592 |
+
|
| 593 |
+
def pyc_source_file_paths() -> Generator[str, None, None]:
|
| 594 |
+
# We de-duplicate installation paths, since there can be overlap (e.g.
|
| 595 |
+
# file in .data maps to same location as file in wheel root).
|
| 596 |
+
# Sorting installation paths makes it easier to reproduce and debug
|
| 597 |
+
# issues related to permissions on existing files.
|
| 598 |
+
for installed_path in sorted(set(installed.values())):
|
| 599 |
+
full_installed_path = os.path.join(lib_dir, installed_path)
|
| 600 |
+
if not os.path.isfile(full_installed_path):
|
| 601 |
+
continue
|
| 602 |
+
if not full_installed_path.endswith(".py"):
|
| 603 |
+
continue
|
| 604 |
+
yield full_installed_path
|
| 605 |
+
|
| 606 |
+
def pyc_output_path(path: str) -> str:
|
| 607 |
+
"""Return the path the pyc file would have been written to."""
|
| 608 |
+
return importlib.util.cache_from_source(path)
|
| 609 |
+
|
| 610 |
+
# Compile all of the pyc files for the installed files
|
| 611 |
+
if pycompile:
|
| 612 |
+
with contextlib.redirect_stdout(
|
| 613 |
+
StreamWrapper.from_stream(sys.stdout)
|
| 614 |
+
) as stdout:
|
| 615 |
+
with warnings.catch_warnings():
|
| 616 |
+
warnings.filterwarnings("ignore")
|
| 617 |
+
for path in pyc_source_file_paths():
|
| 618 |
+
success = compileall.compile_file(path, force=True, quiet=True)
|
| 619 |
+
if success:
|
| 620 |
+
pyc_path = pyc_output_path(path)
|
| 621 |
+
assert os.path.exists(pyc_path)
|
| 622 |
+
pyc_record_path = cast(
|
| 623 |
+
"RecordPath", pyc_path.replace(os.path.sep, "/")
|
| 624 |
+
)
|
| 625 |
+
record_installed(pyc_record_path, pyc_path)
|
| 626 |
+
logger.debug(stdout.getvalue())
|
| 627 |
+
|
| 628 |
+
maker = PipScriptMaker(None, scheme.scripts)
|
| 629 |
+
|
| 630 |
+
# Ensure old scripts are overwritten.
|
| 631 |
+
# See https://github.com/pypa/pip/issues/1800
|
| 632 |
+
maker.clobber = True
|
| 633 |
+
|
| 634 |
+
# Ensure we don't generate any variants for scripts because this is almost
|
| 635 |
+
# never what somebody wants.
|
| 636 |
+
# See https://bitbucket.org/pypa/distlib/issue/35/
|
| 637 |
+
maker.variants = {""}
|
| 638 |
+
|
| 639 |
+
# This is required because otherwise distlib creates scripts that are not
|
| 640 |
+
# executable.
|
| 641 |
+
# See https://bitbucket.org/pypa/distlib/issue/32/
|
| 642 |
+
maker.set_mode = True
|
| 643 |
+
|
| 644 |
+
# Generate the console and GUI entry points specified in the wheel
|
| 645 |
+
scripts_to_generate = get_console_script_specs(console)
|
| 646 |
+
|
| 647 |
+
gui_scripts_to_generate = list(starmap("{} = {}".format, gui.items()))
|
| 648 |
+
|
| 649 |
+
generated_console_scripts = maker.make_multiple(scripts_to_generate)
|
| 650 |
+
generated.extend(generated_console_scripts)
|
| 651 |
+
|
| 652 |
+
generated.extend(maker.make_multiple(gui_scripts_to_generate, {"gui": True}))
|
| 653 |
+
|
| 654 |
+
if warn_script_location:
|
| 655 |
+
msg = message_about_scripts_not_on_PATH(generated_console_scripts)
|
| 656 |
+
if msg is not None:
|
| 657 |
+
logger.warning(msg)
|
| 658 |
+
|
| 659 |
+
generated_file_mode = 0o666 & ~current_umask()
|
| 660 |
+
|
| 661 |
+
@contextlib.contextmanager
|
| 662 |
+
def _generate_file(path: str, **kwargs: Any) -> Generator[BinaryIO, None, None]:
|
| 663 |
+
with adjacent_tmp_file(path, **kwargs) as f:
|
| 664 |
+
yield f
|
| 665 |
+
os.chmod(f.name, generated_file_mode)
|
| 666 |
+
replace(f.name, path)
|
| 667 |
+
|
| 668 |
+
dest_info_dir = os.path.join(lib_dir, info_dir)
|
| 669 |
+
|
| 670 |
+
# Record pip as the installer
|
| 671 |
+
installer_path = os.path.join(dest_info_dir, "INSTALLER")
|
| 672 |
+
with _generate_file(installer_path) as installer_file:
|
| 673 |
+
installer_file.write(b"pip\n")
|
| 674 |
+
generated.append(installer_path)
|
| 675 |
+
|
| 676 |
+
# Record the PEP 610 direct URL reference
|
| 677 |
+
if direct_url is not None:
|
| 678 |
+
direct_url_path = os.path.join(dest_info_dir, DIRECT_URL_METADATA_NAME)
|
| 679 |
+
with _generate_file(direct_url_path) as direct_url_file:
|
| 680 |
+
direct_url_file.write(direct_url.to_json().encode("utf-8"))
|
| 681 |
+
generated.append(direct_url_path)
|
| 682 |
+
|
| 683 |
+
# Record the REQUESTED file
|
| 684 |
+
if requested:
|
| 685 |
+
requested_path = os.path.join(dest_info_dir, "REQUESTED")
|
| 686 |
+
with open(requested_path, "wb"):
|
| 687 |
+
pass
|
| 688 |
+
generated.append(requested_path)
|
| 689 |
+
|
| 690 |
+
record_text = distribution.read_text("RECORD")
|
| 691 |
+
record_rows = list(csv.reader(record_text.splitlines()))
|
| 692 |
+
|
| 693 |
+
rows = get_csv_rows_for_installed(
|
| 694 |
+
record_rows,
|
| 695 |
+
installed=installed,
|
| 696 |
+
changed=changed,
|
| 697 |
+
generated=generated,
|
| 698 |
+
lib_dir=lib_dir,
|
| 699 |
+
)
|
| 700 |
+
|
| 701 |
+
# Record details of all files installed
|
| 702 |
+
record_path = os.path.join(dest_info_dir, "RECORD")
|
| 703 |
+
|
| 704 |
+
with _generate_file(record_path, **csv_io_kwargs("w")) as record_file:
|
| 705 |
+
# Explicitly cast to typing.IO[str] as a workaround for the mypy error:
|
| 706 |
+
# "writer" has incompatible type "BinaryIO"; expected "_Writer"
|
| 707 |
+
writer = csv.writer(cast("IO[str]", record_file))
|
| 708 |
+
writer.writerows(_normalized_outrows(rows))
|
| 709 |
+
|
| 710 |
+
|
| 711 |
+
@contextlib.contextmanager
|
| 712 |
+
def req_error_context(req_description: str) -> Generator[None, None, None]:
|
| 713 |
+
try:
|
| 714 |
+
yield
|
| 715 |
+
except InstallationError as e:
|
| 716 |
+
message = f"For req: {req_description}. {e.args[0]}"
|
| 717 |
+
raise InstallationError(message) from e
|
| 718 |
+
|
| 719 |
+
|
| 720 |
+
def install_wheel(
|
| 721 |
+
name: str,
|
| 722 |
+
wheel_path: str,
|
| 723 |
+
scheme: Scheme,
|
| 724 |
+
req_description: str,
|
| 725 |
+
pycompile: bool = True,
|
| 726 |
+
warn_script_location: bool = True,
|
| 727 |
+
direct_url: Optional[DirectUrl] = None,
|
| 728 |
+
requested: bool = False,
|
| 729 |
+
) -> None:
|
| 730 |
+
with ZipFile(wheel_path, allowZip64=True) as z:
|
| 731 |
+
with req_error_context(req_description):
|
| 732 |
+
_install_wheel(
|
| 733 |
+
name=name,
|
| 734 |
+
wheel_zip=z,
|
| 735 |
+
wheel_path=wheel_path,
|
| 736 |
+
scheme=scheme,
|
| 737 |
+
pycompile=pycompile,
|
| 738 |
+
warn_script_location=warn_script_location,
|
| 739 |
+
direct_url=direct_url,
|
| 740 |
+
requested=requested,
|
| 741 |
+
)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/prepare.py
ADDED
|
@@ -0,0 +1,732 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Prepares a distribution for installation
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
# The following comment should be removed at some point in the future.
|
| 5 |
+
# mypy: strict-optional=False
|
| 6 |
+
|
| 7 |
+
import mimetypes
|
| 8 |
+
import os
|
| 9 |
+
import shutil
|
| 10 |
+
from dataclasses import dataclass
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Dict, Iterable, List, Optional
|
| 13 |
+
|
| 14 |
+
from pip._vendor.packaging.utils import canonicalize_name
|
| 15 |
+
|
| 16 |
+
from pip._internal.distributions import make_distribution_for_install_requirement
|
| 17 |
+
from pip._internal.distributions.installed import InstalledDistribution
|
| 18 |
+
from pip._internal.exceptions import (
|
| 19 |
+
DirectoryUrlHashUnsupported,
|
| 20 |
+
HashMismatch,
|
| 21 |
+
HashUnpinned,
|
| 22 |
+
InstallationError,
|
| 23 |
+
MetadataInconsistent,
|
| 24 |
+
NetworkConnectionError,
|
| 25 |
+
VcsHashUnsupported,
|
| 26 |
+
)
|
| 27 |
+
from pip._internal.index.package_finder import PackageFinder
|
| 28 |
+
from pip._internal.metadata import BaseDistribution, get_metadata_distribution
|
| 29 |
+
from pip._internal.models.direct_url import ArchiveInfo
|
| 30 |
+
from pip._internal.models.link import Link
|
| 31 |
+
from pip._internal.models.wheel import Wheel
|
| 32 |
+
from pip._internal.network.download import BatchDownloader, Downloader
|
| 33 |
+
from pip._internal.network.lazy_wheel import (
|
| 34 |
+
HTTPRangeRequestUnsupported,
|
| 35 |
+
dist_from_wheel_url,
|
| 36 |
+
)
|
| 37 |
+
from pip._internal.network.session import PipSession
|
| 38 |
+
from pip._internal.operations.build.build_tracker import BuildTracker
|
| 39 |
+
from pip._internal.req.req_install import InstallRequirement
|
| 40 |
+
from pip._internal.utils._log import getLogger
|
| 41 |
+
from pip._internal.utils.direct_url_helpers import (
|
| 42 |
+
direct_url_for_editable,
|
| 43 |
+
direct_url_from_link,
|
| 44 |
+
)
|
| 45 |
+
from pip._internal.utils.hashes import Hashes, MissingHashes
|
| 46 |
+
from pip._internal.utils.logging import indent_log
|
| 47 |
+
from pip._internal.utils.misc import (
|
| 48 |
+
display_path,
|
| 49 |
+
hash_file,
|
| 50 |
+
hide_url,
|
| 51 |
+
redact_auth_from_requirement,
|
| 52 |
+
)
|
| 53 |
+
from pip._internal.utils.temp_dir import TempDirectory
|
| 54 |
+
from pip._internal.utils.unpacking import unpack_file
|
| 55 |
+
from pip._internal.vcs import vcs
|
| 56 |
+
|
| 57 |
+
logger = getLogger(__name__)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _get_prepared_distribution(
|
| 61 |
+
req: InstallRequirement,
|
| 62 |
+
build_tracker: BuildTracker,
|
| 63 |
+
finder: PackageFinder,
|
| 64 |
+
build_isolation: bool,
|
| 65 |
+
check_build_deps: bool,
|
| 66 |
+
) -> BaseDistribution:
|
| 67 |
+
"""Prepare a distribution for installation."""
|
| 68 |
+
abstract_dist = make_distribution_for_install_requirement(req)
|
| 69 |
+
tracker_id = abstract_dist.build_tracker_id
|
| 70 |
+
if tracker_id is not None:
|
| 71 |
+
with build_tracker.track(req, tracker_id):
|
| 72 |
+
abstract_dist.prepare_distribution_metadata(
|
| 73 |
+
finder, build_isolation, check_build_deps
|
| 74 |
+
)
|
| 75 |
+
return abstract_dist.get_metadata_distribution()
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None:
|
| 79 |
+
vcs_backend = vcs.get_backend_for_scheme(link.scheme)
|
| 80 |
+
assert vcs_backend is not None
|
| 81 |
+
vcs_backend.unpack(location, url=hide_url(link.url), verbosity=verbosity)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
@dataclass
|
| 85 |
+
class File:
|
| 86 |
+
path: str
|
| 87 |
+
content_type: Optional[str] = None
|
| 88 |
+
|
| 89 |
+
def __post_init__(self) -> None:
|
| 90 |
+
if self.content_type is None:
|
| 91 |
+
self.content_type = mimetypes.guess_type(self.path)[0]
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def get_http_url(
|
| 95 |
+
link: Link,
|
| 96 |
+
download: Downloader,
|
| 97 |
+
download_dir: Optional[str] = None,
|
| 98 |
+
hashes: Optional[Hashes] = None,
|
| 99 |
+
) -> File:
|
| 100 |
+
temp_dir = TempDirectory(kind="unpack", globally_managed=True)
|
| 101 |
+
# If a download dir is specified, is the file already downloaded there?
|
| 102 |
+
already_downloaded_path = None
|
| 103 |
+
if download_dir:
|
| 104 |
+
already_downloaded_path = _check_download_dir(link, download_dir, hashes)
|
| 105 |
+
|
| 106 |
+
if already_downloaded_path:
|
| 107 |
+
from_path = already_downloaded_path
|
| 108 |
+
content_type = None
|
| 109 |
+
else:
|
| 110 |
+
# let's download to a tmp dir
|
| 111 |
+
from_path, content_type = download(link, temp_dir.path)
|
| 112 |
+
if hashes:
|
| 113 |
+
hashes.check_against_path(from_path)
|
| 114 |
+
|
| 115 |
+
return File(from_path, content_type)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def get_file_url(
|
| 119 |
+
link: Link, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None
|
| 120 |
+
) -> File:
|
| 121 |
+
"""Get file and optionally check its hash."""
|
| 122 |
+
# If a download dir is specified, is the file already there and valid?
|
| 123 |
+
already_downloaded_path = None
|
| 124 |
+
if download_dir:
|
| 125 |
+
already_downloaded_path = _check_download_dir(link, download_dir, hashes)
|
| 126 |
+
|
| 127 |
+
if already_downloaded_path:
|
| 128 |
+
from_path = already_downloaded_path
|
| 129 |
+
else:
|
| 130 |
+
from_path = link.file_path
|
| 131 |
+
|
| 132 |
+
# If --require-hashes is off, `hashes` is either empty, the
|
| 133 |
+
# link's embedded hash, or MissingHashes; it is required to
|
| 134 |
+
# match. If --require-hashes is on, we are satisfied by any
|
| 135 |
+
# hash in `hashes` matching: a URL-based or an option-based
|
| 136 |
+
# one; no internet-sourced hash will be in `hashes`.
|
| 137 |
+
if hashes:
|
| 138 |
+
hashes.check_against_path(from_path)
|
| 139 |
+
return File(from_path, None)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def unpack_url(
|
| 143 |
+
link: Link,
|
| 144 |
+
location: str,
|
| 145 |
+
download: Downloader,
|
| 146 |
+
verbosity: int,
|
| 147 |
+
download_dir: Optional[str] = None,
|
| 148 |
+
hashes: Optional[Hashes] = None,
|
| 149 |
+
) -> Optional[File]:
|
| 150 |
+
"""Unpack link into location, downloading if required.
|
| 151 |
+
|
| 152 |
+
:param hashes: A Hashes object, one of whose embedded hashes must match,
|
| 153 |
+
or HashMismatch will be raised. If the Hashes is empty, no matches are
|
| 154 |
+
required, and unhashable types of requirements (like VCS ones, which
|
| 155 |
+
would ordinarily raise HashUnsupported) are allowed.
|
| 156 |
+
"""
|
| 157 |
+
# non-editable vcs urls
|
| 158 |
+
if link.is_vcs:
|
| 159 |
+
unpack_vcs_link(link, location, verbosity=verbosity)
|
| 160 |
+
return None
|
| 161 |
+
|
| 162 |
+
assert not link.is_existing_dir()
|
| 163 |
+
|
| 164 |
+
# file urls
|
| 165 |
+
if link.is_file:
|
| 166 |
+
file = get_file_url(link, download_dir, hashes=hashes)
|
| 167 |
+
|
| 168 |
+
# http urls
|
| 169 |
+
else:
|
| 170 |
+
file = get_http_url(
|
| 171 |
+
link,
|
| 172 |
+
download,
|
| 173 |
+
download_dir,
|
| 174 |
+
hashes=hashes,
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
# unpack the archive to the build dir location. even when only downloading
|
| 178 |
+
# archives, they have to be unpacked to parse dependencies, except wheels
|
| 179 |
+
if not link.is_wheel:
|
| 180 |
+
unpack_file(file.path, location, file.content_type)
|
| 181 |
+
|
| 182 |
+
return file
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _check_download_dir(
|
| 186 |
+
link: Link,
|
| 187 |
+
download_dir: str,
|
| 188 |
+
hashes: Optional[Hashes],
|
| 189 |
+
warn_on_hash_mismatch: bool = True,
|
| 190 |
+
) -> Optional[str]:
|
| 191 |
+
"""Check download_dir for previously downloaded file with correct hash
|
| 192 |
+
If a correct file is found return its path else None
|
| 193 |
+
"""
|
| 194 |
+
download_path = os.path.join(download_dir, link.filename)
|
| 195 |
+
|
| 196 |
+
if not os.path.exists(download_path):
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
# If already downloaded, does its hash match?
|
| 200 |
+
logger.info("File was already downloaded %s", download_path)
|
| 201 |
+
if hashes:
|
| 202 |
+
try:
|
| 203 |
+
hashes.check_against_path(download_path)
|
| 204 |
+
except HashMismatch:
|
| 205 |
+
if warn_on_hash_mismatch:
|
| 206 |
+
logger.warning(
|
| 207 |
+
"Previously-downloaded file %s has bad hash. Re-downloading.",
|
| 208 |
+
download_path,
|
| 209 |
+
)
|
| 210 |
+
os.unlink(download_path)
|
| 211 |
+
return None
|
| 212 |
+
return download_path
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
class RequirementPreparer:
|
| 216 |
+
"""Prepares a Requirement"""
|
| 217 |
+
|
| 218 |
+
def __init__(
|
| 219 |
+
self,
|
| 220 |
+
build_dir: str,
|
| 221 |
+
download_dir: Optional[str],
|
| 222 |
+
src_dir: str,
|
| 223 |
+
build_isolation: bool,
|
| 224 |
+
check_build_deps: bool,
|
| 225 |
+
build_tracker: BuildTracker,
|
| 226 |
+
session: PipSession,
|
| 227 |
+
progress_bar: str,
|
| 228 |
+
finder: PackageFinder,
|
| 229 |
+
require_hashes: bool,
|
| 230 |
+
use_user_site: bool,
|
| 231 |
+
lazy_wheel: bool,
|
| 232 |
+
verbosity: int,
|
| 233 |
+
legacy_resolver: bool,
|
| 234 |
+
) -> None:
|
| 235 |
+
super().__init__()
|
| 236 |
+
|
| 237 |
+
self.src_dir = src_dir
|
| 238 |
+
self.build_dir = build_dir
|
| 239 |
+
self.build_tracker = build_tracker
|
| 240 |
+
self._session = session
|
| 241 |
+
self._download = Downloader(session, progress_bar)
|
| 242 |
+
self._batch_download = BatchDownloader(session, progress_bar)
|
| 243 |
+
self.finder = finder
|
| 244 |
+
|
| 245 |
+
# Where still-packed archives should be written to. If None, they are
|
| 246 |
+
# not saved, and are deleted immediately after unpacking.
|
| 247 |
+
self.download_dir = download_dir
|
| 248 |
+
|
| 249 |
+
# Is build isolation allowed?
|
| 250 |
+
self.build_isolation = build_isolation
|
| 251 |
+
|
| 252 |
+
# Should check build dependencies?
|
| 253 |
+
self.check_build_deps = check_build_deps
|
| 254 |
+
|
| 255 |
+
# Should hash-checking be required?
|
| 256 |
+
self.require_hashes = require_hashes
|
| 257 |
+
|
| 258 |
+
# Should install in user site-packages?
|
| 259 |
+
self.use_user_site = use_user_site
|
| 260 |
+
|
| 261 |
+
# Should wheels be downloaded lazily?
|
| 262 |
+
self.use_lazy_wheel = lazy_wheel
|
| 263 |
+
|
| 264 |
+
# How verbose should underlying tooling be?
|
| 265 |
+
self.verbosity = verbosity
|
| 266 |
+
|
| 267 |
+
# Are we using the legacy resolver?
|
| 268 |
+
self.legacy_resolver = legacy_resolver
|
| 269 |
+
|
| 270 |
+
# Memoized downloaded files, as mapping of url: path.
|
| 271 |
+
self._downloaded: Dict[str, str] = {}
|
| 272 |
+
|
| 273 |
+
# Previous "header" printed for a link-based InstallRequirement
|
| 274 |
+
self._previous_requirement_header = ("", "")
|
| 275 |
+
|
| 276 |
+
def _log_preparing_link(self, req: InstallRequirement) -> None:
|
| 277 |
+
"""Provide context for the requirement being prepared."""
|
| 278 |
+
if req.link.is_file and not req.is_wheel_from_cache:
|
| 279 |
+
message = "Processing %s"
|
| 280 |
+
information = str(display_path(req.link.file_path))
|
| 281 |
+
else:
|
| 282 |
+
message = "Collecting %s"
|
| 283 |
+
information = redact_auth_from_requirement(req.req) if req.req else str(req)
|
| 284 |
+
|
| 285 |
+
# If we used req.req, inject requirement source if available (this
|
| 286 |
+
# would already be included if we used req directly)
|
| 287 |
+
if req.req and req.comes_from:
|
| 288 |
+
if isinstance(req.comes_from, str):
|
| 289 |
+
comes_from: Optional[str] = req.comes_from
|
| 290 |
+
else:
|
| 291 |
+
comes_from = req.comes_from.from_path()
|
| 292 |
+
if comes_from:
|
| 293 |
+
information += f" (from {comes_from})"
|
| 294 |
+
|
| 295 |
+
if (message, information) != self._previous_requirement_header:
|
| 296 |
+
self._previous_requirement_header = (message, information)
|
| 297 |
+
logger.info(message, information)
|
| 298 |
+
|
| 299 |
+
if req.is_wheel_from_cache:
|
| 300 |
+
with indent_log():
|
| 301 |
+
logger.info("Using cached %s", req.link.filename)
|
| 302 |
+
|
| 303 |
+
def _ensure_link_req_src_dir(
|
| 304 |
+
self, req: InstallRequirement, parallel_builds: bool
|
| 305 |
+
) -> None:
|
| 306 |
+
"""Ensure source_dir of a linked InstallRequirement."""
|
| 307 |
+
# Since source_dir is only set for editable requirements.
|
| 308 |
+
if req.link.is_wheel:
|
| 309 |
+
# We don't need to unpack wheels, so no need for a source
|
| 310 |
+
# directory.
|
| 311 |
+
return
|
| 312 |
+
assert req.source_dir is None
|
| 313 |
+
if req.link.is_existing_dir():
|
| 314 |
+
# build local directories in-tree
|
| 315 |
+
req.source_dir = req.link.file_path
|
| 316 |
+
return
|
| 317 |
+
|
| 318 |
+
# We always delete unpacked sdists after pip runs.
|
| 319 |
+
req.ensure_has_source_dir(
|
| 320 |
+
self.build_dir,
|
| 321 |
+
autodelete=True,
|
| 322 |
+
parallel_builds=parallel_builds,
|
| 323 |
+
)
|
| 324 |
+
req.ensure_pristine_source_checkout()
|
| 325 |
+
|
| 326 |
+
def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
|
| 327 |
+
# By the time this is called, the requirement's link should have
|
| 328 |
+
# been checked so we can tell what kind of requirements req is
|
| 329 |
+
# and raise some more informative errors than otherwise.
|
| 330 |
+
# (For example, we can raise VcsHashUnsupported for a VCS URL
|
| 331 |
+
# rather than HashMissing.)
|
| 332 |
+
if not self.require_hashes:
|
| 333 |
+
return req.hashes(trust_internet=True)
|
| 334 |
+
|
| 335 |
+
# We could check these first 2 conditions inside unpack_url
|
| 336 |
+
# and save repetition of conditions, but then we would
|
| 337 |
+
# report less-useful error messages for unhashable
|
| 338 |
+
# requirements, complaining that there's no hash provided.
|
| 339 |
+
if req.link.is_vcs:
|
| 340 |
+
raise VcsHashUnsupported()
|
| 341 |
+
if req.link.is_existing_dir():
|
| 342 |
+
raise DirectoryUrlHashUnsupported()
|
| 343 |
+
|
| 344 |
+
# Unpinned packages are asking for trouble when a new version
|
| 345 |
+
# is uploaded. This isn't a security check, but it saves users
|
| 346 |
+
# a surprising hash mismatch in the future.
|
| 347 |
+
# file:/// URLs aren't pinnable, so don't complain about them
|
| 348 |
+
# not being pinned.
|
| 349 |
+
if not req.is_direct and not req.is_pinned:
|
| 350 |
+
raise HashUnpinned()
|
| 351 |
+
|
| 352 |
+
# If known-good hashes are missing for this requirement,
|
| 353 |
+
# shim it with a facade object that will provoke hash
|
| 354 |
+
# computation and then raise a HashMissing exception
|
| 355 |
+
# showing the user what the hash should be.
|
| 356 |
+
return req.hashes(trust_internet=False) or MissingHashes()
|
| 357 |
+
|
| 358 |
+
def _fetch_metadata_only(
|
| 359 |
+
self,
|
| 360 |
+
req: InstallRequirement,
|
| 361 |
+
) -> Optional[BaseDistribution]:
|
| 362 |
+
if self.legacy_resolver:
|
| 363 |
+
logger.debug(
|
| 364 |
+
"Metadata-only fetching is not used in the legacy resolver",
|
| 365 |
+
)
|
| 366 |
+
return None
|
| 367 |
+
if self.require_hashes:
|
| 368 |
+
logger.debug(
|
| 369 |
+
"Metadata-only fetching is not used as hash checking is required",
|
| 370 |
+
)
|
| 371 |
+
return None
|
| 372 |
+
# Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
|
| 373 |
+
return self._fetch_metadata_using_link_data_attr(
|
| 374 |
+
req
|
| 375 |
+
) or self._fetch_metadata_using_lazy_wheel(req.link)
|
| 376 |
+
|
| 377 |
+
def _fetch_metadata_using_link_data_attr(
|
| 378 |
+
self,
|
| 379 |
+
req: InstallRequirement,
|
| 380 |
+
) -> Optional[BaseDistribution]:
|
| 381 |
+
"""Fetch metadata from the data-dist-info-metadata attribute, if possible."""
|
| 382 |
+
# (1) Get the link to the metadata file, if provided by the backend.
|
| 383 |
+
metadata_link = req.link.metadata_link()
|
| 384 |
+
if metadata_link is None:
|
| 385 |
+
return None
|
| 386 |
+
assert req.req is not None
|
| 387 |
+
logger.verbose(
|
| 388 |
+
"Obtaining dependency information for %s from %s",
|
| 389 |
+
req.req,
|
| 390 |
+
metadata_link,
|
| 391 |
+
)
|
| 392 |
+
# (2) Download the contents of the METADATA file, separate from the dist itself.
|
| 393 |
+
metadata_file = get_http_url(
|
| 394 |
+
metadata_link,
|
| 395 |
+
self._download,
|
| 396 |
+
hashes=metadata_link.as_hashes(),
|
| 397 |
+
)
|
| 398 |
+
with open(metadata_file.path, "rb") as f:
|
| 399 |
+
metadata_contents = f.read()
|
| 400 |
+
# (3) Generate a dist just from those file contents.
|
| 401 |
+
metadata_dist = get_metadata_distribution(
|
| 402 |
+
metadata_contents,
|
| 403 |
+
req.link.filename,
|
| 404 |
+
req.req.name,
|
| 405 |
+
)
|
| 406 |
+
# (4) Ensure the Name: field from the METADATA file matches the name from the
|
| 407 |
+
# install requirement.
|
| 408 |
+
#
|
| 409 |
+
# NB: raw_name will fall back to the name from the install requirement if
|
| 410 |
+
# the Name: field is not present, but it's noted in the raw_name docstring
|
| 411 |
+
# that that should NEVER happen anyway.
|
| 412 |
+
if canonicalize_name(metadata_dist.raw_name) != canonicalize_name(req.req.name):
|
| 413 |
+
raise MetadataInconsistent(
|
| 414 |
+
req, "Name", req.req.name, metadata_dist.raw_name
|
| 415 |
+
)
|
| 416 |
+
return metadata_dist
|
| 417 |
+
|
| 418 |
+
def _fetch_metadata_using_lazy_wheel(
|
| 419 |
+
self,
|
| 420 |
+
link: Link,
|
| 421 |
+
) -> Optional[BaseDistribution]:
|
| 422 |
+
"""Fetch metadata using lazy wheel, if possible."""
|
| 423 |
+
# --use-feature=fast-deps must be provided.
|
| 424 |
+
if not self.use_lazy_wheel:
|
| 425 |
+
return None
|
| 426 |
+
if link.is_file or not link.is_wheel:
|
| 427 |
+
logger.debug(
|
| 428 |
+
"Lazy wheel is not used as %r does not point to a remote wheel",
|
| 429 |
+
link,
|
| 430 |
+
)
|
| 431 |
+
return None
|
| 432 |
+
|
| 433 |
+
wheel = Wheel(link.filename)
|
| 434 |
+
name = canonicalize_name(wheel.name)
|
| 435 |
+
logger.info(
|
| 436 |
+
"Obtaining dependency information from %s %s",
|
| 437 |
+
name,
|
| 438 |
+
wheel.version,
|
| 439 |
+
)
|
| 440 |
+
url = link.url.split("#", 1)[0]
|
| 441 |
+
try:
|
| 442 |
+
return dist_from_wheel_url(name, url, self._session)
|
| 443 |
+
except HTTPRangeRequestUnsupported:
|
| 444 |
+
logger.debug("%s does not support range requests", url)
|
| 445 |
+
return None
|
| 446 |
+
|
| 447 |
+
def _complete_partial_requirements(
|
| 448 |
+
self,
|
| 449 |
+
partially_downloaded_reqs: Iterable[InstallRequirement],
|
| 450 |
+
parallel_builds: bool = False,
|
| 451 |
+
) -> None:
|
| 452 |
+
"""Download any requirements which were only fetched by metadata."""
|
| 453 |
+
# Download to a temporary directory. These will be copied over as
|
| 454 |
+
# needed for downstream 'download', 'wheel', and 'install' commands.
|
| 455 |
+
temp_dir = TempDirectory(kind="unpack", globally_managed=True).path
|
| 456 |
+
|
| 457 |
+
# Map each link to the requirement that owns it. This allows us to set
|
| 458 |
+
# `req.local_file_path` on the appropriate requirement after passing
|
| 459 |
+
# all the links at once into BatchDownloader.
|
| 460 |
+
links_to_fully_download: Dict[Link, InstallRequirement] = {}
|
| 461 |
+
for req in partially_downloaded_reqs:
|
| 462 |
+
assert req.link
|
| 463 |
+
links_to_fully_download[req.link] = req
|
| 464 |
+
|
| 465 |
+
batch_download = self._batch_download(
|
| 466 |
+
links_to_fully_download.keys(),
|
| 467 |
+
temp_dir,
|
| 468 |
+
)
|
| 469 |
+
for link, (filepath, _) in batch_download:
|
| 470 |
+
logger.debug("Downloading link %s to %s", link, filepath)
|
| 471 |
+
req = links_to_fully_download[link]
|
| 472 |
+
# Record the downloaded file path so wheel reqs can extract a Distribution
|
| 473 |
+
# in .get_dist().
|
| 474 |
+
req.local_file_path = filepath
|
| 475 |
+
# Record that the file is downloaded so we don't do it again in
|
| 476 |
+
# _prepare_linked_requirement().
|
| 477 |
+
self._downloaded[req.link.url] = filepath
|
| 478 |
+
|
| 479 |
+
# If this is an sdist, we need to unpack it after downloading, but the
|
| 480 |
+
# .source_dir won't be set up until we are in _prepare_linked_requirement().
|
| 481 |
+
# Add the downloaded archive to the install requirement to unpack after
|
| 482 |
+
# preparing the source dir.
|
| 483 |
+
if not req.is_wheel:
|
| 484 |
+
req.needs_unpacked_archive(Path(filepath))
|
| 485 |
+
|
| 486 |
+
# This step is necessary to ensure all lazy wheels are processed
|
| 487 |
+
# successfully by the 'download', 'wheel', and 'install' commands.
|
| 488 |
+
for req in partially_downloaded_reqs:
|
| 489 |
+
self._prepare_linked_requirement(req, parallel_builds)
|
| 490 |
+
|
| 491 |
+
def prepare_linked_requirement(
|
| 492 |
+
self, req: InstallRequirement, parallel_builds: bool = False
|
| 493 |
+
) -> BaseDistribution:
|
| 494 |
+
"""Prepare a requirement to be obtained from req.link."""
|
| 495 |
+
assert req.link
|
| 496 |
+
self._log_preparing_link(req)
|
| 497 |
+
with indent_log():
|
| 498 |
+
# Check if the relevant file is already available
|
| 499 |
+
# in the download directory
|
| 500 |
+
file_path = None
|
| 501 |
+
if self.download_dir is not None and req.link.is_wheel:
|
| 502 |
+
hashes = self._get_linked_req_hashes(req)
|
| 503 |
+
file_path = _check_download_dir(
|
| 504 |
+
req.link,
|
| 505 |
+
self.download_dir,
|
| 506 |
+
hashes,
|
| 507 |
+
# When a locally built wheel has been found in cache, we don't warn
|
| 508 |
+
# about re-downloading when the already downloaded wheel hash does
|
| 509 |
+
# not match. This is because the hash must be checked against the
|
| 510 |
+
# original link, not the cached link. It that case the already
|
| 511 |
+
# downloaded file will be removed and re-fetched from cache (which
|
| 512 |
+
# implies a hash check against the cache entry's origin.json).
|
| 513 |
+
warn_on_hash_mismatch=not req.is_wheel_from_cache,
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
if file_path is not None:
|
| 517 |
+
# The file is already available, so mark it as downloaded
|
| 518 |
+
self._downloaded[req.link.url] = file_path
|
| 519 |
+
else:
|
| 520 |
+
# The file is not available, attempt to fetch only metadata
|
| 521 |
+
metadata_dist = self._fetch_metadata_only(req)
|
| 522 |
+
if metadata_dist is not None:
|
| 523 |
+
req.needs_more_preparation = True
|
| 524 |
+
return metadata_dist
|
| 525 |
+
|
| 526 |
+
# None of the optimizations worked, fully prepare the requirement
|
| 527 |
+
return self._prepare_linked_requirement(req, parallel_builds)
|
| 528 |
+
|
| 529 |
+
def prepare_linked_requirements_more(
|
| 530 |
+
self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False
|
| 531 |
+
) -> None:
|
| 532 |
+
"""Prepare linked requirements more, if needed."""
|
| 533 |
+
reqs = [req for req in reqs if req.needs_more_preparation]
|
| 534 |
+
for req in reqs:
|
| 535 |
+
# Determine if any of these requirements were already downloaded.
|
| 536 |
+
if self.download_dir is not None and req.link.is_wheel:
|
| 537 |
+
hashes = self._get_linked_req_hashes(req)
|
| 538 |
+
file_path = _check_download_dir(req.link, self.download_dir, hashes)
|
| 539 |
+
if file_path is not None:
|
| 540 |
+
self._downloaded[req.link.url] = file_path
|
| 541 |
+
req.needs_more_preparation = False
|
| 542 |
+
|
| 543 |
+
# Prepare requirements we found were already downloaded for some
|
| 544 |
+
# reason. The other downloads will be completed separately.
|
| 545 |
+
partially_downloaded_reqs: List[InstallRequirement] = []
|
| 546 |
+
for req in reqs:
|
| 547 |
+
if req.needs_more_preparation:
|
| 548 |
+
partially_downloaded_reqs.append(req)
|
| 549 |
+
else:
|
| 550 |
+
self._prepare_linked_requirement(req, parallel_builds)
|
| 551 |
+
|
| 552 |
+
# TODO: separate this part out from RequirementPreparer when the v1
|
| 553 |
+
# resolver can be removed!
|
| 554 |
+
self._complete_partial_requirements(
|
| 555 |
+
partially_downloaded_reqs,
|
| 556 |
+
parallel_builds=parallel_builds,
|
| 557 |
+
)
|
| 558 |
+
|
| 559 |
+
def _prepare_linked_requirement(
|
| 560 |
+
self, req: InstallRequirement, parallel_builds: bool
|
| 561 |
+
) -> BaseDistribution:
|
| 562 |
+
assert req.link
|
| 563 |
+
link = req.link
|
| 564 |
+
|
| 565 |
+
hashes = self._get_linked_req_hashes(req)
|
| 566 |
+
|
| 567 |
+
if hashes and req.is_wheel_from_cache:
|
| 568 |
+
assert req.download_info is not None
|
| 569 |
+
assert link.is_wheel
|
| 570 |
+
assert link.is_file
|
| 571 |
+
# We need to verify hashes, and we have found the requirement in the cache
|
| 572 |
+
# of locally built wheels.
|
| 573 |
+
if (
|
| 574 |
+
isinstance(req.download_info.info, ArchiveInfo)
|
| 575 |
+
and req.download_info.info.hashes
|
| 576 |
+
and hashes.has_one_of(req.download_info.info.hashes)
|
| 577 |
+
):
|
| 578 |
+
# At this point we know the requirement was built from a hashable source
|
| 579 |
+
# artifact, and we verified that the cache entry's hash of the original
|
| 580 |
+
# artifact matches one of the hashes we expect. We don't verify hashes
|
| 581 |
+
# against the cached wheel, because the wheel is not the original.
|
| 582 |
+
hashes = None
|
| 583 |
+
else:
|
| 584 |
+
logger.warning(
|
| 585 |
+
"The hashes of the source archive found in cache entry "
|
| 586 |
+
"don't match, ignoring cached built wheel "
|
| 587 |
+
"and re-downloading source."
|
| 588 |
+
)
|
| 589 |
+
req.link = req.cached_wheel_source_link
|
| 590 |
+
link = req.link
|
| 591 |
+
|
| 592 |
+
self._ensure_link_req_src_dir(req, parallel_builds)
|
| 593 |
+
|
| 594 |
+
if link.is_existing_dir():
|
| 595 |
+
local_file = None
|
| 596 |
+
elif link.url not in self._downloaded:
|
| 597 |
+
try:
|
| 598 |
+
local_file = unpack_url(
|
| 599 |
+
link,
|
| 600 |
+
req.source_dir,
|
| 601 |
+
self._download,
|
| 602 |
+
self.verbosity,
|
| 603 |
+
self.download_dir,
|
| 604 |
+
hashes,
|
| 605 |
+
)
|
| 606 |
+
except NetworkConnectionError as exc:
|
| 607 |
+
raise InstallationError(
|
| 608 |
+
f"Could not install requirement {req} because of HTTP "
|
| 609 |
+
f"error {exc} for URL {link}"
|
| 610 |
+
)
|
| 611 |
+
else:
|
| 612 |
+
file_path = self._downloaded[link.url]
|
| 613 |
+
if hashes:
|
| 614 |
+
hashes.check_against_path(file_path)
|
| 615 |
+
local_file = File(file_path, content_type=None)
|
| 616 |
+
|
| 617 |
+
# If download_info is set, we got it from the wheel cache.
|
| 618 |
+
if req.download_info is None:
|
| 619 |
+
# Editables don't go through this function (see
|
| 620 |
+
# prepare_editable_requirement).
|
| 621 |
+
assert not req.editable
|
| 622 |
+
req.download_info = direct_url_from_link(link, req.source_dir)
|
| 623 |
+
# Make sure we have a hash in download_info. If we got it as part of the
|
| 624 |
+
# URL, it will have been verified and we can rely on it. Otherwise we
|
| 625 |
+
# compute it from the downloaded file.
|
| 626 |
+
# FIXME: https://github.com/pypa/pip/issues/11943
|
| 627 |
+
if (
|
| 628 |
+
isinstance(req.download_info.info, ArchiveInfo)
|
| 629 |
+
and not req.download_info.info.hashes
|
| 630 |
+
and local_file
|
| 631 |
+
):
|
| 632 |
+
hash = hash_file(local_file.path)[0].hexdigest()
|
| 633 |
+
# We populate info.hash for backward compatibility.
|
| 634 |
+
# This will automatically populate info.hashes.
|
| 635 |
+
req.download_info.info.hash = f"sha256={hash}"
|
| 636 |
+
|
| 637 |
+
# For use in later processing,
|
| 638 |
+
# preserve the file path on the requirement.
|
| 639 |
+
if local_file:
|
| 640 |
+
req.local_file_path = local_file.path
|
| 641 |
+
|
| 642 |
+
dist = _get_prepared_distribution(
|
| 643 |
+
req,
|
| 644 |
+
self.build_tracker,
|
| 645 |
+
self.finder,
|
| 646 |
+
self.build_isolation,
|
| 647 |
+
self.check_build_deps,
|
| 648 |
+
)
|
| 649 |
+
return dist
|
| 650 |
+
|
| 651 |
+
def save_linked_requirement(self, req: InstallRequirement) -> None:
|
| 652 |
+
assert self.download_dir is not None
|
| 653 |
+
assert req.link is not None
|
| 654 |
+
link = req.link
|
| 655 |
+
if link.is_vcs or (link.is_existing_dir() and req.editable):
|
| 656 |
+
# Make a .zip of the source_dir we already created.
|
| 657 |
+
req.archive(self.download_dir)
|
| 658 |
+
return
|
| 659 |
+
|
| 660 |
+
if link.is_existing_dir():
|
| 661 |
+
logger.debug(
|
| 662 |
+
"Not copying link to destination directory "
|
| 663 |
+
"since it is a directory: %s",
|
| 664 |
+
link,
|
| 665 |
+
)
|
| 666 |
+
return
|
| 667 |
+
if req.local_file_path is None:
|
| 668 |
+
# No distribution was downloaded for this requirement.
|
| 669 |
+
return
|
| 670 |
+
|
| 671 |
+
download_location = os.path.join(self.download_dir, link.filename)
|
| 672 |
+
if not os.path.exists(download_location):
|
| 673 |
+
shutil.copy(req.local_file_path, download_location)
|
| 674 |
+
download_path = display_path(download_location)
|
| 675 |
+
logger.info("Saved %s", download_path)
|
| 676 |
+
|
| 677 |
+
def prepare_editable_requirement(
|
| 678 |
+
self,
|
| 679 |
+
req: InstallRequirement,
|
| 680 |
+
) -> BaseDistribution:
|
| 681 |
+
"""Prepare an editable requirement."""
|
| 682 |
+
assert req.editable, "cannot prepare a non-editable req as editable"
|
| 683 |
+
|
| 684 |
+
logger.info("Obtaining %s", req)
|
| 685 |
+
|
| 686 |
+
with indent_log():
|
| 687 |
+
if self.require_hashes:
|
| 688 |
+
raise InstallationError(
|
| 689 |
+
f"The editable requirement {req} cannot be installed when "
|
| 690 |
+
"requiring hashes, because there is no single file to "
|
| 691 |
+
"hash."
|
| 692 |
+
)
|
| 693 |
+
req.ensure_has_source_dir(self.src_dir)
|
| 694 |
+
req.update_editable()
|
| 695 |
+
assert req.source_dir
|
| 696 |
+
req.download_info = direct_url_for_editable(req.unpacked_source_directory)
|
| 697 |
+
|
| 698 |
+
dist = _get_prepared_distribution(
|
| 699 |
+
req,
|
| 700 |
+
self.build_tracker,
|
| 701 |
+
self.finder,
|
| 702 |
+
self.build_isolation,
|
| 703 |
+
self.check_build_deps,
|
| 704 |
+
)
|
| 705 |
+
|
| 706 |
+
req.check_if_exists(self.use_user_site)
|
| 707 |
+
|
| 708 |
+
return dist
|
| 709 |
+
|
| 710 |
+
def prepare_installed_requirement(
|
| 711 |
+
self,
|
| 712 |
+
req: InstallRequirement,
|
| 713 |
+
skip_reason: str,
|
| 714 |
+
) -> BaseDistribution:
|
| 715 |
+
"""Prepare an already-installed requirement."""
|
| 716 |
+
assert req.satisfied_by, "req should have been satisfied but isn't"
|
| 717 |
+
assert skip_reason is not None, (
|
| 718 |
+
"did not get skip reason skipped but req.satisfied_by "
|
| 719 |
+
f"is set to {req.satisfied_by}"
|
| 720 |
+
)
|
| 721 |
+
logger.info(
|
| 722 |
+
"Requirement %s: %s (%s)", skip_reason, req, req.satisfied_by.version
|
| 723 |
+
)
|
| 724 |
+
with indent_log():
|
| 725 |
+
if self.require_hashes:
|
| 726 |
+
logger.debug(
|
| 727 |
+
"Since it is already installed, we are trusting this "
|
| 728 |
+
"package without checking its hash. To ensure a "
|
| 729 |
+
"completely repeatable environment, install into an "
|
| 730 |
+
"empty virtualenv."
|
| 731 |
+
)
|
| 732 |
+
return InstalledDistribution(req).get_metadata_distribution()
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/_log.cpython-311.pyc
ADDED
|
Binary file (2.04 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/datetime.cpython-311.pyc
ADDED
|
Binary file (737 Bytes). View file
|
|
|