diff --git a/.gitattributes b/.gitattributes index 922145c51ca7bca8287f2d549fd9f8064524fe78..eb29b6d22d365827d93107b94daa93c48b4a6094 100644 --- a/.gitattributes +++ b/.gitattributes @@ -42,3 +42,7 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/F tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Parsing.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text +tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64.exe filter=lfs diff=lfs merge=lfs -text +tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/__pycache__/typing_extensions.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text +tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5aca5cba451fccd8f05e7772a3b755f5197f2648 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e280cea1d8a888db57222b560cb97e18e09c1c2613b47587acc78c908ac6124 +size 243464 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..68153324c37b9fbf91c1a317f25b583c32da84d8 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3aab0e85d88c1a18bbe9b720f126c115c22a16b91ee8006dbd1d6fa5de099eb +size 396760 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3c49eee5c90b7f5648df27aa6a824674672dfcfb --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__init__.py @@ -0,0 +1,69 @@ +from importlib.metadata import entry_points + +from . import _version, caching +from .callbacks import Callback +from .compression import available_compressions +from .core import get_fs_token_paths, open, open_files, open_local +from .exceptions import FSTimeoutError +from .mapping import FSMap, get_mapper +from .registry import ( + available_protocols, + filesystem, + get_filesystem_class, + register_implementation, + registry, +) +from .spec import AbstractFileSystem + +__version__ = _version.get_versions()["version"] + +__all__ = [ + "AbstractFileSystem", + "FSTimeoutError", + "FSMap", + "filesystem", + "register_implementation", + "get_filesystem_class", + "get_fs_token_paths", + "get_mapper", + "open", + "open_files", + "open_local", + "registry", + "caching", + "Callback", + "available_protocols", + "available_compressions", +] + + +def process_entries(): + if entry_points is not None: + try: + eps = entry_points() + except TypeError: + pass # importlib-metadata < 0.8 + else: + if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0 + specs = eps.select(group="fsspec.specs") + else: + specs = eps.get("fsspec.specs", []) + registered_names = {} + for spec in specs: + err_msg = f"Unable to load filesystem from {spec}" + name = spec.name + if name in registered_names: + continue + registered_names[name] = True + register_implementation( + name, + spec.value.replace(":", "."), + errtxt=err_msg, + # We take our implementations as the ones to overload with if + # for some reason we encounter some, may be the same, already + # registered + clobber=True, + ) + + +process_entries() diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/_version.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/_version.py new file mode 100644 index 0000000000000000000000000000000000000000..aa4d58e826e9ae4b7c165987962648d33b7e2232 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/_version.py @@ -0,0 +1,21 @@ + +# This file was generated by 'versioneer.py' (0.29) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +{ + "date": "2024-02-04T20:21:42-0500", + "dirty": false, + "error": null, + "full-revisionid": "5dc364e13b63609717d77b7361e80cfa64e3b8fd", + "version": "2024.2.0" +} +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc82b7323a69f12579d44807dc61f06b6e30c7e2 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8821ed4dd09dbb7b19e0512d1148d2bba2e66c53 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9efccb23e19f6f126ccc711c0b3a6ff01563644a Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e331e0b9084e4f2dfc8ce10c7086124914fee29 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc32bbcbce1954161743e4a5b9975bec59b387d9 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/arrow.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/arrow.py new file mode 100644 index 0000000000000000000000000000000000000000..e3dd17f8526c378177681a2fd96cf9a4cee014b3 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/arrow.py @@ -0,0 +1,306 @@ +import errno +import io +import os +import secrets +import shutil +from contextlib import suppress +from functools import cached_property, wraps +from urllib.parse import parse_qs + +from fsspec.spec import AbstractFileSystem +from fsspec.utils import ( + get_package_version_without_import, + infer_storage_options, + mirror_from, + tokenize, +) + + +def wrap_exceptions(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except OSError as exception: + if not exception.args: + raise + + message, *args = exception.args + if isinstance(message, str) and "does not exist" in message: + raise FileNotFoundError(errno.ENOENT, message) from exception + else: + raise + + return wrapper + + +PYARROW_VERSION = None + + +class ArrowFSWrapper(AbstractFileSystem): + """FSSpec-compatible wrapper of pyarrow.fs.FileSystem. + + Parameters + ---------- + fs : pyarrow.fs.FileSystem + + """ + + root_marker = "/" + + def __init__(self, fs, **kwargs): + global PYARROW_VERSION + PYARROW_VERSION = get_package_version_without_import("pyarrow") + self.fs = fs + super().__init__(**kwargs) + + @property + def protocol(self): + return self.fs.type_name + + @cached_property + def fsid(self): + return "hdfs_" + tokenize(self.fs.host, self.fs.port) + + @classmethod + def _strip_protocol(cls, path): + ops = infer_storage_options(path) + path = ops["path"] + if path.startswith("//"): + # special case for "hdfs://path" (without the triple slash) + path = path[1:] + return path + + def ls(self, path, detail=False, **kwargs): + path = self._strip_protocol(path) + from pyarrow.fs import FileSelector + + entries = [ + self._make_entry(entry) + for entry in self.fs.get_file_info(FileSelector(path)) + ] + if detail: + return entries + else: + return [entry["name"] for entry in entries] + + def info(self, path, **kwargs): + path = self._strip_protocol(path) + [info] = self.fs.get_file_info([path]) + return self._make_entry(info) + + def exists(self, path): + path = self._strip_protocol(path) + try: + self.info(path) + except FileNotFoundError: + return False + else: + return True + + def _make_entry(self, info): + from pyarrow.fs import FileType + + if info.type is FileType.Directory: + kind = "directory" + elif info.type is FileType.File: + kind = "file" + elif info.type is FileType.NotFound: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), info.path) + else: + kind = "other" + + return { + "name": info.path, + "size": info.size, + "type": kind, + "mtime": info.mtime, + } + + @wrap_exceptions + def cp_file(self, path1, path2, **kwargs): + path1 = self._strip_protocol(path1).rstrip("/") + path2 = self._strip_protocol(path2).rstrip("/") + + with self._open(path1, "rb") as lstream: + tmp_fname = f"{path2}.tmp.{secrets.token_hex(6)}" + try: + with self.open(tmp_fname, "wb") as rstream: + shutil.copyfileobj(lstream, rstream) + self.fs.move(tmp_fname, path2) + except BaseException: # noqa + with suppress(FileNotFoundError): + self.fs.delete_file(tmp_fname) + raise + + @wrap_exceptions + def mv(self, path1, path2, **kwargs): + path1 = self._strip_protocol(path1).rstrip("/") + path2 = self._strip_protocol(path2).rstrip("/") + self.fs.move(path1, path2) + + mv_file = mv + + @wrap_exceptions + def rm_file(self, path): + path = self._strip_protocol(path) + self.fs.delete_file(path) + + @wrap_exceptions + def rm(self, path, recursive=False, maxdepth=None): + path = self._strip_protocol(path).rstrip("/") + if self.isdir(path): + if recursive: + self.fs.delete_dir(path) + else: + raise ValueError("Can't delete directories without recursive=False") + else: + self.fs.delete_file(path) + + @wrap_exceptions + def _open(self, path, mode="rb", block_size=None, seekable=True, **kwargs): + if mode == "rb": + if seekable: + method = self.fs.open_input_file + else: + method = self.fs.open_input_stream + elif mode == "wb": + method = self.fs.open_output_stream + elif mode == "ab": + method = self.fs.open_append_stream + else: + raise ValueError(f"unsupported mode for Arrow filesystem: {mode!r}") + + _kwargs = {} + if mode != "rb" or not seekable: + if int(PYARROW_VERSION.split(".")[0]) >= 4: + # disable compression auto-detection + _kwargs["compression"] = None + stream = method(path, **_kwargs) + + return ArrowFile(self, stream, path, mode, block_size, **kwargs) + + @wrap_exceptions + def mkdir(self, path, create_parents=True, **kwargs): + path = self._strip_protocol(path) + if create_parents: + self.makedirs(path, exist_ok=True) + else: + self.fs.create_dir(path, recursive=False) + + @wrap_exceptions + def makedirs(self, path, exist_ok=False): + path = self._strip_protocol(path) + self.fs.create_dir(path, recursive=True) + + @wrap_exceptions + def rmdir(self, path): + path = self._strip_protocol(path) + self.fs.delete_dir(path) + + @wrap_exceptions + def modified(self, path): + path = self._strip_protocol(path) + return self.fs.get_file_info(path).mtime + + def cat_file(self, path, start=None, end=None, **kwargs): + kwargs["seekable"] = start not in [None, 0] + return super().cat_file(path, start=None, end=None, **kwargs) + + def get_file(self, rpath, lpath, **kwargs): + kwargs["seekable"] = False + super().get_file(rpath, lpath, **kwargs) + + +@mirror_from( + "stream", + [ + "read", + "seek", + "tell", + "write", + "readable", + "writable", + "close", + "size", + "seekable", + ], +) +class ArrowFile(io.IOBase): + def __init__(self, fs, stream, path, mode, block_size=None, **kwargs): + self.path = path + self.mode = mode + + self.fs = fs + self.stream = stream + + self.blocksize = self.block_size = block_size + self.kwargs = kwargs + + def __enter__(self): + return self + + def __exit__(self, *args): + return self.close() + + +class HadoopFileSystem(ArrowFSWrapper): + """A wrapper on top of the pyarrow.fs.HadoopFileSystem + to connect it's interface with fsspec""" + + protocol = "hdfs" + + def __init__( + self, + host="default", + port=0, + user=None, + kerb_ticket=None, + replication=3, + extra_conf=None, + **kwargs, + ): + """ + + Parameters + ---------- + host: str + Hostname, IP or "default" to try to read from Hadoop config + port: int + Port to connect on, or default from Hadoop config if 0 + user: str or None + If given, connect as this username + kerb_ticket: str or None + If given, use this ticket for authentication + replication: int + set replication factor of file for write operations. default value is 3. + extra_conf: None or dict + Passed on to HadoopFileSystem + """ + from pyarrow.fs import HadoopFileSystem + + fs = HadoopFileSystem( + host=host, + port=port, + user=user, + kerb_ticket=kerb_ticket, + replication=replication, + extra_conf=extra_conf, + ) + super().__init__(fs=fs, **kwargs) + + @staticmethod + def _get_kwargs_from_urls(path): + ops = infer_storage_options(path) + out = {} + if ops.get("host", None): + out["host"] = ops["host"] + if ops.get("username", None): + out["user"] = ops["username"] + if ops.get("port", None): + out["port"] = ops["port"] + if ops.get("url_query", None): + queries = parse_qs(ops["url_query"]) + if queries.get("replication", None): + out["replication"] = int(queries["replication"][0]) + return out diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py new file mode 100644 index 0000000000000000000000000000000000000000..b3c43fa6955b27a9b06168033f3b1630ba26ecaf --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py @@ -0,0 +1,882 @@ +from __future__ import annotations + +import inspect +import logging +import os +import tempfile +import time +import weakref +from shutil import rmtree +from typing import TYPE_CHECKING, Any, Callable, ClassVar + +from fsspec import AbstractFileSystem, filesystem +from fsspec.callbacks import DEFAULT_CALLBACK +from fsspec.compression import compr +from fsspec.core import BaseCache, MMapCache +from fsspec.exceptions import BlocksizeMismatchError +from fsspec.implementations.cache_mapper import create_cache_mapper +from fsspec.implementations.cache_metadata import CacheMetadata +from fsspec.spec import AbstractBufferedFile +from fsspec.transaction import Transaction +from fsspec.utils import infer_compression + +if TYPE_CHECKING: + from fsspec.implementations.cache_mapper import AbstractCacheMapper + +logger = logging.getLogger("fsspec.cached") + + +class WriteCachedTransaction(Transaction): + def complete(self, commit=True): + rpaths = [f.path for f in self.files] + lpaths = [f.fn for f in self.files] + if commit: + self.fs.put(lpaths, rpaths) + # else remove? + self.fs._intrans = False + + +class CachingFileSystem(AbstractFileSystem): + """Locally caching filesystem, layer over any other FS + + This class implements chunk-wise local storage of remote files, for quick + access after the initial download. The files are stored in a given + directory with hashes of URLs for the filenames. If no directory is given, + a temporary one is used, which should be cleaned up by the OS after the + process ends. The files themselves are sparse (as implemented in + :class:`~fsspec.caching.MMapCache`), so only the data which is accessed + takes up space. + + Restrictions: + + - the block-size must be the same for each access of a given file, unless + all blocks of the file have already been read + - caching can only be applied to file-systems which produce files + derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also + allowed, for testing + """ + + protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached") + + def __init__( + self, + target_protocol=None, + cache_storage="TMP", + cache_check=10, + check_files=False, + expiry_time=604800, + target_options=None, + fs=None, + same_names: bool | None = None, + compression=None, + cache_mapper: AbstractCacheMapper | None = None, + **kwargs, + ): + """ + + Parameters + ---------- + target_protocol: str (optional) + Target filesystem protocol. Provide either this or ``fs``. + cache_storage: str or list(str) + Location to store files. If "TMP", this is a temporary directory, + and will be cleaned up by the OS when this process ends (or later). + If a list, each location will be tried in the order given, but + only the last will be considered writable. + cache_check: int + Number of seconds between reload of cache metadata + check_files: bool + Whether to explicitly see if the UID of the remote file matches + the stored one before using. Warning: some file systems such as + HTTP cannot reliably give a unique hash of the contents of some + path, so be sure to set this option to False. + expiry_time: int + The time in seconds after which a local copy is considered useless. + Set to falsy to prevent expiry. The default is equivalent to one + week. + target_options: dict or None + Passed to the instantiation of the FS, if fs is None. + fs: filesystem instance + The target filesystem to run against. Provide this or ``protocol``. + same_names: bool (optional) + By default, target URLs are hashed using a ``HashCacheMapper`` so + that files from different backends with the same basename do not + conflict. If this argument is ``true``, a ``BasenameCacheMapper`` + is used instead. Other cache mapper options are available by using + the ``cache_mapper`` keyword argument. Only one of this and + ``cache_mapper`` should be specified. + compression: str (optional) + To decompress on download. Can be 'infer' (guess from the URL name), + one of the entries in ``fsspec.compression.compr``, or None for no + decompression. + cache_mapper: AbstractCacheMapper (optional) + The object use to map from original filenames to cached filenames. + Only one of this and ``same_names`` should be specified. + """ + super().__init__(**kwargs) + if fs is None and target_protocol is None: + raise ValueError( + "Please provide filesystem instance(fs) or target_protocol" + ) + if not (fs is None) ^ (target_protocol is None): + raise ValueError( + "Both filesystems (fs) and target_protocol may not be both given." + ) + if cache_storage == "TMP": + tempdir = tempfile.mkdtemp() + storage = [tempdir] + weakref.finalize(self, self._remove_tempdir, tempdir) + else: + if isinstance(cache_storage, str): + storage = [cache_storage] + else: + storage = cache_storage + os.makedirs(storage[-1], exist_ok=True) + self.storage = storage + self.kwargs = target_options or {} + self.cache_check = cache_check + self.check_files = check_files + self.expiry = expiry_time + self.compression = compression + + # Size of cache in bytes. If None then the size is unknown and will be + # recalculated the next time cache_size() is called. On writes to the + # cache this is reset to None. + self._cache_size = None + + if same_names is not None and cache_mapper is not None: + raise ValueError( + "Cannot specify both same_names and cache_mapper in " + "CachingFileSystem.__init__" + ) + if cache_mapper is not None: + self._mapper = cache_mapper + else: + self._mapper = create_cache_mapper( + same_names if same_names is not None else False + ) + + self.target_protocol = ( + target_protocol + if isinstance(target_protocol, str) + else (fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0]) + ) + self._metadata = CacheMetadata(self.storage) + self.load_cache() + self.fs = fs if fs is not None else filesystem(target_protocol, **self.kwargs) + + def _strip_protocol(path): + # acts as a method, since each instance has a difference target + return self.fs._strip_protocol(type(self)._strip_protocol(path)) + + self._strip_protocol: Callable = _strip_protocol + + @staticmethod + def _remove_tempdir(tempdir): + try: + rmtree(tempdir) + except Exception: + pass + + def _mkcache(self): + os.makedirs(self.storage[-1], exist_ok=True) + + def cache_size(self): + """Return size of cache in bytes. + + If more than one cache directory is in use, only the size of the last + one (the writable cache directory) is returned. + """ + if self._cache_size is None: + cache_dir = self.storage[-1] + self._cache_size = filesystem("file").du(cache_dir, withdirs=True) + return self._cache_size + + def load_cache(self): + """Read set of stored blocks from file""" + self._metadata.load() + self._mkcache() + self.last_cache = time.time() + + def save_cache(self): + """Save set of stored blocks from file""" + self._mkcache() + self._metadata.save() + self.last_cache = time.time() + self._cache_size = None + + def _check_cache(self): + """Reload caches if time elapsed or any disappeared""" + self._mkcache() + if not self.cache_check: + # explicitly told not to bother checking + return + timecond = time.time() - self.last_cache > self.cache_check + existcond = all(os.path.exists(storage) for storage in self.storage) + if timecond or not existcond: + self.load_cache() + + def _check_file(self, path): + """Is path in cache and still valid""" + path = self._strip_protocol(path) + self._check_cache() + return self._metadata.check_file(path, self) + + def clear_cache(self): + """Remove all files and metadata from the cache + + In the case of multiple cache locations, this clears only the last one, + which is assumed to be the read/write one. + """ + rmtree(self.storage[-1]) + self.load_cache() + self._cache_size = None + + def clear_expired_cache(self, expiry_time=None): + """Remove all expired files and metadata from the cache + + In the case of multiple cache locations, this clears only the last one, + which is assumed to be the read/write one. + + Parameters + ---------- + expiry_time: int + The time in seconds after which a local copy is considered useless. + If not defined the default is equivalent to the attribute from the + file caching instantiation. + """ + + if not expiry_time: + expiry_time = self.expiry + + self._check_cache() + + expired_files, writable_cache_empty = self._metadata.clear_expired(expiry_time) + for fn in expired_files: + if os.path.exists(fn): + os.remove(fn) + + if writable_cache_empty: + rmtree(self.storage[-1]) + self.load_cache() + + self._cache_size = None + + def pop_from_cache(self, path): + """Remove cached version of given file + + Deletes local copy of the given (remote) path. If it is found in a cache + location which is not the last, it is assumed to be read-only, and + raises PermissionError + """ + path = self._strip_protocol(path) + fn = self._metadata.pop_file(path) + if fn is not None: + os.remove(fn) + self._cache_size = None + + def _open( + self, + path, + mode="rb", + block_size=None, + autocommit=True, + cache_options=None, + **kwargs, + ): + """Wrap the target _open + + If the whole file exists in the cache, just open it locally and + return that. + + Otherwise, open the file on the target FS, and make it have a mmap + cache pointing to the location which we determine, in our cache. + The ``blocks`` instance is shared, so as the mmap cache instance + updates, so does the entry in our ``cached_files`` attribute. + We monkey-patch this file, so that when it closes, we call + ``close_and_update`` to save the state of the blocks. + """ + path = self._strip_protocol(path) + + path = self.fs._strip_protocol(path) + if "r" not in mode: + return self.fs._open( + path, + mode=mode, + block_size=block_size, + autocommit=autocommit, + cache_options=cache_options, + **kwargs, + ) + detail = self._check_file(path) + if detail: + # file is in cache + detail, fn = detail + hash, blocks = detail["fn"], detail["blocks"] + if blocks is True: + # stored file is complete + logger.debug("Opening local copy of %s", path) + return open(fn, mode) + # TODO: action where partial file exists in read-only cache + logger.debug("Opening partially cached copy of %s", path) + else: + hash = self._mapper(path) + fn = os.path.join(self.storage[-1], hash) + blocks = set() + detail = { + "original": path, + "fn": hash, + "blocks": blocks, + "time": time.time(), + "uid": self.fs.ukey(path), + } + self._metadata.update_file(path, detail) + logger.debug("Creating local sparse file for %s", path) + + # call target filesystems open + self._mkcache() + f = self.fs._open( + path, + mode=mode, + block_size=block_size, + autocommit=autocommit, + cache_options=cache_options, + cache_type="none", + **kwargs, + ) + if self.compression: + comp = ( + infer_compression(path) + if self.compression == "infer" + else self.compression + ) + f = compr[comp](f, mode="rb") + if "blocksize" in detail: + if detail["blocksize"] != f.blocksize: + raise BlocksizeMismatchError( + f"Cached file must be reopened with same block" + f" size as original (old: {detail['blocksize']}," + f" new {f.blocksize})" + ) + else: + detail["blocksize"] = f.blocksize + f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks) + close = f.close + f.close = lambda: self.close_and_update(f, close) + self.save_cache() + return f + + def _parent(self, path): + return self.fs._parent(path) + + def hash_name(self, path: str, *args: Any) -> str: + # Kept for backward compatibility with downstream libraries. + # Ignores extra arguments, previously same_name boolean. + return self._mapper(path) + + def close_and_update(self, f, close): + """Called when a file is closing, so store the set of blocks""" + if f.closed: + return + path = self._strip_protocol(f.path) + self._metadata.on_close_cached_file(f, path) + try: + logger.debug("going to save") + self.save_cache() + logger.debug("saved") + except OSError: + logger.debug("Cache saving failed while closing file") + except NameError: + logger.debug("Cache save failed due to interpreter shutdown") + close() + f.closed = True + + def __getattribute__(self, item): + if item in [ + "load_cache", + "_open", + "save_cache", + "close_and_update", + "__init__", + "__getattribute__", + "__reduce__", + "_make_local_details", + "open", + "cat", + "cat_file", + "cat_ranges", + "get", + "read_block", + "tail", + "head", + "_check_file", + "_check_cache", + "_mkcache", + "clear_cache", + "clear_expired_cache", + "pop_from_cache", + "_mkcache", + "local_file", + "_paths_from_path", + "get_mapper", + "open_many", + "commit_many", + "hash_name", + "__hash__", + "__eq__", + "to_json", + "cache_size", + "pipe_file", + "pipe", + "start_transaction", + "end_transaction", + ]: + # all the methods defined in this class. Note `open` here, since + # it calls `_open`, but is actually in superclass + return lambda *args, **kw: getattr(type(self), item).__get__(self)( + *args, **kw + ) + if item in ["__reduce_ex__"]: + raise AttributeError + if item in ["transaction"]: + # property + return type(self).transaction.__get__(self) + if item in ["_cache", "transaction_type"]: + # class attributes + return getattr(type(self), item) + if item == "__class__": + return type(self) + d = object.__getattribute__(self, "__dict__") + fs = d.get("fs", None) # fs is not immediately defined + if item in d: + return d[item] + elif fs is not None: + if item in fs.__dict__: + # attribute of instance + return fs.__dict__[item] + # attributed belonging to the target filesystem + cls = type(fs) + m = getattr(cls, item) + if (inspect.isfunction(m) or inspect.isdatadescriptor(m)) and ( + not hasattr(m, "__self__") or m.__self__ is None + ): + # instance method + return m.__get__(fs, cls) + return m # class method or attribute + else: + # attributes of the superclass, while target is being set up + return super().__getattribute__(item) + + def __eq__(self, other): + """Test for equality.""" + if self is other: + return True + if not isinstance(other, type(self)): + return False + return ( + self.storage == other.storage + and self.kwargs == other.kwargs + and self.cache_check == other.cache_check + and self.check_files == other.check_files + and self.expiry == other.expiry + and self.compression == other.compression + and self._mapper == other._mapper + and self.target_protocol == other.target_protocol + ) + + def __hash__(self): + """Calculate hash.""" + return ( + hash(tuple(self.storage)) + ^ hash(str(self.kwargs)) + ^ hash(self.cache_check) + ^ hash(self.check_files) + ^ hash(self.expiry) + ^ hash(self.compression) + ^ hash(self._mapper) + ^ hash(self.target_protocol) + ) + + def to_json(self): + """Calculate JSON representation. + + Not implemented yet for CachingFileSystem. + """ + raise NotImplementedError( + "CachingFileSystem JSON representation not implemented" + ) + + +class WholeFileCacheFileSystem(CachingFileSystem): + """Caches whole remote files on first access + + This class is intended as a layer over any other file system, and + will make a local copy of each file accessed, so that all subsequent + reads are local. This is similar to ``CachingFileSystem``, but without + the block-wise functionality and so can work even when sparse files + are not allowed. See its docstring for definition of the init + arguments. + + The class still needs access to the remote store for listing files, + and may refresh cached files. + """ + + protocol = "filecache" + local_file = True + + def open_many(self, open_files, **kwargs): + paths = [of.path for of in open_files] + if "r" in open_files.mode: + self._mkcache() + else: + return [ + LocalTempFile( + self.fs, + path, + mode=open_files.mode, + fn=os.path.join(self.storage[-1], self._mapper(path)), + **kwargs, + ) + for path in paths + ] + + if self.compression: + raise NotImplementedError + details = [self._check_file(sp) for sp in paths] + downpath = [p for p, d in zip(paths, details) if not d] + downfn0 = [ + os.path.join(self.storage[-1], self._mapper(p)) + for p, d in zip(paths, details) + ] # keep these path names for opening later + downfn = [fn for fn, d in zip(downfn0, details) if not d] + if downpath: + # skip if all files are already cached and up to date + self.fs.get(downpath, downfn) + + # update metadata - only happens when downloads are successful + newdetail = [ + { + "original": path, + "fn": self._mapper(path), + "blocks": True, + "time": time.time(), + "uid": self.fs.ukey(path), + } + for path in downpath + ] + for path, detail in zip(downpath, newdetail): + self._metadata.update_file(path, detail) + self.save_cache() + + def firstpart(fn): + # helper to adapt both whole-file and simple-cache + return fn[1] if isinstance(fn, tuple) else fn + + return [ + open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode) + for fn0, fn1 in zip(details, downfn0) + ] + + def commit_many(self, open_files): + self.fs.put([f.fn for f in open_files], [f.path for f in open_files]) + [f.close() for f in open_files] + for f in open_files: + # in case autocommit is off, and so close did not already delete + try: + os.remove(f.name) + except FileNotFoundError: + pass + self._cache_size = None + + def _make_local_details(self, path): + hash = self._mapper(path) + fn = os.path.join(self.storage[-1], hash) + detail = { + "original": path, + "fn": hash, + "blocks": True, + "time": time.time(), + "uid": self.fs.ukey(path), + } + self._metadata.update_file(path, detail) + logger.debug("Copying %s to local cache", path) + return fn + + def cat( + self, + path, + recursive=False, + on_error="raise", + callback=DEFAULT_CALLBACK, + **kwargs, + ): + paths = self.expand_path( + path, recursive=recursive, maxdepth=kwargs.get("maxdepth", None) + ) + getpaths = [] + storepaths = [] + fns = [] + out = {} + for p in paths.copy(): + try: + detail = self._check_file(p) + if not detail: + fn = self._make_local_details(p) + getpaths.append(p) + storepaths.append(fn) + else: + detail, fn = detail if isinstance(detail, tuple) else (None, detail) + fns.append(fn) + except Exception as e: + if on_error == "raise": + raise + if on_error == "return": + out[p] = e + paths.remove(p) + + if getpaths: + self.fs.get(getpaths, storepaths) + self.save_cache() + + callback.set_size(len(paths)) + for p, fn in zip(paths, fns): + with open(fn, "rb") as f: + out[p] = f.read() + callback.relative_update(1) + if isinstance(path, str) and len(paths) == 1 and recursive is False: + out = out[paths[0]] + return out + + def _open(self, path, mode="rb", **kwargs): + path = self._strip_protocol(path) + if "r" not in mode: + fn = self._make_local_details(path) + user_specified_kwargs = { + k: v + for k, v in kwargs.items() + # those kwargs were added by open(), we don't want them + if k not in ["autocommit", "block_size", "cache_options"] + } + return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs) + detail = self._check_file(path) + if detail: + detail, fn = detail + _, blocks = detail["fn"], detail["blocks"] + if blocks is True: + logger.debug("Opening local copy of %s", path) + + # In order to support downstream filesystems to be able to + # infer the compression from the original filename, like + # the `TarFileSystem`, let's extend the `io.BufferedReader` + # fileobject protocol by adding a dedicated attribute + # `original`. + f = open(fn, mode) + f.original = detail.get("original") + return f + else: + raise ValueError( + f"Attempt to open partially cached file {path}" + f" as a wholly cached file" + ) + else: + fn = self._make_local_details(path) + kwargs["mode"] = mode + + # call target filesystems open + self._mkcache() + if self.compression: + with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2: + if isinstance(f, AbstractBufferedFile): + # want no type of caching if just downloading whole thing + f.cache = BaseCache(0, f.cache.fetcher, f.size) + comp = ( + infer_compression(path) + if self.compression == "infer" + else self.compression + ) + f = compr[comp](f, mode="rb") + data = True + while data: + block = getattr(f, "blocksize", 5 * 2**20) + data = f.read(block) + f2.write(data) + else: + self.fs.get_file(path, fn) + self.save_cache() + return self._open(path, mode) + + +class SimpleCacheFileSystem(WholeFileCacheFileSystem): + """Caches whole remote files on first access + + This class is intended as a layer over any other file system, and + will make a local copy of each file accessed, so that all subsequent + reads are local. This implementation only copies whole files, and + does not keep any metadata about the download time or file details. + It is therefore safer to use in multi-threaded/concurrent situations. + + This is the only of the caching filesystems that supports write: you will + be given a real local open file, and upon close and commit, it will be + uploaded to the target filesystem; the writability or the target URL is + not checked until that time. + + """ + + protocol = "simplecache" + local_file = True + transaction_type = WriteCachedTransaction + + def __init__(self, **kwargs): + kw = kwargs.copy() + for key in ["cache_check", "expiry_time", "check_files"]: + kw[key] = False + super().__init__(**kw) + for storage in self.storage: + if not os.path.exists(storage): + os.makedirs(storage, exist_ok=True) + + def _check_file(self, path): + self._check_cache() + sha = self._mapper(path) + for storage in self.storage: + fn = os.path.join(storage, sha) + if os.path.exists(fn): + return fn + + def save_cache(self): + pass + + def load_cache(self): + pass + + def pipe_file(self, path, value=None, **kwargs): + if self._intrans: + with self.open(path, "wb") as f: + f.write(value) + else: + super().pipe_file(path, value) + + def pipe(self, path, value=None, **kwargs): + if isinstance(path, str): + self.pipe_file(self._strip_protocol(path), value, **kwargs) + elif isinstance(path, dict): + for k, v in path.items(): + self.pipe_file(self._strip_protocol(k), v, **kwargs) + else: + raise ValueError("path must be str or dict") + + def cat_ranges( + self, paths, starts, ends, max_gap=None, on_error="return", **kwargs + ): + lpaths = [self._check_file(p) for p in paths] + rpaths = [p for l, p in zip(lpaths, paths) if l is False] + lpaths = [l for l, p in zip(lpaths, paths) if l is False] + self.fs.get(rpaths, lpaths) + return super().cat_ranges( + paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs + ) + + def _open(self, path, mode="rb", **kwargs): + path = self._strip_protocol(path) + sha = self._mapper(path) + + if "r" not in mode: + fn = os.path.join(self.storage[-1], sha) + user_specified_kwargs = { + k: v + for k, v in kwargs.items() + if k not in ["autocommit", "block_size", "cache_options"] + } # those were added by open() + return LocalTempFile( + self, + path, + mode=mode, + autocommit=not self._intrans, + fn=fn, + **user_specified_kwargs, + ) + fn = self._check_file(path) + if fn: + return open(fn, mode) + + fn = os.path.join(self.storage[-1], sha) + logger.debug("Copying %s to local cache", path) + kwargs["mode"] = mode + + self._mkcache() + self._cache_size = None + if self.compression: + with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2: + if isinstance(f, AbstractBufferedFile): + # want no type of caching if just downloading whole thing + f.cache = BaseCache(0, f.cache.fetcher, f.size) + comp = ( + infer_compression(path) + if self.compression == "infer" + else self.compression + ) + f = compr[comp](f, mode="rb") + data = True + while data: + block = getattr(f, "blocksize", 5 * 2**20) + data = f.read(block) + f2.write(data) + else: + self.fs.get_file(path, fn) + return self._open(path, mode) + + +class LocalTempFile: + """A temporary local file, which will be uploaded on commit""" + + def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs): + self.fn = fn + self.fh = open(fn, mode) + self.mode = mode + if seek: + self.fh.seek(seek) + self.path = path + self.fs = fs + self.closed = False + self.autocommit = autocommit + self.kwargs = kwargs + + def __reduce__(self): + # always open in r+b to allow continuing writing at a location + return ( + LocalTempFile, + (self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()), + ) + + def __enter__(self): + return self.fh + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + if self.closed: + return + self.fh.close() + self.closed = True + if self.autocommit: + self.commit() + + def discard(self): + self.fh.close() + os.remove(self.fn) + + def commit(self): + self.fs.put(self.fn, self.path, **self.kwargs) + try: + os.remove(self.fn) + except (PermissionError, FileNotFoundError): + # file path may be held by new version of the file on windows + pass + + @property + def name(self): + return self.fn + + def __getattr__(self, item): + return getattr(self.fh, item) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dask.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dask.py new file mode 100644 index 0000000000000000000000000000000000000000..3e1276463db6866665e6a0fe114efc247971b57e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dask.py @@ -0,0 +1,152 @@ +import dask +from distributed.client import Client, _get_global_client +from distributed.worker import Worker + +from fsspec import filesystem +from fsspec.spec import AbstractBufferedFile, AbstractFileSystem +from fsspec.utils import infer_storage_options + + +def _get_client(client): + if client is None: + return _get_global_client() + elif isinstance(client, Client): + return client + else: + # e.g., connection string + return Client(client) + + +def _in_worker(): + return bool(Worker._instances) + + +class DaskWorkerFileSystem(AbstractFileSystem): + """View files accessible to a worker as any other remote file-system + + When instances are run on the worker, uses the real filesystem. When + run on the client, they call the worker to provide information or data. + + **Warning** this implementation is experimental, and read-only for now. + """ + + def __init__( + self, target_protocol=None, target_options=None, fs=None, client=None, **kwargs + ): + super().__init__(**kwargs) + if not (fs is None) ^ (target_protocol is None): + raise ValueError( + "Please provide one of filesystem instance (fs) or" + " target_protocol, not both" + ) + self.target_protocol = target_protocol + self.target_options = target_options + self.worker = None + self.client = client + self.fs = fs + self._determine_worker() + + @staticmethod + def _get_kwargs_from_urls(path): + so = infer_storage_options(path) + if "host" in so and "port" in so: + return {"client": f"{so['host']}:{so['port']}"} + else: + return {} + + def _determine_worker(self): + if _in_worker(): + self.worker = True + if self.fs is None: + self.fs = filesystem( + self.target_protocol, **(self.target_options or {}) + ) + else: + self.worker = False + self.client = _get_client(self.client) + self.rfs = dask.delayed(self) + + def mkdir(self, *args, **kwargs): + if self.worker: + self.fs.mkdir(*args, **kwargs) + else: + self.rfs.mkdir(*args, **kwargs).compute() + + def rm(self, *args, **kwargs): + if self.worker: + self.fs.rm(*args, **kwargs) + else: + self.rfs.rm(*args, **kwargs).compute() + + def copy(self, *args, **kwargs): + if self.worker: + self.fs.copy(*args, **kwargs) + else: + self.rfs.copy(*args, **kwargs).compute() + + def mv(self, *args, **kwargs): + if self.worker: + self.fs.mv(*args, **kwargs) + else: + self.rfs.mv(*args, **kwargs).compute() + + def ls(self, *args, **kwargs): + if self.worker: + return self.fs.ls(*args, **kwargs) + else: + return self.rfs.ls(*args, **kwargs).compute() + + def _open( + self, + path, + mode="rb", + block_size=None, + autocommit=True, + cache_options=None, + **kwargs, + ): + if self.worker: + return self.fs._open( + path, + mode=mode, + block_size=block_size, + autocommit=autocommit, + cache_options=cache_options, + **kwargs, + ) + else: + return DaskFile( + fs=self, + path=path, + mode=mode, + block_size=block_size, + autocommit=autocommit, + cache_options=cache_options, + **kwargs, + ) + + def fetch_range(self, path, mode, start, end): + if self.worker: + with self._open(path, mode) as f: + f.seek(start) + return f.read(end - start) + else: + return self.rfs.fetch_range(path, mode, start, end).compute() + + +class DaskFile(AbstractBufferedFile): + def __init__(self, mode="rb", **kwargs): + if mode != "rb": + raise ValueError('Remote dask files can only be opened in "rb" mode') + super().__init__(**kwargs) + + def _upload_chunk(self, final=False): + pass + + def _initiate_upload(self): + """Create remote file/upload""" + pass + + def _fetch_range(self, start, end): + """Get the specified set of bytes from remote""" + return self.fs.fetch_range(self.path, self.mode, start, end) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dirfs.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dirfs.py new file mode 100644 index 0000000000000000000000000000000000000000..b1b3f6db7fc729a734f628eac6fa0dfb3d5412b5 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dirfs.py @@ -0,0 +1,364 @@ +from .. import filesystem +from ..asyn import AsyncFileSystem + + +class DirFileSystem(AsyncFileSystem): + """Directory prefix filesystem + + The DirFileSystem is a filesystem-wrapper. It assumes every path it is dealing with + is relative to the `path`. After performing the necessary paths operation it + delegates everything to the wrapped filesystem. + """ + + protocol = "dir" + + def __init__( + self, + path=None, + fs=None, + fo=None, + target_protocol=None, + target_options=None, + **storage_options, + ): + """ + Parameters + ---------- + path: str + Path to the directory. + fs: AbstractFileSystem + An instantiated filesystem to wrap. + target_protocol, target_options: + if fs is none, construct it from these + fo: str + Alternate for path; do not provide both + """ + super().__init__(**storage_options) + if fs is None: + fs = filesystem(protocol=target_protocol, **(target_options or {})) + if (path is not None) ^ (fo is not None) is False: + raise ValueError("Provide path or fo, not both") + path = path or fo + + if self.asynchronous and not fs.async_impl: + raise ValueError("can't use asynchronous with non-async fs") + + if fs.async_impl and self.asynchronous != fs.asynchronous: + raise ValueError("both dirfs and fs should be in the same sync/async mode") + + self.path = fs._strip_protocol(path) + self.fs = fs + + def _join(self, path): + if isinstance(path, str): + if not self.path: + return path + if not path: + return self.path + return self.fs.sep.join((self.path, self._strip_protocol(path))) + return [self._join(_path) for _path in path] + + def _relpath(self, path): + if isinstance(path, str): + if not self.path: + return path + if path == self.path: + return "" + prefix = self.path + self.fs.sep + assert path.startswith(prefix) + return path[len(prefix) :] + return [self._relpath(_path) for _path in path] + + # Wrappers below + + @property + def sep(self): + return self.fs.sep + + async def set_session(self, *args, **kwargs): + return await self.fs.set_session(*args, **kwargs) + + async def _rm_file(self, path, **kwargs): + return await self.fs._rm_file(self._join(path), **kwargs) + + def rm_file(self, path, **kwargs): + return self.fs.rm_file(self._join(path), **kwargs) + + async def _rm(self, path, *args, **kwargs): + return await self.fs._rm(self._join(path), *args, **kwargs) + + def rm(self, path, *args, **kwargs): + return self.fs.rm(self._join(path), *args, **kwargs) + + async def _cp_file(self, path1, path2, **kwargs): + return await self.fs._cp_file(self._join(path1), self._join(path2), **kwargs) + + def cp_file(self, path1, path2, **kwargs): + return self.fs.cp_file(self._join(path1), self._join(path2), **kwargs) + + async def _copy( + self, + path1, + path2, + *args, + **kwargs, + ): + return await self.fs._copy( + self._join(path1), + self._join(path2), + *args, + **kwargs, + ) + + def copy(self, path1, path2, *args, **kwargs): + return self.fs.copy( + self._join(path1), + self._join(path2), + *args, + **kwargs, + ) + + async def _pipe(self, path, *args, **kwargs): + return await self.fs._pipe(self._join(path), *args, **kwargs) + + def pipe(self, path, *args, **kwargs): + return self.fs.pipe(self._join(path), *args, **kwargs) + + async def _pipe_file(self, path, *args, **kwargs): + return await self.fs._pipe_file(self._join(path), *args, **kwargs) + + def pipe_file(self, path, *args, **kwargs): + return self.fs.pipe_file(self._join(path), *args, **kwargs) + + async def _cat_file(self, path, *args, **kwargs): + return await self.fs._cat_file(self._join(path), *args, **kwargs) + + def cat_file(self, path, *args, **kwargs): + return self.fs.cat_file(self._join(path), *args, **kwargs) + + async def _cat(self, path, *args, **kwargs): + ret = await self.fs._cat( + self._join(path), + *args, + **kwargs, + ) + + if isinstance(ret, dict): + return {self._relpath(key): value for key, value in ret.items()} + + return ret + + def cat(self, path, *args, **kwargs): + ret = self.fs.cat( + self._join(path), + *args, + **kwargs, + ) + + if isinstance(ret, dict): + return {self._relpath(key): value for key, value in ret.items()} + + return ret + + async def _put_file(self, lpath, rpath, **kwargs): + return await self.fs._put_file(lpath, self._join(rpath), **kwargs) + + def put_file(self, lpath, rpath, **kwargs): + return self.fs.put_file(lpath, self._join(rpath), **kwargs) + + async def _put( + self, + lpath, + rpath, + *args, + **kwargs, + ): + return await self.fs._put( + lpath, + self._join(rpath), + *args, + **kwargs, + ) + + def put(self, lpath, rpath, *args, **kwargs): + return self.fs.put( + lpath, + self._join(rpath), + *args, + **kwargs, + ) + + async def _get_file(self, rpath, lpath, **kwargs): + return await self.fs._get_file(self._join(rpath), lpath, **kwargs) + + def get_file(self, rpath, lpath, **kwargs): + return self.fs.get_file(self._join(rpath), lpath, **kwargs) + + async def _get(self, rpath, *args, **kwargs): + return await self.fs._get(self._join(rpath), *args, **kwargs) + + def get(self, rpath, *args, **kwargs): + return self.fs.get(self._join(rpath), *args, **kwargs) + + async def _isfile(self, path): + return await self.fs._isfile(self._join(path)) + + def isfile(self, path): + return self.fs.isfile(self._join(path)) + + async def _isdir(self, path): + return await self.fs._isdir(self._join(path)) + + def isdir(self, path): + return self.fs.isdir(self._join(path)) + + async def _size(self, path): + return await self.fs._size(self._join(path)) + + def size(self, path): + return self.fs.size(self._join(path)) + + async def _exists(self, path): + return await self.fs._exists(self._join(path)) + + def exists(self, path): + return self.fs.exists(self._join(path)) + + async def _info(self, path, **kwargs): + return await self.fs._info(self._join(path), **kwargs) + + def info(self, path, **kwargs): + return self.fs.info(self._join(path), **kwargs) + + async def _ls(self, path, detail=True, **kwargs): + ret = (await self.fs._ls(self._join(path), detail=detail, **kwargs)).copy() + if detail: + out = [] + for entry in ret: + entry = entry.copy() + entry["name"] = self._relpath(entry["name"]) + out.append(entry) + return out + + return self._relpath(ret) + + def ls(self, path, detail=True, **kwargs): + ret = self.fs.ls(self._join(path), detail=detail, **kwargs).copy() + if detail: + out = [] + for entry in ret: + entry = entry.copy() + entry["name"] = self._relpath(entry["name"]) + out.append(entry) + return out + + return self._relpath(ret) + + async def _walk(self, path, *args, **kwargs): + async for root, dirs, files in self.fs._walk(self._join(path), *args, **kwargs): + yield self._relpath(root), dirs, files + + def walk(self, path, *args, **kwargs): + for root, dirs, files in self.fs.walk(self._join(path), *args, **kwargs): + yield self._relpath(root), dirs, files + + async def _glob(self, path, **kwargs): + detail = kwargs.get("detail", False) + ret = await self.fs._glob(self._join(path), **kwargs) + if detail: + return {self._relpath(path): info for path, info in ret.items()} + return self._relpath(ret) + + def glob(self, path, **kwargs): + detail = kwargs.get("detail", False) + ret = self.fs.glob(self._join(path), **kwargs) + if detail: + return {self._relpath(path): info for path, info in ret.items()} + return self._relpath(ret) + + async def _du(self, path, *args, **kwargs): + total = kwargs.get("total", True) + ret = await self.fs._du(self._join(path), *args, **kwargs) + if total: + return ret + + return {self._relpath(path): size for path, size in ret.items()} + + def du(self, path, *args, **kwargs): + total = kwargs.get("total", True) + ret = self.fs.du(self._join(path), *args, **kwargs) + if total: + return ret + + return {self._relpath(path): size for path, size in ret.items()} + + async def _find(self, path, *args, **kwargs): + detail = kwargs.get("detail", False) + ret = await self.fs._find(self._join(path), *args, **kwargs) + if detail: + return {self._relpath(path): info for path, info in ret.items()} + return self._relpath(ret) + + def find(self, path, *args, **kwargs): + detail = kwargs.get("detail", False) + ret = self.fs.find(self._join(path), *args, **kwargs) + if detail: + return {self._relpath(path): info for path, info in ret.items()} + return self._relpath(ret) + + async def _expand_path(self, path, *args, **kwargs): + return self._relpath( + await self.fs._expand_path(self._join(path), *args, **kwargs) + ) + + def expand_path(self, path, *args, **kwargs): + return self._relpath(self.fs.expand_path(self._join(path), *args, **kwargs)) + + async def _mkdir(self, path, *args, **kwargs): + return await self.fs._mkdir(self._join(path), *args, **kwargs) + + def mkdir(self, path, *args, **kwargs): + return self.fs.mkdir(self._join(path), *args, **kwargs) + + async def _makedirs(self, path, *args, **kwargs): + return await self.fs._makedirs(self._join(path), *args, **kwargs) + + def makedirs(self, path, *args, **kwargs): + return self.fs.makedirs(self._join(path), *args, **kwargs) + + def rmdir(self, path): + return self.fs.rmdir(self._join(path)) + + def mv_file(self, path1, path2, **kwargs): + return self.fs.mv_file( + self._join(path1), + self._join(path2), + **kwargs, + ) + + def touch(self, path, **kwargs): + return self.fs.touch(self._join(path), **kwargs) + + def created(self, path): + return self.fs.created(self._join(path)) + + def modified(self, path): + return self.fs.modified(self._join(path)) + + def sign(self, path, *args, **kwargs): + return self.fs.sign(self._join(path), *args, **kwargs) + + def __repr__(self): + return f"{self.__class__.__qualname__}(path='{self.path}', fs={self.fs})" + + def open( + self, + path, + *args, + **kwargs, + ): + return self.fs.open( + self._join(path), + *args, + **kwargs, + ) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/git.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/git.py new file mode 100644 index 0000000000000000000000000000000000000000..7c34d93e08c20fc65421e5aa4bab53e8c683fee7 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/git.py @@ -0,0 +1,127 @@ +import os + +import pygit2 + +from fsspec.spec import AbstractFileSystem + +from .memory import MemoryFile + + +class GitFileSystem(AbstractFileSystem): + """Browse the files of a local git repo at any hash/tag/branch + + (experimental backend) + """ + + root_marker = "" + cachable = True + + def __init__(self, path=None, fo=None, ref=None, **kwargs): + """ + + Parameters + ---------- + path: str (optional) + Local location of the repo (uses current directory if not given). + May be deprecated in favour of ``fo``. When used with a higher + level function such as fsspec.open(), may be of the form + "git://[path-to-repo[:]][ref@]path/to/file" (but the actual + file path should not contain "@" or ":"). + fo: str (optional) + Same as ``path``, but passed as part of a chained URL. This one + takes precedence if both are given. + ref: str (optional) + Reference to work with, could be a hash, tag or branch name. Defaults + to current working tree. Note that ``ls`` and ``open`` also take hash, + so this becomes the default for those operations + kwargs + """ + super().__init__(**kwargs) + self.repo = pygit2.Repository(fo or path or os.getcwd()) + self.ref = ref or "master" + + @classmethod + def _strip_protocol(cls, path): + path = super()._strip_protocol(path).lstrip("/") + if ":" in path: + path = path.split(":", 1)[1] + if "@" in path: + path = path.split("@", 1)[1] + return path.lstrip("/") + + def _path_to_object(self, path, ref): + comm, ref = self.repo.resolve_refish(ref or self.ref) + parts = path.split("/") + tree = comm.tree + for part in parts: + if part and isinstance(tree, pygit2.Tree): + tree = tree[part] + return tree + + @staticmethod + def _get_kwargs_from_urls(path): + if path.startswith("git://"): + path = path[6:] + out = {} + if ":" in path: + out["path"], path = path.split(":", 1) + if "@" in path: + out["ref"], path = path.split("@", 1) + return out + + def ls(self, path, detail=True, ref=None, **kwargs): + path = self._strip_protocol(path) + tree = self._path_to_object(path, ref) + if isinstance(tree, pygit2.Tree): + out = [] + for obj in tree: + if isinstance(obj, pygit2.Tree): + out.append( + { + "type": "directory", + "name": "/".join([path, obj.name]).lstrip("/"), + "hex": obj.hex, + "mode": f"{obj.filemode:o}", + "size": 0, + } + ) + else: + out.append( + { + "type": "file", + "name": "/".join([path, obj.name]).lstrip("/"), + "hex": obj.hex, + "mode": f"{obj.filemode:o}", + "size": obj.size, + } + ) + else: + obj = tree + out = [ + { + "type": "file", + "name": obj.name, + "hex": obj.hex, + "mode": f"{obj.filemode:o}", + "size": obj.size, + } + ] + if detail: + return out + return [o["name"] for o in out] + + def ukey(self, path, ref=None): + return self.info(path, ref=ref)["hex"] + + def _open( + self, + path, + mode="rb", + block_size=None, + autocommit=True, + cache_options=None, + ref=None, + **kwargs, + ): + obj = self._path_to_object(path, ref or self.ref) + return MemoryFile(data=obj.data) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/http.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/http.py new file mode 100644 index 0000000000000000000000000000000000000000..204a0a7ee9955183542df51d7aaf4ec9173a34c9 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/http.py @@ -0,0 +1,868 @@ +import asyncio +import io +import logging +import re +import weakref +from copy import copy +from urllib.parse import urlparse + +import aiohttp +import yarl + +from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_wrapper +from fsspec.callbacks import DEFAULT_CALLBACK +from fsspec.exceptions import FSTimeoutError +from fsspec.spec import AbstractBufferedFile +from fsspec.utils import ( + DEFAULT_BLOCK_SIZE, + glob_translate, + isfilelike, + nullcontext, + tokenize, +) + +from ..caching import AllBytes + +# https://stackoverflow.com/a/15926317/3821154 +ex = re.compile(r"""<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P[^"']+)""") +ex2 = re.compile(r"""(?Phttp[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""") +logger = logging.getLogger("fsspec.http") + + +async def get_client(**kwargs): + return aiohttp.ClientSession(**kwargs) + + +class HTTPFileSystem(AsyncFileSystem): + """ + Simple File-System for fetching data via HTTP(S) + + ``ls()`` is implemented by loading the parent page and doing a regex + match on the result. If simple_link=True, anything of the form + "http(s)://server.com/stuff?thing=other"; otherwise only links within + HTML href tags will be used. + """ + + sep = "/" + + def __init__( + self, + simple_links=True, + block_size=None, + same_scheme=True, + size_policy=None, + cache_type="bytes", + cache_options=None, + asynchronous=False, + loop=None, + client_kwargs=None, + get_client=get_client, + encoded=False, + **storage_options, + ): + """ + NB: if this is called async, you must await set_client + + Parameters + ---------- + block_size: int + Blocks to read bytes; if 0, will default to raw requests file-like + objects instead of HTTPFile instances + simple_links: bool + If True, will consider both HTML tags and anything that looks + like a URL; if False, will consider only the former. + same_scheme: True + When doing ls/glob, if this is True, only consider paths that have + http/https matching the input URLs. + size_policy: this argument is deprecated + client_kwargs: dict + Passed to aiohttp.ClientSession, see + https://docs.aiohttp.org/en/stable/client_reference.html + For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}`` + get_client: Callable[..., aiohttp.ClientSession] + A callable which takes keyword arguments and constructs + an aiohttp.ClientSession. It's state will be managed by + the HTTPFileSystem class. + storage_options: key-value + Any other parameters passed on to requests + cache_type, cache_options: defaults used in open + """ + super().__init__(self, asynchronous=asynchronous, loop=loop, **storage_options) + self.block_size = block_size if block_size is not None else DEFAULT_BLOCK_SIZE + self.simple_links = simple_links + self.same_schema = same_scheme + self.cache_type = cache_type + self.cache_options = cache_options + self.client_kwargs = client_kwargs or {} + self.get_client = get_client + self.encoded = encoded + self.kwargs = storage_options + self._session = None + + # Clean caching-related parameters from `storage_options` + # before propagating them as `request_options` through `self.kwargs`. + # TODO: Maybe rename `self.kwargs` to `self.request_options` to make + # it clearer. + request_options = copy(storage_options) + self.use_listings_cache = request_options.pop("use_listings_cache", False) + request_options.pop("listings_expiry_time", None) + request_options.pop("max_paths", None) + request_options.pop("skip_instance_cache", None) + self.kwargs = request_options + + @property + def fsid(self): + return "http" + + def encode_url(self, url): + return yarl.URL(url, encoded=self.encoded) + + @staticmethod + def close_session(loop, session): + if loop is not None and loop.is_running(): + try: + sync(loop, session.close, timeout=0.1) + return + except (TimeoutError, FSTimeoutError, NotImplementedError): + pass + connector = getattr(session, "_connector", None) + if connector is not None: + # close after loop is dead + connector._close() + + async def set_session(self): + if self._session is None: + self._session = await self.get_client(loop=self.loop, **self.client_kwargs) + if not self.asynchronous: + weakref.finalize(self, self.close_session, self.loop, self._session) + return self._session + + @classmethod + def _strip_protocol(cls, path): + """For HTTP, we always want to keep the full URL""" + return path + + @classmethod + def _parent(cls, path): + # override, since _strip_protocol is different for URLs + par = super()._parent(path) + if len(par) > 7: # "http://..." + return par + return "" + + async def _ls_real(self, url, detail=True, **kwargs): + # ignoring URL-encoded arguments + kw = self.kwargs.copy() + kw.update(kwargs) + logger.debug(url) + session = await self.set_session() + async with session.get(self.encode_url(url), **self.kwargs) as r: + self._raise_not_found_for_status(r, url) + text = await r.text() + if self.simple_links: + links = ex2.findall(text) + [u[2] for u in ex.findall(text)] + else: + links = [u[2] for u in ex.findall(text)] + out = set() + parts = urlparse(url) + for l in links: + if isinstance(l, tuple): + l = l[1] + if l.startswith("/") and len(l) > 1: + # absolute URL on this server + l = f"{parts.scheme}://{parts.netloc}{l}" + if l.startswith("http"): + if self.same_schema and l.startswith(url.rstrip("/") + "/"): + out.add(l) + elif l.replace("https", "http").startswith( + url.replace("https", "http").rstrip("/") + "/" + ): + # allowed to cross http <-> https + out.add(l) + else: + if l not in ["..", "../"]: + # Ignore FTP-like "parent" + out.add("/".join([url.rstrip("/"), l.lstrip("/")])) + if not out and url.endswith("/"): + out = await self._ls_real(url.rstrip("/"), detail=False) + if detail: + return [ + { + "name": u, + "size": None, + "type": "directory" if u.endswith("/") else "file", + } + for u in out + ] + else: + return sorted(out) + + async def _ls(self, url, detail=True, **kwargs): + if self.use_listings_cache and url in self.dircache: + out = self.dircache[url] + else: + out = await self._ls_real(url, detail=detail, **kwargs) + self.dircache[url] = out + return out + + ls = sync_wrapper(_ls) + + def _raise_not_found_for_status(self, response, url): + """ + Raises FileNotFoundError for 404s, otherwise uses raise_for_status. + """ + if response.status == 404: + raise FileNotFoundError(url) + response.raise_for_status() + + async def _cat_file(self, url, start=None, end=None, **kwargs): + kw = self.kwargs.copy() + kw.update(kwargs) + logger.debug(url) + + if start is not None or end is not None: + if start == end: + return b"" + headers = kw.pop("headers", {}).copy() + + headers["Range"] = await self._process_limits(url, start, end) + kw["headers"] = headers + session = await self.set_session() + async with session.get(self.encode_url(url), **kw) as r: + out = await r.read() + self._raise_not_found_for_status(r, url) + return out + + async def _get_file( + self, rpath, lpath, chunk_size=5 * 2**20, callback=DEFAULT_CALLBACK, **kwargs + ): + kw = self.kwargs.copy() + kw.update(kwargs) + logger.debug(rpath) + session = await self.set_session() + async with session.get(self.encode_url(rpath), **kw) as r: + try: + size = int(r.headers["content-length"]) + except (ValueError, KeyError): + size = None + + callback.set_size(size) + self._raise_not_found_for_status(r, rpath) + if isfilelike(lpath): + outfile = lpath + else: + outfile = open(lpath, "wb") # noqa: ASYNC101 + + try: + chunk = True + while chunk: + chunk = await r.content.read(chunk_size) + outfile.write(chunk) + callback.relative_update(len(chunk)) + finally: + if not isfilelike(lpath): + outfile.close() + + async def _put_file( + self, + lpath, + rpath, + chunk_size=5 * 2**20, + callback=DEFAULT_CALLBACK, + method="post", + **kwargs, + ): + async def gen_chunks(): + # Support passing arbitrary file-like objects + # and use them instead of streams. + if isinstance(lpath, io.IOBase): + context = nullcontext(lpath) + use_seek = False # might not support seeking + else: + context = open(lpath, "rb") # noqa: ASYNC101 + use_seek = True + + with context as f: + if use_seek: + callback.set_size(f.seek(0, 2)) + f.seek(0) + else: + callback.set_size(getattr(f, "size", None)) + + chunk = f.read(chunk_size) + while chunk: + yield chunk + callback.relative_update(len(chunk)) + chunk = f.read(chunk_size) + + kw = self.kwargs.copy() + kw.update(kwargs) + session = await self.set_session() + + method = method.lower() + if method not in ("post", "put"): + raise ValueError( + f"method has to be either 'post' or 'put', not: {method!r}" + ) + + meth = getattr(session, method) + async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp: + self._raise_not_found_for_status(resp, rpath) + + async def _exists(self, path, **kwargs): + kw = self.kwargs.copy() + kw.update(kwargs) + try: + logger.debug(path) + session = await self.set_session() + r = await session.get(self.encode_url(path), **kw) + async with r: + return r.status < 400 + except aiohttp.ClientError: + return False + + async def _isfile(self, path, **kwargs): + return await self._exists(path, **kwargs) + + def _open( + self, + path, + mode="rb", + block_size=None, + autocommit=None, # XXX: This differs from the base class. + cache_type=None, + cache_options=None, + size=None, + **kwargs, + ): + """Make a file-like object + + Parameters + ---------- + path: str + Full URL with protocol + mode: string + must be "rb" + block_size: int or None + Bytes to download in one request; use instance value if None. If + zero, will return a streaming Requests file-like instance. + kwargs: key-value + Any other parameters, passed to requests calls + """ + if mode != "rb": + raise NotImplementedError + block_size = block_size if block_size is not None else self.block_size + kw = self.kwargs.copy() + kw["asynchronous"] = self.asynchronous + kw.update(kwargs) + size = size or self.info(path, **kwargs)["size"] + session = sync(self.loop, self.set_session) + if block_size and size: + return HTTPFile( + self, + path, + session=session, + block_size=block_size, + mode=mode, + size=size, + cache_type=cache_type or self.cache_type, + cache_options=cache_options or self.cache_options, + loop=self.loop, + **kw, + ) + else: + return HTTPStreamFile( + self, + path, + mode=mode, + loop=self.loop, + session=session, + **kw, + ) + + async def open_async(self, path, mode="rb", size=None, **kwargs): + session = await self.set_session() + if size is None: + try: + size = (await self._info(path, **kwargs))["size"] + except FileNotFoundError: + pass + return AsyncStreamFile( + self, + path, + loop=self.loop, + session=session, + size=size, + **kwargs, + ) + + def ukey(self, url): + """Unique identifier; assume HTTP files are static, unchanging""" + return tokenize(url, self.kwargs, self.protocol) + + async def _info(self, url, **kwargs): + """Get info of URL + + Tries to access location via HEAD, and then GET methods, but does + not fetch the data. + + It is possible that the server does not supply any size information, in + which case size will be given as None (and certain operations on the + corresponding file will not work). + """ + info = {} + session = await self.set_session() + + for policy in ["head", "get"]: + try: + info.update( + await _file_info( + self.encode_url(url), + size_policy=policy, + session=session, + **self.kwargs, + **kwargs, + ) + ) + if info.get("size") is not None: + break + except Exception as exc: + if policy == "get": + # If get failed, then raise a FileNotFoundError + raise FileNotFoundError(url) from exc + logger.debug(str(exc)) + + return {"name": url, "size": None, **info, "type": "file"} + + async def _glob(self, path, maxdepth=None, **kwargs): + """ + Find files by glob-matching. + + This implementation is idntical to the one in AbstractFileSystem, + but "?" is not considered as a character for globbing, because it is + so common in URLs, often identifying the "query" part. + """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + import re + + ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash + path = self._strip_protocol(path) + append_slash_to_dirname = ends_with_slash or path.endswith("/**") + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) + + min_idx = min(idx_star, idx_brace) + + detail = kwargs.pop("detail", False) + + if not has_magic(path): + if await self._exists(path, **kwargs): + if not detail: + return [path] + else: + return {path: await self._info(path, **kwargs)} + else: + if not detail: + return [] # glob of non-existent returns empty + else: + return {} + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 + else: + root = "" + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None + + allpaths = await self._find( + root, maxdepth=depth, withdirs=True, detail=True, **kwargs + ) + + pattern = glob_translate(path + ("/" if ends_with_slash else "")) + pattern = re.compile(pattern) + + out = { + p: info + for p, info in sorted(allpaths.items()) + if pattern.match( + ( + p + "/" + if append_slash_to_dirname and info["type"] == "directory" + else p + ) + ) + } + + if detail: + return out + else: + return list(out) + + async def _isdir(self, path): + # override, since all URLs are (also) files + try: + return bool(await self._ls(path)) + except (FileNotFoundError, ValueError): + return False + + +class HTTPFile(AbstractBufferedFile): + """ + A file-like object pointing to a remove HTTP(S) resource + + Supports only reading, with read-ahead of a predermined block-size. + + In the case that the server does not supply the filesize, only reading of + the complete file in one go is supported. + + Parameters + ---------- + url: str + Full URL of the remote resource, including the protocol + session: aiohttp.ClientSession or None + All calls will be made within this session, to avoid restarting + connections where the server allows this + block_size: int or None + The amount of read-ahead to do, in bytes. Default is 5MB, or the value + configured for the FileSystem creating this file + size: None or int + If given, this is the size of the file in bytes, and we don't attempt + to call the server to find the value. + kwargs: all other key-values are passed to requests calls. + """ + + def __init__( + self, + fs, + url, + session=None, + block_size=None, + mode="rb", + cache_type="bytes", + cache_options=None, + size=None, + loop=None, + asynchronous=False, + **kwargs, + ): + if mode != "rb": + raise NotImplementedError("File mode not supported") + self.asynchronous = asynchronous + self.url = url + self.session = session + self.details = {"name": url, "size": size, "type": "file"} + super().__init__( + fs=fs, + path=url, + mode=mode, + block_size=block_size, + cache_type=cache_type, + cache_options=cache_options, + **kwargs, + ) + self.loop = loop + + def read(self, length=-1): + """Read bytes from file + + Parameters + ---------- + length: int + Read up to this many bytes. If negative, read all content to end of + file. If the server has not supplied the filesize, attempting to + read only part of the data will raise a ValueError. + """ + if ( + (length < 0 and self.loc == 0) # explicit read all + # but not when the size is known and fits into a block anyways + and not (self.size is not None and self.size <= self.blocksize) + ): + self._fetch_all() + if self.size is None: + if length < 0: + self._fetch_all() + else: + length = min(self.size - self.loc, length) + return super().read(length) + + async def async_fetch_all(self): + """Read whole file in one shot, without caching + + This is only called when position is still at zero, + and read() is called without a byte-count. + """ + logger.debug(f"Fetch all for {self}") + if not isinstance(self.cache, AllBytes): + r = await self.session.get(self.fs.encode_url(self.url), **self.kwargs) + async with r: + r.raise_for_status() + out = await r.read() + self.cache = AllBytes( + size=len(out), fetcher=None, blocksize=None, data=out + ) + self.size = len(out) + + _fetch_all = sync_wrapper(async_fetch_all) + + def _parse_content_range(self, headers): + """Parse the Content-Range header""" + s = headers.get("Content-Range", "") + m = re.match(r"bytes (\d+-\d+|\*)/(\d+|\*)", s) + if not m: + return None, None, None + + if m[1] == "*": + start = end = None + else: + start, end = [int(x) for x in m[1].split("-")] + total = None if m[2] == "*" else int(m[2]) + return start, end, total + + async def async_fetch_range(self, start, end): + """Download a block of data + + The expectation is that the server returns only the requested bytes, + with HTTP code 206. If this is not the case, we first check the headers, + and then stream the output - if the data size is bigger than we + requested, an exception is raised. + """ + logger.debug(f"Fetch range for {self}: {start}-{end}") + kwargs = self.kwargs.copy() + headers = kwargs.pop("headers", {}).copy() + headers["Range"] = f"bytes={start}-{end - 1}" + logger.debug(f"{self.url} : {headers['Range']}") + r = await self.session.get( + self.fs.encode_url(self.url), headers=headers, **kwargs + ) + async with r: + if r.status == 416: + # range request outside file + return b"" + r.raise_for_status() + + # If the server has handled the range request, it should reply + # with status 206 (partial content). But we'll guess that a suitable + # Content-Range header or a Content-Length no more than the + # requested range also mean we have got the desired range. + response_is_range = ( + r.status == 206 + or self._parse_content_range(r.headers)[0] == start + or int(r.headers.get("Content-Length", end + 1)) <= end - start + ) + + if response_is_range: + # partial content, as expected + out = await r.read() + elif start > 0: + raise ValueError( + "The HTTP server doesn't appear to support range requests. " + "Only reading this file from the beginning is supported. " + "Open with block_size=0 for a streaming file interface." + ) + else: + # Response is not a range, but we want the start of the file, + # so we can read the required amount anyway. + cl = 0 + out = [] + while True: + chunk = await r.content.read(2**20) + # data size unknown, let's read until we have enough + if chunk: + out.append(chunk) + cl += len(chunk) + if cl > end - start: + break + else: + break + out = b"".join(out)[: end - start] + return out + + _fetch_range = sync_wrapper(async_fetch_range) + + def __reduce__(self): + return ( + reopen, + ( + self.fs, + self.url, + self.mode, + self.blocksize, + self.cache.name if self.cache else "none", + self.size, + ), + ) + + +def reopen(fs, url, mode, blocksize, cache_type, size=None): + return fs.open( + url, mode=mode, block_size=blocksize, cache_type=cache_type, size=size + ) + + +magic_check = re.compile("([*[])") + + +def has_magic(s): + match = magic_check.search(s) + return match is not None + + +class HTTPStreamFile(AbstractBufferedFile): + def __init__(self, fs, url, mode="rb", loop=None, session=None, **kwargs): + self.asynchronous = kwargs.pop("asynchronous", False) + self.url = url + self.loop = loop + self.session = session + if mode != "rb": + raise ValueError + self.details = {"name": url, "size": None} + super().__init__(fs=fs, path=url, mode=mode, cache_type="none", **kwargs) + + async def cor(): + r = await self.session.get(self.fs.encode_url(url), **kwargs).__aenter__() + self.fs._raise_not_found_for_status(r, url) + return r + + self.r = sync(self.loop, cor) + + def seek(self, loc, whence=0): + if loc == 0 and whence == 1: + return + if loc == self.loc and whence == 0: + return + raise ValueError("Cannot seek streaming HTTP file") + + async def _read(self, num=-1): + out = await self.r.content.read(num) + self.loc += len(out) + return out + + read = sync_wrapper(_read) + + async def _close(self): + self.r.close() + + def close(self): + asyncio.run_coroutine_threadsafe(self._close(), self.loop) + super().close() + + def __reduce__(self): + return reopen, (self.fs, self.url, self.mode, self.blocksize, self.cache.name) + + +class AsyncStreamFile(AbstractAsyncStreamedFile): + def __init__( + self, fs, url, mode="rb", loop=None, session=None, size=None, **kwargs + ): + self.url = url + self.session = session + self.r = None + if mode != "rb": + raise ValueError + self.details = {"name": url, "size": None} + self.kwargs = kwargs + super().__init__(fs=fs, path=url, mode=mode, cache_type="none") + self.size = size + + async def read(self, num=-1): + if self.r is None: + r = await self.session.get( + self.fs.encode_url(self.url), **self.kwargs + ).__aenter__() + self.fs._raise_not_found_for_status(r, self.url) + self.r = r + out = await self.r.content.read(num) + self.loc += len(out) + return out + + async def close(self): + if self.r is not None: + self.r.close() + self.r = None + await super().close() + + +async def get_range(session, url, start, end, file=None, **kwargs): + # explicit get a range when we know it must be safe + kwargs = kwargs.copy() + headers = kwargs.pop("headers", {}).copy() + headers["Range"] = f"bytes={start}-{end - 1}" + r = await session.get(url, headers=headers, **kwargs) + r.raise_for_status() + async with r: + out = await r.read() + if file: + with open(file, "r+b") as f: # noqa: ASYNC101 + f.seek(start) + f.write(out) + else: + return out + + +async def _file_info(url, session, size_policy="head", **kwargs): + """Call HEAD on the server to get details about the file (size/checksum etc.) + + Default operation is to explicitly allow redirects and use encoding + 'identity' (no compression) to get the true size of the target. + """ + logger.debug("Retrieve file size for %s", url) + kwargs = kwargs.copy() + ar = kwargs.pop("allow_redirects", True) + head = kwargs.get("headers", {}).copy() + head["Accept-Encoding"] = "identity" + kwargs["headers"] = head + + info = {} + if size_policy == "head": + r = await session.head(url, allow_redirects=ar, **kwargs) + elif size_policy == "get": + r = await session.get(url, allow_redirects=ar, **kwargs) + else: + raise TypeError(f'size_policy must be "head" or "get", got {size_policy}') + async with r: + r.raise_for_status() + + # TODO: + # recognise lack of 'Accept-Ranges', + # or 'Accept-Ranges': 'none' (not 'bytes') + # to mean streaming only, no random access => return None + if "Content-Length" in r.headers: + # Some servers may choose to ignore Accept-Encoding and return + # compressed content, in which case the returned size is unreliable. + if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [ + "identity", + "", + ]: + info["size"] = int(r.headers["Content-Length"]) + elif "Content-Range" in r.headers: + info["size"] = int(r.headers["Content-Range"].split("/")[1]) + + if "Content-Type" in r.headers: + info["mimetype"] = r.headers["Content-Type"].partition(";")[0] + + info["url"] = str(r.url) + + for checksum_field in ["ETag", "Content-MD5", "Digest"]: + if r.headers.get(checksum_field): + info[checksum_field] = r.headers[checksum_field] + + return info + + +async def _file_size(url, session=None, *args, **kwargs): + if session is None: + session = await get_client() + info = await _file_info(url, session=session, *args, **kwargs) + return info.get("size") + + +file_size = sync_wrapper(_file_size) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/jupyter.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/jupyter.py new file mode 100644 index 0000000000000000000000000000000000000000..2839f4c1feea56dddd54bdc00f0b884c8461d29e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/jupyter.py @@ -0,0 +1,124 @@ +import base64 +import io +import re + +import requests + +import fsspec + + +class JupyterFileSystem(fsspec.AbstractFileSystem): + """View of the files as seen by a Jupyter server (notebook or lab)""" + + protocol = ("jupyter", "jlab") + + def __init__(self, url, tok=None, **kwargs): + """ + + Parameters + ---------- + url : str + Base URL of the server, like "http://127.0.0.1:8888". May include + token in the string, which is given by the process when starting up + tok : str + If the token is obtained separately, can be given here + kwargs + """ + if "?" in url: + if tok is None: + try: + tok = re.findall("token=([a-z0-9]+)", url)[0] + except IndexError as e: + raise ValueError("Could not determine token") from e + url = url.split("?", 1)[0] + self.url = url.rstrip("/") + "/api/contents" + self.session = requests.Session() + if tok: + self.session.headers["Authorization"] = f"token {tok}" + + super().__init__(**kwargs) + + def ls(self, path, detail=True, **kwargs): + path = self._strip_protocol(path) + r = self.session.get(f"{self.url}/{path}") + if r.status_code == 404: + return FileNotFoundError(path) + r.raise_for_status() + out = r.json() + + if out["type"] == "directory": + out = out["content"] + else: + out = [out] + for o in out: + o["name"] = o.pop("path") + o.pop("content") + if o["type"] == "notebook": + o["type"] = "file" + if detail: + return out + return [o["name"] for o in out] + + def cat_file(self, path, start=None, end=None, **kwargs): + path = self._strip_protocol(path) + r = self.session.get(f"{self.url}/{path}") + if r.status_code == 404: + return FileNotFoundError(path) + r.raise_for_status() + out = r.json() + if out["format"] == "text": + # data should be binary + b = out["content"].encode() + else: + b = base64.b64decode(out["content"]) + return b[start:end] + + def pipe_file(self, path, value, **_): + path = self._strip_protocol(path) + json = { + "name": path.rsplit("/", 1)[-1], + "path": path, + "size": len(value), + "content": base64.b64encode(value).decode(), + "format": "base64", + "type": "file", + } + self.session.put(f"{self.url}/{path}", json=json) + + def mkdir(self, path, create_parents=True, **kwargs): + path = self._strip_protocol(path) + if create_parents and "/" in path: + self.mkdir(path.rsplit("/", 1)[0], True) + json = { + "name": path.rsplit("/", 1)[-1], + "path": path, + "size": None, + "content": None, + "type": "directory", + } + self.session.put(f"{self.url}/{path}", json=json) + + def _rm(self, path): + path = self._strip_protocol(path) + self.session.delete(f"{self.url}/{path}") + + def _open(self, path, mode="rb", **kwargs): + path = self._strip_protocol(path) + if mode == "rb": + data = self.cat_file(path) + return io.BytesIO(data) + else: + return SimpleFileWriter(self, path, mode="wb") + + +class SimpleFileWriter(fsspec.spec.AbstractBufferedFile): + def _upload_chunk(self, final=False): + """Never uploads a chunk until file is done + + Not suitable for large files + """ + if final is False: + return False + self.buffer.seek(0) + data = self.buffer.read() + self.fs.pipe_file(self.path, data) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/reference.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/reference.py new file mode 100644 index 0000000000000000000000000000000000000000..05795cbad94200f7fa6c38d1c657c87f796f6b3e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/reference.py @@ -0,0 +1,1160 @@ +import base64 +import collections +import io +import itertools +import logging +import math +import os +from functools import lru_cache +from typing import TYPE_CHECKING + +import fsspec.core + +try: + import ujson as json +except ImportError: + if not TYPE_CHECKING: + import json + +from ..asyn import AsyncFileSystem +from ..callbacks import DEFAULT_CALLBACK +from ..core import filesystem, open, split_protocol +from ..utils import isfilelike, merge_offset_ranges, other_paths + +logger = logging.getLogger("fsspec.reference") + + +class ReferenceNotReachable(RuntimeError): + def __init__(self, reference, target, *args): + super().__init__(*args) + self.reference = reference + self.target = target + + def __str__(self): + return f'Reference "{self.reference}" failed to fetch target {self.target}' + + +def _first(d): + return list(d.values())[0] + + +def _prot_in_references(path, references): + ref = references.get(path) + if isinstance(ref, (list, tuple)): + return split_protocol(ref[0])[0] if ref[0] else ref[0] + + +def _protocol_groups(paths, references): + if isinstance(paths, str): + return {_prot_in_references(paths, references): [paths]} + out = {} + for path in paths: + protocol = _prot_in_references(path, references) + out.setdefault(protocol, []).append(path) + return out + + +class RefsValuesView(collections.abc.ValuesView): + def __iter__(self): + for val in self._mapping.zmetadata.values(): + yield json.dumps(val).encode() + yield from self._mapping._items.values() + for field in self._mapping.listdir(): + chunk_sizes = self._mapping._get_chunk_sizes(field) + if len(chunk_sizes) == 0: + yield self._mapping[field + "/0"] + continue + yield from self._mapping._generate_all_records(field) + + +class RefsItemsView(collections.abc.ItemsView): + def __iter__(self): + return zip(self._mapping.keys(), self._mapping.values()) + + +def ravel_multi_index(idx, sizes): + val = 0 + mult = 1 + for i, s in zip(idx[::-1], sizes[::-1]): + val += i * mult + mult *= s + return val + + +class LazyReferenceMapper(collections.abc.MutableMapping): + """This interface can be used to read/write references from Parquet stores. + It is not intended for other types of references. + It can be used with Kerchunk's MultiZarrToZarr method to combine + references into a parquet store. + Examples of this use-case can be found here: + https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage""" + + # import is class level to prevent numpy dep requirement for fsspec + @property + def np(self): + import numpy as np + + return np + + @property + def pd(self): + import pandas as pd + + return pd + + def __init__( + self, root, fs=None, out_root=None, cache_size=128, categorical_threshold=10 + ): + """ + + This instance will be writable, storing changes in memory until full partitions + are accumulated or .flush() is called. + + To create an empty lazy store, use .create() + + Parameters + ---------- + root : str + Root of parquet store + fs : fsspec.AbstractFileSystem + fsspec filesystem object, default is local filesystem. + cache_size : int, default=128 + Maximum size of LRU cache, where cache_size*record_size denotes + the total number of references that can be loaded in memory at once. + categorical_threshold : int + Encode urls as pandas.Categorical to reduce memory footprint if the ratio + of the number of unique urls to total number of refs for each variable + is greater than or equal to this number. (default 10) + """ + self.root = root + self.chunk_sizes = {} + self.out_root = out_root or self.root + self.cat_thresh = categorical_threshold + self.cache_size = cache_size + self.dirs = None + self.url = self.root + "/{field}/refs.{record}.parq" + # TODO: derive fs from `root` + self.fs = fsspec.filesystem("file") if fs is None else fs + + def __getattr__(self, item): + if item in ("_items", "record_size", "zmetadata"): + self.setup() + # avoid possible recursion if setup fails somehow + return self.__dict__[item] + raise AttributeError(item) + + def setup(self): + self._items = {} + self._items[".zmetadata"] = self.fs.cat_file( + "/".join([self.root, ".zmetadata"]) + ) + met = json.loads(self._items[".zmetadata"]) + self.record_size = met["record_size"] + self.zmetadata = met["metadata"] + + # Define function to open and decompress refs + @lru_cache(maxsize=self.cache_size) + def open_refs(field, record): + """cached parquet file loader""" + path = self.url.format(field=field, record=record) + data = io.BytesIO(self.fs.cat_file(path)) + df = self.pd.read_parquet(data, engine="fastparquet") + refs = {c: df[c].values for c in df.columns} + return refs + + self.open_refs = open_refs + + @staticmethod + def create(root, storage_options=None, fs=None, record_size=10000, **kwargs): + """Make empty parquet reference set + + First deletes the contents of the given directory, if it exists. + + Parameters + ---------- + root: str + Directory to contain the output; will be created + storage_options: dict | None + For making the filesystem to use for writing is fs is None + fs: FileSystem | None + Filesystem for writing + record_size: int + Number of references per parquet file + kwargs: passed to __init__ + + Returns + ------- + LazyReferenceMapper instance + """ + met = {"metadata": {}, "record_size": record_size} + if fs is None: + fs, root = fsspec.core.url_to_fs(root, **(storage_options or {})) + if fs.exists(root): + fs.rm(root, recursive=True) + fs.makedirs(root, exist_ok=True) + fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode()) + return LazyReferenceMapper(root, fs, **kwargs) + + def listdir(self, basename=True): + """List top-level directories""" + # cache me? + if self.dirs is None: + dirs = [p.split("/", 1)[0] for p in self.zmetadata] + self.dirs = {p for p in dirs if p and not p.startswith(".")} + listing = self.dirs + if basename: + listing = [os.path.basename(path) for path in listing] + return listing + + def ls(self, path="", detail=True): + """Shortcut file listings""" + if not path: + dirnames = self.listdir() + others = set( + [".zmetadata"] + + [name for name in self.zmetadata if "/" not in name] + + [name for name in self._items if "/" not in name] + ) + if detail is False: + others.update(dirnames) + return sorted(others) + dirinfo = [ + {"name": name, "type": "directory", "size": 0} for name in dirnames + ] + fileinfo = [ + { + "name": name, + "type": "file", + "size": len( + json.dumps(self.zmetadata[name]) + if name in self.zmetadata + else self._items[name] + ), + } + for name in others + ] + return sorted(dirinfo + fileinfo, key=lambda s: s["name"]) + parts = path.split("/", 1) + if len(parts) > 1: + raise FileNotFoundError("Cannot list within directories right now") + field = parts[0] + others = set( + [name for name in self.zmetadata if name.startswith(f"{path}/")] + + [name for name in self._items if name.startswith(f"{path}/")] + ) + fileinfo = [ + { + "name": name, + "type": "file", + "size": len( + json.dumps(self.zmetadata[name]) + if name in self.zmetadata + else self._items[name] + ), + } + for name in others + ] + keys = self._keys_in_field(field) + + if detail is False: + return list(others) + list(keys) + recs = self._generate_all_records(field) + recinfo = [ + {"name": name, "type": "file", "size": rec[-1]} + for name, rec in zip(keys, recs) + if rec[0] # filters out path==None, deleted/missing + ] + return fileinfo + recinfo + + def _load_one_key(self, key): + """Get the reference for one key + + Returns bytes, one-element list or three-element list. + """ + if key in self._items: + return self._items[key] + elif key in self.zmetadata: + return json.dumps(self.zmetadata[key]).encode() + elif "/" not in key or self._is_meta(key): + raise KeyError(key) + field, sub_key = key.split("/") + record, ri, chunk_size = self._key_to_record(key) + maybe = self._items.get((field, record), {}).get(ri, False) + if maybe is None: + # explicitly deleted + raise KeyError + elif maybe: + return maybe + elif chunk_size == 0: + return b"" + + # Chunk keys can be loaded from row group and cached in LRU cache + try: + refs = self.open_refs(field, record) + except (ValueError, TypeError, FileNotFoundError): + raise KeyError(key) + columns = ["path", "offset", "size", "raw"] + selection = [refs[c][ri] if c in refs else None for c in columns] + raw = selection[-1] + if raw is not None: + return raw + if selection[0] is None: + raise KeyError("This reference does not exist or has been deleted") + if selection[1:3] == [0, 0]: + # URL only + return selection[:1] + # URL, offset, size + return selection[:3] + + @lru_cache(4096) + def _key_to_record(self, key): + """Details needed to construct a reference for one key""" + field, chunk = key.split("/") + chunk_sizes = self._get_chunk_sizes(field) + if len(chunk_sizes) == 0: + return 0, 0, 0 + chunk_idx = [int(c) for c in chunk.split(".")] + chunk_number = ravel_multi_index(chunk_idx, chunk_sizes) + record = chunk_number // self.record_size + ri = chunk_number % self.record_size + return record, ri, len(chunk_sizes) + + def _get_chunk_sizes(self, field): + """The number of chunks along each axis for a given field""" + if field not in self.chunk_sizes: + zarray = self.zmetadata[f"{field}/.zarray"] + size_ratio = [ + math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"]) + ] + self.chunk_sizes[field] = size_ratio or [1] + return self.chunk_sizes[field] + + def _generate_record(self, field, record): + """The references for a given parquet file of a given field""" + refs = self.open_refs(field, record) + it = iter(zip(*refs.values())) + if len(refs) == 3: + # All urls + return (list(t) for t in it) + elif len(refs) == 1: + # All raws + return refs["raw"] + else: + # Mix of urls and raws + return (list(t[:3]) if not t[3] else t[3] for t in it) + + def _generate_all_records(self, field): + """Load all the references within a field by iterating over the parquet files""" + nrec = 1 + for ch in self._get_chunk_sizes(field): + nrec *= ch + nrec = math.ceil(nrec / self.record_size) + for record in range(nrec): + yield from self._generate_record(field, record) + + def values(self): + return RefsValuesView(self) + + def items(self): + return RefsItemsView(self) + + def __hash__(self): + return id(self) + + def __getitem__(self, key): + return self._load_one_key(key) + + def __setitem__(self, key, value): + if "/" in key and not self._is_meta(key): + field, chunk = key.split("/") + record, i, _ = self._key_to_record(key) + subdict = self._items.setdefault((field, record), {}) + subdict[i] = value + if len(subdict) == self.record_size: + self.write(field, record) + else: + # metadata or top-level + self._items[key] = value + new_value = json.loads( + value.decode() if isinstance(value, bytes) else value + ) + self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value} + + @staticmethod + def _is_meta(key): + return key.startswith(".z") or "/.z" in key + + def __delitem__(self, key): + if key in self._items: + del self._items[key] + elif key in self.zmetadata: + del self.zmetadata[key] + else: + if "/" in key and not self._is_meta(key): + field, chunk = key.split("/") + record, i, _ = self._key_to_record(key) + subdict = self._items.setdefault((field, record), {}) + subdict[i] = None + if len(subdict) == self.record_size: + self.write(field, record) + else: + # metadata or top-level + self._items[key] = None + + def write(self, field, record, base_url=None, storage_options=None): + # extra requirements if writing + import kerchunk.df + import numpy as np + import pandas as pd + + partition = self._items[(field, record)] + original = False + if len(partition) < self.record_size: + try: + original = self.open_refs(field, record) + except IOError: + pass + + if original: + paths = original["path"] + offsets = original["offset"] + sizes = original["size"] + raws = original["raw"] + else: + paths = np.full(self.record_size, np.nan, dtype="O") + offsets = np.zeros(self.record_size, dtype="int64") + sizes = np.zeros(self.record_size, dtype="int64") + raws = np.full(self.record_size, np.nan, dtype="O") + for j, data in partition.items(): + if isinstance(data, list): + if ( + str(paths.dtype) == "category" + and data[0] not in paths.dtype.categories + ): + paths = paths.add_categories(data[0]) + paths[j] = data[0] + if len(data) > 1: + offsets[j] = data[1] + sizes[j] = data[2] + elif data is None: + # delete + paths[j] = None + offsets[j] = 0 + sizes[j] = 0 + raws[j] = None + else: + # this is the only call into kerchunk, could remove + raws[j] = kerchunk.df._proc_raw(data) + # TODO: only save needed columns + df = pd.DataFrame( + { + "path": paths, + "offset": offsets, + "size": sizes, + "raw": raws, + }, + copy=False, + ) + if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh: + df["path"] = df["path"].astype("category") + object_encoding = {"raw": "bytes", "path": "utf8"} + has_nulls = ["path", "raw"] + + fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq" + self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True) + df.to_parquet( + fn, + engine="fastparquet", + storage_options=storage_options + or getattr(self.fs, "storage_options", None), + compression="zstd", + index=False, + stats=False, + object_encoding=object_encoding, + has_nulls=has_nulls, + # **kwargs, + ) + partition.clear() + self._items.pop((field, record)) + + def flush(self, base_url=None, storage_options=None): + """Output any modified or deleted keys + + Parameters + ---------- + base_url: str + Location of the output + """ + # write what we have so far and clear sub chunks + for thing in list(self._items): + if isinstance(thing, tuple): + field, record = thing + self.write( + field, + record, + base_url=base_url, + storage_options=storage_options, + ) + + # gather .zmetadata from self._items and write that too + for k in list(self._items): + if k != ".zmetadata" and ".z" in k: + self.zmetadata[k] = json.loads(self._items.pop(k)) + met = {"metadata": self.zmetadata, "record_size": self.record_size} + self._items[".zmetadata"] = json.dumps(met).encode() + self.fs.pipe( + "/".join([base_url or self.out_root, ".zmetadata"]), + self._items[".zmetadata"], + ) + + # TODO: only clear those that we wrote to? + self.open_refs.cache_clear() + + def __len__(self): + # Caveat: This counts expected references, not actual - but is fast + count = 0 + for field in self.listdir(): + if field.startswith("."): + count += 1 + else: + count += math.prod(self._get_chunk_sizes(field)) + count += len(self.zmetadata) # all metadata keys + # any other files not in reference partitions + count += sum(1 for _ in self._items if not isinstance(_, tuple)) + return count + + def __iter__(self): + # Caveat: returns only existing keys, so the number of these does not + # match len(self) + metas = set(self.zmetadata) + metas.update(self._items) + for bit in metas: + if isinstance(bit, str): + yield bit + for field in self.listdir(): + for k in self._keys_in_field(field): + if k in self: + yield k + + def __contains__(self, item): + try: + self._load_one_key(item) + return True + except KeyError: + return False + + def _keys_in_field(self, field): + """List key names in given field + + Produces strings like "field/x.y" appropriate from the chunking of the array + """ + chunk_sizes = self._get_chunk_sizes(field) + if len(chunk_sizes) == 0: + yield field + "/0" + return + inds = itertools.product(*(range(i) for i in chunk_sizes)) + for ind in inds: + yield field + "/" + ".".join([str(c) for c in ind]) + + +class ReferenceFileSystem(AsyncFileSystem): + """View byte ranges of some other file as a file system + Initial version: single file system target, which must support + async, and must allow start and end args in _cat_file. Later versions + may allow multiple arbitrary URLs for the targets. + This FileSystem is read-only. It is designed to be used with async + targets (for now). This FileSystem only allows whole-file access, no + ``open``. We do not get original file details from the target FS. + Configuration is by passing a dict of references at init, or a URL to + a JSON file containing the same; this dict + can also contain concrete data for some set of paths. + Reference dict format: + {path0: bytes_data, path1: (target_url, offset, size)} + https://github.com/fsspec/kerchunk/blob/main/README.md + """ + + protocol = "reference" + + def __init__( + self, + fo, + target=None, + ref_storage_args=None, + target_protocol=None, + target_options=None, + remote_protocol=None, + remote_options=None, + fs=None, + template_overrides=None, + simple_templates=True, + max_gap=64_000, + max_block=256_000_000, + cache_size=128, + **kwargs, + ): + """ + Parameters + ---------- + fo : dict or str + The set of references to use for this instance, with a structure as above. + If str referencing a JSON file, will use fsspec.open, in conjunction + with target_options and target_protocol to open and parse JSON at this + location. If a directory, then assume references are a set of parquet + files to be loaded lazily. + target : str + For any references having target_url as None, this is the default file + target to use + ref_storage_args : dict + If references is a str, use these kwargs for loading the JSON file. + Deprecated: use target_options instead. + target_protocol : str + Used for loading the reference file, if it is a path. If None, protocol + will be derived from the given path + target_options : dict + Extra FS options for loading the reference file ``fo``, if given as a path + remote_protocol : str + The protocol of the filesystem on which the references will be evaluated + (unless fs is provided). If not given, will be derived from the first + URL that has a protocol in the templates or in the references, in that + order. + remote_options : dict + kwargs to go with remote_protocol + fs : AbstractFileSystem | dict(str, (AbstractFileSystem | dict)) + Directly provide a file system(s): + - a single filesystem instance + - a dict of protocol:filesystem, where each value is either a filesystem + instance, or a dict of kwargs that can be used to create in + instance for the given protocol + + If this is given, remote_options and remote_protocol are ignored. + template_overrides : dict + Swap out any templates in the references file with these - useful for + testing. + simple_templates: bool + Whether templates can be processed with simple replace (True) or if + jinja is needed (False, much slower). All reference sets produced by + ``kerchunk`` are simple in this sense, but the spec allows for complex. + max_gap, max_block: int + For merging multiple concurrent requests to the same remote file. + Neighboring byte ranges will only be merged when their + inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0 + to only merge when it requires no extra bytes. Pass a negative + number to disable merging, appropriate for local target files. + Neighboring byte ranges will only be merged when the size of + the aggregated range is <= ``max_block``. Default is 256MB. + cache_size : int + Maximum size of LRU cache, where cache_size*record_size denotes + the total number of references that can be loaded in memory at once. + Only used for lazily loaded references. + kwargs : passed to parent class + """ + super().__init__(**kwargs) + self.target = target + self.template_overrides = template_overrides + self.simple_templates = simple_templates + self.templates = {} + self.fss = {} + self._dircache = {} + self.max_gap = max_gap + self.max_block = max_block + if isinstance(fo, str): + dic = dict( + **(ref_storage_args or target_options or {}), protocol=target_protocol + ) + ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic) + if ref_fs.isfile(fo2): + # text JSON + with fsspec.open(fo, "rb", **dic) as f: + logger.info("Read reference from URL %s", fo) + text = json.load(f) + self._process_references(text, template_overrides) + else: + # Lazy parquet refs + logger.info("Open lazy reference dict from URL %s", fo) + self.references = LazyReferenceMapper( + fo2, + fs=ref_fs, + cache_size=cache_size, + ) + else: + # dictionaries + self._process_references(fo, template_overrides) + if isinstance(fs, dict): + self.fss = { + k: ( + fsspec.filesystem(k.split(":", 1)[0], **opts) + if isinstance(opts, dict) + else opts + ) + for k, opts in fs.items() + } + if None not in self.fss: + self.fss[None] = filesystem("file") + return + if fs is not None: + # single remote FS + remote_protocol = ( + fs.protocol[0] if isinstance(fs.protocol, tuple) else fs.protocol + ) + self.fss[remote_protocol] = fs + + if remote_protocol is None: + # get single protocol from any templates + for ref in self.templates.values(): + if callable(ref): + ref = ref() + protocol, _ = fsspec.core.split_protocol(ref) + if protocol and protocol not in self.fss: + fs = filesystem(protocol, **(remote_options or {})) + self.fss[protocol] = fs + if remote_protocol is None: + # get single protocol from references + # TODO: warning here, since this can be very expensive? + for ref in self.references.values(): + if callable(ref): + ref = ref() + if isinstance(ref, list) and ref[0]: + protocol, _ = fsspec.core.split_protocol(ref[0]) + if protocol not in self.fss: + fs = filesystem(protocol, **(remote_options or {})) + self.fss[protocol] = fs + # only use first remote URL + break + + if remote_protocol and remote_protocol not in self.fss: + fs = filesystem(remote_protocol, **(remote_options or {})) + self.fss[remote_protocol] = fs + + self.fss[None] = fs or filesystem("file") # default one + + def _cat_common(self, path, start=None, end=None): + path = self._strip_protocol(path) + logger.debug(f"cat: {path}") + try: + part = self.references[path] + except KeyError: + raise FileNotFoundError(path) + if isinstance(part, str): + part = part.encode() + if isinstance(part, bytes): + logger.debug(f"Reference: {path}, type bytes") + if part.startswith(b"base64:"): + part = base64.b64decode(part[7:]) + return part, None, None + + if len(part) == 1: + logger.debug(f"Reference: {path}, whole file => {part}") + url = part[0] + start1, end1 = start, end + else: + url, start0, size = part + logger.debug(f"Reference: {path} => {url}, offset {start0}, size {size}") + end0 = start0 + size + + if start is not None: + if start >= 0: + start1 = start0 + start + else: + start1 = end0 + start + else: + start1 = start0 + if end is not None: + if end >= 0: + end1 = start0 + end + else: + end1 = end0 + end + else: + end1 = end0 + if url is None: + url = self.target + return url, start1, end1 + + async def _cat_file(self, path, start=None, end=None, **kwargs): + part_or_url, start0, end0 = self._cat_common(path, start=start, end=end) + if isinstance(part_or_url, bytes): + return part_or_url[start:end] + protocol, _ = split_protocol(part_or_url) + try: + await self.fss[protocol]._cat_file(part_or_url, start=start, end=end) + except Exception as e: + raise ReferenceNotReachable(path, part_or_url) from e + + def cat_file(self, path, start=None, end=None, **kwargs): + part_or_url, start0, end0 = self._cat_common(path, start=start, end=end) + if isinstance(part_or_url, bytes): + return part_or_url[start:end] + protocol, _ = split_protocol(part_or_url) + try: + return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0) + except Exception as e: + raise ReferenceNotReachable(path, part_or_url) from e + + def pipe_file(self, path, value, **_): + """Temporarily add binary data or reference as a file""" + self.references[path] = value + + async def _get_file(self, rpath, lpath, **kwargs): + if self.isdir(rpath): + return os.makedirs(lpath, exist_ok=True) + data = await self._cat_file(rpath) + with open(lpath, "wb") as f: + f.write(data) + + def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs): + if self.isdir(rpath): + return os.makedirs(lpath, exist_ok=True) + data = self.cat_file(rpath, **kwargs) + callback.set_size(len(data)) + if isfilelike(lpath): + lpath.write(data) + else: + with open(lpath, "wb") as f: + f.write(data) + callback.absolute_update(len(data)) + + def get(self, rpath, lpath, recursive=False, **kwargs): + if recursive: + # trigger directory build + self.ls("") + rpath = self.expand_path(rpath, recursive=recursive) + fs = fsspec.filesystem("file", auto_mkdir=True) + targets = other_paths(rpath, lpath) + if recursive: + data = self.cat([r for r in rpath if not self.isdir(r)]) + else: + data = self.cat(rpath) + for remote, local in zip(rpath, targets): + if remote in data: + fs.pipe_file(local, data[remote]) + + def cat(self, path, recursive=False, on_error="raise", **kwargs): + if isinstance(path, str) and recursive: + raise NotImplementedError + if isinstance(path, list) and (recursive or any("*" in p for p in path)): + raise NotImplementedError + # TODO: if references is lazy, pre-fetch all paths in batch before access + proto_dict = _protocol_groups(path, self.references) + out = {} + for proto, paths in proto_dict.items(): + fs = self.fss[proto] + urls, starts, ends, valid_paths = [], [], [], [] + for p in paths: + # find references or label not-found. Early exit if any not + # found and on_error is "raise" + try: + u, s, e = self._cat_common(p) + except FileNotFoundError as err: + if on_error == "raise": + raise + if on_error != "omit": + out[p] = err + else: + urls.append(u) + starts.append(s) + ends.append(e) + valid_paths.append(p) + + # process references into form for merging + urls2 = [] + starts2 = [] + ends2 = [] + paths2 = [] + whole_files = set() + for u, s, e, p in zip(urls, starts, ends, valid_paths): + if isinstance(u, bytes): + # data + out[p] = u + elif s is None: + # whole file - limits are None, None, but no further + # entries take for this file + whole_files.add(u) + urls2.append(u) + starts2.append(s) + ends2.append(e) + paths2.append(p) + for u, s, e, p in zip(urls, starts, ends, valid_paths): + # second run to account for files that are to be loaded whole + if s is not None and u not in whole_files: + urls2.append(u) + starts2.append(s) + ends2.append(e) + paths2.append(p) + + # merge and fetch consolidated ranges + new_paths, new_starts, new_ends = merge_offset_ranges( + list(urls2), + list(starts2), + list(ends2), + sort=True, + max_gap=self.max_gap, + max_block=self.max_block, + ) + bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends) + + # unbundle from merged bytes - simple approach + for u, s, e, p in zip(urls, starts, ends, valid_paths): + if p in out: + continue # was bytes, already handled + for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out): + if np == u and (ns is None or ne is None): + if isinstance(b, Exception): + out[p] = b + else: + out[p] = b[s:e] + elif np == u and s >= ns and e <= ne: + if isinstance(b, Exception): + out[p] = b + else: + out[p] = b[s - ns : (e - ne) or None] + + for k, v in out.copy().items(): + # these were valid references, but fetch failed, so transform exc + if isinstance(v, Exception) and k in self.references: + ex = out[k] + new_ex = ReferenceNotReachable(k, self.references[k]) + new_ex.__cause__ = ex + if on_error == "raise": + raise new_ex + elif on_error != "omit": + out[k] = new_ex + + if len(out) == 1 and isinstance(path, str) and "*" not in path: + return _first(out) + return out + + def _process_references(self, references, template_overrides=None): + vers = references.get("version", None) + if vers is None: + self._process_references0(references) + elif vers == 1: + self._process_references1(references, template_overrides=template_overrides) + else: + raise ValueError(f"Unknown reference spec version: {vers}") + # TODO: we make dircache by iterating over all entries, but for Spec >= 1, + # can replace with programmatic. Is it even needed for mapper interface? + + def _process_references0(self, references): + """Make reference dict for Spec Version 0""" + self.references = references + + def _process_references1(self, references, template_overrides=None): + if not self.simple_templates or self.templates: + import jinja2 + self.references = {} + self._process_templates(references.get("templates", {})) + + @lru_cache(1000) + def _render_jinja(u): + return jinja2.Template(u).render(**self.templates) + + for k, v in references.get("refs", {}).items(): + if isinstance(v, str): + if v.startswith("base64:"): + self.references[k] = base64.b64decode(v[7:]) + self.references[k] = v + elif self.templates: + u = v[0] + if "{{" in u: + if self.simple_templates: + u = ( + u.replace("{{", "{") + .replace("}}", "}") + .format(**self.templates) + ) + else: + u = _render_jinja(u) + self.references[k] = [u] if len(v) == 1 else [u, v[1], v[2]] + else: + self.references[k] = v + self.references.update(self._process_gen(references.get("gen", []))) + + def _process_templates(self, tmp): + self.templates = {} + if self.template_overrides is not None: + tmp.update(self.template_overrides) + for k, v in tmp.items(): + if "{{" in v: + import jinja2 + + self.templates[k] = lambda temp=v, **kwargs: jinja2.Template( + temp + ).render(**kwargs) + else: + self.templates[k] = v + + def _process_gen(self, gens): + out = {} + for gen in gens: + dimension = { + k: v + if isinstance(v, list) + else range(v.get("start", 0), v["stop"], v.get("step", 1)) + for k, v in gen["dimensions"].items() + } + products = ( + dict(zip(dimension.keys(), values)) + for values in itertools.product(*dimension.values()) + ) + for pr in products: + import jinja2 + + key = jinja2.Template(gen["key"]).render(**pr, **self.templates) + url = jinja2.Template(gen["url"]).render(**pr, **self.templates) + if ("offset" in gen) and ("length" in gen): + offset = int( + jinja2.Template(gen["offset"]).render(**pr, **self.templates) + ) + length = int( + jinja2.Template(gen["length"]).render(**pr, **self.templates) + ) + out[key] = [url, offset, length] + elif ("offset" in gen) ^ ("length" in gen): + raise ValueError( + "Both 'offset' and 'length' are required for a " + "reference generator entry if either is provided." + ) + else: + out[key] = [url] + return out + + def _dircache_from_items(self): + self.dircache = {"": []} + it = self.references.items() + for path, part in it: + if isinstance(part, (bytes, str)): + size = len(part) + elif len(part) == 1: + size = None + else: + _, _, size = part + par = path.rsplit("/", 1)[0] if "/" in path else "" + par0 = par + subdirs = [par0] + while par0 and par0 not in self.dircache: + # collect parent directories + par0 = self._parent(par0) + subdirs.append(par0) + + subdirs = subdirs[::-1] + for parent, child in zip(subdirs, subdirs[1:]): + # register newly discovered directories + assert child not in self.dircache + assert parent in self.dircache + self.dircache[parent].append( + {"name": child, "type": "directory", "size": 0} + ) + self.dircache[child] = [] + + self.dircache[par].append({"name": path, "type": "file", "size": size}) + + def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs): + data = self.cat_file(path) # load whole chunk into memory + return io.BytesIO(data) + + def ls(self, path, detail=True, **kwargs): + path = self._strip_protocol(path) + if isinstance(self.references, LazyReferenceMapper): + try: + return self.references.ls(path, detail) + except KeyError: + pass + raise FileNotFoundError(f"'{path}' is not a known key") + if not self.dircache: + self._dircache_from_items() + out = self._ls_from_cache(path) + if out is None: + raise FileNotFoundError(path) + if detail: + return out + return [o["name"] for o in out] + + def exists(self, path, **kwargs): # overwrite auto-sync version + return self.isdir(path) or self.isfile(path) + + def isdir(self, path): # overwrite auto-sync version + if self.dircache: + return path in self.dircache + elif isinstance(self.references, LazyReferenceMapper): + return path in self.references.listdir("") + else: + # this may be faster than building dircache for single calls, but + # by looping will be slow for many calls; could cache it? + return any(_.startswith(f"{path}/") for _ in self.references) + + def isfile(self, path): # overwrite auto-sync version + return path in self.references + + async def _ls(self, path, detail=True, **kwargs): # calls fast sync code + return self.ls(path, detail, **kwargs) + + def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): + if withdirs: + return super().find( + path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, **kwargs + ) + if path: + path = self._strip_protocol(path) + r = sorted(k for k in self.references if k.startswith(path)) + else: + r = sorted(self.references) + if detail: + if not self.dircache: + self._dircache_from_items() + return {k: self._ls_from_cache(k)[0] for k in r} + else: + return r + + def info(self, path, **kwargs): + out = self.references.get(path) + if out is not None: + if isinstance(out, (str, bytes)): + # decode base64 here + return {"name": path, "type": "file", "size": len(out)} + elif len(out) > 1: + return {"name": path, "type": "file", "size": out[2]} + else: + out0 = [{"name": path, "type": "file", "size": None}] + else: + out = self.ls(path, True) + out0 = [o for o in out if o["name"] == path] + if not out0: + return {"name": path, "type": "directory", "size": 0} + if out0[0]["size"] is None: + # if this is a whole remote file, update size using remote FS + prot, _ = split_protocol(self.references[path][0]) + out0[0]["size"] = self.fss[prot].size(self.references[path][0]) + return out0[0] + + async def _info(self, path, **kwargs): # calls fast sync code + return self.info(path) + + async def _rm_file(self, path, **kwargs): + self.references.pop( + path, None + ) # ignores FileNotFound, just as well for directories + self.dircache.clear() # this is a bit heavy handed + + async def _pipe_file(self, path, data): + # can be str or bytes + self.references[path] = data + self.dircache.clear() # this is a bit heavy handed + + async def _put_file(self, lpath, rpath, **kwargs): + # puts binary + with open(lpath, "rb") as f: + self.references[rpath] = f.read() + self.dircache.clear() # this is a bit heavy handed + + def save_json(self, url, **storage_options): + """Write modified references into new location""" + out = {} + for k, v in self.references.items(): + if isinstance(v, bytes): + try: + out[k] = v.decode("ascii") + except UnicodeDecodeError: + out[k] = (b"base64:" + base64.b64encode(v)).decode() + else: + out[k] = v + with fsspec.open(url, "wb", **storage_options) as f: + f.write(json.dumps({"version": 1, "refs": out}).encode()) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/smb.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/smb.py new file mode 100644 index 0000000000000000000000000000000000000000..02f9ede372a6e383387609df5593814e2e5f8296 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/smb.py @@ -0,0 +1,324 @@ +""" +This module contains SMBFileSystem class responsible for handling access to +Windows Samba network shares by using package smbprotocol +""" + +import datetime +import uuid +from stat import S_ISDIR, S_ISLNK + +import smbclient + +from .. import AbstractFileSystem +from ..utils import infer_storage_options + +# ! pylint: disable=bad-continuation + + +class SMBFileSystem(AbstractFileSystem): + """Allow reading and writing to Windows and Samba network shares. + + When using `fsspec.open()` for getting a file-like object the URI + should be specified as this format: + ``smb://workgroup;user:password@server:port/share/folder/file.csv``. + + Example:: + + >>> import fsspec + >>> with fsspec.open( + ... 'smb://myuser:mypassword@myserver.com/' 'share/folder/file.csv' + ... ) as smbfile: + ... df = pd.read_csv(smbfile, sep='|', header=None) + + Note that you need to pass in a valid hostname or IP address for the host + component of the URL. Do not use the Windows/NetBIOS machine name for the + host component. + + The first component of the path in the URL points to the name of the shared + folder. Subsequent path components will point to the directory/folder/file. + + The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be + optional. + + .. note:: + + For working this source require `smbprotocol`_ to be installed, e.g.:: + + $ pip install smbprotocol + # or + # pip install smbprotocol[kerberos] + + .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements + + Note: if using this with the ``open`` or ``open_files``, with full URLs, + there is no way to tell if a path is relative, so all paths are assumed + to be absolute. + """ + + protocol = "smb" + + # pylint: disable=too-many-arguments + def __init__( + self, + host, + port=None, + username=None, + password=None, + timeout=60, + encrypt=None, + share_access=None, + **kwargs, + ): + """ + You can use _get_kwargs_from_urls to get some kwargs from + a reasonable SMB url. + + Authentication will be anonymous or integrated if username/password are not + given. + + Parameters + ---------- + host: str + The remote server name/ip to connect to + port: int or None + Port to connect with. Usually 445, sometimes 139. + username: str or None + Username to connect with. Required if Kerberos auth is not being used. + password: str or None + User's password on the server, if using username + timeout: int + Connection timeout in seconds + encrypt: bool + Whether to force encryption or not, once this has been set to True + the session cannot be changed back to False. + share_access: str or None + Specifies the default access applied to file open operations + performed with this file system object. + This affects whether other processes can concurrently open a handle + to the same file. + + - None (the default): exclusively locks the file until closed. + - 'r': Allow other handles to be opened with read access. + - 'w': Allow other handles to be opened with write access. + - 'd': Allow other handles to be opened with delete access. + """ + super().__init__(**kwargs) + self.host = host + self.port = port + self.username = username + self.password = password + self.timeout = timeout + self.encrypt = encrypt + self.temppath = kwargs.pop("temppath", "") + self.share_access = share_access + self._connect() + + @property + def _port(self): + return 445 if self.port is None else self.port + + def _connect(self): + smbclient.register_session( + self.host, + username=self.username, + password=self.password, + port=self._port, + encrypt=self.encrypt, + connection_timeout=self.timeout, + ) + + @classmethod + def _strip_protocol(cls, path): + return infer_storage_options(path)["path"] + + @staticmethod + def _get_kwargs_from_urls(path): + # smb://workgroup;user:password@host:port/share/folder/file.csv + out = infer_storage_options(path) + out.pop("path", None) + out.pop("protocol", None) + return out + + def mkdir(self, path, create_parents=True, **kwargs): + wpath = _as_unc_path(self.host, path) + if create_parents: + smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs) + else: + smbclient.mkdir(wpath, port=self._port, **kwargs) + + def makedirs(self, path, exist_ok=False): + if _share_has_path(path): + wpath = _as_unc_path(self.host, path) + smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port) + + def rmdir(self, path): + if _share_has_path(path): + wpath = _as_unc_path(self.host, path) + smbclient.rmdir(wpath, port=self._port) + + def info(self, path, **kwargs): + wpath = _as_unc_path(self.host, path) + stats = smbclient.stat(wpath, port=self._port, **kwargs) + if S_ISDIR(stats.st_mode): + stype = "directory" + elif S_ISLNK(stats.st_mode): + stype = "link" + else: + stype = "file" + res = { + "name": path + "/" if stype == "directory" else path, + "size": stats.st_size, + "type": stype, + "uid": stats.st_uid, + "gid": stats.st_gid, + "time": stats.st_atime, + "mtime": stats.st_mtime, + } + return res + + def created(self, path): + """Return the created timestamp of a file as a datetime.datetime""" + wpath = _as_unc_path(self.host, path) + stats = smbclient.stat(wpath, port=self._port) + return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc) + + def modified(self, path): + """Return the modified timestamp of a file as a datetime.datetime""" + wpath = _as_unc_path(self.host, path) + stats = smbclient.stat(wpath, port=self._port) + return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc) + + def ls(self, path, detail=True, **kwargs): + unc = _as_unc_path(self.host, path) + listed = smbclient.listdir(unc, port=self._port, **kwargs) + dirs = ["/".join([path.rstrip("/"), p]) for p in listed] + if detail: + dirs = [self.info(d) for d in dirs] + return dirs + + # pylint: disable=too-many-arguments + def _open( + self, + path, + mode="rb", + block_size=-1, + autocommit=True, + cache_options=None, + **kwargs, + ): + """ + block_size: int or None + If 0, no buffering, 1, line buffering, >1, buffer that many bytes + + Notes + ----- + By specifying 'share_access' in 'kwargs' it is possible to override the + default shared access setting applied in the constructor of this object. + """ + bls = block_size if block_size is not None and block_size >= 0 else -1 + wpath = _as_unc_path(self.host, path) + share_access = kwargs.pop("share_access", self.share_access) + if "w" in mode and autocommit is False: + temp = _as_temp_path(self.host, path, self.temppath) + return SMBFileOpener( + wpath, temp, mode, port=self._port, block_size=bls, **kwargs + ) + return smbclient.open_file( + wpath, + mode, + buffering=bls, + share_access=share_access, + port=self._port, + **kwargs, + ) + + def copy(self, path1, path2, **kwargs): + """Copy within two locations in the same filesystem""" + wpath1 = _as_unc_path(self.host, path1) + wpath2 = _as_unc_path(self.host, path2) + smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs) + + def _rm(self, path): + if _share_has_path(path): + wpath = _as_unc_path(self.host, path) + stats = smbclient.stat(wpath, port=self._port) + if S_ISDIR(stats.st_mode): + smbclient.rmdir(wpath, port=self._port) + else: + smbclient.remove(wpath, port=self._port) + + def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs): + wpath1 = _as_unc_path(self.host, path1) + wpath2 = _as_unc_path(self.host, path2) + smbclient.rename(wpath1, wpath2, port=self._port, **kwargs) + + +def _as_unc_path(host, path): + rpath = path.replace("/", "\\") + unc = f"\\\\{host}{rpath}" + return unc + + +def _as_temp_path(host, path, temppath): + share = path.split("/")[1] + temp_file = f"/{share}{temppath}/{uuid.uuid4()}" + unc = _as_unc_path(host, temp_file) + return unc + + +def _share_has_path(path): + parts = path.count("/") + if path.endswith("/"): + return parts > 2 + return parts > 1 + + +class SMBFileOpener: + """writes to remote temporary file, move on commit""" + + def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs): + self.path = path + self.temp = temp + self.mode = mode + self.block_size = block_size + self.kwargs = kwargs + self.smbfile = None + self._incontext = False + self.port = port + self._open() + + def _open(self): + if self.smbfile is None or self.smbfile.closed: + self.smbfile = smbclient.open_file( + self.temp, + self.mode, + port=self.port, + buffering=self.block_size, + **self.kwargs, + ) + + def commit(self): + """Move temp file to definitive on success.""" + # TODO: use transaction support in SMB protocol + smbclient.replace(self.temp, self.path, port=self.port) + + def discard(self): + """Remove the temp file on failure.""" + smbclient.remove(self.temp, port=self.port) + + def __fspath__(self): + return self.path + + def __iter__(self): + return self.smbfile.__iter__() + + def __getattr__(self, item): + return getattr(self.smbfile, item) + + def __enter__(self): + self._incontext = True + return self.smbfile.__enter__() + + def __exit__(self, exc_type, exc_value, traceback): + self._incontext = False + self.smbfile.__exit__(exc_type, exc_value, traceback) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/zip.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/zip.py new file mode 100644 index 0000000000000000000000000000000000000000..473975df3c73fb50106bd9b9b36ed8a30c4bc322 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/zip.py @@ -0,0 +1,133 @@ +import zipfile + +import fsspec +from fsspec.archive import AbstractArchiveFileSystem + + +class ZipFileSystem(AbstractArchiveFileSystem): + """Read/Write contents of ZIP archive as a file-system + + Keeps file object open while instance lives. + + This class is pickleable, but not necessarily thread-safe + """ + + root_marker = "" + protocol = "zip" + cachable = False + + def __init__( + self, + fo="", + mode="r", + target_protocol=None, + target_options=None, + compression=zipfile.ZIP_STORED, + allowZip64=True, + compresslevel=None, + **kwargs, + ): + """ + Parameters + ---------- + fo: str or file-like + Contains ZIP, and must exist. If a str, will fetch file using + :meth:`~fsspec.open_files`, which must return one file exactly. + mode: str + Accept: "r", "w", "a" + target_protocol: str (optional) + If ``fo`` is a string, this value can be used to override the + FS protocol inferred from a URL + target_options: dict (optional) + Kwargs passed when instantiating the target FS, if ``fo`` is + a string. + compression, allowZip64, compresslevel: passed to ZipFile + Only relevant when creating a ZIP + """ + super().__init__(self, **kwargs) + if mode not in set("rwa"): + raise ValueError(f"mode '{mode}' no understood") + self.mode = mode + if isinstance(fo, str): + if mode == "a": + m = "r+b" + else: + m = mode + "b" + fo = fsspec.open( + fo, mode=m, protocol=target_protocol, **(target_options or {}) + ) + self.of = fo + self.fo = fo.__enter__() # the whole instance is a context + self.zip = zipfile.ZipFile( + self.fo, + mode=mode, + compression=compression, + allowZip64=allowZip64, + compresslevel=compresslevel, + ) + self.dir_cache = None + + @classmethod + def _strip_protocol(cls, path): + # zip file paths are always relative to the archive root + return super()._strip_protocol(path).lstrip("/") + + def __del__(self): + if hasattr(self, "zip"): + self.close() + del self.zip + + def close(self): + """Commits any write changes to the file. Done on ``del`` too.""" + self.zip.close() + + def _get_dirs(self): + if self.dir_cache is None or self.mode in set("wa"): + # when writing, dir_cache is always in the ZipFile's attributes, + # not read from the file. + files = self.zip.infolist() + self.dir_cache = { + dirname.rstrip("/"): { + "name": dirname.rstrip("/"), + "size": 0, + "type": "directory", + } + for dirname in self._all_dirnames(self.zip.namelist()) + } + for z in files: + f = {s: getattr(z, s, None) for s in zipfile.ZipInfo.__slots__} + f.update( + { + "name": z.filename.rstrip("/"), + "size": z.file_size, + "type": ("directory" if z.is_dir() else "file"), + } + ) + self.dir_cache[f["name"]] = f + + def pipe_file(self, path, value, **kwargs): + # override upstream, because we know the exact file size in this case + self.zip.writestr(path, value, **kwargs) + + def _open( + self, + path, + mode="rb", + block_size=None, + autocommit=True, + cache_options=None, + **kwargs, + ): + path = self._strip_protocol(path) + if "r" in mode and self.mode in set("wa"): + if self.exists(path): + raise OSError("ZipFS can only be open for reading or writing, not both") + raise FileNotFoundError(path) + if "r" in self.mode and "w" in mode: + raise OSError("ZipFS can only be open for reading or writing, not both") + out = self.zip.open(path, mode.strip("b")) + if "r" in mode: + info = self.info(path) + out.size = info["size"] + out.name = info["name"] + return out diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/mapping.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/mapping.py new file mode 100644 index 0000000000000000000000000000000000000000..b9822ae1750fe3657aaec608b7434ccc832f86cb --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/mapping.py @@ -0,0 +1,247 @@ +import array +import posixpath +import warnings +from collections.abc import MutableMapping +from functools import cached_property + +from .core import url_to_fs + + +class FSMap(MutableMapping): + """Wrap a FileSystem instance as a mutable wrapping. + + The keys of the mapping become files under the given root, and the + values (which must be bytes) the contents of those files. + + Parameters + ---------- + root: string + prefix for all the files + fs: FileSystem instance + check: bool (=True) + performs a touch at the location, to check for write access. + + Examples + -------- + >>> fs = FileSystem(**parameters) # doctest: +SKIP + >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP + or, more likely + >>> d = fs.get_mapper('my-data/path/') + + >>> d['loc1'] = b'Hello World' # doctest: +SKIP + >>> list(d.keys()) # doctest: +SKIP + ['loc1'] + >>> d['loc1'] # doctest: +SKIP + b'Hello World' + """ + + def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): + self.fs = fs + self.root = fs._strip_protocol(root).rstrip("/") + self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] + if missing_exceptions is None: + missing_exceptions = ( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, + ) + self.missing_exceptions = missing_exceptions + self.check = check + self.create = create + if create: + if not self.fs.exists(root): + self.fs.mkdir(root) + if check: + if not self.fs.exists(root): + raise ValueError( + f"Path {root} does not exist. Create " + f" with the ``create=True`` keyword" + ) + self.fs.touch(root + "/a") + self.fs.rm(root + "/a") + + @cached_property + def dirfs(self): + """dirfs instance that can be used with the same keys as the mapper""" + from .implementations.dirfs import DirFileSystem + + return DirFileSystem(path=self._root_key_to_str, fs=self.fs) + + def clear(self): + """Remove all keys below root - empties out mapping""" + try: + self.fs.rm(self.root, True) + self.fs.mkdir(self.root) + except: # noqa: E722 + pass + + def getitems(self, keys, on_error="raise"): + """Fetch multiple items from the store + + If the backend is async-able, this might proceed concurrently + + Parameters + ---------- + keys: list(str) + They keys to be fetched + on_error : "raise", "omit", "return" + If raise, an underlying exception will be raised (converted to KeyError + if the type is in self.missing_exceptions); if omit, keys with exception + will simply not be included in the output; if "return", all keys are + included in the output, but the value will be bytes or an exception + instance. + + Returns + ------- + dict(key, bytes|exception) + """ + keys2 = [self._key_to_str(k) for k in keys] + oe = on_error if on_error == "raise" else "return" + try: + out = self.fs.cat(keys2, on_error=oe) + if isinstance(out, bytes): + out = {keys2[0]: out} + except self.missing_exceptions as e: + raise KeyError from e + out = { + k: (KeyError() if isinstance(v, self.missing_exceptions) else v) + for k, v in out.items() + } + return { + key: out[k2] + for key, k2 in zip(keys, keys2) + if on_error == "return" or not isinstance(out[k2], BaseException) + } + + def setitems(self, values_dict): + """Set the values of multiple items in the store + + Parameters + ---------- + values_dict: dict(str, bytes) + """ + values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()} + self.fs.pipe(values) + + def delitems(self, keys): + """Remove multiple keys from the store""" + self.fs.rm([self._key_to_str(k) for k in keys]) + + def _key_to_str(self, key): + """Generate full path for the key""" + if not isinstance(key, str): + # raise TypeError("key must be of type `str`, got `{type(key).__name__}`" + warnings.warn( + "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError", + DeprecationWarning, + ) + if isinstance(key, list): + key = tuple(key) + key = str(key) + return f"{self._root_key_to_str}{key}" + + def _str_to_key(self, s): + """Strip path of to leave key name""" + return s[len(self.root) :].lstrip("/") + + def __getitem__(self, key, default=None): + """Retrieve data""" + k = self._key_to_str(key) + try: + result = self.fs.cat(k) + except self.missing_exceptions: + if default is not None: + return default + raise KeyError(key) + return result + + def pop(self, key, default=None): + """Pop data""" + result = self.__getitem__(key, default) + try: + del self[key] + except KeyError: + pass + return result + + def __setitem__(self, key, value): + """Store value in key""" + key = self._key_to_str(key) + self.fs.mkdirs(self.fs._parent(key), exist_ok=True) + self.fs.pipe_file(key, maybe_convert(value)) + + def __iter__(self): + return (self._str_to_key(x) for x in self.fs.find(self.root)) + + def __len__(self): + return len(self.fs.find(self.root)) + + def __delitem__(self, key): + """Remove key""" + try: + self.fs.rm(self._key_to_str(key)) + except: # noqa: E722 + raise KeyError + + def __contains__(self, key): + """Does key exist in mapping?""" + path = self._key_to_str(key) + return self.fs.exists(path) and self.fs.isfile(path) + + def __reduce__(self): + return FSMap, (self.root, self.fs, False, False, self.missing_exceptions) + + +def maybe_convert(value): + if isinstance(value, array.array) or hasattr(value, "__array__"): + # bytes-like things + if hasattr(value, "dtype") and value.dtype.kind in "Mm": + # The buffer interface doesn't support datetime64/timdelta64 numpy + # arrays + value = value.view("int64") + value = bytes(memoryview(value)) + return value + + +def get_mapper( + url="", + check=False, + create=False, + missing_exceptions=None, + alternate_root=None, + **kwargs, +): + """Create key-value interface for given URL and options + + The URL will be of the form "protocol://location" and point to the root + of the mapper required. All keys will be file-names below this location, + and their values the contents of each key. + + Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``. + + Parameters + ---------- + url: str + Root URL of mapping + check: bool + Whether to attempt to read from the location before instantiation, to + check that the mapping does exist + create: bool + Whether to make the directory corresponding to the root before + instantiating + missing_exceptions: None or tuple + If given, these exception types will be regarded as missing keys and + return KeyError when trying to read data. By default, you get + (FileNotFoundError, IsADirectoryError, NotADirectoryError) + alternate_root: None or str + In cases of complex URLs, the parser may fail to pick the correct part + for the mapper root, so this arg can override + + Returns + ------- + ``FSMap`` instance, the dict-like key-value store. + """ + # Removing protocol here - could defer to each open() on the backend + fs, urlpath = url_to_fs(url, **kwargs) + root = alternate_root if alternate_root is not None else urlpath + return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/utils.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..543bd89d79751a1d88688677ac313f94896512e5 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/utils.py @@ -0,0 +1,742 @@ +from __future__ import annotations + +import contextlib +import logging +import math +import os +import pathlib +import re +import sys +import tempfile +from functools import partial +from hashlib import md5 +from importlib.metadata import version +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Iterable, + Iterator, + Sequence, + TypeVar, +) +from urllib.parse import urlsplit + +if TYPE_CHECKING: + from typing_extensions import TypeGuard + + from fsspec.spec import AbstractFileSystem + + +DEFAULT_BLOCK_SIZE = 5 * 2**20 + +T = TypeVar("T") + + +def infer_storage_options( + urlpath: str, inherit_storage_options: dict[str, Any] | None = None +) -> dict[str, Any]: + """Infer storage options from URL path and merge it with existing storage + options. + + Parameters + ---------- + urlpath: str or unicode + Either local absolute file path or URL (hdfs://namenode:8020/file.csv) + inherit_storage_options: dict (optional) + Its contents will get merged with the inferred information from the + given path + + Returns + ------- + Storage options dict. + + Examples + -------- + >>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP + {"protocol": "file", "path", "/mnt/datasets/test.csv"} + >>> infer_storage_options( + ... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1', + ... inherit_storage_options={'extra': 'value'}, + ... ) # doctest: +SKIP + {"protocol": "hdfs", "username": "username", "password": "pwd", + "host": "node", "port": 123, "path": "/mnt/datasets/test.csv", + "url_query": "q=1", "extra": "value"} + """ + # Handle Windows paths including disk name in this special case + if ( + re.match(r"^[a-zA-Z]:[\\/]", urlpath) + or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None + ): + return {"protocol": "file", "path": urlpath} + + parsed_path = urlsplit(urlpath) + protocol = parsed_path.scheme or "file" + if parsed_path.fragment: + path = "#".join([parsed_path.path, parsed_path.fragment]) + else: + path = parsed_path.path + if protocol == "file": + # Special case parsing file protocol URL on Windows according to: + # https://msdn.microsoft.com/en-us/library/jj710207.aspx + windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path) + if windows_path: + path = "%s:%s" % windows_path.groups() + + if protocol in ["http", "https"]: + # for HTTP, we don't want to parse, as requests will anyway + return {"protocol": protocol, "path": urlpath} + + options: dict[str, Any] = {"protocol": protocol, "path": path} + + if parsed_path.netloc: + # Parse `hostname` from netloc manually because `parsed_path.hostname` + # lowercases the hostname which is not always desirable (e.g. in S3): + # https://github.com/dask/dask/issues/1417 + options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0] + + if protocol in ("s3", "s3a", "gcs", "gs"): + options["path"] = options["host"] + options["path"] + else: + options["host"] = options["host"] + if parsed_path.port: + options["port"] = parsed_path.port + if parsed_path.username: + options["username"] = parsed_path.username + if parsed_path.password: + options["password"] = parsed_path.password + + if parsed_path.query: + options["url_query"] = parsed_path.query + if parsed_path.fragment: + options["url_fragment"] = parsed_path.fragment + + if inherit_storage_options: + update_storage_options(options, inherit_storage_options) + + return options + + +def update_storage_options( + options: dict[str, Any], inherited: dict[str, Any] | None = None +) -> None: + if not inherited: + inherited = {} + collisions = set(options) & set(inherited) + if collisions: + for collision in collisions: + if options.get(collision) != inherited.get(collision): + raise KeyError( + f"Collision between inferred and specified storage " + f"option:\n{collision}" + ) + options.update(inherited) + + +# Compression extensions registered via fsspec.compression.register_compression +compressions: dict[str, str] = {} + + +def infer_compression(filename: str) -> str | None: + """Infer compression, if available, from filename. + + Infer a named compression type, if registered and available, from filename + extension. This includes builtin (gz, bz2, zip) compressions, as well as + optional compressions. See fsspec.compression.register_compression. + """ + extension = os.path.splitext(filename)[-1].strip(".").lower() + if extension in compressions: + return compressions[extension] + return None + + +def build_name_function(max_int: float) -> Callable[[int], str]: + """Returns a function that receives a single integer + and returns it as a string padded by enough zero characters + to align with maximum possible integer + + >>> name_f = build_name_function(57) + + >>> name_f(7) + '07' + >>> name_f(31) + '31' + >>> build_name_function(1000)(42) + '0042' + >>> build_name_function(999)(42) + '042' + >>> build_name_function(0)(0) + '0' + """ + # handle corner cases max_int is 0 or exact power of 10 + max_int += 1e-8 + + pad_length = int(math.ceil(math.log10(max_int))) + + def name_function(i: int) -> str: + return str(i).zfill(pad_length) + + return name_function + + +def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool: + r"""Seek current file to file start, file end, or byte after delimiter seq. + + Seeks file to next chunk delimiter, where chunks are defined on file start, + a delimiting sequence, and file end. Use file.tell() to see location afterwards. + Note that file start is a valid split, so must be at offset > 0 to seek for + delimiter. + + Parameters + ---------- + file: a file + delimiter: bytes + a delimiter like ``b'\n'`` or message sentinel, matching file .read() type + blocksize: int + Number of bytes to read from the file at once. + + + Returns + ------- + Returns True if a delimiter was found, False if at file start or end. + + """ + + if file.tell() == 0: + # beginning-of-file, return without seek + return False + + # Interface is for binary IO, with delimiter as bytes, but initialize last + # with result of file.read to preserve compatibility with text IO. + last: bytes | None = None + while True: + current = file.read(blocksize) + if not current: + # end-of-file without delimiter + return False + full = last + current if last else current + try: + if delimiter in full: + i = full.index(delimiter) + file.seek(file.tell() - (len(full) - i) + len(delimiter)) + return True + elif len(current) < blocksize: + # end-of-file without delimiter + return False + except (OSError, ValueError): + pass + last = full[-len(delimiter) :] + + +def read_block( + f: IO[bytes], + offset: int, + length: int | None, + delimiter: bytes | None = None, + split_before: bool = False, +) -> bytes: + """Read a block of bytes from a file + + Parameters + ---------- + f: File + Open file + offset: int + Byte offset to start read + length: int + Number of bytes to read, read through end of file if None + delimiter: bytes (optional) + Ensure reading starts and stops at delimiter bytestring + split_before: bool (optional) + Start/stop read *before* delimiter bytestring. + + + If using the ``delimiter=`` keyword argument we ensure that the read + starts and stops at delimiter boundaries that follow the locations + ``offset`` and ``offset + length``. If ``offset`` is zero then we + start at zero, regardless of delimiter. The bytestring returned WILL + include the terminating delimiter string. + + Examples + -------- + + >>> from io import BytesIO # doctest: +SKIP + >>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP + >>> read_block(f, 0, 13) # doctest: +SKIP + b'Alice, 100\\nBo' + + >>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP + b'Alice, 100\\nBob, 200\\n' + + >>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP + b'Bob, 200\\nCharlie, 300' + """ + if delimiter: + f.seek(offset) + found_start_delim = seek_delimiter(f, delimiter, 2**16) + if length is None: + return f.read() + start = f.tell() + length -= start - offset + + f.seek(start + length) + found_end_delim = seek_delimiter(f, delimiter, 2**16) + end = f.tell() + + # Adjust split location to before delimiter iff seek found the + # delimiter sequence, not start or end of file. + if found_start_delim and split_before: + start -= len(delimiter) + + if found_end_delim and split_before: + end -= len(delimiter) + + offset = start + length = end - start + + f.seek(offset) + + # TODO: allow length to be None and read to the end of the file? + assert length is not None + b = f.read(length) + return b + + +def tokenize(*args: Any, **kwargs: Any) -> str: + """Deterministic token + + (modified from dask.base) + + >>> tokenize([1, 2, '3']) + '9d71491b50023b06fc76928e6eddb952' + + >>> tokenize('Hello') == tokenize('Hello') + True + """ + if kwargs: + args += (kwargs,) + try: + h = md5(str(args).encode()) + except ValueError: + # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380 + h = md5(str(args).encode(), usedforsecurity=False) + return h.hexdigest() + + +def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str: + """Attempt to convert a path-like object to a string. + + Parameters + ---------- + filepath: object to be converted + + Returns + ------- + filepath_str: maybe a string version of the object + + Notes + ----- + Objects supporting the fspath protocol are coerced according to its + __fspath__ method. + + For backwards compatibility with older Python version, pathlib.Path + objects are specially coerced. + + Any other object is passed through unchanged, which includes bytes, + strings, buffers, or anything else that's not even path-like. + """ + if isinstance(filepath, str): + return filepath + elif hasattr(filepath, "__fspath__"): + return filepath.__fspath__() + elif isinstance(filepath, pathlib.Path): + return str(filepath) + elif hasattr(filepath, "path"): + return filepath.path + else: + return filepath # type: ignore[return-value] + + +def make_instance( + cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any] +) -> T: + inst = cls(*args, **kwargs) + inst._determine_worker() # type: ignore[attr-defined] + return inst + + +def common_prefix(paths: Iterable[str]) -> str: + """For a list of paths, find the shortest prefix common to all""" + parts = [p.split("/") for p in paths] + lmax = min(len(p) for p in parts) + end = 0 + for i in range(lmax): + end = all(p[i] == parts[0][i] for p in parts) + if not end: + break + i += end + return "/".join(parts[0][:i]) + + +def other_paths( + paths: list[str], + path2: str | list[str], + exists: bool = False, + flatten: bool = False, +) -> list[str]: + """In bulk file operations, construct a new file tree from a list of files + + Parameters + ---------- + paths: list of str + The input file tree + path2: str or list of str + Root to construct the new list in. If this is already a list of str, we just + assert it has the right number of elements. + exists: bool (optional) + For a str destination, it is already exists (and is a dir), files should + end up inside. + flatten: bool (optional) + Whether to flatten the input directory tree structure so that the output files + are in the same directory. + + Returns + ------- + list of str + """ + + if isinstance(path2, str): + path2 = path2.rstrip("/") + + if flatten: + path2 = ["/".join((path2, p.split("/")[-1])) for p in paths] + else: + cp = common_prefix(paths) + if exists: + cp = cp.rsplit("/", 1)[0] + if not cp and all(not s.startswith("/") for s in paths): + path2 = ["/".join([path2, p]) for p in paths] + else: + path2 = [p.replace(cp, path2, 1) for p in paths] + else: + assert len(paths) == len(path2) + return path2 + + +def is_exception(obj: Any) -> bool: + return isinstance(obj, BaseException) + + +def isfilelike(f: Any) -> TypeGuard[IO[bytes]]: + for attr in ["read", "close", "tell"]: + if not hasattr(f, attr): + return False + return True + + +def get_protocol(url: str) -> str: + url = stringify_path(url) + parts = re.split(r"(\:\:|\://)", url, 1) + if len(parts) > 1: + return parts[0] + return "file" + + +def can_be_local(path: str) -> bool: + """Can the given URL be used with open_local?""" + from fsspec import get_filesystem_class + + try: + return getattr(get_filesystem_class(get_protocol(path)), "local_file", False) + except (ValueError, ImportError): + # not in registry or import failed + return False + + +def get_package_version_without_import(name: str) -> str | None: + """For given package name, try to find the version without importing it + + Import and package.__version__ is still the backup here, so an import + *might* happen. + + Returns either the version string, or None if the package + or the version was not readily found. + """ + if name in sys.modules: + mod = sys.modules[name] + if hasattr(mod, "__version__"): + return mod.__version__ + try: + return version(name) + except: # noqa: E722 + pass + try: + import importlib + + mod = importlib.import_module(name) + return mod.__version__ + except (ImportError, AttributeError): + return None + + +def setup_logging( + logger: logging.Logger | None = None, + logger_name: str | None = None, + level: str = "DEBUG", + clear: bool = True, +) -> logging.Logger: + if logger is None and logger_name is None: + raise ValueError("Provide either logger object or logger name") + logger = logger or logging.getLogger(logger_name) + handle = logging.StreamHandler() + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s" + ) + handle.setFormatter(formatter) + if clear: + logger.handlers.clear() + logger.addHandler(handle) + logger.setLevel(level) + return logger + + +def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str: + return fs.unstrip_protocol(name) + + +def mirror_from( + origin_name: str, methods: Iterable[str] +) -> Callable[[type[T]], type[T]]: + """Mirror attributes and methods from the given + origin_name attribute of the instance to the + decorated class""" + + def origin_getter(method: str, self: Any) -> Any: + origin = getattr(self, origin_name) + return getattr(origin, method) + + def wrapper(cls: type[T]) -> type[T]: + for method in methods: + wrapped_method = partial(origin_getter, method) + setattr(cls, method, property(wrapped_method)) + return cls + + return wrapper + + +@contextlib.contextmanager +def nullcontext(obj: T) -> Iterator[T]: + yield obj + + +def merge_offset_ranges( + paths: list[str], + starts: list[int] | int, + ends: list[int] | int, + max_gap: int = 0, + max_block: int | None = None, + sort: bool = True, +) -> tuple[list[str], list[int], list[int]]: + """Merge adjacent byte-offset ranges when the inter-range + gap is <= `max_gap`, and when the merged byte range does not + exceed `max_block` (if specified). By default, this function + will re-order the input paths and byte ranges to ensure sorted + order. If the user can guarantee that the inputs are already + sorted, passing `sort=False` will skip the re-ordering. + """ + # Check input + if not isinstance(paths, list): + raise TypeError + if not isinstance(starts, list): + starts = [starts] * len(paths) + if not isinstance(ends, list): + ends = [ends] * len(paths) + if len(starts) != len(paths) or len(ends) != len(paths): + raise ValueError + + # Early Return + if len(starts) <= 1: + return paths, starts, ends + + starts = [s or 0 for s in starts] + # Sort by paths and then ranges if `sort=True` + if sort: + paths, starts, ends = ( + list(v) + for v in zip( + *sorted( + zip(paths, starts, ends), + ) + ) + ) + + if paths: + # Loop through the coupled `paths`, `starts`, and + # `ends`, and merge adjacent blocks when appropriate + new_paths = paths[:1] + new_starts = starts[:1] + new_ends = ends[:1] + for i in range(1, len(paths)): + if paths[i] == paths[i - 1] and new_ends[-1] is None: + continue + elif ( + paths[i] != paths[i - 1] + or ((starts[i] - new_ends[-1]) > max_gap) + or (max_block is not None and (ends[i] - new_starts[-1]) > max_block) + ): + # Cannot merge with previous block. + # Add new `paths`, `starts`, and `ends` elements + new_paths.append(paths[i]) + new_starts.append(starts[i]) + new_ends.append(ends[i]) + else: + # Merge with previous block by updating the + # last element of `ends` + new_ends[-1] = ends[i] + return new_paths, new_starts, new_ends + + # `paths` is empty. Just return input lists + return paths, starts, ends + + +def file_size(filelike: IO[bytes]) -> int: + """Find length of any open read-mode file-like""" + pos = filelike.tell() + try: + return filelike.seek(0, 2) + finally: + filelike.seek(pos) + + +@contextlib.contextmanager +def atomic_write(path: str, mode: str = "wb"): + """ + A context manager that opens a temporary file next to `path` and, on exit, + replaces `path` with the temporary file, thereby updating `path` + atomically. + """ + fd, fn = tempfile.mkstemp( + dir=os.path.dirname(path), prefix=os.path.basename(path) + "-" + ) + try: + with open(fd, mode) as fp: + yield fp + except BaseException: + with contextlib.suppress(FileNotFoundError): + os.unlink(fn) + raise + else: + os.replace(fn, path) + + +def _translate(pat, STAR, QUESTION_MARK): + # Copied from: https://github.com/python/cpython/pull/106703. + res: list[str] = [] + add = res.append + i, n = 0, len(pat) + while i < n: + c = pat[i] + i = i + 1 + if c == "*": + # compress consecutive `*` into one + if (not res) or res[-1] is not STAR: + add(STAR) + elif c == "?": + add(QUESTION_MARK) + elif c == "[": + j = i + if j < n and pat[j] == "!": + j = j + 1 + if j < n and pat[j] == "]": + j = j + 1 + while j < n and pat[j] != "]": + j = j + 1 + if j >= n: + add("\\[") + else: + stuff = pat[i:j] + if "-" not in stuff: + stuff = stuff.replace("\\", r"\\") + else: + chunks = [] + k = i + 2 if pat[i] == "!" else i + 1 + while True: + k = pat.find("-", k, j) + if k < 0: + break + chunks.append(pat[i:k]) + i = k + 1 + k = k + 3 + chunk = pat[i:j] + if chunk: + chunks.append(chunk) + else: + chunks[-1] += "-" + # Remove empty ranges -- invalid in RE. + for k in range(len(chunks) - 1, 0, -1): + if chunks[k - 1][-1] > chunks[k][0]: + chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:] + del chunks[k] + # Escape backslashes and hyphens for set difference (--). + # Hyphens that create ranges shouldn't be escaped. + stuff = "-".join( + s.replace("\\", r"\\").replace("-", r"\-") for s in chunks + ) + # Escape set operations (&&, ~~ and ||). + stuff = re.sub(r"([&~|])", r"\\\1", stuff) + i = j + 1 + if not stuff: + # Empty range: never match. + add("(?!)") + elif stuff == "!": + # Negated empty range: match any character. + add(".") + else: + if stuff[0] == "!": + stuff = "^" + stuff[1:] + elif stuff[0] in ("^", "["): + stuff = "\\" + stuff + add(f"[{stuff}]") + else: + add(re.escape(c)) + assert i == n + return res + + +def glob_translate(pat): + # Copied from: https://github.com/python/cpython/pull/106703. + # The keyword parameters' values are fixed to: + # recursive=True, include_hidden=True, seps=None + """Translate a pathname with shell wildcards to a regular expression.""" + if os.path.altsep: + seps = os.path.sep + os.path.altsep + else: + seps = os.path.sep + escaped_seps = "".join(map(re.escape, seps)) + any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps + not_sep = f"[^{escaped_seps}]" + one_last_segment = f"{not_sep}+" + one_segment = f"{one_last_segment}{any_sep}" + any_segments = f"(?:.+{any_sep})?" + any_last_segments = ".*" + results = [] + parts = re.split(any_sep, pat) + last_part_idx = len(parts) - 1 + for idx, part in enumerate(parts): + if part == "*": + results.append(one_segment if idx < last_part_idx else one_last_segment) + continue + if part == "**": + results.append(any_segments if idx < last_part_idx else any_last_segments) + continue + elif "**" in part: + raise ValueError( + "Invalid pattern: '**' can only be an entire path component" + ) + if part: + results.extend(_translate(part, f"{not_sep}*", not_sep)) + if idx < last_part_idx: + results.append(any_sep) + res = "".join(results) + return rf"(?s:{res})\Z" diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..94f258df84ba8730208768fc44222bee4b3ebc33 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py @@ -0,0 +1,8 @@ +# This file has moved to under torch/_functorch. It is not public API. +# If you are not a PyTorch developer and you are relying on the following +# imports, please file an issue. +from torch._functorch.aot_autograd import ( + aot_autograd_decompositions, + KNOWN_TYPES, + PytreeThunk, +) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0775047752ca9dbf102d7883108950c62b419494 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3de7787df0c3304207b42b51e9fb62da9d33c7d0 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py @@ -0,0 +1,4 @@ +# This file has moved to under torch/_functorch. It is not public API. +# If you are not a PyTorch developer and you are relying on the following +# imports, please file an issue. +from torch._functorch.make_functional import _swap_state diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87dc17920eb5b524f1ffd4faa5bec51a9e562407 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dc90517753e50f92362ba954248e31f69f7cfcd5 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__init__.py @@ -0,0 +1,16 @@ +# This file has moved to under torch/_functorch. It is not public API. +# If you are not a PyTorch developer and you are relying on the following +# imports, please file an issue. +from torch._functorch.vmap import ( + _add_batch_dim, + _broadcast_to_and_flatten, + _create_batched_inputs, + _get_name, + _process_batched_inputs, + _remove_batch_dim, + _unwrap_batched, + _validate_and_get_batch_size, + Tensor, + tree_flatten, + tree_unflatten, +) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2477103026d9e70d7a182c7deccaacc520aa8015 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/compile/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/compile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..96b853cd2e27e947bf25b63325741637a801426f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/compile/__init__.py @@ -0,0 +1,31 @@ +from torch._functorch import config +from torch._functorch.aot_autograd import ( + aot_function, + aot_module, + aot_module_simplified, + compiled_function, + compiled_module, + get_aot_compilation_context, + get_aot_graph_name, + get_graph_being_compiled, + make_boxed_compiler, + make_boxed_func, +) +from torch._functorch.compilers import ( + debug_compile, + default_decompositions, + draw_graph_compile, + memory_efficient_fusion, + nnc_jit, + nop, + print_compile, + ts_compile, +) +from torch._functorch.fx_minifier import minifier +from torch._functorch.partitioners import ( + default_partition, + draw_graph, + draw_joint_graph, + min_cut_rematerialization_partition, +) +from torch._functorch.python_key import pythonkey_decompose diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..985cd770b5654a76b989999ce9834695761fd140 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c29ecd33c503f7d6e83e5d5b951f28b73e331934 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/_parsing.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/_parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..63adcb6e5a64c777a4e1a711da5ac9f3b7fe1dfd --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/_parsing.py @@ -0,0 +1,302 @@ +"""Adapted from https://github.com/arogozhnikov/einops/blob/36c7bb16e57d6e57f8f3050f9e07abdf3f00469f/einops/parsing.py. + +MIT License + +Copyright (c) 2018 Alex Rogozhnikov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +from __future__ import annotations + +import keyword +import warnings +from typing import Collection, List, Mapping, Optional, Set, Tuple, Union + +_ellipsis: str = "…" # NB, this is a single unicode symbol. String is used as it is not a list, but can be iterated + + +class AnonymousAxis: + """Used by `ParsedExpression` to represent an axis with a size (> 1), but no associated identifier. + + Note: Different instances of this class are not equal to each other, even if they have the same value. + """ + + def __init__(self, value: str) -> None: + self.value = int(value) + if self.value < 1: + raise ValueError( + f"Anonymous axis should have positive length, not {self.value}" + ) + + def __repr__(self) -> str: + return f"{self.value}-axis" + + +class ParsedExpression: + """Structure containing information about one side of an `einops`-style pattern (e.g. 'b c (h w)').""" + + def __init__( + self, + expression: str, + *, + allow_underscore: bool = False, + allow_duplicates: bool = False, + ) -> None: + """Parse the expression and store relevant metadata. + + Args: + expression (str): the `einops`-pattern to parse + allow_underscore (bool): whether to allow axis identifier names to begin with an underscore + allow_duplicates (bool): whether to allow an identifier to appear more than once in the expression + """ + self.has_ellipsis: bool = False + self.has_ellipsis_parenthesized: Optional[bool] = None + self.identifiers: Set[Union[str, AnonymousAxis]] = set() + # that's axes like 2, 3, 4 or 5. Axes with size 1 are exceptional and replaced with empty composition + self.has_non_unitary_anonymous_axes: bool = False + # composition keeps structure of composite axes, see how different corner cases are handled in tests + self.composition: List[Union[List[Union[str, AnonymousAxis]], str]] = [] + if "." in expression: + if "..." not in expression: + raise ValueError( + "Expression may contain dots only inside ellipsis (...)" + ) + if str.count(expression, "...") != 1 or str.count(expression, ".") != 3: + raise ValueError( + "Expression may contain dots only inside ellipsis (...); only one ellipsis for tensor " + ) + expression = expression.replace("...", _ellipsis) + self.has_ellipsis = True + + bracket_group: Optional[List[Union[str, AnonymousAxis]]] = None + + def add_axis_name(x: str) -> None: + if x in self.identifiers: + if not (allow_underscore and x == "_") and not allow_duplicates: + raise ValueError( + f"Indexing expression contains duplicate dimension '{x}'" + ) + if x == _ellipsis: + self.identifiers.add(_ellipsis) + if bracket_group is None: + self.composition.append(_ellipsis) + self.has_ellipsis_parenthesized = False + else: + bracket_group.append(_ellipsis) + self.has_ellipsis_parenthesized = True + else: + is_number = str.isdecimal(x) + if is_number and int(x) == 1: + # handling the case of anonymous axis of length 1 + if bracket_group is None: + self.composition.append([]) + else: + pass # no need to think about 1s inside parenthesis + return + is_axis_name, reason = self.check_axis_name_return_reason( + x, allow_underscore=allow_underscore + ) + if not (is_number or is_axis_name): + raise ValueError(f"Invalid axis identifier: {x}\n{reason}") + axis_name: Union[str, AnonymousAxis] = ( + AnonymousAxis(x) if is_number else x + ) + self.identifiers.add(axis_name) + if is_number: + self.has_non_unitary_anonymous_axes = True + if bracket_group is None: + self.composition.append([axis_name]) + else: + bracket_group.append(axis_name) + + current_identifier = None + for char in expression: + if char in "() ": + if current_identifier is not None: + add_axis_name(current_identifier) + current_identifier = None + if char == "(": + if bracket_group is not None: + raise ValueError( + "Axis composition is one-level (brackets inside brackets not allowed)" + ) + bracket_group = [] + elif char == ")": + if bracket_group is None: + raise ValueError("Brackets are not balanced") + self.composition.append(bracket_group) + bracket_group = None + elif str.isalnum(char) or char in ["_", _ellipsis]: + if current_identifier is None: + current_identifier = char + else: + current_identifier += char + else: + raise ValueError(f"Unknown character '{char}'") + + if bracket_group is not None: + raise ValueError(f"Imbalanced parentheses in expression: '{expression}'") + if current_identifier is not None: + add_axis_name(current_identifier) + + @staticmethod + def check_axis_name_return_reason( + name: str, allow_underscore: bool = False + ) -> Tuple[bool, str]: + """Check if the given axis name is valid, and a message explaining why if not. + + Valid axes names are python identifiers except keywords, and should not start or end with an underscore. + + Args: + name (str): the axis name to check + allow_underscore (bool): whether axis names are allowed to start with an underscore + + Returns: + Tuple[bool, str]: whether the axis name is valid, a message explaining why if not + """ + if not str.isidentifier(name): + return False, "not a valid python identifier" + elif name[0] == "_" or name[-1] == "_": + if name == "_" and allow_underscore: + return True, "" + return False, "axis name should should not start or end with underscore" + else: + if keyword.iskeyword(name): + warnings.warn( + f"It is discouraged to use axes names that are keywords: {name}", + RuntimeWarning, + ) + if name in ["axis"]: + warnings.warn( + "It is discouraged to use 'axis' as an axis name and will raise an error in future", + FutureWarning, + ) + return True, "" + + @staticmethod + def check_axis_name(name: str) -> bool: + """Check if the name is a valid axis name. + + Args: + name (str): the axis name to check + + Returns: + bool: whether the axis name is valid + """ + is_valid, _ = ParsedExpression.check_axis_name_return_reason(name) + return is_valid + + +def parse_pattern( + pattern: str, axes_lengths: Mapping[str, int] +) -> Tuple[ParsedExpression, ParsedExpression]: + """Parse an `einops`-style pattern into a left-hand side and right-hand side `ParsedExpression` object. + + Args: + pattern (str): the `einops`-style rearrangement pattern + axes_lengths (Mapping[str, int]): any additional length specifications for dimensions + + Returns: + Tuple[ParsedExpression, ParsedExpression]: a tuple containing the left-hand side and right-hand side expressions + """ + # adapted from einops.einops._prepare_transformation_recipe + # https://github.com/arogozhnikov/einops/blob/230ac1526c1f42c9e1f7373912c7f8047496df11/einops/einops.py + try: + left_str, right_str = pattern.split("->") + except ValueError: + raise ValueError("Pattern must contain a single '->' separator") from None + + if _ellipsis in axes_lengths: + raise ValueError(f"'{_ellipsis}' is not an allowed axis identifier") + + left = ParsedExpression(left_str) + right = ParsedExpression(right_str) + + if not left.has_ellipsis and right.has_ellipsis: + raise ValueError( + f"Ellipsis found in right side, but not left side of a pattern {pattern}" + ) + if left.has_ellipsis and left.has_ellipsis_parenthesized: + raise ValueError( + f"Ellipsis is parenthesis in the left side is not allowed: {pattern}" + ) + + return left, right + + +def validate_rearrange_expressions( + left: ParsedExpression, right: ParsedExpression, axes_lengths: Mapping[str, int] +) -> None: + """Perform expression validations that are specific to the `rearrange` operation. + + Args: + left (ParsedExpression): left-hand side expression + right (ParsedExpression): right-hand side expression + axes_lengths (Mapping[str, int]): any additional length specifications for dimensions + """ + for length in axes_lengths.values(): + if (length_type := type(length)) is not int: + raise TypeError( + f"rearrange axis lengths must be integers, got: {length_type}" + ) + + if left.has_non_unitary_anonymous_axes or right.has_non_unitary_anonymous_axes: + raise ValueError("rearrange only supports unnamed axes of size 1") + + difference = set.symmetric_difference(left.identifiers, right.identifiers) + if len(difference) > 0: + raise ValueError( + f"Identifiers only on one side of rearrange expression (should be on both): {difference}" + ) + + unmatched_axes = axes_lengths.keys() - left.identifiers + if len(unmatched_axes) > 0: + raise ValueError( + f"Identifiers not found in rearrange expression: {unmatched_axes}" + ) + + +def comma_separate(collection: Collection[Union[str, Collection[str]]]) -> str: + """Convert a collection of strings representing first class dims into a comma-separated string. + + Args: + collection (Collection[Union[str, Collection[str]]]): the collection of strings to convert + + Returns: + str: the comma-separated string + + Examples: + >>> comma_separate(('d0',)) + 'd0' + + >>> comma_separate(('d0', 'd1', 'd2', 'd3')) + 'd0, d1, d2, d3' + + >>> comma_separate([('d1', 'd4')]) + '(d1, d4)' + + >>> comma_separate([('d0',), (), ('d1',), ('d2',), ('d3', 'd4')]) + '(d0,), (), (d1,), (d2,), (d3, d4)' + """ + return ", ".join( + item + if isinstance(item, str) + else f"({comma_separate(item)}{',' if len(item) == 1 else ''})" + for item in collection + ) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/experimental/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/experimental/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..23fc8381cc2358880f935064edd7eeff9766fec6 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/experimental/__init__.py @@ -0,0 +1,6 @@ +# PyTorch forward-mode is not mature yet +from torch._functorch.apis import chunk_vmap +from torch._functorch.batch_norm_replacement import replace_all_batch_norm_modules_ +from torch._functorch.eager_transforms import hessian, jacfwd, jvp + +from functorch import functionalize diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..088b2dc87976fa2936a9ccb334c109295444abc6 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD @@ -0,0 +1,20 @@ +nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/__pycache__/__init__.cpython-311.pyc,, +nvidia/cufft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/cufft/__pycache__/__init__.cpython-311.pyc,, +nvidia/cufft/include/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc,, +nvidia/cufft/include/cudalibxt.h,sha256=9GDuRiOzJuO61zRDhIpWpF7XHp8FXSOIlHJNoIMwOZQ,4105 +nvidia/cufft/include/cufft.h,sha256=Ui7ajKuYZcP-2bm9mpH96YN1igLKeDLgrttyc4jMQJE,12570 +nvidia/cufft/include/cufftXt.h,sha256=LfRdibvAlaNQ35vYqI4n8YcMpPYROrIjpZu2L0tISi4,11463 +nvidia/cufft/include/cufftw.h,sha256=DBrJQf-dnCWD-OYgdhnEzn8OiAX0U3xdteEaNdhs7mU,19412 +nvidia/cufft/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc,, +nvidia/cufft/lib/libcufft.so.10,sha256=ylxvgdWEkGtNMrmErYcE3WW_db2rQzTtIs5-73UBqVo,279161544 +nvidia/cufft/lib/libcufftw.so.10,sha256=GlkqWy81mpB3VQ7h_a3VjrLPnMC_q4_jl6N0-5SdoUM,1618440 +nvidia_cufft_cu11-10.9.0.58.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +nvidia_cufft_cu11-10.9.0.58.dist-info/License.txt,sha256=rW9YU_ugyg0VnQ9Y1JrkmDDC-Mk_epJki5zpCttMbM0,59262 +nvidia_cufft_cu11-10.9.0.58.dist-info/METADATA,sha256=XITT6bPOjdOxPQa-kAVw4XjFf4_iU-JoLUXrOwPJ4JA,1503 +nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD,, +nvidia_cufft_cu11-10.9.0.58.dist-info/WHEEL,sha256=-kQi_VMfvRQozZJT7HUPMfY-5vLo0LVTmAylNJ3Ft98,106 +nvidia_cufft_cu11-10.9.0.58.dist-info/top_level.txt,sha256=fTkAtiFuL16nUrB9ytDDtpytz2t0B4NvYTnRzwAhO14,7 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cusolver_cu11-11.4.1.48.dist-info/License.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cusolver_cu11-11.4.1.48.dist-info/License.txt new file mode 100644 index 0000000000000000000000000000000000000000..b491c70e0aef319022ded661e111ddbd45b8a17f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cusolver_cu11-11.4.1.48.dist-info/License.txt @@ -0,0 +1,1568 @@ +End User License Agreement +-------------------------- + + +Preface +------- + +The Software License Agreement in Chapter 1 and the Supplement +in Chapter 2 contain license terms and conditions that govern +the use of NVIDIA software. By accepting this agreement, you +agree to comply with all the terms and conditions applicable +to the product(s) included herein. + + +NVIDIA Driver + + +Description + +This package contains the operating system driver and +fundamental system software components for NVIDIA GPUs. + + +NVIDIA CUDA Toolkit + + +Description + +The NVIDIA CUDA Toolkit provides command-line and graphical +tools for building, debugging and optimizing the performance +of applications accelerated by NVIDIA GPUs, runtime and math +libraries, and documentation including programming guides, +user manuals, and API references. + + +Default Install Location of CUDA Toolkit + +Windows platform: + +%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.# + +Linux platform: + +/usr/local/cuda-#.# + +Mac platform: + +/Developer/NVIDIA/CUDA-#.# + + +NVIDIA CUDA Samples + + +Description + +This package includes over 100+ CUDA examples that demonstrate +various CUDA programming principles, and efficient CUDA +implementation of algorithms in specific application domains. + + +Default Install Location of CUDA Samples + +Windows platform: + +%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.# + +Linux platform: + +/usr/local/cuda-#.#/samples + +and + +$HOME/NVIDIA_CUDA-#.#_Samples + +Mac platform: + +/Developer/NVIDIA/CUDA-#.#/samples + + +NVIDIA Nsight Visual Studio Edition (Windows only) + + +Description + +NVIDIA Nsight Development Platform, Visual Studio Edition is a +development environment integrated into Microsoft Visual +Studio that provides tools for debugging, profiling, analyzing +and optimizing your GPU computing and graphics applications. + + +Default Install Location of Nsight Visual Studio Edition + +Windows platform: + +%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.# + + +1. License Agreement for NVIDIA Software Development Kits +--------------------------------------------------------- + + +Release Date: July 26, 2018 +--------------------------- + + +Important NoticeRead before downloading, installing, +copying or using the licensed software: +------------------------------------------------------- + +This license agreement, including exhibits attached +("Agreement”) is a legal agreement between you and NVIDIA +Corporation ("NVIDIA") and governs your use of a NVIDIA +software development kit (“SDK”). + +Each SDK has its own set of software and materials, but here +is a description of the types of items that may be included in +a SDK: source code, header files, APIs, data sets and assets +(examples include images, textures, models, scenes, videos, +native API input/output files), binary software, sample code, +libraries, utility programs, programming code and +documentation. + +This Agreement can be accepted only by an adult of legal age +of majority in the country in which the SDK is used. + +If you are entering into this Agreement on behalf of a company +or other legal entity, you represent that you have the legal +authority to bind the entity to this Agreement, in which case +“you” will mean the entity you represent. + +If you don’t have the required age or authority to accept +this Agreement, or if you don’t accept all the terms and +conditions of this Agreement, do not download, install or use +the SDK. + +You agree to use the SDK only for purposes that are permitted +by (a) this Agreement, and (b) any applicable law, regulation +or generally accepted practices or guidelines in the relevant +jurisdictions. + + +1.1. License + + +1.1.1. License Grant + +Subject to the terms of this Agreement, NVIDIA hereby grants +you a non-exclusive, non-transferable license, without the +right to sublicense (except as expressly provided in this +Agreement) to: + + 1. Install and use the SDK, + + 2. Modify and create derivative works of sample source code + delivered in the SDK, and + + 3. Distribute those portions of the SDK that are identified + in this Agreement as distributable, as incorporated in + object code format into a software application that meets + the distribution requirements indicated in this Agreement. + + +1.1.2. Distribution Requirements + +These are the distribution requirements for you to exercise +the distribution grant: + + 1. Your application must have material additional + functionality, beyond the included portions of the SDK. + + 2. The distributable portions of the SDK shall only be + accessed by your application. + + 3. The following notice shall be included in modifications + and derivative works of sample source code distributed: + “This software contains source code provided by NVIDIA + Corporation.” + + 4. Unless a developer tool is identified in this Agreement + as distributable, it is delivered for your internal use + only. + + 5. The terms under which you distribute your application + must be consistent with the terms of this Agreement, + including (without limitation) terms relating to the + license grant and license restrictions and protection of + NVIDIA’s intellectual property rights. Additionally, you + agree that you will protect the privacy, security and + legal rights of your application users. + + 6. You agree to notify NVIDIA in writing of any known or + suspected distribution or use of the SDK not in compliance + with the requirements of this Agreement, and to enforce + the terms of your agreements with respect to distributed + SDK. + + +1.1.3. Authorized Users + +You may allow employees and contractors of your entity or of +your subsidiary(ies) to access and use the SDK from your +secure network to perform work on your behalf. + +If you are an academic institution you may allow users +enrolled or employed by the academic institution to access and +use the SDK from your secure network. + +You are responsible for the compliance with the terms of this +Agreement by your authorized users. If you become aware that +your authorized users didn’t follow the terms of this +Agreement, you agree to take reasonable steps to resolve the +non-compliance and prevent new occurrences. + + +1.1.4. Pre-Release SDK + +The SDK versions identified as alpha, beta, preview or +otherwise as pre-release, may not be fully functional, may +contain errors or design flaws, and may have reduced or +different security, privacy, accessibility, availability, and +reliability standards relative to commercial versions of +NVIDIA software and materials. Use of a pre-release SDK may +result in unexpected results, loss of data, project delays or +other unpredictable damage or loss. + +You may use a pre-release SDK at your own risk, understanding +that pre-release SDKs are not intended for use in production +or business-critical systems. + +NVIDIA may choose not to make available a commercial version +of any pre-release SDK. NVIDIA may also choose to abandon +development and terminate the availability of a pre-release +SDK at any time without liability. + + +1.1.5. Updates + +NVIDIA may, at its option, make available patches, workarounds +or other updates to this SDK. Unless the updates are provided +with their separate governing terms, they are deemed part of +the SDK licensed to you as provided in this Agreement. You +agree that the form and content of the SDK that NVIDIA +provides may change without prior notice to you. While NVIDIA +generally maintains compatibility between versions, NVIDIA may +in some cases make changes that introduce incompatibilities in +future versions of the SDK. + + +1.1.6. Third Party Licenses + +The SDK may come bundled with, or otherwise include or be +distributed with, third party software licensed by a NVIDIA +supplier and/or open source software provided under an open +source license. Use of third party software is subject to the +third-party license terms, or in the absence of third party +terms, the terms of this Agreement. Copyright to third party +software is held by the copyright holders indicated in the +third-party software or license. + + +1.1.7. Reservation of Rights + +NVIDIA reserves all rights, title, and interest in and to the +SDK, not expressly granted to you under this Agreement. + + +1.2. Limitations + +The following license limitations apply to your use of the +SDK: + + 1. You may not reverse engineer, decompile or disassemble, + or remove copyright or other proprietary notices from any + portion of the SDK or copies of the SDK. + + 2. Except as expressly provided in this Agreement, you may + not copy, sell, rent, sublicense, transfer, distribute, + modify, or create derivative works of any portion of the + SDK. For clarity, you may not distribute or sublicense the + SDK as a stand-alone product. + + 3. Unless you have an agreement with NVIDIA for this + purpose, you may not indicate that an application created + with the SDK is sponsored or endorsed by NVIDIA. + + 4. You may not bypass, disable, or circumvent any + encryption, security, digital rights management or + authentication mechanism in the SDK. + + 5. You may not use the SDK in any manner that would cause it + to become subject to an open source software license. As + examples, licenses that require as a condition of use, + modification, and/or distribution that the SDK be: + + a. Disclosed or distributed in source code form; + + b. Licensed for the purpose of making derivative works; + or + + c. Redistributable at no charge. + + 6. Unless you have an agreement with NVIDIA for this + purpose, you may not use the SDK with any system or + application where the use or failure of the system or + application can reasonably be expected to threaten or + result in personal injury, death, or catastrophic loss. + Examples include use in avionics, navigation, military, + medical, life support or other life critical applications. + NVIDIA does not design, test or manufacture the SDK for + these critical uses and NVIDIA shall not be liable to you + or any third party, in whole or in part, for any claims or + damages arising from such uses. + + 7. You agree to defend, indemnify and hold harmless NVIDIA + and its affiliates, and their respective employees, + contractors, agents, officers and directors, from and + against any and all claims, damages, obligations, losses, + liabilities, costs or debt, fines, restitutions and + expenses (including but not limited to attorney’s fees + and costs incident to establishing the right of + indemnification) arising out of or related to your use of + the SDK outside of the scope of this Agreement, or not in + compliance with its terms. + + +1.3. Ownership + + 1. NVIDIA or its licensors hold all rights, title and + interest in and to the SDK and its modifications and + derivative works, including their respective intellectual + property rights, subject to your rights described in this + section. This SDK may include software and materials from + NVIDIA’s licensors, and these licensors are intended + third party beneficiaries that may enforce this Agreement + with respect to their intellectual property rights. + + 2. You hold all rights, title and interest in and to your + applications and your derivative works of the sample + source code delivered in the SDK, including their + respective intellectual property rights, subject to + NVIDIA’s rights described in this section. + + 3. You may, but don’t have to, provide to NVIDIA + suggestions, feature requests or other feedback regarding + the SDK, including possible enhancements or modifications + to the SDK. For any feedback that you voluntarily provide, + you hereby grant NVIDIA and its affiliates a perpetual, + non-exclusive, worldwide, irrevocable license to use, + reproduce, modify, license, sublicense (through multiple + tiers of sublicensees), and distribute (through multiple + tiers of distributors) it without the payment of any + royalties or fees to you. NVIDIA will use feedback at its + choice. NVIDIA is constantly looking for ways to improve + its products, so you may send feedback to NVIDIA through + the developer portal at https://developer.nvidia.com. + + +1.4. No Warranties + +THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL +FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND +ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND +OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, +BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE +ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO +WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF +DEALING OR COURSE OF TRADE. + + +1.5. Limitation of Liability + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS +AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, +PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS +OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF +PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION +WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK, +WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH +OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF +LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES +TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS +AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE +NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS +LIMIT. + +These exclusions and limitations of liability shall apply +regardless if NVIDIA or its affiliates have been advised of +the possibility of such damages, and regardless of whether a +remedy fails its essential purpose. These exclusions and +limitations of liability form an essential basis of the +bargain between the parties, and, absent any of these +exclusions or limitations of liability, the provisions of this +Agreement, including, without limitation, the economic terms, +would be substantially different. + + +1.6. Termination + + 1. This Agreement will continue to apply until terminated by + either you or NVIDIA as described below. + + 2. If you want to terminate this Agreement, you may do so by + stopping to use the SDK. + + 3. NVIDIA may, at any time, terminate this Agreement if: + + a. (i) you fail to comply with any term of this + Agreement and the non-compliance is not fixed within + thirty (30) days following notice from NVIDIA (or + immediately if you violate NVIDIA’s intellectual + property rights); + + b. (ii) you commence or participate in any legal + proceeding against NVIDIA with respect to the SDK; or + + c. (iii) NVIDIA decides to no longer provide the SDK in + a country or, in NVIDIA’s sole discretion, the + continued use of it is no longer commercially viable. + + 4. Upon any termination of this Agreement, you agree to + promptly discontinue use of the SDK and destroy all copies + in your possession or control. Your prior distributions in + accordance with this Agreement are not affected by the + termination of this Agreement. Upon written request, you + will certify in writing that you have complied with your + commitments under this section. Upon any termination of + this Agreement all provisions survive except for the + license grant provisions. + + +1.7. General + +If you wish to assign this Agreement or your rights and +obligations, including by merger, consolidation, dissolution +or operation of law, contact NVIDIA to ask for permission. Any +attempted assignment not approved by NVIDIA in writing shall +be void and of no effect. NVIDIA may assign, delegate or +transfer this Agreement and its rights and obligations, and if +to a non-affiliate you will be notified. + +You agree to cooperate with NVIDIA and provide reasonably +requested information to verify your compliance with this +Agreement. + +This Agreement will be governed in all respects by the laws of +the United States and of the State of Delaware as those laws +are applied to contracts entered into and performed entirely +within Delaware by Delaware residents, without regard to the +conflicts of laws principles. The United Nations Convention on +Contracts for the International Sale of Goods is specifically +disclaimed. You agree to all terms of this Agreement in the +English language. + +The state or federal courts residing in Santa Clara County, +California shall have exclusive jurisdiction over any dispute +or claim arising out of this Agreement. Notwithstanding this, +you agree that NVIDIA shall still be allowed to apply for +injunctive remedies or an equivalent type of urgent legal +relief in any jurisdiction. + +If any court of competent jurisdiction determines that any +provision of this Agreement is illegal, invalid or +unenforceable, such provision will be construed as limited to +the extent necessary to be consistent with and fully +enforceable under the law and the remaining provisions will +remain in full force and effect. Unless otherwise specified, +remedies are cumulative. + +Each party acknowledges and agrees that the other is an +independent contractor in the performance of this Agreement. + +The SDK has been developed entirely at private expense and is +“commercial items” consisting of “commercial computer +software” and “commercial computer software +documentation” provided with RESTRICTED RIGHTS. Use, +duplication or disclosure by the U.S. Government or a U.S. +Government subcontractor is subject to the restrictions in +this Agreement pursuant to DFARS 227.7202-3(a) or as set forth +in subparagraphs (c)(1) and (2) of the Commercial Computer +Software - Restricted Rights clause at FAR 52.227-19, as +applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas +Expressway, Santa Clara, CA 95051. + +The SDK is subject to United States export laws and +regulations. You agree that you will not ship, transfer or +export the SDK into any country, or use the SDK in any manner, +prohibited by the United States Bureau of Industry and +Security or economic sanctions regulations administered by the +U.S. Department of Treasury’s Office of Foreign Assets +Control (OFAC), or any applicable export laws, restrictions or +regulations. These laws include restrictions on destinations, +end users and end use. By accepting this Agreement, you +confirm that you are not a resident or citizen of any country +currently embargoed by the U.S. and that you are not otherwise +prohibited from receiving the SDK. + +Any notice delivered by NVIDIA to you under this Agreement +will be delivered via mail, email or fax. You agree that any +notices that NVIDIA sends you electronically will satisfy any +legal communication requirements. Please direct your legal +notices or other correspondence to NVIDIA Corporation, 2788 +San Tomas Expressway, Santa Clara, California 95051, United +States of America, Attention: Legal Department. + +This Agreement and any exhibits incorporated into this +Agreement constitute the entire agreement of the parties with +respect to the subject matter of this Agreement and supersede +all prior negotiations or documentation exchanged between the +parties relating to this SDK license. Any additional and/or +conflicting terms on documents issued by you are null, void, +and invalid. Any amendment or waiver under this Agreement +shall be in writing and signed by representatives of both +parties. + + +2. CUDA Toolkit Supplement to Software License Agreement for +NVIDIA Software Development Kits +------------------------------------------------------------ + + +Release date: August 16, 2018 +----------------------------- + +The terms in this supplement govern your use of the NVIDIA +CUDA Toolkit SDK under the terms of your license agreement +(“Agreement”) as modified by this supplement. Capitalized +terms used but not defined below have the meaning assigned to +them in the Agreement. + +This supplement is an exhibit to the Agreement and is +incorporated as an integral part of the Agreement. In the +event of conflict between the terms in this supplement and the +terms in the Agreement, the terms in this supplement govern. + + +2.1. License Scope + +The SDK is licensed for you to develop applications only for +use in systems with NVIDIA GPUs. + + +2.2. Distribution + +The portions of the SDK that are distributable under the +Agreement are listed in Attachment A. + + +2.3. Operating Systems + +Those portions of the SDK designed exclusively for use on the +Linux or FreeBSD operating systems, or other operating systems +derived from the source code to these operating systems, may +be copied and redistributed for use in accordance with this +Agreement, provided that the object code files are not +modified in any way (except for unzipping of compressed +files). + + +2.4. Audio and Video Encoders and Decoders + +You acknowledge and agree that it is your sole responsibility +to obtain any additional third-party licenses required to +make, have made, use, have used, sell, import, and offer for +sale your products or services that include or incorporate any +third-party software and content relating to audio and/or +video encoders and decoders from, including but not limited +to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., +MPEG-LA, and Coding Technologies. NVIDIA does not grant to you +under this Agreement any necessary patent or other rights with +respect to any audio and/or video encoders and decoders. + + +2.5. Licensing + +If the distribution terms in this Agreement are not suitable +for your organization, or for any questions regarding this +Agreement, please contact NVIDIA at +nvidia-compute-license-questions@nvidia.com. + + +2.6. Attachment A + +The following portions of the SDK are distributable under the +Agreement: + +Component + +CUDA Runtime + +Windows + +cudart.dll, cudart_static.lib, cudadevrt.lib + +Mac OSX + +libcudart.dylib, libcudart_static.a, libcudadevrt.a + +Linux + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Android + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Component + +CUDA FFT Library + +Windows + +cufft.dll, cufftw.dll, cufft.lib, cufftw.lib + +Mac OSX + +libcufft.dylib, libcufft_static.a, libcufftw.dylib, +libcufftw_static.a + +Linux + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Android + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Component + +CUDA BLAS Library + +Windows + +cublas.dll, cublasLt.dll + +Mac OSX + +libcublas.dylib, libcublasLt.dylib, libcublas_static.a, +libcublasLt_static.a + +Linux + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Android + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Component + +NVIDIA "Drop-in" BLAS Library + +Windows + +nvblas.dll + +Mac OSX + +libnvblas.dylib + +Linux + +libnvblas.so + +Component + +CUDA Sparse Matrix Library + +Windows + +cusparse.dll, cusparse.lib + +Mac OSX + +libcusparse.dylib, libcusparse_static.a + +Linux + +libcusparse.so, libcusparse_static.a + +Android + +libcusparse.so, libcusparse_static.a + +Component + +CUDA Linear Solver Library + +Windows + +cusolver.dll, cusolver.lib + +Mac OSX + +libcusolver.dylib, libcusolver_static.a + +Linux + +libcusolver.so, libcusolver_static.a + +Android + +libcusolver.so, libcusolver_static.a + +Component + +CUDA Random Number Generation Library + +Windows + +curand.dll, curand.lib + +Mac OSX + +libcurand.dylib, libcurand_static.a + +Linux + +libcurand.so, libcurand_static.a + +Android + +libcurand.so, libcurand_static.a + +Component + +CUDA Accelerated Graph Library + +Component + +NVIDIA Performance Primitives Library + +Windows + +nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll, +nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll, +nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib, +nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll, +nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib + +Mac OSX + +libnppc.dylib, libnppc_static.a, libnppial.dylib, +libnppial_static.a, libnppicc.dylib, libnppicc_static.a, +libnppicom.dylib, libnppicom_static.a, libnppidei.dylib, +libnppidei_static.a, libnppif.dylib, libnppif_static.a, +libnppig.dylib, libnppig_static.a, libnppim.dylib, +libnppisu_static.a, libnppitc.dylib, libnppitc_static.a, +libnpps.dylib, libnpps_static.a + +Linux + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Android + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Component + +NVIDIA JPEG Library + +Linux + +libnvjpeg.so, libnvjpeg_static.a + +Component + +Internal common library required for statically linking to +cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP + +Mac OSX + +libculibos.a + +Linux + +libculibos.a + +Component + +NVIDIA Runtime Compilation Library and Header + +All + +nvrtc.h + +Windows + +nvrtc.dll, nvrtc-builtins.dll + +Mac OSX + +libnvrtc.dylib, libnvrtc-builtins.dylib + +Linux + +libnvrtc.so, libnvrtc-builtins.so + +Component + +NVIDIA Optimizing Compiler Library + +Windows + +nvvm.dll + +Mac OSX + +libnvvm.dylib + +Linux + +libnvvm.so + +Component + +NVIDIA Common Device Math Functions Library + +Windows + +libdevice.10.bc + +Mac OSX + +libdevice.10.bc + +Linux + +libdevice.10.bc + +Component + +CUDA Occupancy Calculation Header Library + +All + +cuda_occupancy.h + +Component + +CUDA Half Precision Headers + +All + +cuda_fp16.h, cuda_fp16.hpp + +Component + +CUDA Profiling Tools Interface (CUPTI) Library + +Windows + +cupti.dll + +Mac OSX + +libcupti.dylib + +Linux + +libcupti.so + +Component + +NVIDIA Tools Extension Library + +Windows + +nvToolsExt.dll, nvToolsExt.lib + +Mac OSX + +libnvToolsExt.dylib + +Linux + +libnvToolsExt.so + +Component + +NVIDIA CUDA Driver Libraries + +Linux + +libcuda.so, libnvidia-fatbinaryloader.so, +libnvidia-ptxjitcompiler.so + +The NVIDIA CUDA Driver Libraries are only distributable in +applications that meet this criteria: + + 1. The application was developed starting from a NVIDIA CUDA + container obtained from Docker Hub or the NVIDIA GPU + Cloud, and + + 2. The resulting application is packaged as a Docker + container and distributed to users on Docker Hub or the + NVIDIA GPU Cloud only. + + +2.7. Attachment B + + +Additional Licensing Obligations + +The following third party components included in the SOFTWARE +are licensed to Licensee pursuant to the following terms and +conditions: + + 1. Licensee's use of the GDB third party component is + subject to the terms and conditions of GNU GPL v3: + + This product includes copyrighted third-party software licensed + under the terms of the GNU General Public License v3 ("GPL v3"). + All third-party software packages are copyright by their respective + authors. GPL v3 terms and conditions are hereby incorporated into + the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt + + Consistent with these licensing requirements, the software + listed below is provided under the terms of the specified + open source software licenses. To obtain source code for + software provided under licenses that require + redistribution of source code, including the GNU General + Public License (GPL) and GNU Lesser General Public License + (LGPL), contact oss-requests@nvidia.com. This offer is + valid for a period of three (3) years from the date of the + distribution of this product by NVIDIA CORPORATION. + + Component License + CUDA-GDB GPL v3 + + 2. Licensee represents and warrants that any and all third + party licensing and/or royalty payment obligations in + connection with Licensee's use of the H.264 video codecs + are solely the responsibility of Licensee. + + 3. Licensee's use of the Thrust library is subject to the + terms and conditions of the Apache License Version 2.0. + All third-party software packages are copyright by their + respective authors. Apache License Version 2.0 terms and + conditions are hereby incorporated into the Agreement by + this reference. + http://www.apache.org/licenses/LICENSE-2.0.html + + In addition, Licensee acknowledges the following notice: + Thrust includes source code from the Boost Iterator, + Tuple, System, and Random Number libraries. + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 4. Licensee's use of the LLVM third party component is + subject to the following terms and conditions: + + ====================================================== + LLVM Release License + ====================================================== + University of Illinois/NCSA + Open Source License + + Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. + All rights reserved. + + Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal with the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at Urbana- + Champaign, nor the names of its contributors may be used to endorse or + promote products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS WITH THE SOFTWARE. + + 5. Licensee's use (e.g. nvprof) of the PCRE third party + component is subject to the following terms and + conditions: + + ------------ + PCRE LICENCE + ------------ + PCRE is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. + Release 8 of PCRE is distributed under the terms of the "BSD" licence, as + specified below. The documentation for PCRE, supplied in the "doc" + directory, is distributed under the same terms as the software itself. The + basic library functions are written in C and are freestanding. Also + included in the distribution is a set of C++ wrapper functions, and a just- + in-time compiler that can be used to optimize pattern matching. These are + both optional features that can be omitted when the library is built. + + THE BASIC LIBRARY FUNCTIONS + --------------------------- + Written by: Philip Hazel + Email local part: ph10 + Email domain: cam.ac.uk + University of Cambridge Computing Service, + Cambridge, England. + Copyright (c) 1997-2012 University of Cambridge + All rights reserved. + + PCRE JUST-IN-TIME COMPILATION SUPPORT + ------------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2010-2012 Zoltan Herczeg + All rights reserved. + + STACK-LESS JUST-IN-TIME COMPILER + -------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2009-2012 Zoltan Herczeg + All rights reserved. + + THE C++ WRAPPER FUNCTIONS + ------------------------- + Contributed by: Google Inc. + Copyright (c) 2007-2012, Google Inc. + All rights reserved. + + THE "BSD" LICENCE + ----------------- + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 6. Some of the cuBLAS library routines were written by or + derived from code written by Vasily Volkov and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2007-2009, Regents of the University of California + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the University of California, Berkeley nor + the names of its contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 7. Some of the cuBLAS library routines were written by or + derived from code written by Davide Barbieri and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * The name of the author may not be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 8. Some of the cuBLAS library routines were derived from + code developed by the University of Tennessee and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2010 The University of Tennessee. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer listed in this license in the documentation and/or + other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 9. Some of the cuBLAS library routines were written by or + derived from code written by Jonathan Hogg and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2012, The Science and Technology Facilities Council (STFC). + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the STFC nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 10. Some of the cuBLAS library routines were written by or + derived from code written by Ahmad M. Abdelfattah, David + Keyes, and Hatem Ltaief, and are subject to the Apache + License, Version 2.0, as follows: + + -- (C) Copyright 2013 King Abdullah University of Science and Technology + Authors: + Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa) + David Keyes (david.keyes@kaust.edu.sa) + Hatem Ltaief (hatem.ltaief@kaust.edu.sa) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the King Abdullah University of Science and + Technology nor the names of its contributors may be used to endorse + or promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE + + 11. Some of the cuSPARSE library routines were written by or + derived from code written by Li-Wen Chang and are subject + to the NCSA Open Source License as follows: + + Copyright (c) 2012, University of Illinois. + + All rights reserved. + + Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal with the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimers in the documentation and/or other materials provided + with the distribution. + * Neither the names of IMPACT Group, University of Illinois, nor + the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + 12. Some of the cuRAND library routines were written by or + derived from code written by Mutsuo Saito and Makoto + Matsumoto and are subject to the following license: + + Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima + University. All rights reserved. + + Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima + University and University of Tokyo. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the Hiroshima University nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 13. Some of the cuRAND library routines were derived from + code developed by D. E. Shaw Research and are subject to + the following license: + + Copyright 2010-2011, D. E. Shaw Research. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions, and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 14. Some of the Math library routines were written by or + derived from code developed by Norbert Juffa and are + subject to the following license: + + Copyright (c) 2015-2017, Norbert Juffa + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 15. Licensee's use of the lz4 third party component is + subject to the following terms and conditions: + + Copyright (C) 2011-2013, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 16. The NPP library uses code from the Boost Math Toolkit, + and is subject to the following license: + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 17. Portions of the Nsight Eclipse Edition is subject to the + following license: + + The Eclipse Foundation makes available all content in this plug-in + ("Content"). Unless otherwise indicated below, the Content is provided + to you under the terms and conditions of the Eclipse Public License + Version 1.0 ("EPL"). A copy of the EPL is available at http:// + www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program" + will mean the Content. + + If you did not receive this Content directly from the Eclipse + Foundation, the Content is being redistributed by another party + ("Redistributor") and different terms and conditions may apply to your + use of any object code in the Content. Check the Redistributor's + license that was provided with the Content. If no such license exists, + contact the Redistributor. Unless otherwise indicated below, the terms + and conditions of the EPL still apply to any source code in the + Content and such source code may be obtained at http://www.eclipse.org. + + 18. Some of the cuBLAS library routines uses code from + OpenAI, which is subject to the following license: + + License URL + https://github.com/openai/openai-gemm/blob/master/LICENSE + + License Text + The MIT License + + Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + 19. Licensee's use of the Visual Studio Setup Configuration + Samples is subject to the following license: + + The MIT License (MIT) + Copyright (C) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + 20. Licensee's use of linmath.h header for CPU functions for + GL vector/matrix operations from lunarG is subject to the + Apache License Version 2.0. + + 21. The DX12-CUDA sample uses the d3dx12.h header, which is + subject to the MIT license . + +----------------- diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/commands/__pycache__/index.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/commands/__pycache__/index.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..800bc51a684637144224fd4ae5456486098074e5 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/commands/__pycache__/index.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/index/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/index/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..37359ee737dfa502b485238728323c98dc0b0937 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/index/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bed6b3fc858e3749f14dda064826f25de0477875 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/base.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/base.py new file mode 100644 index 0000000000000000000000000000000000000000..3f9f896e632e929a63e9724ab80ecdfc9761b795 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/base.py @@ -0,0 +1,81 @@ +import functools +import os +import site +import sys +import sysconfig +import typing + +from pip._internal.exceptions import InstallationError +from pip._internal.utils import appdirs +from pip._internal.utils.virtualenv import running_under_virtualenv + +# Application Directories +USER_CACHE_DIR = appdirs.user_cache_dir("pip") + +# FIXME doesn't account for venv linked to global site-packages +site_packages: str = sysconfig.get_path("purelib") + + +def get_major_minor_version() -> str: + """ + Return the major-minor version of the current Python as a string, e.g. + "3.7" or "3.10". + """ + return "{}.{}".format(*sys.version_info) + + +def change_root(new_root: str, pathname: str) -> str: + """Return 'pathname' with 'new_root' prepended. + + If 'pathname' is relative, this is equivalent to os.path.join(new_root, pathname). + Otherwise, it requires making 'pathname' relative and then joining the + two, which is tricky on DOS/Windows and Mac OS. + + This is borrowed from Python's standard library's distutils module. + """ + if os.name == "posix": + if not os.path.isabs(pathname): + return os.path.join(new_root, pathname) + else: + return os.path.join(new_root, pathname[1:]) + + elif os.name == "nt": + (drive, path) = os.path.splitdrive(pathname) + if path[0] == "\\": + path = path[1:] + return os.path.join(new_root, path) + + else: + raise InstallationError( + f"Unknown platform: {os.name}\n" + "Can not change root path prefix on unknown platform." + ) + + +def get_src_prefix() -> str: + if running_under_virtualenv(): + src_prefix = os.path.join(sys.prefix, "src") + else: + # FIXME: keep src in cwd for now (it is not a temporary folder) + try: + src_prefix = os.path.join(os.getcwd(), "src") + except OSError: + # In case the current working directory has been renamed or deleted + sys.exit("The folder you are executing pip from can no longer be found.") + + # under macOS + virtualenv sys.prefix is not properly resolved + # it is something like /path/to/python/bin/.. + return os.path.abspath(src_prefix) + + +try: + # Use getusersitepackages if this is present, as it ensures that the + # value is initialised properly. + user_site: typing.Optional[str] = site.getusersitepackages() +except AttributeError: + user_site = site.USER_SITE + + +@functools.lru_cache(maxsize=None) +def is_osx_framework() -> bool: + return bool(sysconfig.get_config_var("PYTHONFRAMEWORK")) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3179f860914d3cac20e6eb8b4f168a4a51211519 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/metadata_legacy.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/metadata_legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..c01dd1c678a083c17190fc9b7e214eca5c91f4f1 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/metadata_legacy.py @@ -0,0 +1,74 @@ +"""Metadata generation logic for legacy source distributions. +""" + +import logging +import os + +from pip._internal.build_env import BuildEnvironment +from pip._internal.cli.spinners import open_spinner +from pip._internal.exceptions import ( + InstallationError, + InstallationSubprocessError, + MetadataGenerationFailed, +) +from pip._internal.utils.setuptools_build import make_setuptools_egg_info_args +from pip._internal.utils.subprocess import call_subprocess +from pip._internal.utils.temp_dir import TempDirectory + +logger = logging.getLogger(__name__) + + +def _find_egg_info(directory: str) -> str: + """Find an .egg-info subdirectory in `directory`.""" + filenames = [f for f in os.listdir(directory) if f.endswith(".egg-info")] + + if not filenames: + raise InstallationError(f"No .egg-info directory found in {directory}") + + if len(filenames) > 1: + raise InstallationError( + f"More than one .egg-info directory found in {directory}" + ) + + return os.path.join(directory, filenames[0]) + + +def generate_metadata( + build_env: BuildEnvironment, + setup_py_path: str, + source_dir: str, + isolated: bool, + details: str, +) -> str: + """Generate metadata using setup.py-based defacto mechanisms. + + Returns the generated metadata directory. + """ + logger.debug( + "Running setup.py (path:%s) egg_info for package %s", + setup_py_path, + details, + ) + + egg_info_dir = TempDirectory(kind="pip-egg-info", globally_managed=True).path + + args = make_setuptools_egg_info_args( + setup_py_path, + egg_info_dir=egg_info_dir, + no_user_config=isolated, + ) + + with build_env: + with open_spinner("Preparing metadata (setup.py)") as spinner: + try: + call_subprocess( + args, + cwd=source_dir, + command_desc="python setup.py egg_info", + spinner=spinner, + ) + except InstallationSubprocessError as error: + raise MetadataGenerationFailed(package_details=details) from error + + # Return the .egg-info directory. + return _find_egg_info(egg_info_dir) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/check.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/check.py new file mode 100644 index 0000000000000000000000000000000000000000..4b6fbc4c37599588ad69da3dc8a4d9628dc89b96 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/check.py @@ -0,0 +1,181 @@ +"""Validation of dependencies of packages +""" + +import logging +from contextlib import suppress +from email.parser import Parser +from functools import reduce +from typing import ( + Callable, + Dict, + FrozenSet, + Generator, + Iterable, + List, + NamedTuple, + Optional, + Set, + Tuple, +) + +from pip._vendor.packaging.requirements import Requirement +from pip._vendor.packaging.tags import Tag, parse_tag +from pip._vendor.packaging.utils import NormalizedName, canonicalize_name +from pip._vendor.packaging.version import Version + +from pip._internal.distributions import make_distribution_for_install_requirement +from pip._internal.metadata import get_default_environment +from pip._internal.metadata.base import BaseDistribution +from pip._internal.req.req_install import InstallRequirement + +logger = logging.getLogger(__name__) + + +class PackageDetails(NamedTuple): + version: Version + dependencies: List[Requirement] + + +# Shorthands +PackageSet = Dict[NormalizedName, PackageDetails] +Missing = Tuple[NormalizedName, Requirement] +Conflicting = Tuple[NormalizedName, Version, Requirement] + +MissingDict = Dict[NormalizedName, List[Missing]] +ConflictingDict = Dict[NormalizedName, List[Conflicting]] +CheckResult = Tuple[MissingDict, ConflictingDict] +ConflictDetails = Tuple[PackageSet, CheckResult] + + +def create_package_set_from_installed() -> Tuple[PackageSet, bool]: + """Converts a list of distributions into a PackageSet.""" + package_set = {} + problems = False + env = get_default_environment() + for dist in env.iter_installed_distributions(local_only=False, skip=()): + name = dist.canonical_name + try: + dependencies = list(dist.iter_dependencies()) + package_set[name] = PackageDetails(dist.version, dependencies) + except (OSError, ValueError) as e: + # Don't crash on unreadable or broken metadata. + logger.warning("Error parsing dependencies of %s: %s", name, e) + problems = True + return package_set, problems + + +def check_package_set( + package_set: PackageSet, should_ignore: Optional[Callable[[str], bool]] = None +) -> CheckResult: + """Check if a package set is consistent + + If should_ignore is passed, it should be a callable that takes a + package name and returns a boolean. + """ + + missing = {} + conflicting = {} + + for package_name, package_detail in package_set.items(): + # Info about dependencies of package_name + missing_deps: Set[Missing] = set() + conflicting_deps: Set[Conflicting] = set() + + if should_ignore and should_ignore(package_name): + continue + + for req in package_detail.dependencies: + name = canonicalize_name(req.name) + + # Check if it's missing + if name not in package_set: + missed = True + if req.marker is not None: + missed = req.marker.evaluate({"extra": ""}) + if missed: + missing_deps.add((name, req)) + continue + + # Check if there's a conflict + version = package_set[name].version + if not req.specifier.contains(version, prereleases=True): + conflicting_deps.add((name, version, req)) + + if missing_deps: + missing[package_name] = sorted(missing_deps, key=str) + if conflicting_deps: + conflicting[package_name] = sorted(conflicting_deps, key=str) + + return missing, conflicting + + +def check_install_conflicts(to_install: List[InstallRequirement]) -> ConflictDetails: + """For checking if the dependency graph would be consistent after \ + installing given requirements + """ + # Start from the current state + package_set, _ = create_package_set_from_installed() + # Install packages + would_be_installed = _simulate_installation_of(to_install, package_set) + + # Only warn about directly-dependent packages; create a whitelist of them + whitelist = _create_whitelist(would_be_installed, package_set) + + return ( + package_set, + check_package_set( + package_set, should_ignore=lambda name: name not in whitelist + ), + ) + + +def check_unsupported( + packages: Iterable[BaseDistribution], + supported_tags: Iterable[Tag], +) -> Generator[BaseDistribution, None, None]: + for p in packages: + with suppress(FileNotFoundError): + wheel_file = p.read_text("WHEEL") + wheel_tags: FrozenSet[Tag] = reduce( + frozenset.union, + map(parse_tag, Parser().parsestr(wheel_file).get_all("Tag", [])), + frozenset(), + ) + if wheel_tags.isdisjoint(supported_tags): + yield p + + +def _simulate_installation_of( + to_install: List[InstallRequirement], package_set: PackageSet +) -> Set[NormalizedName]: + """Computes the version of packages after installing to_install.""" + # Keep track of packages that were installed + installed = set() + + # Modify it as installing requirement_set would (assuming no errors) + for inst_req in to_install: + abstract_dist = make_distribution_for_install_requirement(inst_req) + dist = abstract_dist.get_metadata_distribution() + name = dist.canonical_name + package_set[name] = PackageDetails(dist.version, list(dist.iter_dependencies())) + + installed.add(name) + + return installed + + +def _create_whitelist( + would_be_installed: Set[NormalizedName], package_set: PackageSet +) -> Set[NormalizedName]: + packages_affected = set(would_be_installed) + + for package_name in package_set: + if package_name in packages_affected: + continue + + for req in package_set[package_name].dependencies: + if canonicalize_name(req.name) in packages_affected: + packages_affected.add(package_name) + break + + return packages_affected diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/freeze.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/freeze.py new file mode 100644 index 0000000000000000000000000000000000000000..bb1039fb7766795059df227f13e37c54ce436618 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/freeze.py @@ -0,0 +1,258 @@ +import collections +import logging +import os +from typing import Container, Dict, Generator, Iterable, List, NamedTuple, Optional, Set + +from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.packaging.version import InvalidVersion + +from pip._internal.exceptions import BadCommand, InstallationError +from pip._internal.metadata import BaseDistribution, get_environment +from pip._internal.req.constructors import ( + install_req_from_editable, + install_req_from_line, +) +from pip._internal.req.req_file import COMMENT_RE +from pip._internal.utils.direct_url_helpers import direct_url_as_pep440_direct_reference + +logger = logging.getLogger(__name__) + + +class _EditableInfo(NamedTuple): + requirement: str + comments: List[str] + + +def freeze( + requirement: Optional[List[str]] = None, + local_only: bool = False, + user_only: bool = False, + paths: Optional[List[str]] = None, + isolated: bool = False, + exclude_editable: bool = False, + skip: Container[str] = (), +) -> Generator[str, None, None]: + installations: Dict[str, FrozenRequirement] = {} + + dists = get_environment(paths).iter_installed_distributions( + local_only=local_only, + skip=(), + user_only=user_only, + ) + for dist in dists: + req = FrozenRequirement.from_dist(dist) + if exclude_editable and req.editable: + continue + installations[req.canonical_name] = req + + if requirement: + # the options that don't get turned into an InstallRequirement + # should only be emitted once, even if the same option is in multiple + # requirements files, so we need to keep track of what has been emitted + # so that we don't emit it again if it's seen again + emitted_options: Set[str] = set() + # keep track of which files a requirement is in so that we can + # give an accurate warning if a requirement appears multiple times. + req_files: Dict[str, List[str]] = collections.defaultdict(list) + for req_file_path in requirement: + with open(req_file_path) as req_file: + for line in req_file: + if ( + not line.strip() + or line.strip().startswith("#") + or line.startswith( + ( + "-r", + "--requirement", + "-f", + "--find-links", + "-i", + "--index-url", + "--pre", + "--trusted-host", + "--process-dependency-links", + "--extra-index-url", + "--use-feature", + ) + ) + ): + line = line.rstrip() + if line not in emitted_options: + emitted_options.add(line) + yield line + continue + + if line.startswith("-e") or line.startswith("--editable"): + if line.startswith("-e"): + line = line[2:].strip() + else: + line = line[len("--editable") :].strip().lstrip("=") + line_req = install_req_from_editable( + line, + isolated=isolated, + ) + else: + line_req = install_req_from_line( + COMMENT_RE.sub("", line).strip(), + isolated=isolated, + ) + + if not line_req.name: + logger.info( + "Skipping line in requirement file [%s] because " + "it's not clear what it would install: %s", + req_file_path, + line.strip(), + ) + logger.info( + " (add #egg=PackageName to the URL to avoid" + " this warning)" + ) + else: + line_req_canonical_name = canonicalize_name(line_req.name) + if line_req_canonical_name not in installations: + # either it's not installed, or it is installed + # but has been processed already + if not req_files[line_req.name]: + logger.warning( + "Requirement file [%s] contains %s, but " + "package %r is not installed", + req_file_path, + COMMENT_RE.sub("", line).strip(), + line_req.name, + ) + else: + req_files[line_req.name].append(req_file_path) + else: + yield str(installations[line_req_canonical_name]).rstrip() + del installations[line_req_canonical_name] + req_files[line_req.name].append(req_file_path) + + # Warn about requirements that were included multiple times (in a + # single requirements file or in different requirements files). + for name, files in req_files.items(): + if len(files) > 1: + logger.warning( + "Requirement %s included multiple times [%s]", + name, + ", ".join(sorted(set(files))), + ) + + yield ("## The following requirements were added by pip freeze:") + for installation in sorted(installations.values(), key=lambda x: x.name.lower()): + if installation.canonical_name not in skip: + yield str(installation).rstrip() + + +def _format_as_name_version(dist: BaseDistribution) -> str: + try: + dist_version = dist.version + except InvalidVersion: + # legacy version + return f"{dist.raw_name}==={dist.raw_version}" + else: + return f"{dist.raw_name}=={dist_version}" + + +def _get_editable_info(dist: BaseDistribution) -> _EditableInfo: + """ + Compute and return values (req, comments) for use in + FrozenRequirement.from_dist(). + """ + editable_project_location = dist.editable_project_location + assert editable_project_location + location = os.path.normcase(os.path.abspath(editable_project_location)) + + from pip._internal.vcs import RemoteNotFoundError, RemoteNotValidError, vcs + + vcs_backend = vcs.get_backend_for_dir(location) + + if vcs_backend is None: + display = _format_as_name_version(dist) + logger.debug( + 'No VCS found for editable requirement "%s" in: %r', + display, + location, + ) + return _EditableInfo( + requirement=location, + comments=[f"# Editable install with no version control ({display})"], + ) + + vcs_name = type(vcs_backend).__name__ + + try: + req = vcs_backend.get_src_requirement(location, dist.raw_name) + except RemoteNotFoundError: + display = _format_as_name_version(dist) + return _EditableInfo( + requirement=location, + comments=[f"# Editable {vcs_name} install with no remote ({display})"], + ) + except RemoteNotValidError as ex: + display = _format_as_name_version(dist) + return _EditableInfo( + requirement=location, + comments=[ + f"# Editable {vcs_name} install ({display}) with either a deleted " + f"local remote or invalid URI:", + f"# '{ex.url}'", + ], + ) + except BadCommand: + logger.warning( + "cannot determine version of editable source in %s " + "(%s command not found in path)", + location, + vcs_backend.name, + ) + return _EditableInfo(requirement=location, comments=[]) + except InstallationError as exc: + logger.warning("Error when trying to get requirement for VCS system %s", exc) + else: + return _EditableInfo(requirement=req, comments=[]) + + logger.warning("Could not determine repository location of %s", location) + + return _EditableInfo( + requirement=location, + comments=["## !! Could not determine repository location"], + ) + + +class FrozenRequirement: + def __init__( + self, + name: str, + req: str, + editable: bool, + comments: Iterable[str] = (), + ) -> None: + self.name = name + self.canonical_name = canonicalize_name(name) + self.req = req + self.editable = editable + self.comments = comments + + @classmethod + def from_dist(cls, dist: BaseDistribution) -> "FrozenRequirement": + editable = dist.editable + if editable: + req, comments = _get_editable_info(dist) + else: + comments = [] + direct_url = dist.direct_url + if direct_url: + # if PEP 610 metadata is present, use it + req = direct_url_as_pep440_direct_reference(direct_url, dist.raw_name) + else: + # name==version requirement + req = _format_as_name_version(dist) + + return cls(dist.raw_name, req, editable, comments=comments) + + def __str__(self) -> str: + req = self.req + if self.editable: + req = f"-e {req}" + return "\n".join(list(self.comments) + [str(req)]) + "\n" diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..24d6a5dd31fe33b03f90ed0f9ee465253686900c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__init__.py @@ -0,0 +1,2 @@ +"""For modules related to installing packages. +""" diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c730d926512f6a5203b9c39347b1d3642becfbb Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/wheel.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/wheel.py new file mode 100644 index 0000000000000000000000000000000000000000..aef42aa9eefae7cae4a3d877b6dab1c53d85ad5f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/wheel.py @@ -0,0 +1,741 @@ +"""Support for installing and building the "wheel" binary package format. +""" + +import collections +import compileall +import contextlib +import csv +import importlib +import logging +import os.path +import re +import shutil +import sys +import warnings +from base64 import urlsafe_b64encode +from email.message import Message +from itertools import chain, filterfalse, starmap +from typing import ( + IO, + TYPE_CHECKING, + Any, + BinaryIO, + Callable, + Dict, + Generator, + Iterable, + Iterator, + List, + NewType, + Optional, + Protocol, + Sequence, + Set, + Tuple, + Union, + cast, +) +from zipfile import ZipFile, ZipInfo + +from pip._vendor.distlib.scripts import ScriptMaker +from pip._vendor.distlib.util import get_export_entry +from pip._vendor.packaging.utils import canonicalize_name + +from pip._internal.exceptions import InstallationError +from pip._internal.locations import get_major_minor_version +from pip._internal.metadata import ( + BaseDistribution, + FilesystemWheel, + get_wheel_distribution, +) +from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, DirectUrl +from pip._internal.models.scheme import SCHEME_KEYS, Scheme +from pip._internal.utils.filesystem import adjacent_tmp_file, replace +from pip._internal.utils.misc import StreamWrapper, ensure_dir, hash_file, partition +from pip._internal.utils.unpacking import ( + current_umask, + is_within_directory, + set_extracted_file_to_default_mode_plus_executable, + zip_item_is_executable, +) +from pip._internal.utils.wheel import parse_wheel + +if TYPE_CHECKING: + + class File(Protocol): + src_record_path: "RecordPath" + dest_path: str + changed: bool + + def save(self) -> None: + pass + + +logger = logging.getLogger(__name__) + +RecordPath = NewType("RecordPath", str) +InstalledCSVRow = Tuple[RecordPath, str, Union[int, str]] + + +def rehash(path: str, blocksize: int = 1 << 20) -> Tuple[str, str]: + """Return (encoded_digest, length) for path using hashlib.sha256()""" + h, length = hash_file(path, blocksize) + digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=") + return (digest, str(length)) + + +def csv_io_kwargs(mode: str) -> Dict[str, Any]: + """Return keyword arguments to properly open a CSV file + in the given mode. + """ + return {"mode": mode, "newline": "", "encoding": "utf-8"} + + +def fix_script(path: str) -> bool: + """Replace #!python with #!/path/to/python + Return True if file was changed. + """ + # XXX RECORD hashes will need to be updated + assert os.path.isfile(path) + + with open(path, "rb") as script: + firstline = script.readline() + if not firstline.startswith(b"#!python"): + return False + exename = sys.executable.encode(sys.getfilesystemencoding()) + firstline = b"#!" + exename + os.linesep.encode("ascii") + rest = script.read() + with open(path, "wb") as script: + script.write(firstline) + script.write(rest) + return True + + +def wheel_root_is_purelib(metadata: Message) -> bool: + return metadata.get("Root-Is-Purelib", "").lower() == "true" + + +def get_entrypoints(dist: BaseDistribution) -> Tuple[Dict[str, str], Dict[str, str]]: + console_scripts = {} + gui_scripts = {} + for entry_point in dist.iter_entry_points(): + if entry_point.group == "console_scripts": + console_scripts[entry_point.name] = entry_point.value + elif entry_point.group == "gui_scripts": + gui_scripts[entry_point.name] = entry_point.value + return console_scripts, gui_scripts + + +def message_about_scripts_not_on_PATH(scripts: Sequence[str]) -> Optional[str]: + """Determine if any scripts are not on PATH and format a warning. + Returns a warning message if one or more scripts are not on PATH, + otherwise None. + """ + if not scripts: + return None + + # Group scripts by the path they were installed in + grouped_by_dir: Dict[str, Set[str]] = collections.defaultdict(set) + for destfile in scripts: + parent_dir = os.path.dirname(destfile) + script_name = os.path.basename(destfile) + grouped_by_dir[parent_dir].add(script_name) + + # We don't want to warn for directories that are on PATH. + not_warn_dirs = [ + os.path.normcase(os.path.normpath(i)).rstrip(os.sep) + for i in os.environ.get("PATH", "").split(os.pathsep) + ] + # If an executable sits with sys.executable, we don't warn for it. + # This covers the case of venv invocations without activating the venv. + not_warn_dirs.append( + os.path.normcase(os.path.normpath(os.path.dirname(sys.executable))) + ) + warn_for: Dict[str, Set[str]] = { + parent_dir: scripts + for parent_dir, scripts in grouped_by_dir.items() + if os.path.normcase(os.path.normpath(parent_dir)) not in not_warn_dirs + } + if not warn_for: + return None + + # Format a message + msg_lines = [] + for parent_dir, dir_scripts in warn_for.items(): + sorted_scripts: List[str] = sorted(dir_scripts) + if len(sorted_scripts) == 1: + start_text = f"script {sorted_scripts[0]} is" + else: + start_text = "scripts {} are".format( + ", ".join(sorted_scripts[:-1]) + " and " + sorted_scripts[-1] + ) + + msg_lines.append( + f"The {start_text} installed in '{parent_dir}' which is not on PATH." + ) + + last_line_fmt = ( + "Consider adding {} to PATH or, if you prefer " + "to suppress this warning, use --no-warn-script-location." + ) + if len(msg_lines) == 1: + msg_lines.append(last_line_fmt.format("this directory")) + else: + msg_lines.append(last_line_fmt.format("these directories")) + + # Add a note if any directory starts with ~ + warn_for_tilde = any( + i[0] == "~" for i in os.environ.get("PATH", "").split(os.pathsep) if i + ) + if warn_for_tilde: + tilde_warning_msg = ( + "NOTE: The current PATH contains path(s) starting with `~`, " + "which may not be expanded by all applications." + ) + msg_lines.append(tilde_warning_msg) + + # Returns the formatted multiline message + return "\n".join(msg_lines) + + +def _normalized_outrows( + outrows: Iterable[InstalledCSVRow], +) -> List[Tuple[str, str, str]]: + """Normalize the given rows of a RECORD file. + + Items in each row are converted into str. Rows are then sorted to make + the value more predictable for tests. + + Each row is a 3-tuple (path, hash, size) and corresponds to a record of + a RECORD file (see PEP 376 and PEP 427 for details). For the rows + passed to this function, the size can be an integer as an int or string, + or the empty string. + """ + # Normally, there should only be one row per path, in which case the + # second and third elements don't come into play when sorting. + # However, in cases in the wild where a path might happen to occur twice, + # we don't want the sort operation to trigger an error (but still want + # determinism). Since the third element can be an int or string, we + # coerce each element to a string to avoid a TypeError in this case. + # For additional background, see-- + # https://github.com/pypa/pip/issues/5868 + return sorted( + (record_path, hash_, str(size)) for record_path, hash_, size in outrows + ) + + +def _record_to_fs_path(record_path: RecordPath, lib_dir: str) -> str: + return os.path.join(lib_dir, record_path) + + +def _fs_to_record_path(path: str, lib_dir: str) -> RecordPath: + # On Windows, do not handle relative paths if they belong to different + # logical disks + if os.path.splitdrive(path)[0].lower() == os.path.splitdrive(lib_dir)[0].lower(): + path = os.path.relpath(path, lib_dir) + + path = path.replace(os.path.sep, "/") + return cast("RecordPath", path) + + +def get_csv_rows_for_installed( + old_csv_rows: List[List[str]], + installed: Dict[RecordPath, RecordPath], + changed: Set[RecordPath], + generated: List[str], + lib_dir: str, +) -> List[InstalledCSVRow]: + """ + :param installed: A map from archive RECORD path to installation RECORD + path. + """ + installed_rows: List[InstalledCSVRow] = [] + for row in old_csv_rows: + if len(row) > 3: + logger.warning("RECORD line has more than three elements: %s", row) + old_record_path = cast("RecordPath", row[0]) + new_record_path = installed.pop(old_record_path, old_record_path) + if new_record_path in changed: + digest, length = rehash(_record_to_fs_path(new_record_path, lib_dir)) + else: + digest = row[1] if len(row) > 1 else "" + length = row[2] if len(row) > 2 else "" + installed_rows.append((new_record_path, digest, length)) + for f in generated: + path = _fs_to_record_path(f, lib_dir) + digest, length = rehash(f) + installed_rows.append((path, digest, length)) + return installed_rows + [ + (installed_record_path, "", "") for installed_record_path in installed.values() + ] + + +def get_console_script_specs(console: Dict[str, str]) -> List[str]: + """ + Given the mapping from entrypoint name to callable, return the relevant + console script specs. + """ + # Don't mutate caller's version + console = console.copy() + + scripts_to_generate = [] + + # Special case pip and setuptools to generate versioned wrappers + # + # The issue is that some projects (specifically, pip and setuptools) use + # code in setup.py to create "versioned" entry points - pip2.7 on Python + # 2.7, pip3.3 on Python 3.3, etc. But these entry points are baked into + # the wheel metadata at build time, and so if the wheel is installed with + # a *different* version of Python the entry points will be wrong. The + # correct fix for this is to enhance the metadata to be able to describe + # such versioned entry points. + # Currently, projects using versioned entry points will either have + # incorrect versioned entry points, or they will not be able to distribute + # "universal" wheels (i.e., they will need a wheel per Python version). + # + # Because setuptools and pip are bundled with _ensurepip and virtualenv, + # we need to use universal wheels. As a workaround, we + # override the versioned entry points in the wheel and generate the + # correct ones. + # + # To add the level of hack in this section of code, in order to support + # ensurepip this code will look for an ``ENSUREPIP_OPTIONS`` environment + # variable which will control which version scripts get installed. + # + # ENSUREPIP_OPTIONS=altinstall + # - Only pipX.Y and easy_install-X.Y will be generated and installed + # ENSUREPIP_OPTIONS=install + # - pipX.Y, pipX, easy_install-X.Y will be generated and installed. Note + # that this option is technically if ENSUREPIP_OPTIONS is set and is + # not altinstall + # DEFAULT + # - The default behavior is to install pip, pipX, pipX.Y, easy_install + # and easy_install-X.Y. + pip_script = console.pop("pip", None) + if pip_script: + if "ENSUREPIP_OPTIONS" not in os.environ: + scripts_to_generate.append("pip = " + pip_script) + + if os.environ.get("ENSUREPIP_OPTIONS", "") != "altinstall": + scripts_to_generate.append(f"pip{sys.version_info[0]} = {pip_script}") + + scripts_to_generate.append(f"pip{get_major_minor_version()} = {pip_script}") + # Delete any other versioned pip entry points + pip_ep = [k for k in console if re.match(r"pip(\d+(\.\d+)?)?$", k)] + for k in pip_ep: + del console[k] + easy_install_script = console.pop("easy_install", None) + if easy_install_script: + if "ENSUREPIP_OPTIONS" not in os.environ: + scripts_to_generate.append("easy_install = " + easy_install_script) + + scripts_to_generate.append( + f"easy_install-{get_major_minor_version()} = {easy_install_script}" + ) + # Delete any other versioned easy_install entry points + easy_install_ep = [ + k for k in console if re.match(r"easy_install(-\d+\.\d+)?$", k) + ] + for k in easy_install_ep: + del console[k] + + # Generate the console entry points specified in the wheel + scripts_to_generate.extend(starmap("{} = {}".format, console.items())) + + return scripts_to_generate + + +class ZipBackedFile: + def __init__( + self, src_record_path: RecordPath, dest_path: str, zip_file: ZipFile + ) -> None: + self.src_record_path = src_record_path + self.dest_path = dest_path + self._zip_file = zip_file + self.changed = False + + def _getinfo(self) -> ZipInfo: + return self._zip_file.getinfo(self.src_record_path) + + def save(self) -> None: + # When we open the output file below, any existing file is truncated + # before we start writing the new contents. This is fine in most + # cases, but can cause a segfault if pip has loaded a shared + # object (e.g. from pyopenssl through its vendored urllib3) + # Since the shared object is mmap'd an attempt to call a + # symbol in it will then cause a segfault. Unlinking the file + # allows writing of new contents while allowing the process to + # continue to use the old copy. + if os.path.exists(self.dest_path): + os.unlink(self.dest_path) + + zipinfo = self._getinfo() + + # optimization: the file is created by open(), + # skip the decompression when there is 0 bytes to decompress. + with open(self.dest_path, "wb") as dest: + if zipinfo.file_size > 0: + with self._zip_file.open(zipinfo) as f: + blocksize = min(zipinfo.file_size, 1024 * 1024) + shutil.copyfileobj(f, dest, blocksize) + + if zip_item_is_executable(zipinfo): + set_extracted_file_to_default_mode_plus_executable(self.dest_path) + + +class ScriptFile: + def __init__(self, file: "File") -> None: + self._file = file + self.src_record_path = self._file.src_record_path + self.dest_path = self._file.dest_path + self.changed = False + + def save(self) -> None: + self._file.save() + self.changed = fix_script(self.dest_path) + + +class MissingCallableSuffix(InstallationError): + def __init__(self, entry_point: str) -> None: + super().__init__( + f"Invalid script entry point: {entry_point} - A callable " + "suffix is required. Cf https://packaging.python.org/" + "specifications/entry-points/#use-for-scripts for more " + "information." + ) + + +def _raise_for_invalid_entrypoint(specification: str) -> None: + entry = get_export_entry(specification) + if entry is not None and entry.suffix is None: + raise MissingCallableSuffix(str(entry)) + + +class PipScriptMaker(ScriptMaker): + def make( + self, specification: str, options: Optional[Dict[str, Any]] = None + ) -> List[str]: + _raise_for_invalid_entrypoint(specification) + return super().make(specification, options) + + +def _install_wheel( # noqa: C901, PLR0915 function is too long + name: str, + wheel_zip: ZipFile, + wheel_path: str, + scheme: Scheme, + pycompile: bool = True, + warn_script_location: bool = True, + direct_url: Optional[DirectUrl] = None, + requested: bool = False, +) -> None: + """Install a wheel. + + :param name: Name of the project to install + :param wheel_zip: open ZipFile for wheel being installed + :param scheme: Distutils scheme dictating the install directories + :param req_description: String used in place of the requirement, for + logging + :param pycompile: Whether to byte-compile installed Python files + :param warn_script_location: Whether to check that scripts are installed + into a directory on PATH + :raises UnsupportedWheel: + * when the directory holds an unpacked wheel with incompatible + Wheel-Version + * when the .dist-info dir does not match the wheel + """ + info_dir, metadata = parse_wheel(wheel_zip, name) + + if wheel_root_is_purelib(metadata): + lib_dir = scheme.purelib + else: + lib_dir = scheme.platlib + + # Record details of the files moved + # installed = files copied from the wheel to the destination + # changed = files changed while installing (scripts #! line typically) + # generated = files newly generated during the install (script wrappers) + installed: Dict[RecordPath, RecordPath] = {} + changed: Set[RecordPath] = set() + generated: List[str] = [] + + def record_installed( + srcfile: RecordPath, destfile: str, modified: bool = False + ) -> None: + """Map archive RECORD paths to installation RECORD paths.""" + newpath = _fs_to_record_path(destfile, lib_dir) + installed[srcfile] = newpath + if modified: + changed.add(newpath) + + def is_dir_path(path: RecordPath) -> bool: + return path.endswith("/") + + def assert_no_path_traversal(dest_dir_path: str, target_path: str) -> None: + if not is_within_directory(dest_dir_path, target_path): + message = ( + "The wheel {!r} has a file {!r} trying to install" + " outside the target directory {!r}" + ) + raise InstallationError( + message.format(wheel_path, target_path, dest_dir_path) + ) + + def root_scheme_file_maker( + zip_file: ZipFile, dest: str + ) -> Callable[[RecordPath], "File"]: + def make_root_scheme_file(record_path: RecordPath) -> "File": + normed_path = os.path.normpath(record_path) + dest_path = os.path.join(dest, normed_path) + assert_no_path_traversal(dest, dest_path) + return ZipBackedFile(record_path, dest_path, zip_file) + + return make_root_scheme_file + + def data_scheme_file_maker( + zip_file: ZipFile, scheme: Scheme + ) -> Callable[[RecordPath], "File"]: + scheme_paths = {key: getattr(scheme, key) for key in SCHEME_KEYS} + + def make_data_scheme_file(record_path: RecordPath) -> "File": + normed_path = os.path.normpath(record_path) + try: + _, scheme_key, dest_subpath = normed_path.split(os.path.sep, 2) + except ValueError: + message = ( + f"Unexpected file in {wheel_path}: {record_path!r}. .data directory" + " contents should be named like: '/'." + ) + raise InstallationError(message) + + try: + scheme_path = scheme_paths[scheme_key] + except KeyError: + valid_scheme_keys = ", ".join(sorted(scheme_paths)) + message = ( + f"Unknown scheme key used in {wheel_path}: {scheme_key} " + f"(for file {record_path!r}). .data directory contents " + f"should be in subdirectories named with a valid scheme " + f"key ({valid_scheme_keys})" + ) + raise InstallationError(message) + + dest_path = os.path.join(scheme_path, dest_subpath) + assert_no_path_traversal(scheme_path, dest_path) + return ZipBackedFile(record_path, dest_path, zip_file) + + return make_data_scheme_file + + def is_data_scheme_path(path: RecordPath) -> bool: + return path.split("/", 1)[0].endswith(".data") + + paths = cast(List[RecordPath], wheel_zip.namelist()) + file_paths = filterfalse(is_dir_path, paths) + root_scheme_paths, data_scheme_paths = partition(is_data_scheme_path, file_paths) + + make_root_scheme_file = root_scheme_file_maker(wheel_zip, lib_dir) + files: Iterator[File] = map(make_root_scheme_file, root_scheme_paths) + + def is_script_scheme_path(path: RecordPath) -> bool: + parts = path.split("/", 2) + return len(parts) > 2 and parts[0].endswith(".data") and parts[1] == "scripts" + + other_scheme_paths, script_scheme_paths = partition( + is_script_scheme_path, data_scheme_paths + ) + + make_data_scheme_file = data_scheme_file_maker(wheel_zip, scheme) + other_scheme_files = map(make_data_scheme_file, other_scheme_paths) + files = chain(files, other_scheme_files) + + # Get the defined entry points + distribution = get_wheel_distribution( + FilesystemWheel(wheel_path), + canonicalize_name(name), + ) + console, gui = get_entrypoints(distribution) + + def is_entrypoint_wrapper(file: "File") -> bool: + # EP, EP.exe and EP-script.py are scripts generated for + # entry point EP by setuptools + path = file.dest_path + name = os.path.basename(path) + if name.lower().endswith(".exe"): + matchname = name[:-4] + elif name.lower().endswith("-script.py"): + matchname = name[:-10] + elif name.lower().endswith(".pya"): + matchname = name[:-4] + else: + matchname = name + # Ignore setuptools-generated scripts + return matchname in console or matchname in gui + + script_scheme_files: Iterator[File] = map( + make_data_scheme_file, script_scheme_paths + ) + script_scheme_files = filterfalse(is_entrypoint_wrapper, script_scheme_files) + script_scheme_files = map(ScriptFile, script_scheme_files) + files = chain(files, script_scheme_files) + + existing_parents = set() + for file in files: + # directory creation is lazy and after file filtering + # to ensure we don't install empty dirs; empty dirs can't be + # uninstalled. + parent_dir = os.path.dirname(file.dest_path) + if parent_dir not in existing_parents: + ensure_dir(parent_dir) + existing_parents.add(parent_dir) + file.save() + record_installed(file.src_record_path, file.dest_path, file.changed) + + def pyc_source_file_paths() -> Generator[str, None, None]: + # We de-duplicate installation paths, since there can be overlap (e.g. + # file in .data maps to same location as file in wheel root). + # Sorting installation paths makes it easier to reproduce and debug + # issues related to permissions on existing files. + for installed_path in sorted(set(installed.values())): + full_installed_path = os.path.join(lib_dir, installed_path) + if not os.path.isfile(full_installed_path): + continue + if not full_installed_path.endswith(".py"): + continue + yield full_installed_path + + def pyc_output_path(path: str) -> str: + """Return the path the pyc file would have been written to.""" + return importlib.util.cache_from_source(path) + + # Compile all of the pyc files for the installed files + if pycompile: + with contextlib.redirect_stdout( + StreamWrapper.from_stream(sys.stdout) + ) as stdout: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + for path in pyc_source_file_paths(): + success = compileall.compile_file(path, force=True, quiet=True) + if success: + pyc_path = pyc_output_path(path) + assert os.path.exists(pyc_path) + pyc_record_path = cast( + "RecordPath", pyc_path.replace(os.path.sep, "/") + ) + record_installed(pyc_record_path, pyc_path) + logger.debug(stdout.getvalue()) + + maker = PipScriptMaker(None, scheme.scripts) + + # Ensure old scripts are overwritten. + # See https://github.com/pypa/pip/issues/1800 + maker.clobber = True + + # Ensure we don't generate any variants for scripts because this is almost + # never what somebody wants. + # See https://bitbucket.org/pypa/distlib/issue/35/ + maker.variants = {""} + + # This is required because otherwise distlib creates scripts that are not + # executable. + # See https://bitbucket.org/pypa/distlib/issue/32/ + maker.set_mode = True + + # Generate the console and GUI entry points specified in the wheel + scripts_to_generate = get_console_script_specs(console) + + gui_scripts_to_generate = list(starmap("{} = {}".format, gui.items())) + + generated_console_scripts = maker.make_multiple(scripts_to_generate) + generated.extend(generated_console_scripts) + + generated.extend(maker.make_multiple(gui_scripts_to_generate, {"gui": True})) + + if warn_script_location: + msg = message_about_scripts_not_on_PATH(generated_console_scripts) + if msg is not None: + logger.warning(msg) + + generated_file_mode = 0o666 & ~current_umask() + + @contextlib.contextmanager + def _generate_file(path: str, **kwargs: Any) -> Generator[BinaryIO, None, None]: + with adjacent_tmp_file(path, **kwargs) as f: + yield f + os.chmod(f.name, generated_file_mode) + replace(f.name, path) + + dest_info_dir = os.path.join(lib_dir, info_dir) + + # Record pip as the installer + installer_path = os.path.join(dest_info_dir, "INSTALLER") + with _generate_file(installer_path) as installer_file: + installer_file.write(b"pip\n") + generated.append(installer_path) + + # Record the PEP 610 direct URL reference + if direct_url is not None: + direct_url_path = os.path.join(dest_info_dir, DIRECT_URL_METADATA_NAME) + with _generate_file(direct_url_path) as direct_url_file: + direct_url_file.write(direct_url.to_json().encode("utf-8")) + generated.append(direct_url_path) + + # Record the REQUESTED file + if requested: + requested_path = os.path.join(dest_info_dir, "REQUESTED") + with open(requested_path, "wb"): + pass + generated.append(requested_path) + + record_text = distribution.read_text("RECORD") + record_rows = list(csv.reader(record_text.splitlines())) + + rows = get_csv_rows_for_installed( + record_rows, + installed=installed, + changed=changed, + generated=generated, + lib_dir=lib_dir, + ) + + # Record details of all files installed + record_path = os.path.join(dest_info_dir, "RECORD") + + with _generate_file(record_path, **csv_io_kwargs("w")) as record_file: + # Explicitly cast to typing.IO[str] as a workaround for the mypy error: + # "writer" has incompatible type "BinaryIO"; expected "_Writer" + writer = csv.writer(cast("IO[str]", record_file)) + writer.writerows(_normalized_outrows(rows)) + + +@contextlib.contextmanager +def req_error_context(req_description: str) -> Generator[None, None, None]: + try: + yield + except InstallationError as e: + message = f"For req: {req_description}. {e.args[0]}" + raise InstallationError(message) from e + + +def install_wheel( + name: str, + wheel_path: str, + scheme: Scheme, + req_description: str, + pycompile: bool = True, + warn_script_location: bool = True, + direct_url: Optional[DirectUrl] = None, + requested: bool = False, +) -> None: + with ZipFile(wheel_path, allowZip64=True) as z: + with req_error_context(req_description): + _install_wheel( + name=name, + wheel_zip=z, + wheel_path=wheel_path, + scheme=scheme, + pycompile=pycompile, + warn_script_location=warn_script_location, + direct_url=direct_url, + requested=requested, + ) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/prepare.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/prepare.py new file mode 100644 index 0000000000000000000000000000000000000000..e6aa344720028f422840a720d26f5cfab358062c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/prepare.py @@ -0,0 +1,732 @@ +"""Prepares a distribution for installation +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +import mimetypes +import os +import shutil +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Optional + +from pip._vendor.packaging.utils import canonicalize_name + +from pip._internal.distributions import make_distribution_for_install_requirement +from pip._internal.distributions.installed import InstalledDistribution +from pip._internal.exceptions import ( + DirectoryUrlHashUnsupported, + HashMismatch, + HashUnpinned, + InstallationError, + MetadataInconsistent, + NetworkConnectionError, + VcsHashUnsupported, +) +from pip._internal.index.package_finder import PackageFinder +from pip._internal.metadata import BaseDistribution, get_metadata_distribution +from pip._internal.models.direct_url import ArchiveInfo +from pip._internal.models.link import Link +from pip._internal.models.wheel import Wheel +from pip._internal.network.download import BatchDownloader, Downloader +from pip._internal.network.lazy_wheel import ( + HTTPRangeRequestUnsupported, + dist_from_wheel_url, +) +from pip._internal.network.session import PipSession +from pip._internal.operations.build.build_tracker import BuildTracker +from pip._internal.req.req_install import InstallRequirement +from pip._internal.utils._log import getLogger +from pip._internal.utils.direct_url_helpers import ( + direct_url_for_editable, + direct_url_from_link, +) +from pip._internal.utils.hashes import Hashes, MissingHashes +from pip._internal.utils.logging import indent_log +from pip._internal.utils.misc import ( + display_path, + hash_file, + hide_url, + redact_auth_from_requirement, +) +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.unpacking import unpack_file +from pip._internal.vcs import vcs + +logger = getLogger(__name__) + + +def _get_prepared_distribution( + req: InstallRequirement, + build_tracker: BuildTracker, + finder: PackageFinder, + build_isolation: bool, + check_build_deps: bool, +) -> BaseDistribution: + """Prepare a distribution for installation.""" + abstract_dist = make_distribution_for_install_requirement(req) + tracker_id = abstract_dist.build_tracker_id + if tracker_id is not None: + with build_tracker.track(req, tracker_id): + abstract_dist.prepare_distribution_metadata( + finder, build_isolation, check_build_deps + ) + return abstract_dist.get_metadata_distribution() + + +def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None: + vcs_backend = vcs.get_backend_for_scheme(link.scheme) + assert vcs_backend is not None + vcs_backend.unpack(location, url=hide_url(link.url), verbosity=verbosity) + + +@dataclass +class File: + path: str + content_type: Optional[str] = None + + def __post_init__(self) -> None: + if self.content_type is None: + self.content_type = mimetypes.guess_type(self.path)[0] + + +def get_http_url( + link: Link, + download: Downloader, + download_dir: Optional[str] = None, + hashes: Optional[Hashes] = None, +) -> File: + temp_dir = TempDirectory(kind="unpack", globally_managed=True) + # If a download dir is specified, is the file already downloaded there? + already_downloaded_path = None + if download_dir: + already_downloaded_path = _check_download_dir(link, download_dir, hashes) + + if already_downloaded_path: + from_path = already_downloaded_path + content_type = None + else: + # let's download to a tmp dir + from_path, content_type = download(link, temp_dir.path) + if hashes: + hashes.check_against_path(from_path) + + return File(from_path, content_type) + + +def get_file_url( + link: Link, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None +) -> File: + """Get file and optionally check its hash.""" + # If a download dir is specified, is the file already there and valid? + already_downloaded_path = None + if download_dir: + already_downloaded_path = _check_download_dir(link, download_dir, hashes) + + if already_downloaded_path: + from_path = already_downloaded_path + else: + from_path = link.file_path + + # If --require-hashes is off, `hashes` is either empty, the + # link's embedded hash, or MissingHashes; it is required to + # match. If --require-hashes is on, we are satisfied by any + # hash in `hashes` matching: a URL-based or an option-based + # one; no internet-sourced hash will be in `hashes`. + if hashes: + hashes.check_against_path(from_path) + return File(from_path, None) + + +def unpack_url( + link: Link, + location: str, + download: Downloader, + verbosity: int, + download_dir: Optional[str] = None, + hashes: Optional[Hashes] = None, +) -> Optional[File]: + """Unpack link into location, downloading if required. + + :param hashes: A Hashes object, one of whose embedded hashes must match, + or HashMismatch will be raised. If the Hashes is empty, no matches are + required, and unhashable types of requirements (like VCS ones, which + would ordinarily raise HashUnsupported) are allowed. + """ + # non-editable vcs urls + if link.is_vcs: + unpack_vcs_link(link, location, verbosity=verbosity) + return None + + assert not link.is_existing_dir() + + # file urls + if link.is_file: + file = get_file_url(link, download_dir, hashes=hashes) + + # http urls + else: + file = get_http_url( + link, + download, + download_dir, + hashes=hashes, + ) + + # unpack the archive to the build dir location. even when only downloading + # archives, they have to be unpacked to parse dependencies, except wheels + if not link.is_wheel: + unpack_file(file.path, location, file.content_type) + + return file + + +def _check_download_dir( + link: Link, + download_dir: str, + hashes: Optional[Hashes], + warn_on_hash_mismatch: bool = True, +) -> Optional[str]: + """Check download_dir for previously downloaded file with correct hash + If a correct file is found return its path else None + """ + download_path = os.path.join(download_dir, link.filename) + + if not os.path.exists(download_path): + return None + + # If already downloaded, does its hash match? + logger.info("File was already downloaded %s", download_path) + if hashes: + try: + hashes.check_against_path(download_path) + except HashMismatch: + if warn_on_hash_mismatch: + logger.warning( + "Previously-downloaded file %s has bad hash. Re-downloading.", + download_path, + ) + os.unlink(download_path) + return None + return download_path + + +class RequirementPreparer: + """Prepares a Requirement""" + + def __init__( + self, + build_dir: str, + download_dir: Optional[str], + src_dir: str, + build_isolation: bool, + check_build_deps: bool, + build_tracker: BuildTracker, + session: PipSession, + progress_bar: str, + finder: PackageFinder, + require_hashes: bool, + use_user_site: bool, + lazy_wheel: bool, + verbosity: int, + legacy_resolver: bool, + ) -> None: + super().__init__() + + self.src_dir = src_dir + self.build_dir = build_dir + self.build_tracker = build_tracker + self._session = session + self._download = Downloader(session, progress_bar) + self._batch_download = BatchDownloader(session, progress_bar) + self.finder = finder + + # Where still-packed archives should be written to. If None, they are + # not saved, and are deleted immediately after unpacking. + self.download_dir = download_dir + + # Is build isolation allowed? + self.build_isolation = build_isolation + + # Should check build dependencies? + self.check_build_deps = check_build_deps + + # Should hash-checking be required? + self.require_hashes = require_hashes + + # Should install in user site-packages? + self.use_user_site = use_user_site + + # Should wheels be downloaded lazily? + self.use_lazy_wheel = lazy_wheel + + # How verbose should underlying tooling be? + self.verbosity = verbosity + + # Are we using the legacy resolver? + self.legacy_resolver = legacy_resolver + + # Memoized downloaded files, as mapping of url: path. + self._downloaded: Dict[str, str] = {} + + # Previous "header" printed for a link-based InstallRequirement + self._previous_requirement_header = ("", "") + + def _log_preparing_link(self, req: InstallRequirement) -> None: + """Provide context for the requirement being prepared.""" + if req.link.is_file and not req.is_wheel_from_cache: + message = "Processing %s" + information = str(display_path(req.link.file_path)) + else: + message = "Collecting %s" + information = redact_auth_from_requirement(req.req) if req.req else str(req) + + # If we used req.req, inject requirement source if available (this + # would already be included if we used req directly) + if req.req and req.comes_from: + if isinstance(req.comes_from, str): + comes_from: Optional[str] = req.comes_from + else: + comes_from = req.comes_from.from_path() + if comes_from: + information += f" (from {comes_from})" + + if (message, information) != self._previous_requirement_header: + self._previous_requirement_header = (message, information) + logger.info(message, information) + + if req.is_wheel_from_cache: + with indent_log(): + logger.info("Using cached %s", req.link.filename) + + def _ensure_link_req_src_dir( + self, req: InstallRequirement, parallel_builds: bool + ) -> None: + """Ensure source_dir of a linked InstallRequirement.""" + # Since source_dir is only set for editable requirements. + if req.link.is_wheel: + # We don't need to unpack wheels, so no need for a source + # directory. + return + assert req.source_dir is None + if req.link.is_existing_dir(): + # build local directories in-tree + req.source_dir = req.link.file_path + return + + # We always delete unpacked sdists after pip runs. + req.ensure_has_source_dir( + self.build_dir, + autodelete=True, + parallel_builds=parallel_builds, + ) + req.ensure_pristine_source_checkout() + + def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes: + # By the time this is called, the requirement's link should have + # been checked so we can tell what kind of requirements req is + # and raise some more informative errors than otherwise. + # (For example, we can raise VcsHashUnsupported for a VCS URL + # rather than HashMissing.) + if not self.require_hashes: + return req.hashes(trust_internet=True) + + # We could check these first 2 conditions inside unpack_url + # and save repetition of conditions, but then we would + # report less-useful error messages for unhashable + # requirements, complaining that there's no hash provided. + if req.link.is_vcs: + raise VcsHashUnsupported() + if req.link.is_existing_dir(): + raise DirectoryUrlHashUnsupported() + + # Unpinned packages are asking for trouble when a new version + # is uploaded. This isn't a security check, but it saves users + # a surprising hash mismatch in the future. + # file:/// URLs aren't pinnable, so don't complain about them + # not being pinned. + if not req.is_direct and not req.is_pinned: + raise HashUnpinned() + + # If known-good hashes are missing for this requirement, + # shim it with a facade object that will provoke hash + # computation and then raise a HashMissing exception + # showing the user what the hash should be. + return req.hashes(trust_internet=False) or MissingHashes() + + def _fetch_metadata_only( + self, + req: InstallRequirement, + ) -> Optional[BaseDistribution]: + if self.legacy_resolver: + logger.debug( + "Metadata-only fetching is not used in the legacy resolver", + ) + return None + if self.require_hashes: + logger.debug( + "Metadata-only fetching is not used as hash checking is required", + ) + return None + # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable. + return self._fetch_metadata_using_link_data_attr( + req + ) or self._fetch_metadata_using_lazy_wheel(req.link) + + def _fetch_metadata_using_link_data_attr( + self, + req: InstallRequirement, + ) -> Optional[BaseDistribution]: + """Fetch metadata from the data-dist-info-metadata attribute, if possible.""" + # (1) Get the link to the metadata file, if provided by the backend. + metadata_link = req.link.metadata_link() + if metadata_link is None: + return None + assert req.req is not None + logger.verbose( + "Obtaining dependency information for %s from %s", + req.req, + metadata_link, + ) + # (2) Download the contents of the METADATA file, separate from the dist itself. + metadata_file = get_http_url( + metadata_link, + self._download, + hashes=metadata_link.as_hashes(), + ) + with open(metadata_file.path, "rb") as f: + metadata_contents = f.read() + # (3) Generate a dist just from those file contents. + metadata_dist = get_metadata_distribution( + metadata_contents, + req.link.filename, + req.req.name, + ) + # (4) Ensure the Name: field from the METADATA file matches the name from the + # install requirement. + # + # NB: raw_name will fall back to the name from the install requirement if + # the Name: field is not present, but it's noted in the raw_name docstring + # that that should NEVER happen anyway. + if canonicalize_name(metadata_dist.raw_name) != canonicalize_name(req.req.name): + raise MetadataInconsistent( + req, "Name", req.req.name, metadata_dist.raw_name + ) + return metadata_dist + + def _fetch_metadata_using_lazy_wheel( + self, + link: Link, + ) -> Optional[BaseDistribution]: + """Fetch metadata using lazy wheel, if possible.""" + # --use-feature=fast-deps must be provided. + if not self.use_lazy_wheel: + return None + if link.is_file or not link.is_wheel: + logger.debug( + "Lazy wheel is not used as %r does not point to a remote wheel", + link, + ) + return None + + wheel = Wheel(link.filename) + name = canonicalize_name(wheel.name) + logger.info( + "Obtaining dependency information from %s %s", + name, + wheel.version, + ) + url = link.url.split("#", 1)[0] + try: + return dist_from_wheel_url(name, url, self._session) + except HTTPRangeRequestUnsupported: + logger.debug("%s does not support range requests", url) + return None + + def _complete_partial_requirements( + self, + partially_downloaded_reqs: Iterable[InstallRequirement], + parallel_builds: bool = False, + ) -> None: + """Download any requirements which were only fetched by metadata.""" + # Download to a temporary directory. These will be copied over as + # needed for downstream 'download', 'wheel', and 'install' commands. + temp_dir = TempDirectory(kind="unpack", globally_managed=True).path + + # Map each link to the requirement that owns it. This allows us to set + # `req.local_file_path` on the appropriate requirement after passing + # all the links at once into BatchDownloader. + links_to_fully_download: Dict[Link, InstallRequirement] = {} + for req in partially_downloaded_reqs: + assert req.link + links_to_fully_download[req.link] = req + + batch_download = self._batch_download( + links_to_fully_download.keys(), + temp_dir, + ) + for link, (filepath, _) in batch_download: + logger.debug("Downloading link %s to %s", link, filepath) + req = links_to_fully_download[link] + # Record the downloaded file path so wheel reqs can extract a Distribution + # in .get_dist(). + req.local_file_path = filepath + # Record that the file is downloaded so we don't do it again in + # _prepare_linked_requirement(). + self._downloaded[req.link.url] = filepath + + # If this is an sdist, we need to unpack it after downloading, but the + # .source_dir won't be set up until we are in _prepare_linked_requirement(). + # Add the downloaded archive to the install requirement to unpack after + # preparing the source dir. + if not req.is_wheel: + req.needs_unpacked_archive(Path(filepath)) + + # This step is necessary to ensure all lazy wheels are processed + # successfully by the 'download', 'wheel', and 'install' commands. + for req in partially_downloaded_reqs: + self._prepare_linked_requirement(req, parallel_builds) + + def prepare_linked_requirement( + self, req: InstallRequirement, parallel_builds: bool = False + ) -> BaseDistribution: + """Prepare a requirement to be obtained from req.link.""" + assert req.link + self._log_preparing_link(req) + with indent_log(): + # Check if the relevant file is already available + # in the download directory + file_path = None + if self.download_dir is not None and req.link.is_wheel: + hashes = self._get_linked_req_hashes(req) + file_path = _check_download_dir( + req.link, + self.download_dir, + hashes, + # When a locally built wheel has been found in cache, we don't warn + # about re-downloading when the already downloaded wheel hash does + # not match. This is because the hash must be checked against the + # original link, not the cached link. It that case the already + # downloaded file will be removed and re-fetched from cache (which + # implies a hash check against the cache entry's origin.json). + warn_on_hash_mismatch=not req.is_wheel_from_cache, + ) + + if file_path is not None: + # The file is already available, so mark it as downloaded + self._downloaded[req.link.url] = file_path + else: + # The file is not available, attempt to fetch only metadata + metadata_dist = self._fetch_metadata_only(req) + if metadata_dist is not None: + req.needs_more_preparation = True + return metadata_dist + + # None of the optimizations worked, fully prepare the requirement + return self._prepare_linked_requirement(req, parallel_builds) + + def prepare_linked_requirements_more( + self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False + ) -> None: + """Prepare linked requirements more, if needed.""" + reqs = [req for req in reqs if req.needs_more_preparation] + for req in reqs: + # Determine if any of these requirements were already downloaded. + if self.download_dir is not None and req.link.is_wheel: + hashes = self._get_linked_req_hashes(req) + file_path = _check_download_dir(req.link, self.download_dir, hashes) + if file_path is not None: + self._downloaded[req.link.url] = file_path + req.needs_more_preparation = False + + # Prepare requirements we found were already downloaded for some + # reason. The other downloads will be completed separately. + partially_downloaded_reqs: List[InstallRequirement] = [] + for req in reqs: + if req.needs_more_preparation: + partially_downloaded_reqs.append(req) + else: + self._prepare_linked_requirement(req, parallel_builds) + + # TODO: separate this part out from RequirementPreparer when the v1 + # resolver can be removed! + self._complete_partial_requirements( + partially_downloaded_reqs, + parallel_builds=parallel_builds, + ) + + def _prepare_linked_requirement( + self, req: InstallRequirement, parallel_builds: bool + ) -> BaseDistribution: + assert req.link + link = req.link + + hashes = self._get_linked_req_hashes(req) + + if hashes and req.is_wheel_from_cache: + assert req.download_info is not None + assert link.is_wheel + assert link.is_file + # We need to verify hashes, and we have found the requirement in the cache + # of locally built wheels. + if ( + isinstance(req.download_info.info, ArchiveInfo) + and req.download_info.info.hashes + and hashes.has_one_of(req.download_info.info.hashes) + ): + # At this point we know the requirement was built from a hashable source + # artifact, and we verified that the cache entry's hash of the original + # artifact matches one of the hashes we expect. We don't verify hashes + # against the cached wheel, because the wheel is not the original. + hashes = None + else: + logger.warning( + "The hashes of the source archive found in cache entry " + "don't match, ignoring cached built wheel " + "and re-downloading source." + ) + req.link = req.cached_wheel_source_link + link = req.link + + self._ensure_link_req_src_dir(req, parallel_builds) + + if link.is_existing_dir(): + local_file = None + elif link.url not in self._downloaded: + try: + local_file = unpack_url( + link, + req.source_dir, + self._download, + self.verbosity, + self.download_dir, + hashes, + ) + except NetworkConnectionError as exc: + raise InstallationError( + f"Could not install requirement {req} because of HTTP " + f"error {exc} for URL {link}" + ) + else: + file_path = self._downloaded[link.url] + if hashes: + hashes.check_against_path(file_path) + local_file = File(file_path, content_type=None) + + # If download_info is set, we got it from the wheel cache. + if req.download_info is None: + # Editables don't go through this function (see + # prepare_editable_requirement). + assert not req.editable + req.download_info = direct_url_from_link(link, req.source_dir) + # Make sure we have a hash in download_info. If we got it as part of the + # URL, it will have been verified and we can rely on it. Otherwise we + # compute it from the downloaded file. + # FIXME: https://github.com/pypa/pip/issues/11943 + if ( + isinstance(req.download_info.info, ArchiveInfo) + and not req.download_info.info.hashes + and local_file + ): + hash = hash_file(local_file.path)[0].hexdigest() + # We populate info.hash for backward compatibility. + # This will automatically populate info.hashes. + req.download_info.info.hash = f"sha256={hash}" + + # For use in later processing, + # preserve the file path on the requirement. + if local_file: + req.local_file_path = local_file.path + + dist = _get_prepared_distribution( + req, + self.build_tracker, + self.finder, + self.build_isolation, + self.check_build_deps, + ) + return dist + + def save_linked_requirement(self, req: InstallRequirement) -> None: + assert self.download_dir is not None + assert req.link is not None + link = req.link + if link.is_vcs or (link.is_existing_dir() and req.editable): + # Make a .zip of the source_dir we already created. + req.archive(self.download_dir) + return + + if link.is_existing_dir(): + logger.debug( + "Not copying link to destination directory " + "since it is a directory: %s", + link, + ) + return + if req.local_file_path is None: + # No distribution was downloaded for this requirement. + return + + download_location = os.path.join(self.download_dir, link.filename) + if not os.path.exists(download_location): + shutil.copy(req.local_file_path, download_location) + download_path = display_path(download_location) + logger.info("Saved %s", download_path) + + def prepare_editable_requirement( + self, + req: InstallRequirement, + ) -> BaseDistribution: + """Prepare an editable requirement.""" + assert req.editable, "cannot prepare a non-editable req as editable" + + logger.info("Obtaining %s", req) + + with indent_log(): + if self.require_hashes: + raise InstallationError( + f"The editable requirement {req} cannot be installed when " + "requiring hashes, because there is no single file to " + "hash." + ) + req.ensure_has_source_dir(self.src_dir) + req.update_editable() + assert req.source_dir + req.download_info = direct_url_for_editable(req.unpacked_source_directory) + + dist = _get_prepared_distribution( + req, + self.build_tracker, + self.finder, + self.build_isolation, + self.check_build_deps, + ) + + req.check_if_exists(self.use_user_site) + + return dist + + def prepare_installed_requirement( + self, + req: InstallRequirement, + skip_reason: str, + ) -> BaseDistribution: + """Prepare an already-installed requirement.""" + assert req.satisfied_by, "req should have been satisfied but isn't" + assert skip_reason is not None, ( + "did not get skip reason skipped but req.satisfied_by " + f"is set to {req.satisfied_by}" + ) + logger.info( + "Requirement %s: %s (%s)", skip_reason, req, req.satisfied_by.version + ) + with indent_log(): + if self.require_hashes: + logger.debug( + "Since it is already installed, we are trusting this " + "package without checking its hash. To ensure a " + "completely repeatable environment, install into an " + "empty virtualenv." + ) + return InstalledDistribution(req).get_metadata_distribution() diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/_log.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/_log.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b86d6fad0d209c4ed01ae84e7b494dd492aecc68 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/_log.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/datetime.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/datetime.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f95cb4485051bddf7bf1e7abaa8c3d35e54fd114 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/datetime.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/direct_url_helpers.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/direct_url_helpers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..074796d3db3394bda0ddcc493b7df279a2fb3956 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/direct_url_helpers.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/entrypoints.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/entrypoints.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f367f67c43cb5caa579b7e908fb6ddfd8172894e Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/entrypoints.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/glibc.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/glibc.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46b1b88635e67181f2cf6c01078c388ef1ee6b89 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/glibc.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/logging.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/logging.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ff70f0196a13a40496fb257b6487d9d61538ff6 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/logging.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/retry.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/retry.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8be358eb560ca21b03c975b610f04b0914f5c822 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/retry.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/urls.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/urls.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12b9b692529346f1b6d4c556bf47dadd051688dc Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/urls.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/virtualenv.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/virtualenv.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..478e030ba1c7b8be93789bd929798266f628b50b Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/virtualenv.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/wheel.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/wheel.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..811313a9e198ef085cec7ef69b0951ffe0a9e0a8 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/wheel.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/__pycache__/typing_extensions.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/__pycache__/typing_extensions.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af01f0d819e8e00c1ef8c068afca364314a9fbb1 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/__pycache__/typing_extensions.cpython-311.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc3c574fc34b8ec2da244845f12b242ce9b16fd4b9c42be6296bb74798712499 +size 151555 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64.exe b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64.exe new file mode 100644 index 0000000000000000000000000000000000000000..daebd1c30353b2d9b016069da83ad5b4c8ee86bd --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a319ffaba23a017d7b1e18ba726ba6c54c53d6446db55f92af53c279894f8ad +size 101888 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/INSTALLER b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/REQUESTED b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/entry_points.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..b429cbd8469abc2f6a5b716d8963fb41f1c479b1 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/entry_points.txt @@ -0,0 +1,56 @@ +[distutils.commands] +alias = setuptools.command.alias:alias +bdist_egg = setuptools.command.bdist_egg:bdist_egg +bdist_rpm = setuptools.command.bdist_rpm:bdist_rpm +build = setuptools.command.build:build +build_clib = setuptools.command.build_clib:build_clib +build_ext = setuptools.command.build_ext:build_ext +build_py = setuptools.command.build_py:build_py +develop = setuptools.command.develop:develop +dist_info = setuptools.command.dist_info:dist_info +easy_install = setuptools.command.easy_install:easy_install +editable_wheel = setuptools.command.editable_wheel:editable_wheel +egg_info = setuptools.command.egg_info:egg_info +install = setuptools.command.install:install +install_egg_info = setuptools.command.install_egg_info:install_egg_info +install_lib = setuptools.command.install_lib:install_lib +install_scripts = setuptools.command.install_scripts:install_scripts +rotate = setuptools.command.rotate:rotate +saveopts = setuptools.command.saveopts:saveopts +sdist = setuptools.command.sdist:sdist +setopt = setuptools.command.setopt:setopt +test = setuptools.command.test:test +upload_docs = setuptools.command.upload_docs:upload_docs + +[distutils.setup_keywords] +dependency_links = setuptools.dist:assert_string_list +eager_resources = setuptools.dist:assert_string_list +entry_points = setuptools.dist:check_entry_points +exclude_package_data = setuptools.dist:check_package_data +extras_require = setuptools.dist:check_extras +include_package_data = setuptools.dist:assert_bool +install_requires = setuptools.dist:check_requirements +namespace_packages = setuptools.dist:check_nsp +package_data = setuptools.dist:check_package_data +packages = setuptools.dist:check_packages +python_requires = setuptools.dist:check_specifier +setup_requires = setuptools.dist:check_requirements +test_loader = setuptools.dist:check_importable +test_runner = setuptools.dist:check_importable +test_suite = setuptools.dist:check_test_suite +tests_require = setuptools.dist:check_requirements +use_2to3 = setuptools.dist:invalid_unless_false +zip_safe = setuptools.dist:assert_bool + +[egg_info.writers] +PKG-INFO = setuptools.command.egg_info:write_pkg_info +dependency_links.txt = setuptools.command.egg_info:overwrite_arg +eager_resources.txt = setuptools.command.egg_info:overwrite_arg +entry_points.txt = setuptools.command.egg_info:write_entries +namespace_packages.txt = setuptools.command.egg_info:overwrite_arg +requires.txt = setuptools.command.egg_info:write_requirements +top_level.txt = setuptools.command.egg_info:write_toplevel_names + +[setuptools.finalize_distribution_options] +keywords = setuptools.dist:Distribution._finalize_setup_keywords +parent_finalize = setuptools.dist:_Distribution.finalize_options diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/top_level.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..b5ac1070294b478b7cc2ce677207ee08813bfa37 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/setuptools-69.5.1.dist-info/top_level.txt @@ -0,0 +1,3 @@ +_distutils_hack +pkg_resources +setuptools diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ab8f72d8b2e31b1cb8456ee1c4b8e62d4aba252 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__init__.py @@ -0,0 +1,3 @@ +from __future__ import annotations + +__version__ = "0.45.1" diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_setuptools_logging.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_setuptools_logging.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00f983218920c35564852a1b19fe880321b50de6 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_setuptools_logging.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/macosx_libfile.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/macosx_libfile.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a796c2cc9924fb47808e1b4d5880f59a971848f Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/macosx_libfile.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/metadata.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/metadata.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..43051f5293156bc8069eba86c9b12ac0e8d025cb Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/metadata.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_setuptools_logging.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_setuptools_logging.py new file mode 100644 index 0000000000000000000000000000000000000000..a1a2482ba29ac5290c8f7d7688452ec3faf59332 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_setuptools_logging.py @@ -0,0 +1,26 @@ +# copied from setuptools.logging, omitting monkeypatching +from __future__ import annotations + +import logging +import sys + + +def _not_warning(record: logging.LogRecord) -> bool: + return record.levelno < logging.WARNING + + +def configure() -> None: + """ + Configure logging to emit warning and above to stderr + and everything else to stdout. This behavior is provided + for compatibility with distutils.log but may change in + the future. + """ + err_handler = logging.StreamHandler() + err_handler.setLevel(logging.WARNING) + out_handler = logging.StreamHandler(sys.stdout) + out_handler.addFilter(_not_warning) + handlers = err_handler, out_handler + logging.basicConfig( + format="{message}", style="{", handlers=handlers, level=logging.DEBUG + ) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6ba1217f5bdb6106c37ecc2be74d53ef2237b717 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__init__.py @@ -0,0 +1,155 @@ +""" +Wheel command-line utility. +""" + +from __future__ import annotations + +import argparse +import os +import sys +from argparse import ArgumentTypeError + + +class WheelError(Exception): + pass + + +def unpack_f(args: argparse.Namespace) -> None: + from .unpack import unpack + + unpack(args.wheelfile, args.dest) + + +def pack_f(args: argparse.Namespace) -> None: + from .pack import pack + + pack(args.directory, args.dest_dir, args.build_number) + + +def convert_f(args: argparse.Namespace) -> None: + from .convert import convert + + convert(args.files, args.dest_dir, args.verbose) + + +def tags_f(args: argparse.Namespace) -> None: + from .tags import tags + + names = ( + tags( + wheel, + args.python_tag, + args.abi_tag, + args.platform_tag, + args.build, + args.remove, + ) + for wheel in args.wheel + ) + + for name in names: + print(name) + + +def version_f(args: argparse.Namespace) -> None: + from .. import __version__ + + print(f"wheel {__version__}") + + +def parse_build_tag(build_tag: str) -> str: + if build_tag and not build_tag[0].isdigit(): + raise ArgumentTypeError("build tag must begin with a digit") + elif "-" in build_tag: + raise ArgumentTypeError("invalid character ('-') in build tag") + + return build_tag + + +TAGS_HELP = """\ +Make a new wheel with given tags. Any tags unspecified will remain the same. +Starting the tags with a "+" will append to the existing tags. Starting with a +"-" will remove a tag (use --option=-TAG syntax). Multiple tags can be +separated by ".". The original file will remain unless --remove is given. The +output filename(s) will be displayed on stdout for further processing. +""" + + +def parser(): + p = argparse.ArgumentParser() + s = p.add_subparsers(help="commands") + + unpack_parser = s.add_parser("unpack", help="Unpack wheel") + unpack_parser.add_argument( + "--dest", "-d", help="Destination directory", default="." + ) + unpack_parser.add_argument("wheelfile", help="Wheel file") + unpack_parser.set_defaults(func=unpack_f) + + repack_parser = s.add_parser("pack", help="Repack wheel") + repack_parser.add_argument("directory", help="Root directory of the unpacked wheel") + repack_parser.add_argument( + "--dest-dir", + "-d", + default=os.path.curdir, + help="Directory to store the wheel (default %(default)s)", + ) + repack_parser.add_argument( + "--build-number", help="Build tag to use in the wheel name" + ) + repack_parser.set_defaults(func=pack_f) + + convert_parser = s.add_parser("convert", help="Convert egg or wininst to wheel") + convert_parser.add_argument("files", nargs="*", help="Files to convert") + convert_parser.add_argument( + "--dest-dir", + "-d", + default=os.path.curdir, + help="Directory to store wheels (default %(default)s)", + ) + convert_parser.add_argument("--verbose", "-v", action="store_true") + convert_parser.set_defaults(func=convert_f) + + tags_parser = s.add_parser( + "tags", help="Add or replace the tags on a wheel", description=TAGS_HELP + ) + tags_parser.add_argument("wheel", nargs="*", help="Existing wheel(s) to retag") + tags_parser.add_argument( + "--remove", + action="store_true", + help="Remove the original files, keeping only the renamed ones", + ) + tags_parser.add_argument( + "--python-tag", metavar="TAG", help="Specify an interpreter tag(s)" + ) + tags_parser.add_argument("--abi-tag", metavar="TAG", help="Specify an ABI tag(s)") + tags_parser.add_argument( + "--platform-tag", metavar="TAG", help="Specify a platform tag(s)" + ) + tags_parser.add_argument( + "--build", type=parse_build_tag, metavar="BUILD", help="Specify a build tag" + ) + tags_parser.set_defaults(func=tags_f) + + version_parser = s.add_parser("version", help="Print version and exit") + version_parser.set_defaults(func=version_f) + + help_parser = s.add_parser("help", help="Show this help") + help_parser.set_defaults(func=lambda args: p.print_help()) + + return p + + +def main(): + p = parser() + args = p.parse_args() + if not hasattr(args, "func"): + p.print_help() + else: + try: + args.func(args) + return 0 + except WheelError as e: + print(e, file=sys.stderr) + + return 1 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/convert.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/convert.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0419a5a479b81db02b78ee572009281613a499b8 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/convert.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/pack.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/pack.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3fd7a590fc49e088e4e72fb4e51972c4105dbba Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/pack.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/tags.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/tags.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef2c75bab3d2ea17d3109ba65b317a8f320b9005 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/__pycache__/tags.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/tags.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/tags.py new file mode 100644 index 0000000000000000000000000000000000000000..88da72e9ec4a31e2427bdb2bcf2b3e0ce3c0beb0 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/cli/tags.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +import email.policy +import itertools +import os +from collections.abc import Iterable +from email.parser import BytesParser + +from ..wheelfile import WheelFile + + +def _compute_tags(original_tags: Iterable[str], new_tags: str | None) -> set[str]: + """Add or replace tags. Supports dot-separated tags""" + if new_tags is None: + return set(original_tags) + + if new_tags.startswith("+"): + return {*original_tags, *new_tags[1:].split(".")} + + if new_tags.startswith("-"): + return set(original_tags) - set(new_tags[1:].split(".")) + + return set(new_tags.split(".")) + + +def tags( + wheel: str, + python_tags: str | None = None, + abi_tags: str | None = None, + platform_tags: str | None = None, + build_tag: str | None = None, + remove: bool = False, +) -> str: + """Change the tags on a wheel file. + + The tags are left unchanged if they are not specified. To specify "none", + use ["none"]. To append to the previous tags, a tag should start with a + "+". If a tag starts with "-", it will be removed from existing tags. + Processing is done left to right. + + :param wheel: The paths to the wheels + :param python_tags: The Python tags to set + :param abi_tags: The ABI tags to set + :param platform_tags: The platform tags to set + :param build_tag: The build tag to set + :param remove: Remove the original wheel + """ + with WheelFile(wheel, "r") as f: + assert f.filename, f"{f.filename} must be available" + + wheel_info = f.read(f.dist_info_path + "/WHEEL") + info = BytesParser(policy=email.policy.compat32).parsebytes(wheel_info) + + original_wheel_name = os.path.basename(f.filename) + namever = f.parsed_filename.group("namever") + build = f.parsed_filename.group("build") + original_python_tags = f.parsed_filename.group("pyver").split(".") + original_abi_tags = f.parsed_filename.group("abi").split(".") + original_plat_tags = f.parsed_filename.group("plat").split(".") + + tags: list[str] = info.get_all("Tag", []) + existing_build_tag = info.get("Build") + + impls = {tag.split("-")[0] for tag in tags} + abivers = {tag.split("-")[1] for tag in tags} + platforms = {tag.split("-")[2] for tag in tags} + + if impls != set(original_python_tags): + msg = f"Wheel internal tags {impls!r} != filename tags {original_python_tags!r}" + raise AssertionError(msg) + + if abivers != set(original_abi_tags): + msg = f"Wheel internal tags {abivers!r} != filename tags {original_abi_tags!r}" + raise AssertionError(msg) + + if platforms != set(original_plat_tags): + msg = ( + f"Wheel internal tags {platforms!r} != filename tags {original_plat_tags!r}" + ) + raise AssertionError(msg) + + if existing_build_tag != build: + msg = ( + f"Incorrect filename '{build}' " + f"& *.dist-info/WHEEL '{existing_build_tag}' build numbers" + ) + raise AssertionError(msg) + + # Start changing as needed + if build_tag is not None: + build = build_tag + + final_python_tags = sorted(_compute_tags(original_python_tags, python_tags)) + final_abi_tags = sorted(_compute_tags(original_abi_tags, abi_tags)) + final_plat_tags = sorted(_compute_tags(original_plat_tags, platform_tags)) + + final_tags = [ + namever, + ".".join(final_python_tags), + ".".join(final_abi_tags), + ".".join(final_plat_tags), + ] + if build: + final_tags.insert(1, build) + + final_wheel_name = "-".join(final_tags) + ".whl" + + if original_wheel_name != final_wheel_name: + del info["Tag"], info["Build"] + for a, b, c in itertools.product( + final_python_tags, final_abi_tags, final_plat_tags + ): + info["Tag"] = f"{a}-{b}-{c}" + if build: + info["Build"] = build + + original_wheel_path = os.path.join( + os.path.dirname(f.filename), original_wheel_name + ) + final_wheel_path = os.path.join(os.path.dirname(f.filename), final_wheel_name) + + with WheelFile(original_wheel_path, "r") as fin, WheelFile( + final_wheel_path, "w" + ) as fout: + fout.comment = fin.comment # preserve the comment + for item in fin.infolist(): + if item.is_dir(): + continue + if item.filename == f.dist_info_path + "/RECORD": + continue + if item.filename == f.dist_info_path + "/WHEEL": + fout.writestr(item, info.as_bytes()) + else: + fout.writestr(item, fin.read(item)) + + if remove: + os.remove(original_wheel_path) + + return final_wheel_name diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..343d54b651261d177d45515b150c9eef030ae3b8 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/_tokenizer.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/_tokenizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..853fa5dbeb6b51339de4bf8160dcae6bf64cb7ce Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/_tokenizer.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/markers.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/markers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5dc0076455debe38fcb049edbd20b4b7ac8fcd42 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/markers.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/requirements.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/requirements.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f4ba5d473a4efc2f9a256350b05c954ae9e2510 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/requirements.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/utils.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a73d70afa723ca7dcbe062df060ab1fd7ae4376 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/vendored/packaging/__pycache__/utils.cpython-311.pyc differ