koichi12 commited on
Commit
4344bb0
·
verified ·
1 Parent(s): 7d0a722

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc +3 -0
  3. tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so +3 -0
  4. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__init__.py +69 -0
  5. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/_version.py +21 -0
  6. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc +0 -0
  7. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc +0 -0
  8. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc +0 -0
  9. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc +0 -0
  10. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc +0 -0
  11. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/arrow.py +306 -0
  12. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py +882 -0
  13. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dask.py +152 -0
  14. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dirfs.py +364 -0
  15. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/git.py +127 -0
  16. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/http.py +868 -0
  17. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/jupyter.py +124 -0
  18. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/reference.py +1160 -0
  19. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/smb.py +324 -0
  20. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/zip.py +133 -0
  21. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/mapping.py +247 -0
  22. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/utils.py +742 -0
  23. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py +8 -0
  24. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc +0 -0
  25. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py +4 -0
  26. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc +0 -0
  27. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__init__.py +16 -0
  28. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc +0 -0
  29. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/compile/__init__.py +31 -0
  30. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc +0 -0
  31. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc +0 -0
  32. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/_parsing.py +302 -0
  33. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/experimental/__init__.py +6 -0
  34. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD +20 -0
  35. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cusolver_cu11-11.4.1.48.dist-info/License.txt +1568 -0
  36. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/commands/__pycache__/index.cpython-311.pyc +0 -0
  37. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/index/__pycache__/__init__.cpython-311.pyc +0 -0
  38. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/__pycache__/__init__.cpython-311.pyc +0 -0
  39. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/base.py +81 -0
  40. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-311.pyc +0 -0
  41. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/__init__.py +0 -0
  42. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/metadata_legacy.py +74 -0
  43. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/check.py +181 -0
  44. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/freeze.py +258 -0
  45. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__init__.py +2 -0
  46. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-311.pyc +0 -0
  47. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/wheel.py +741 -0
  48. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/prepare.py +732 -0
  49. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/_log.cpython-311.pyc +0 -0
  50. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/datetime.cpython-311.pyc +0 -0
.gitattributes CHANGED
@@ -42,3 +42,7 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/F
42
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
43
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
44
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Parsing.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
42
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
43
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
44
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Parsing.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
45
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64.exe filter=lfs diff=lfs merge=lfs -text
46
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
47
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/__pycache__/typing_extensions.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
48
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Optimize.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e280cea1d8a888db57222b560cb97e18e09c1c2613b47587acc78c908ac6124
3
+ size 243464
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Utils.cpython-311-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3aab0e85d88c1a18bbe9b720f126c115c22a16b91ee8006dbd1d6fa5de099eb
3
+ size 396760
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__init__.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from importlib.metadata import entry_points
2
+
3
+ from . import _version, caching
4
+ from .callbacks import Callback
5
+ from .compression import available_compressions
6
+ from .core import get_fs_token_paths, open, open_files, open_local
7
+ from .exceptions import FSTimeoutError
8
+ from .mapping import FSMap, get_mapper
9
+ from .registry import (
10
+ available_protocols,
11
+ filesystem,
12
+ get_filesystem_class,
13
+ register_implementation,
14
+ registry,
15
+ )
16
+ from .spec import AbstractFileSystem
17
+
18
+ __version__ = _version.get_versions()["version"]
19
+
20
+ __all__ = [
21
+ "AbstractFileSystem",
22
+ "FSTimeoutError",
23
+ "FSMap",
24
+ "filesystem",
25
+ "register_implementation",
26
+ "get_filesystem_class",
27
+ "get_fs_token_paths",
28
+ "get_mapper",
29
+ "open",
30
+ "open_files",
31
+ "open_local",
32
+ "registry",
33
+ "caching",
34
+ "Callback",
35
+ "available_protocols",
36
+ "available_compressions",
37
+ ]
38
+
39
+
40
+ def process_entries():
41
+ if entry_points is not None:
42
+ try:
43
+ eps = entry_points()
44
+ except TypeError:
45
+ pass # importlib-metadata < 0.8
46
+ else:
47
+ if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
48
+ specs = eps.select(group="fsspec.specs")
49
+ else:
50
+ specs = eps.get("fsspec.specs", [])
51
+ registered_names = {}
52
+ for spec in specs:
53
+ err_msg = f"Unable to load filesystem from {spec}"
54
+ name = spec.name
55
+ if name in registered_names:
56
+ continue
57
+ registered_names[name] = True
58
+ register_implementation(
59
+ name,
60
+ spec.value.replace(":", "."),
61
+ errtxt=err_msg,
62
+ # We take our implementations as the ones to overload with if
63
+ # for some reason we encounter some, may be the same, already
64
+ # registered
65
+ clobber=True,
66
+ )
67
+
68
+
69
+ process_entries()
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/_version.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # This file was generated by 'versioneer.py' (0.29) from
3
+ # revision-control system data, or from the parent directory name of an
4
+ # unpacked source archive. Distribution tarballs contain a pre-generated copy
5
+ # of this file.
6
+
7
+ import json
8
+
9
+ version_json = '''
10
+ {
11
+ "date": "2024-02-04T20:21:42-0500",
12
+ "dirty": false,
13
+ "error": null,
14
+ "full-revisionid": "5dc364e13b63609717d77b7361e80cfa64e3b8fd",
15
+ "version": "2024.2.0"
16
+ }
17
+ ''' # END VERSION_JSON
18
+
19
+
20
+ def get_versions():
21
+ return json.loads(version_json)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc ADDED
Binary file (20.1 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc ADDED
Binary file (19.7 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc ADDED
Binary file (6.17 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc ADDED
Binary file (5.39 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/arrow.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import errno
2
+ import io
3
+ import os
4
+ import secrets
5
+ import shutil
6
+ from contextlib import suppress
7
+ from functools import cached_property, wraps
8
+ from urllib.parse import parse_qs
9
+
10
+ from fsspec.spec import AbstractFileSystem
11
+ from fsspec.utils import (
12
+ get_package_version_without_import,
13
+ infer_storage_options,
14
+ mirror_from,
15
+ tokenize,
16
+ )
17
+
18
+
19
+ def wrap_exceptions(func):
20
+ @wraps(func)
21
+ def wrapper(*args, **kwargs):
22
+ try:
23
+ return func(*args, **kwargs)
24
+ except OSError as exception:
25
+ if not exception.args:
26
+ raise
27
+
28
+ message, *args = exception.args
29
+ if isinstance(message, str) and "does not exist" in message:
30
+ raise FileNotFoundError(errno.ENOENT, message) from exception
31
+ else:
32
+ raise
33
+
34
+ return wrapper
35
+
36
+
37
+ PYARROW_VERSION = None
38
+
39
+
40
+ class ArrowFSWrapper(AbstractFileSystem):
41
+ """FSSpec-compatible wrapper of pyarrow.fs.FileSystem.
42
+
43
+ Parameters
44
+ ----------
45
+ fs : pyarrow.fs.FileSystem
46
+
47
+ """
48
+
49
+ root_marker = "/"
50
+
51
+ def __init__(self, fs, **kwargs):
52
+ global PYARROW_VERSION
53
+ PYARROW_VERSION = get_package_version_without_import("pyarrow")
54
+ self.fs = fs
55
+ super().__init__(**kwargs)
56
+
57
+ @property
58
+ def protocol(self):
59
+ return self.fs.type_name
60
+
61
+ @cached_property
62
+ def fsid(self):
63
+ return "hdfs_" + tokenize(self.fs.host, self.fs.port)
64
+
65
+ @classmethod
66
+ def _strip_protocol(cls, path):
67
+ ops = infer_storage_options(path)
68
+ path = ops["path"]
69
+ if path.startswith("//"):
70
+ # special case for "hdfs://path" (without the triple slash)
71
+ path = path[1:]
72
+ return path
73
+
74
+ def ls(self, path, detail=False, **kwargs):
75
+ path = self._strip_protocol(path)
76
+ from pyarrow.fs import FileSelector
77
+
78
+ entries = [
79
+ self._make_entry(entry)
80
+ for entry in self.fs.get_file_info(FileSelector(path))
81
+ ]
82
+ if detail:
83
+ return entries
84
+ else:
85
+ return [entry["name"] for entry in entries]
86
+
87
+ def info(self, path, **kwargs):
88
+ path = self._strip_protocol(path)
89
+ [info] = self.fs.get_file_info([path])
90
+ return self._make_entry(info)
91
+
92
+ def exists(self, path):
93
+ path = self._strip_protocol(path)
94
+ try:
95
+ self.info(path)
96
+ except FileNotFoundError:
97
+ return False
98
+ else:
99
+ return True
100
+
101
+ def _make_entry(self, info):
102
+ from pyarrow.fs import FileType
103
+
104
+ if info.type is FileType.Directory:
105
+ kind = "directory"
106
+ elif info.type is FileType.File:
107
+ kind = "file"
108
+ elif info.type is FileType.NotFound:
109
+ raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), info.path)
110
+ else:
111
+ kind = "other"
112
+
113
+ return {
114
+ "name": info.path,
115
+ "size": info.size,
116
+ "type": kind,
117
+ "mtime": info.mtime,
118
+ }
119
+
120
+ @wrap_exceptions
121
+ def cp_file(self, path1, path2, **kwargs):
122
+ path1 = self._strip_protocol(path1).rstrip("/")
123
+ path2 = self._strip_protocol(path2).rstrip("/")
124
+
125
+ with self._open(path1, "rb") as lstream:
126
+ tmp_fname = f"{path2}.tmp.{secrets.token_hex(6)}"
127
+ try:
128
+ with self.open(tmp_fname, "wb") as rstream:
129
+ shutil.copyfileobj(lstream, rstream)
130
+ self.fs.move(tmp_fname, path2)
131
+ except BaseException: # noqa
132
+ with suppress(FileNotFoundError):
133
+ self.fs.delete_file(tmp_fname)
134
+ raise
135
+
136
+ @wrap_exceptions
137
+ def mv(self, path1, path2, **kwargs):
138
+ path1 = self._strip_protocol(path1).rstrip("/")
139
+ path2 = self._strip_protocol(path2).rstrip("/")
140
+ self.fs.move(path1, path2)
141
+
142
+ mv_file = mv
143
+
144
+ @wrap_exceptions
145
+ def rm_file(self, path):
146
+ path = self._strip_protocol(path)
147
+ self.fs.delete_file(path)
148
+
149
+ @wrap_exceptions
150
+ def rm(self, path, recursive=False, maxdepth=None):
151
+ path = self._strip_protocol(path).rstrip("/")
152
+ if self.isdir(path):
153
+ if recursive:
154
+ self.fs.delete_dir(path)
155
+ else:
156
+ raise ValueError("Can't delete directories without recursive=False")
157
+ else:
158
+ self.fs.delete_file(path)
159
+
160
+ @wrap_exceptions
161
+ def _open(self, path, mode="rb", block_size=None, seekable=True, **kwargs):
162
+ if mode == "rb":
163
+ if seekable:
164
+ method = self.fs.open_input_file
165
+ else:
166
+ method = self.fs.open_input_stream
167
+ elif mode == "wb":
168
+ method = self.fs.open_output_stream
169
+ elif mode == "ab":
170
+ method = self.fs.open_append_stream
171
+ else:
172
+ raise ValueError(f"unsupported mode for Arrow filesystem: {mode!r}")
173
+
174
+ _kwargs = {}
175
+ if mode != "rb" or not seekable:
176
+ if int(PYARROW_VERSION.split(".")[0]) >= 4:
177
+ # disable compression auto-detection
178
+ _kwargs["compression"] = None
179
+ stream = method(path, **_kwargs)
180
+
181
+ return ArrowFile(self, stream, path, mode, block_size, **kwargs)
182
+
183
+ @wrap_exceptions
184
+ def mkdir(self, path, create_parents=True, **kwargs):
185
+ path = self._strip_protocol(path)
186
+ if create_parents:
187
+ self.makedirs(path, exist_ok=True)
188
+ else:
189
+ self.fs.create_dir(path, recursive=False)
190
+
191
+ @wrap_exceptions
192
+ def makedirs(self, path, exist_ok=False):
193
+ path = self._strip_protocol(path)
194
+ self.fs.create_dir(path, recursive=True)
195
+
196
+ @wrap_exceptions
197
+ def rmdir(self, path):
198
+ path = self._strip_protocol(path)
199
+ self.fs.delete_dir(path)
200
+
201
+ @wrap_exceptions
202
+ def modified(self, path):
203
+ path = self._strip_protocol(path)
204
+ return self.fs.get_file_info(path).mtime
205
+
206
+ def cat_file(self, path, start=None, end=None, **kwargs):
207
+ kwargs["seekable"] = start not in [None, 0]
208
+ return super().cat_file(path, start=None, end=None, **kwargs)
209
+
210
+ def get_file(self, rpath, lpath, **kwargs):
211
+ kwargs["seekable"] = False
212
+ super().get_file(rpath, lpath, **kwargs)
213
+
214
+
215
+ @mirror_from(
216
+ "stream",
217
+ [
218
+ "read",
219
+ "seek",
220
+ "tell",
221
+ "write",
222
+ "readable",
223
+ "writable",
224
+ "close",
225
+ "size",
226
+ "seekable",
227
+ ],
228
+ )
229
+ class ArrowFile(io.IOBase):
230
+ def __init__(self, fs, stream, path, mode, block_size=None, **kwargs):
231
+ self.path = path
232
+ self.mode = mode
233
+
234
+ self.fs = fs
235
+ self.stream = stream
236
+
237
+ self.blocksize = self.block_size = block_size
238
+ self.kwargs = kwargs
239
+
240
+ def __enter__(self):
241
+ return self
242
+
243
+ def __exit__(self, *args):
244
+ return self.close()
245
+
246
+
247
+ class HadoopFileSystem(ArrowFSWrapper):
248
+ """A wrapper on top of the pyarrow.fs.HadoopFileSystem
249
+ to connect it's interface with fsspec"""
250
+
251
+ protocol = "hdfs"
252
+
253
+ def __init__(
254
+ self,
255
+ host="default",
256
+ port=0,
257
+ user=None,
258
+ kerb_ticket=None,
259
+ replication=3,
260
+ extra_conf=None,
261
+ **kwargs,
262
+ ):
263
+ """
264
+
265
+ Parameters
266
+ ----------
267
+ host: str
268
+ Hostname, IP or "default" to try to read from Hadoop config
269
+ port: int
270
+ Port to connect on, or default from Hadoop config if 0
271
+ user: str or None
272
+ If given, connect as this username
273
+ kerb_ticket: str or None
274
+ If given, use this ticket for authentication
275
+ replication: int
276
+ set replication factor of file for write operations. default value is 3.
277
+ extra_conf: None or dict
278
+ Passed on to HadoopFileSystem
279
+ """
280
+ from pyarrow.fs import HadoopFileSystem
281
+
282
+ fs = HadoopFileSystem(
283
+ host=host,
284
+ port=port,
285
+ user=user,
286
+ kerb_ticket=kerb_ticket,
287
+ replication=replication,
288
+ extra_conf=extra_conf,
289
+ )
290
+ super().__init__(fs=fs, **kwargs)
291
+
292
+ @staticmethod
293
+ def _get_kwargs_from_urls(path):
294
+ ops = infer_storage_options(path)
295
+ out = {}
296
+ if ops.get("host", None):
297
+ out["host"] = ops["host"]
298
+ if ops.get("username", None):
299
+ out["user"] = ops["username"]
300
+ if ops.get("port", None):
301
+ out["port"] = ops["port"]
302
+ if ops.get("url_query", None):
303
+ queries = parse_qs(ops["url_query"])
304
+ if queries.get("replication", None):
305
+ out["replication"] = int(queries["replication"][0])
306
+ return out
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py ADDED
@@ -0,0 +1,882 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ import tempfile
7
+ import time
8
+ import weakref
9
+ from shutil import rmtree
10
+ from typing import TYPE_CHECKING, Any, Callable, ClassVar
11
+
12
+ from fsspec import AbstractFileSystem, filesystem
13
+ from fsspec.callbacks import DEFAULT_CALLBACK
14
+ from fsspec.compression import compr
15
+ from fsspec.core import BaseCache, MMapCache
16
+ from fsspec.exceptions import BlocksizeMismatchError
17
+ from fsspec.implementations.cache_mapper import create_cache_mapper
18
+ from fsspec.implementations.cache_metadata import CacheMetadata
19
+ from fsspec.spec import AbstractBufferedFile
20
+ from fsspec.transaction import Transaction
21
+ from fsspec.utils import infer_compression
22
+
23
+ if TYPE_CHECKING:
24
+ from fsspec.implementations.cache_mapper import AbstractCacheMapper
25
+
26
+ logger = logging.getLogger("fsspec.cached")
27
+
28
+
29
+ class WriteCachedTransaction(Transaction):
30
+ def complete(self, commit=True):
31
+ rpaths = [f.path for f in self.files]
32
+ lpaths = [f.fn for f in self.files]
33
+ if commit:
34
+ self.fs.put(lpaths, rpaths)
35
+ # else remove?
36
+ self.fs._intrans = False
37
+
38
+
39
+ class CachingFileSystem(AbstractFileSystem):
40
+ """Locally caching filesystem, layer over any other FS
41
+
42
+ This class implements chunk-wise local storage of remote files, for quick
43
+ access after the initial download. The files are stored in a given
44
+ directory with hashes of URLs for the filenames. If no directory is given,
45
+ a temporary one is used, which should be cleaned up by the OS after the
46
+ process ends. The files themselves are sparse (as implemented in
47
+ :class:`~fsspec.caching.MMapCache`), so only the data which is accessed
48
+ takes up space.
49
+
50
+ Restrictions:
51
+
52
+ - the block-size must be the same for each access of a given file, unless
53
+ all blocks of the file have already been read
54
+ - caching can only be applied to file-systems which produce files
55
+ derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also
56
+ allowed, for testing
57
+ """
58
+
59
+ protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached")
60
+
61
+ def __init__(
62
+ self,
63
+ target_protocol=None,
64
+ cache_storage="TMP",
65
+ cache_check=10,
66
+ check_files=False,
67
+ expiry_time=604800,
68
+ target_options=None,
69
+ fs=None,
70
+ same_names: bool | None = None,
71
+ compression=None,
72
+ cache_mapper: AbstractCacheMapper | None = None,
73
+ **kwargs,
74
+ ):
75
+ """
76
+
77
+ Parameters
78
+ ----------
79
+ target_protocol: str (optional)
80
+ Target filesystem protocol. Provide either this or ``fs``.
81
+ cache_storage: str or list(str)
82
+ Location to store files. If "TMP", this is a temporary directory,
83
+ and will be cleaned up by the OS when this process ends (or later).
84
+ If a list, each location will be tried in the order given, but
85
+ only the last will be considered writable.
86
+ cache_check: int
87
+ Number of seconds between reload of cache metadata
88
+ check_files: bool
89
+ Whether to explicitly see if the UID of the remote file matches
90
+ the stored one before using. Warning: some file systems such as
91
+ HTTP cannot reliably give a unique hash of the contents of some
92
+ path, so be sure to set this option to False.
93
+ expiry_time: int
94
+ The time in seconds after which a local copy is considered useless.
95
+ Set to falsy to prevent expiry. The default is equivalent to one
96
+ week.
97
+ target_options: dict or None
98
+ Passed to the instantiation of the FS, if fs is None.
99
+ fs: filesystem instance
100
+ The target filesystem to run against. Provide this or ``protocol``.
101
+ same_names: bool (optional)
102
+ By default, target URLs are hashed using a ``HashCacheMapper`` so
103
+ that files from different backends with the same basename do not
104
+ conflict. If this argument is ``true``, a ``BasenameCacheMapper``
105
+ is used instead. Other cache mapper options are available by using
106
+ the ``cache_mapper`` keyword argument. Only one of this and
107
+ ``cache_mapper`` should be specified.
108
+ compression: str (optional)
109
+ To decompress on download. Can be 'infer' (guess from the URL name),
110
+ one of the entries in ``fsspec.compression.compr``, or None for no
111
+ decompression.
112
+ cache_mapper: AbstractCacheMapper (optional)
113
+ The object use to map from original filenames to cached filenames.
114
+ Only one of this and ``same_names`` should be specified.
115
+ """
116
+ super().__init__(**kwargs)
117
+ if fs is None and target_protocol is None:
118
+ raise ValueError(
119
+ "Please provide filesystem instance(fs) or target_protocol"
120
+ )
121
+ if not (fs is None) ^ (target_protocol is None):
122
+ raise ValueError(
123
+ "Both filesystems (fs) and target_protocol may not be both given."
124
+ )
125
+ if cache_storage == "TMP":
126
+ tempdir = tempfile.mkdtemp()
127
+ storage = [tempdir]
128
+ weakref.finalize(self, self._remove_tempdir, tempdir)
129
+ else:
130
+ if isinstance(cache_storage, str):
131
+ storage = [cache_storage]
132
+ else:
133
+ storage = cache_storage
134
+ os.makedirs(storage[-1], exist_ok=True)
135
+ self.storage = storage
136
+ self.kwargs = target_options or {}
137
+ self.cache_check = cache_check
138
+ self.check_files = check_files
139
+ self.expiry = expiry_time
140
+ self.compression = compression
141
+
142
+ # Size of cache in bytes. If None then the size is unknown and will be
143
+ # recalculated the next time cache_size() is called. On writes to the
144
+ # cache this is reset to None.
145
+ self._cache_size = None
146
+
147
+ if same_names is not None and cache_mapper is not None:
148
+ raise ValueError(
149
+ "Cannot specify both same_names and cache_mapper in "
150
+ "CachingFileSystem.__init__"
151
+ )
152
+ if cache_mapper is not None:
153
+ self._mapper = cache_mapper
154
+ else:
155
+ self._mapper = create_cache_mapper(
156
+ same_names if same_names is not None else False
157
+ )
158
+
159
+ self.target_protocol = (
160
+ target_protocol
161
+ if isinstance(target_protocol, str)
162
+ else (fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0])
163
+ )
164
+ self._metadata = CacheMetadata(self.storage)
165
+ self.load_cache()
166
+ self.fs = fs if fs is not None else filesystem(target_protocol, **self.kwargs)
167
+
168
+ def _strip_protocol(path):
169
+ # acts as a method, since each instance has a difference target
170
+ return self.fs._strip_protocol(type(self)._strip_protocol(path))
171
+
172
+ self._strip_protocol: Callable = _strip_protocol
173
+
174
+ @staticmethod
175
+ def _remove_tempdir(tempdir):
176
+ try:
177
+ rmtree(tempdir)
178
+ except Exception:
179
+ pass
180
+
181
+ def _mkcache(self):
182
+ os.makedirs(self.storage[-1], exist_ok=True)
183
+
184
+ def cache_size(self):
185
+ """Return size of cache in bytes.
186
+
187
+ If more than one cache directory is in use, only the size of the last
188
+ one (the writable cache directory) is returned.
189
+ """
190
+ if self._cache_size is None:
191
+ cache_dir = self.storage[-1]
192
+ self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
193
+ return self._cache_size
194
+
195
+ def load_cache(self):
196
+ """Read set of stored blocks from file"""
197
+ self._metadata.load()
198
+ self._mkcache()
199
+ self.last_cache = time.time()
200
+
201
+ def save_cache(self):
202
+ """Save set of stored blocks from file"""
203
+ self._mkcache()
204
+ self._metadata.save()
205
+ self.last_cache = time.time()
206
+ self._cache_size = None
207
+
208
+ def _check_cache(self):
209
+ """Reload caches if time elapsed or any disappeared"""
210
+ self._mkcache()
211
+ if not self.cache_check:
212
+ # explicitly told not to bother checking
213
+ return
214
+ timecond = time.time() - self.last_cache > self.cache_check
215
+ existcond = all(os.path.exists(storage) for storage in self.storage)
216
+ if timecond or not existcond:
217
+ self.load_cache()
218
+
219
+ def _check_file(self, path):
220
+ """Is path in cache and still valid"""
221
+ path = self._strip_protocol(path)
222
+ self._check_cache()
223
+ return self._metadata.check_file(path, self)
224
+
225
+ def clear_cache(self):
226
+ """Remove all files and metadata from the cache
227
+
228
+ In the case of multiple cache locations, this clears only the last one,
229
+ which is assumed to be the read/write one.
230
+ """
231
+ rmtree(self.storage[-1])
232
+ self.load_cache()
233
+ self._cache_size = None
234
+
235
+ def clear_expired_cache(self, expiry_time=None):
236
+ """Remove all expired files and metadata from the cache
237
+
238
+ In the case of multiple cache locations, this clears only the last one,
239
+ which is assumed to be the read/write one.
240
+
241
+ Parameters
242
+ ----------
243
+ expiry_time: int
244
+ The time in seconds after which a local copy is considered useless.
245
+ If not defined the default is equivalent to the attribute from the
246
+ file caching instantiation.
247
+ """
248
+
249
+ if not expiry_time:
250
+ expiry_time = self.expiry
251
+
252
+ self._check_cache()
253
+
254
+ expired_files, writable_cache_empty = self._metadata.clear_expired(expiry_time)
255
+ for fn in expired_files:
256
+ if os.path.exists(fn):
257
+ os.remove(fn)
258
+
259
+ if writable_cache_empty:
260
+ rmtree(self.storage[-1])
261
+ self.load_cache()
262
+
263
+ self._cache_size = None
264
+
265
+ def pop_from_cache(self, path):
266
+ """Remove cached version of given file
267
+
268
+ Deletes local copy of the given (remote) path. If it is found in a cache
269
+ location which is not the last, it is assumed to be read-only, and
270
+ raises PermissionError
271
+ """
272
+ path = self._strip_protocol(path)
273
+ fn = self._metadata.pop_file(path)
274
+ if fn is not None:
275
+ os.remove(fn)
276
+ self._cache_size = None
277
+
278
+ def _open(
279
+ self,
280
+ path,
281
+ mode="rb",
282
+ block_size=None,
283
+ autocommit=True,
284
+ cache_options=None,
285
+ **kwargs,
286
+ ):
287
+ """Wrap the target _open
288
+
289
+ If the whole file exists in the cache, just open it locally and
290
+ return that.
291
+
292
+ Otherwise, open the file on the target FS, and make it have a mmap
293
+ cache pointing to the location which we determine, in our cache.
294
+ The ``blocks`` instance is shared, so as the mmap cache instance
295
+ updates, so does the entry in our ``cached_files`` attribute.
296
+ We monkey-patch this file, so that when it closes, we call
297
+ ``close_and_update`` to save the state of the blocks.
298
+ """
299
+ path = self._strip_protocol(path)
300
+
301
+ path = self.fs._strip_protocol(path)
302
+ if "r" not in mode:
303
+ return self.fs._open(
304
+ path,
305
+ mode=mode,
306
+ block_size=block_size,
307
+ autocommit=autocommit,
308
+ cache_options=cache_options,
309
+ **kwargs,
310
+ )
311
+ detail = self._check_file(path)
312
+ if detail:
313
+ # file is in cache
314
+ detail, fn = detail
315
+ hash, blocks = detail["fn"], detail["blocks"]
316
+ if blocks is True:
317
+ # stored file is complete
318
+ logger.debug("Opening local copy of %s", path)
319
+ return open(fn, mode)
320
+ # TODO: action where partial file exists in read-only cache
321
+ logger.debug("Opening partially cached copy of %s", path)
322
+ else:
323
+ hash = self._mapper(path)
324
+ fn = os.path.join(self.storage[-1], hash)
325
+ blocks = set()
326
+ detail = {
327
+ "original": path,
328
+ "fn": hash,
329
+ "blocks": blocks,
330
+ "time": time.time(),
331
+ "uid": self.fs.ukey(path),
332
+ }
333
+ self._metadata.update_file(path, detail)
334
+ logger.debug("Creating local sparse file for %s", path)
335
+
336
+ # call target filesystems open
337
+ self._mkcache()
338
+ f = self.fs._open(
339
+ path,
340
+ mode=mode,
341
+ block_size=block_size,
342
+ autocommit=autocommit,
343
+ cache_options=cache_options,
344
+ cache_type="none",
345
+ **kwargs,
346
+ )
347
+ if self.compression:
348
+ comp = (
349
+ infer_compression(path)
350
+ if self.compression == "infer"
351
+ else self.compression
352
+ )
353
+ f = compr[comp](f, mode="rb")
354
+ if "blocksize" in detail:
355
+ if detail["blocksize"] != f.blocksize:
356
+ raise BlocksizeMismatchError(
357
+ f"Cached file must be reopened with same block"
358
+ f" size as original (old: {detail['blocksize']},"
359
+ f" new {f.blocksize})"
360
+ )
361
+ else:
362
+ detail["blocksize"] = f.blocksize
363
+ f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks)
364
+ close = f.close
365
+ f.close = lambda: self.close_and_update(f, close)
366
+ self.save_cache()
367
+ return f
368
+
369
+ def _parent(self, path):
370
+ return self.fs._parent(path)
371
+
372
+ def hash_name(self, path: str, *args: Any) -> str:
373
+ # Kept for backward compatibility with downstream libraries.
374
+ # Ignores extra arguments, previously same_name boolean.
375
+ return self._mapper(path)
376
+
377
+ def close_and_update(self, f, close):
378
+ """Called when a file is closing, so store the set of blocks"""
379
+ if f.closed:
380
+ return
381
+ path = self._strip_protocol(f.path)
382
+ self._metadata.on_close_cached_file(f, path)
383
+ try:
384
+ logger.debug("going to save")
385
+ self.save_cache()
386
+ logger.debug("saved")
387
+ except OSError:
388
+ logger.debug("Cache saving failed while closing file")
389
+ except NameError:
390
+ logger.debug("Cache save failed due to interpreter shutdown")
391
+ close()
392
+ f.closed = True
393
+
394
+ def __getattribute__(self, item):
395
+ if item in [
396
+ "load_cache",
397
+ "_open",
398
+ "save_cache",
399
+ "close_and_update",
400
+ "__init__",
401
+ "__getattribute__",
402
+ "__reduce__",
403
+ "_make_local_details",
404
+ "open",
405
+ "cat",
406
+ "cat_file",
407
+ "cat_ranges",
408
+ "get",
409
+ "read_block",
410
+ "tail",
411
+ "head",
412
+ "_check_file",
413
+ "_check_cache",
414
+ "_mkcache",
415
+ "clear_cache",
416
+ "clear_expired_cache",
417
+ "pop_from_cache",
418
+ "_mkcache",
419
+ "local_file",
420
+ "_paths_from_path",
421
+ "get_mapper",
422
+ "open_many",
423
+ "commit_many",
424
+ "hash_name",
425
+ "__hash__",
426
+ "__eq__",
427
+ "to_json",
428
+ "cache_size",
429
+ "pipe_file",
430
+ "pipe",
431
+ "start_transaction",
432
+ "end_transaction",
433
+ ]:
434
+ # all the methods defined in this class. Note `open` here, since
435
+ # it calls `_open`, but is actually in superclass
436
+ return lambda *args, **kw: getattr(type(self), item).__get__(self)(
437
+ *args, **kw
438
+ )
439
+ if item in ["__reduce_ex__"]:
440
+ raise AttributeError
441
+ if item in ["transaction"]:
442
+ # property
443
+ return type(self).transaction.__get__(self)
444
+ if item in ["_cache", "transaction_type"]:
445
+ # class attributes
446
+ return getattr(type(self), item)
447
+ if item == "__class__":
448
+ return type(self)
449
+ d = object.__getattribute__(self, "__dict__")
450
+ fs = d.get("fs", None) # fs is not immediately defined
451
+ if item in d:
452
+ return d[item]
453
+ elif fs is not None:
454
+ if item in fs.__dict__:
455
+ # attribute of instance
456
+ return fs.__dict__[item]
457
+ # attributed belonging to the target filesystem
458
+ cls = type(fs)
459
+ m = getattr(cls, item)
460
+ if (inspect.isfunction(m) or inspect.isdatadescriptor(m)) and (
461
+ not hasattr(m, "__self__") or m.__self__ is None
462
+ ):
463
+ # instance method
464
+ return m.__get__(fs, cls)
465
+ return m # class method or attribute
466
+ else:
467
+ # attributes of the superclass, while target is being set up
468
+ return super().__getattribute__(item)
469
+
470
+ def __eq__(self, other):
471
+ """Test for equality."""
472
+ if self is other:
473
+ return True
474
+ if not isinstance(other, type(self)):
475
+ return False
476
+ return (
477
+ self.storage == other.storage
478
+ and self.kwargs == other.kwargs
479
+ and self.cache_check == other.cache_check
480
+ and self.check_files == other.check_files
481
+ and self.expiry == other.expiry
482
+ and self.compression == other.compression
483
+ and self._mapper == other._mapper
484
+ and self.target_protocol == other.target_protocol
485
+ )
486
+
487
+ def __hash__(self):
488
+ """Calculate hash."""
489
+ return (
490
+ hash(tuple(self.storage))
491
+ ^ hash(str(self.kwargs))
492
+ ^ hash(self.cache_check)
493
+ ^ hash(self.check_files)
494
+ ^ hash(self.expiry)
495
+ ^ hash(self.compression)
496
+ ^ hash(self._mapper)
497
+ ^ hash(self.target_protocol)
498
+ )
499
+
500
+ def to_json(self):
501
+ """Calculate JSON representation.
502
+
503
+ Not implemented yet for CachingFileSystem.
504
+ """
505
+ raise NotImplementedError(
506
+ "CachingFileSystem JSON representation not implemented"
507
+ )
508
+
509
+
510
+ class WholeFileCacheFileSystem(CachingFileSystem):
511
+ """Caches whole remote files on first access
512
+
513
+ This class is intended as a layer over any other file system, and
514
+ will make a local copy of each file accessed, so that all subsequent
515
+ reads are local. This is similar to ``CachingFileSystem``, but without
516
+ the block-wise functionality and so can work even when sparse files
517
+ are not allowed. See its docstring for definition of the init
518
+ arguments.
519
+
520
+ The class still needs access to the remote store for listing files,
521
+ and may refresh cached files.
522
+ """
523
+
524
+ protocol = "filecache"
525
+ local_file = True
526
+
527
+ def open_many(self, open_files, **kwargs):
528
+ paths = [of.path for of in open_files]
529
+ if "r" in open_files.mode:
530
+ self._mkcache()
531
+ else:
532
+ return [
533
+ LocalTempFile(
534
+ self.fs,
535
+ path,
536
+ mode=open_files.mode,
537
+ fn=os.path.join(self.storage[-1], self._mapper(path)),
538
+ **kwargs,
539
+ )
540
+ for path in paths
541
+ ]
542
+
543
+ if self.compression:
544
+ raise NotImplementedError
545
+ details = [self._check_file(sp) for sp in paths]
546
+ downpath = [p for p, d in zip(paths, details) if not d]
547
+ downfn0 = [
548
+ os.path.join(self.storage[-1], self._mapper(p))
549
+ for p, d in zip(paths, details)
550
+ ] # keep these path names for opening later
551
+ downfn = [fn for fn, d in zip(downfn0, details) if not d]
552
+ if downpath:
553
+ # skip if all files are already cached and up to date
554
+ self.fs.get(downpath, downfn)
555
+
556
+ # update metadata - only happens when downloads are successful
557
+ newdetail = [
558
+ {
559
+ "original": path,
560
+ "fn": self._mapper(path),
561
+ "blocks": True,
562
+ "time": time.time(),
563
+ "uid": self.fs.ukey(path),
564
+ }
565
+ for path in downpath
566
+ ]
567
+ for path, detail in zip(downpath, newdetail):
568
+ self._metadata.update_file(path, detail)
569
+ self.save_cache()
570
+
571
+ def firstpart(fn):
572
+ # helper to adapt both whole-file and simple-cache
573
+ return fn[1] if isinstance(fn, tuple) else fn
574
+
575
+ return [
576
+ open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode)
577
+ for fn0, fn1 in zip(details, downfn0)
578
+ ]
579
+
580
+ def commit_many(self, open_files):
581
+ self.fs.put([f.fn for f in open_files], [f.path for f in open_files])
582
+ [f.close() for f in open_files]
583
+ for f in open_files:
584
+ # in case autocommit is off, and so close did not already delete
585
+ try:
586
+ os.remove(f.name)
587
+ except FileNotFoundError:
588
+ pass
589
+ self._cache_size = None
590
+
591
+ def _make_local_details(self, path):
592
+ hash = self._mapper(path)
593
+ fn = os.path.join(self.storage[-1], hash)
594
+ detail = {
595
+ "original": path,
596
+ "fn": hash,
597
+ "blocks": True,
598
+ "time": time.time(),
599
+ "uid": self.fs.ukey(path),
600
+ }
601
+ self._metadata.update_file(path, detail)
602
+ logger.debug("Copying %s to local cache", path)
603
+ return fn
604
+
605
+ def cat(
606
+ self,
607
+ path,
608
+ recursive=False,
609
+ on_error="raise",
610
+ callback=DEFAULT_CALLBACK,
611
+ **kwargs,
612
+ ):
613
+ paths = self.expand_path(
614
+ path, recursive=recursive, maxdepth=kwargs.get("maxdepth", None)
615
+ )
616
+ getpaths = []
617
+ storepaths = []
618
+ fns = []
619
+ out = {}
620
+ for p in paths.copy():
621
+ try:
622
+ detail = self._check_file(p)
623
+ if not detail:
624
+ fn = self._make_local_details(p)
625
+ getpaths.append(p)
626
+ storepaths.append(fn)
627
+ else:
628
+ detail, fn = detail if isinstance(detail, tuple) else (None, detail)
629
+ fns.append(fn)
630
+ except Exception as e:
631
+ if on_error == "raise":
632
+ raise
633
+ if on_error == "return":
634
+ out[p] = e
635
+ paths.remove(p)
636
+
637
+ if getpaths:
638
+ self.fs.get(getpaths, storepaths)
639
+ self.save_cache()
640
+
641
+ callback.set_size(len(paths))
642
+ for p, fn in zip(paths, fns):
643
+ with open(fn, "rb") as f:
644
+ out[p] = f.read()
645
+ callback.relative_update(1)
646
+ if isinstance(path, str) and len(paths) == 1 and recursive is False:
647
+ out = out[paths[0]]
648
+ return out
649
+
650
+ def _open(self, path, mode="rb", **kwargs):
651
+ path = self._strip_protocol(path)
652
+ if "r" not in mode:
653
+ fn = self._make_local_details(path)
654
+ user_specified_kwargs = {
655
+ k: v
656
+ for k, v in kwargs.items()
657
+ # those kwargs were added by open(), we don't want them
658
+ if k not in ["autocommit", "block_size", "cache_options"]
659
+ }
660
+ return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)
661
+ detail = self._check_file(path)
662
+ if detail:
663
+ detail, fn = detail
664
+ _, blocks = detail["fn"], detail["blocks"]
665
+ if blocks is True:
666
+ logger.debug("Opening local copy of %s", path)
667
+
668
+ # In order to support downstream filesystems to be able to
669
+ # infer the compression from the original filename, like
670
+ # the `TarFileSystem`, let's extend the `io.BufferedReader`
671
+ # fileobject protocol by adding a dedicated attribute
672
+ # `original`.
673
+ f = open(fn, mode)
674
+ f.original = detail.get("original")
675
+ return f
676
+ else:
677
+ raise ValueError(
678
+ f"Attempt to open partially cached file {path}"
679
+ f" as a wholly cached file"
680
+ )
681
+ else:
682
+ fn = self._make_local_details(path)
683
+ kwargs["mode"] = mode
684
+
685
+ # call target filesystems open
686
+ self._mkcache()
687
+ if self.compression:
688
+ with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
689
+ if isinstance(f, AbstractBufferedFile):
690
+ # want no type of caching if just downloading whole thing
691
+ f.cache = BaseCache(0, f.cache.fetcher, f.size)
692
+ comp = (
693
+ infer_compression(path)
694
+ if self.compression == "infer"
695
+ else self.compression
696
+ )
697
+ f = compr[comp](f, mode="rb")
698
+ data = True
699
+ while data:
700
+ block = getattr(f, "blocksize", 5 * 2**20)
701
+ data = f.read(block)
702
+ f2.write(data)
703
+ else:
704
+ self.fs.get_file(path, fn)
705
+ self.save_cache()
706
+ return self._open(path, mode)
707
+
708
+
709
+ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
710
+ """Caches whole remote files on first access
711
+
712
+ This class is intended as a layer over any other file system, and
713
+ will make a local copy of each file accessed, so that all subsequent
714
+ reads are local. This implementation only copies whole files, and
715
+ does not keep any metadata about the download time or file details.
716
+ It is therefore safer to use in multi-threaded/concurrent situations.
717
+
718
+ This is the only of the caching filesystems that supports write: you will
719
+ be given a real local open file, and upon close and commit, it will be
720
+ uploaded to the target filesystem; the writability or the target URL is
721
+ not checked until that time.
722
+
723
+ """
724
+
725
+ protocol = "simplecache"
726
+ local_file = True
727
+ transaction_type = WriteCachedTransaction
728
+
729
+ def __init__(self, **kwargs):
730
+ kw = kwargs.copy()
731
+ for key in ["cache_check", "expiry_time", "check_files"]:
732
+ kw[key] = False
733
+ super().__init__(**kw)
734
+ for storage in self.storage:
735
+ if not os.path.exists(storage):
736
+ os.makedirs(storage, exist_ok=True)
737
+
738
+ def _check_file(self, path):
739
+ self._check_cache()
740
+ sha = self._mapper(path)
741
+ for storage in self.storage:
742
+ fn = os.path.join(storage, sha)
743
+ if os.path.exists(fn):
744
+ return fn
745
+
746
+ def save_cache(self):
747
+ pass
748
+
749
+ def load_cache(self):
750
+ pass
751
+
752
+ def pipe_file(self, path, value=None, **kwargs):
753
+ if self._intrans:
754
+ with self.open(path, "wb") as f:
755
+ f.write(value)
756
+ else:
757
+ super().pipe_file(path, value)
758
+
759
+ def pipe(self, path, value=None, **kwargs):
760
+ if isinstance(path, str):
761
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
762
+ elif isinstance(path, dict):
763
+ for k, v in path.items():
764
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
765
+ else:
766
+ raise ValueError("path must be str or dict")
767
+
768
+ def cat_ranges(
769
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
770
+ ):
771
+ lpaths = [self._check_file(p) for p in paths]
772
+ rpaths = [p for l, p in zip(lpaths, paths) if l is False]
773
+ lpaths = [l for l, p in zip(lpaths, paths) if l is False]
774
+ self.fs.get(rpaths, lpaths)
775
+ return super().cat_ranges(
776
+ paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
777
+ )
778
+
779
+ def _open(self, path, mode="rb", **kwargs):
780
+ path = self._strip_protocol(path)
781
+ sha = self._mapper(path)
782
+
783
+ if "r" not in mode:
784
+ fn = os.path.join(self.storage[-1], sha)
785
+ user_specified_kwargs = {
786
+ k: v
787
+ for k, v in kwargs.items()
788
+ if k not in ["autocommit", "block_size", "cache_options"]
789
+ } # those were added by open()
790
+ return LocalTempFile(
791
+ self,
792
+ path,
793
+ mode=mode,
794
+ autocommit=not self._intrans,
795
+ fn=fn,
796
+ **user_specified_kwargs,
797
+ )
798
+ fn = self._check_file(path)
799
+ if fn:
800
+ return open(fn, mode)
801
+
802
+ fn = os.path.join(self.storage[-1], sha)
803
+ logger.debug("Copying %s to local cache", path)
804
+ kwargs["mode"] = mode
805
+
806
+ self._mkcache()
807
+ self._cache_size = None
808
+ if self.compression:
809
+ with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
810
+ if isinstance(f, AbstractBufferedFile):
811
+ # want no type of caching if just downloading whole thing
812
+ f.cache = BaseCache(0, f.cache.fetcher, f.size)
813
+ comp = (
814
+ infer_compression(path)
815
+ if self.compression == "infer"
816
+ else self.compression
817
+ )
818
+ f = compr[comp](f, mode="rb")
819
+ data = True
820
+ while data:
821
+ block = getattr(f, "blocksize", 5 * 2**20)
822
+ data = f.read(block)
823
+ f2.write(data)
824
+ else:
825
+ self.fs.get_file(path, fn)
826
+ return self._open(path, mode)
827
+
828
+
829
+ class LocalTempFile:
830
+ """A temporary local file, which will be uploaded on commit"""
831
+
832
+ def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
833
+ self.fn = fn
834
+ self.fh = open(fn, mode)
835
+ self.mode = mode
836
+ if seek:
837
+ self.fh.seek(seek)
838
+ self.path = path
839
+ self.fs = fs
840
+ self.closed = False
841
+ self.autocommit = autocommit
842
+ self.kwargs = kwargs
843
+
844
+ def __reduce__(self):
845
+ # always open in r+b to allow continuing writing at a location
846
+ return (
847
+ LocalTempFile,
848
+ (self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
849
+ )
850
+
851
+ def __enter__(self):
852
+ return self.fh
853
+
854
+ def __exit__(self, exc_type, exc_val, exc_tb):
855
+ self.close()
856
+
857
+ def close(self):
858
+ if self.closed:
859
+ return
860
+ self.fh.close()
861
+ self.closed = True
862
+ if self.autocommit:
863
+ self.commit()
864
+
865
+ def discard(self):
866
+ self.fh.close()
867
+ os.remove(self.fn)
868
+
869
+ def commit(self):
870
+ self.fs.put(self.fn, self.path, **self.kwargs)
871
+ try:
872
+ os.remove(self.fn)
873
+ except (PermissionError, FileNotFoundError):
874
+ # file path may be held by new version of the file on windows
875
+ pass
876
+
877
+ @property
878
+ def name(self):
879
+ return self.fn
880
+
881
+ def __getattr__(self, item):
882
+ return getattr(self.fh, item)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dask.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dask
2
+ from distributed.client import Client, _get_global_client
3
+ from distributed.worker import Worker
4
+
5
+ from fsspec import filesystem
6
+ from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
7
+ from fsspec.utils import infer_storage_options
8
+
9
+
10
+ def _get_client(client):
11
+ if client is None:
12
+ return _get_global_client()
13
+ elif isinstance(client, Client):
14
+ return client
15
+ else:
16
+ # e.g., connection string
17
+ return Client(client)
18
+
19
+
20
+ def _in_worker():
21
+ return bool(Worker._instances)
22
+
23
+
24
+ class DaskWorkerFileSystem(AbstractFileSystem):
25
+ """View files accessible to a worker as any other remote file-system
26
+
27
+ When instances are run on the worker, uses the real filesystem. When
28
+ run on the client, they call the worker to provide information or data.
29
+
30
+ **Warning** this implementation is experimental, and read-only for now.
31
+ """
32
+
33
+ def __init__(
34
+ self, target_protocol=None, target_options=None, fs=None, client=None, **kwargs
35
+ ):
36
+ super().__init__(**kwargs)
37
+ if not (fs is None) ^ (target_protocol is None):
38
+ raise ValueError(
39
+ "Please provide one of filesystem instance (fs) or"
40
+ " target_protocol, not both"
41
+ )
42
+ self.target_protocol = target_protocol
43
+ self.target_options = target_options
44
+ self.worker = None
45
+ self.client = client
46
+ self.fs = fs
47
+ self._determine_worker()
48
+
49
+ @staticmethod
50
+ def _get_kwargs_from_urls(path):
51
+ so = infer_storage_options(path)
52
+ if "host" in so and "port" in so:
53
+ return {"client": f"{so['host']}:{so['port']}"}
54
+ else:
55
+ return {}
56
+
57
+ def _determine_worker(self):
58
+ if _in_worker():
59
+ self.worker = True
60
+ if self.fs is None:
61
+ self.fs = filesystem(
62
+ self.target_protocol, **(self.target_options or {})
63
+ )
64
+ else:
65
+ self.worker = False
66
+ self.client = _get_client(self.client)
67
+ self.rfs = dask.delayed(self)
68
+
69
+ def mkdir(self, *args, **kwargs):
70
+ if self.worker:
71
+ self.fs.mkdir(*args, **kwargs)
72
+ else:
73
+ self.rfs.mkdir(*args, **kwargs).compute()
74
+
75
+ def rm(self, *args, **kwargs):
76
+ if self.worker:
77
+ self.fs.rm(*args, **kwargs)
78
+ else:
79
+ self.rfs.rm(*args, **kwargs).compute()
80
+
81
+ def copy(self, *args, **kwargs):
82
+ if self.worker:
83
+ self.fs.copy(*args, **kwargs)
84
+ else:
85
+ self.rfs.copy(*args, **kwargs).compute()
86
+
87
+ def mv(self, *args, **kwargs):
88
+ if self.worker:
89
+ self.fs.mv(*args, **kwargs)
90
+ else:
91
+ self.rfs.mv(*args, **kwargs).compute()
92
+
93
+ def ls(self, *args, **kwargs):
94
+ if self.worker:
95
+ return self.fs.ls(*args, **kwargs)
96
+ else:
97
+ return self.rfs.ls(*args, **kwargs).compute()
98
+
99
+ def _open(
100
+ self,
101
+ path,
102
+ mode="rb",
103
+ block_size=None,
104
+ autocommit=True,
105
+ cache_options=None,
106
+ **kwargs,
107
+ ):
108
+ if self.worker:
109
+ return self.fs._open(
110
+ path,
111
+ mode=mode,
112
+ block_size=block_size,
113
+ autocommit=autocommit,
114
+ cache_options=cache_options,
115
+ **kwargs,
116
+ )
117
+ else:
118
+ return DaskFile(
119
+ fs=self,
120
+ path=path,
121
+ mode=mode,
122
+ block_size=block_size,
123
+ autocommit=autocommit,
124
+ cache_options=cache_options,
125
+ **kwargs,
126
+ )
127
+
128
+ def fetch_range(self, path, mode, start, end):
129
+ if self.worker:
130
+ with self._open(path, mode) as f:
131
+ f.seek(start)
132
+ return f.read(end - start)
133
+ else:
134
+ return self.rfs.fetch_range(path, mode, start, end).compute()
135
+
136
+
137
+ class DaskFile(AbstractBufferedFile):
138
+ def __init__(self, mode="rb", **kwargs):
139
+ if mode != "rb":
140
+ raise ValueError('Remote dask files can only be opened in "rb" mode')
141
+ super().__init__(**kwargs)
142
+
143
+ def _upload_chunk(self, final=False):
144
+ pass
145
+
146
+ def _initiate_upload(self):
147
+ """Create remote file/upload"""
148
+ pass
149
+
150
+ def _fetch_range(self, start, end):
151
+ """Get the specified set of bytes from remote"""
152
+ return self.fs.fetch_range(self.path, self.mode, start, end)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/dirfs.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .. import filesystem
2
+ from ..asyn import AsyncFileSystem
3
+
4
+
5
+ class DirFileSystem(AsyncFileSystem):
6
+ """Directory prefix filesystem
7
+
8
+ The DirFileSystem is a filesystem-wrapper. It assumes every path it is dealing with
9
+ is relative to the `path`. After performing the necessary paths operation it
10
+ delegates everything to the wrapped filesystem.
11
+ """
12
+
13
+ protocol = "dir"
14
+
15
+ def __init__(
16
+ self,
17
+ path=None,
18
+ fs=None,
19
+ fo=None,
20
+ target_protocol=None,
21
+ target_options=None,
22
+ **storage_options,
23
+ ):
24
+ """
25
+ Parameters
26
+ ----------
27
+ path: str
28
+ Path to the directory.
29
+ fs: AbstractFileSystem
30
+ An instantiated filesystem to wrap.
31
+ target_protocol, target_options:
32
+ if fs is none, construct it from these
33
+ fo: str
34
+ Alternate for path; do not provide both
35
+ """
36
+ super().__init__(**storage_options)
37
+ if fs is None:
38
+ fs = filesystem(protocol=target_protocol, **(target_options or {}))
39
+ if (path is not None) ^ (fo is not None) is False:
40
+ raise ValueError("Provide path or fo, not both")
41
+ path = path or fo
42
+
43
+ if self.asynchronous and not fs.async_impl:
44
+ raise ValueError("can't use asynchronous with non-async fs")
45
+
46
+ if fs.async_impl and self.asynchronous != fs.asynchronous:
47
+ raise ValueError("both dirfs and fs should be in the same sync/async mode")
48
+
49
+ self.path = fs._strip_protocol(path)
50
+ self.fs = fs
51
+
52
+ def _join(self, path):
53
+ if isinstance(path, str):
54
+ if not self.path:
55
+ return path
56
+ if not path:
57
+ return self.path
58
+ return self.fs.sep.join((self.path, self._strip_protocol(path)))
59
+ return [self._join(_path) for _path in path]
60
+
61
+ def _relpath(self, path):
62
+ if isinstance(path, str):
63
+ if not self.path:
64
+ return path
65
+ if path == self.path:
66
+ return ""
67
+ prefix = self.path + self.fs.sep
68
+ assert path.startswith(prefix)
69
+ return path[len(prefix) :]
70
+ return [self._relpath(_path) for _path in path]
71
+
72
+ # Wrappers below
73
+
74
+ @property
75
+ def sep(self):
76
+ return self.fs.sep
77
+
78
+ async def set_session(self, *args, **kwargs):
79
+ return await self.fs.set_session(*args, **kwargs)
80
+
81
+ async def _rm_file(self, path, **kwargs):
82
+ return await self.fs._rm_file(self._join(path), **kwargs)
83
+
84
+ def rm_file(self, path, **kwargs):
85
+ return self.fs.rm_file(self._join(path), **kwargs)
86
+
87
+ async def _rm(self, path, *args, **kwargs):
88
+ return await self.fs._rm(self._join(path), *args, **kwargs)
89
+
90
+ def rm(self, path, *args, **kwargs):
91
+ return self.fs.rm(self._join(path), *args, **kwargs)
92
+
93
+ async def _cp_file(self, path1, path2, **kwargs):
94
+ return await self.fs._cp_file(self._join(path1), self._join(path2), **kwargs)
95
+
96
+ def cp_file(self, path1, path2, **kwargs):
97
+ return self.fs.cp_file(self._join(path1), self._join(path2), **kwargs)
98
+
99
+ async def _copy(
100
+ self,
101
+ path1,
102
+ path2,
103
+ *args,
104
+ **kwargs,
105
+ ):
106
+ return await self.fs._copy(
107
+ self._join(path1),
108
+ self._join(path2),
109
+ *args,
110
+ **kwargs,
111
+ )
112
+
113
+ def copy(self, path1, path2, *args, **kwargs):
114
+ return self.fs.copy(
115
+ self._join(path1),
116
+ self._join(path2),
117
+ *args,
118
+ **kwargs,
119
+ )
120
+
121
+ async def _pipe(self, path, *args, **kwargs):
122
+ return await self.fs._pipe(self._join(path), *args, **kwargs)
123
+
124
+ def pipe(self, path, *args, **kwargs):
125
+ return self.fs.pipe(self._join(path), *args, **kwargs)
126
+
127
+ async def _pipe_file(self, path, *args, **kwargs):
128
+ return await self.fs._pipe_file(self._join(path), *args, **kwargs)
129
+
130
+ def pipe_file(self, path, *args, **kwargs):
131
+ return self.fs.pipe_file(self._join(path), *args, **kwargs)
132
+
133
+ async def _cat_file(self, path, *args, **kwargs):
134
+ return await self.fs._cat_file(self._join(path), *args, **kwargs)
135
+
136
+ def cat_file(self, path, *args, **kwargs):
137
+ return self.fs.cat_file(self._join(path), *args, **kwargs)
138
+
139
+ async def _cat(self, path, *args, **kwargs):
140
+ ret = await self.fs._cat(
141
+ self._join(path),
142
+ *args,
143
+ **kwargs,
144
+ )
145
+
146
+ if isinstance(ret, dict):
147
+ return {self._relpath(key): value for key, value in ret.items()}
148
+
149
+ return ret
150
+
151
+ def cat(self, path, *args, **kwargs):
152
+ ret = self.fs.cat(
153
+ self._join(path),
154
+ *args,
155
+ **kwargs,
156
+ )
157
+
158
+ if isinstance(ret, dict):
159
+ return {self._relpath(key): value for key, value in ret.items()}
160
+
161
+ return ret
162
+
163
+ async def _put_file(self, lpath, rpath, **kwargs):
164
+ return await self.fs._put_file(lpath, self._join(rpath), **kwargs)
165
+
166
+ def put_file(self, lpath, rpath, **kwargs):
167
+ return self.fs.put_file(lpath, self._join(rpath), **kwargs)
168
+
169
+ async def _put(
170
+ self,
171
+ lpath,
172
+ rpath,
173
+ *args,
174
+ **kwargs,
175
+ ):
176
+ return await self.fs._put(
177
+ lpath,
178
+ self._join(rpath),
179
+ *args,
180
+ **kwargs,
181
+ )
182
+
183
+ def put(self, lpath, rpath, *args, **kwargs):
184
+ return self.fs.put(
185
+ lpath,
186
+ self._join(rpath),
187
+ *args,
188
+ **kwargs,
189
+ )
190
+
191
+ async def _get_file(self, rpath, lpath, **kwargs):
192
+ return await self.fs._get_file(self._join(rpath), lpath, **kwargs)
193
+
194
+ def get_file(self, rpath, lpath, **kwargs):
195
+ return self.fs.get_file(self._join(rpath), lpath, **kwargs)
196
+
197
+ async def _get(self, rpath, *args, **kwargs):
198
+ return await self.fs._get(self._join(rpath), *args, **kwargs)
199
+
200
+ def get(self, rpath, *args, **kwargs):
201
+ return self.fs.get(self._join(rpath), *args, **kwargs)
202
+
203
+ async def _isfile(self, path):
204
+ return await self.fs._isfile(self._join(path))
205
+
206
+ def isfile(self, path):
207
+ return self.fs.isfile(self._join(path))
208
+
209
+ async def _isdir(self, path):
210
+ return await self.fs._isdir(self._join(path))
211
+
212
+ def isdir(self, path):
213
+ return self.fs.isdir(self._join(path))
214
+
215
+ async def _size(self, path):
216
+ return await self.fs._size(self._join(path))
217
+
218
+ def size(self, path):
219
+ return self.fs.size(self._join(path))
220
+
221
+ async def _exists(self, path):
222
+ return await self.fs._exists(self._join(path))
223
+
224
+ def exists(self, path):
225
+ return self.fs.exists(self._join(path))
226
+
227
+ async def _info(self, path, **kwargs):
228
+ return await self.fs._info(self._join(path), **kwargs)
229
+
230
+ def info(self, path, **kwargs):
231
+ return self.fs.info(self._join(path), **kwargs)
232
+
233
+ async def _ls(self, path, detail=True, **kwargs):
234
+ ret = (await self.fs._ls(self._join(path), detail=detail, **kwargs)).copy()
235
+ if detail:
236
+ out = []
237
+ for entry in ret:
238
+ entry = entry.copy()
239
+ entry["name"] = self._relpath(entry["name"])
240
+ out.append(entry)
241
+ return out
242
+
243
+ return self._relpath(ret)
244
+
245
+ def ls(self, path, detail=True, **kwargs):
246
+ ret = self.fs.ls(self._join(path), detail=detail, **kwargs).copy()
247
+ if detail:
248
+ out = []
249
+ for entry in ret:
250
+ entry = entry.copy()
251
+ entry["name"] = self._relpath(entry["name"])
252
+ out.append(entry)
253
+ return out
254
+
255
+ return self._relpath(ret)
256
+
257
+ async def _walk(self, path, *args, **kwargs):
258
+ async for root, dirs, files in self.fs._walk(self._join(path), *args, **kwargs):
259
+ yield self._relpath(root), dirs, files
260
+
261
+ def walk(self, path, *args, **kwargs):
262
+ for root, dirs, files in self.fs.walk(self._join(path), *args, **kwargs):
263
+ yield self._relpath(root), dirs, files
264
+
265
+ async def _glob(self, path, **kwargs):
266
+ detail = kwargs.get("detail", False)
267
+ ret = await self.fs._glob(self._join(path), **kwargs)
268
+ if detail:
269
+ return {self._relpath(path): info for path, info in ret.items()}
270
+ return self._relpath(ret)
271
+
272
+ def glob(self, path, **kwargs):
273
+ detail = kwargs.get("detail", False)
274
+ ret = self.fs.glob(self._join(path), **kwargs)
275
+ if detail:
276
+ return {self._relpath(path): info for path, info in ret.items()}
277
+ return self._relpath(ret)
278
+
279
+ async def _du(self, path, *args, **kwargs):
280
+ total = kwargs.get("total", True)
281
+ ret = await self.fs._du(self._join(path), *args, **kwargs)
282
+ if total:
283
+ return ret
284
+
285
+ return {self._relpath(path): size for path, size in ret.items()}
286
+
287
+ def du(self, path, *args, **kwargs):
288
+ total = kwargs.get("total", True)
289
+ ret = self.fs.du(self._join(path), *args, **kwargs)
290
+ if total:
291
+ return ret
292
+
293
+ return {self._relpath(path): size for path, size in ret.items()}
294
+
295
+ async def _find(self, path, *args, **kwargs):
296
+ detail = kwargs.get("detail", False)
297
+ ret = await self.fs._find(self._join(path), *args, **kwargs)
298
+ if detail:
299
+ return {self._relpath(path): info for path, info in ret.items()}
300
+ return self._relpath(ret)
301
+
302
+ def find(self, path, *args, **kwargs):
303
+ detail = kwargs.get("detail", False)
304
+ ret = self.fs.find(self._join(path), *args, **kwargs)
305
+ if detail:
306
+ return {self._relpath(path): info for path, info in ret.items()}
307
+ return self._relpath(ret)
308
+
309
+ async def _expand_path(self, path, *args, **kwargs):
310
+ return self._relpath(
311
+ await self.fs._expand_path(self._join(path), *args, **kwargs)
312
+ )
313
+
314
+ def expand_path(self, path, *args, **kwargs):
315
+ return self._relpath(self.fs.expand_path(self._join(path), *args, **kwargs))
316
+
317
+ async def _mkdir(self, path, *args, **kwargs):
318
+ return await self.fs._mkdir(self._join(path), *args, **kwargs)
319
+
320
+ def mkdir(self, path, *args, **kwargs):
321
+ return self.fs.mkdir(self._join(path), *args, **kwargs)
322
+
323
+ async def _makedirs(self, path, *args, **kwargs):
324
+ return await self.fs._makedirs(self._join(path), *args, **kwargs)
325
+
326
+ def makedirs(self, path, *args, **kwargs):
327
+ return self.fs.makedirs(self._join(path), *args, **kwargs)
328
+
329
+ def rmdir(self, path):
330
+ return self.fs.rmdir(self._join(path))
331
+
332
+ def mv_file(self, path1, path2, **kwargs):
333
+ return self.fs.mv_file(
334
+ self._join(path1),
335
+ self._join(path2),
336
+ **kwargs,
337
+ )
338
+
339
+ def touch(self, path, **kwargs):
340
+ return self.fs.touch(self._join(path), **kwargs)
341
+
342
+ def created(self, path):
343
+ return self.fs.created(self._join(path))
344
+
345
+ def modified(self, path):
346
+ return self.fs.modified(self._join(path))
347
+
348
+ def sign(self, path, *args, **kwargs):
349
+ return self.fs.sign(self._join(path), *args, **kwargs)
350
+
351
+ def __repr__(self):
352
+ return f"{self.__class__.__qualname__}(path='{self.path}', fs={self.fs})"
353
+
354
+ def open(
355
+ self,
356
+ path,
357
+ *args,
358
+ **kwargs,
359
+ ):
360
+ return self.fs.open(
361
+ self._join(path),
362
+ *args,
363
+ **kwargs,
364
+ )
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/git.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import pygit2
4
+
5
+ from fsspec.spec import AbstractFileSystem
6
+
7
+ from .memory import MemoryFile
8
+
9
+
10
+ class GitFileSystem(AbstractFileSystem):
11
+ """Browse the files of a local git repo at any hash/tag/branch
12
+
13
+ (experimental backend)
14
+ """
15
+
16
+ root_marker = ""
17
+ cachable = True
18
+
19
+ def __init__(self, path=None, fo=None, ref=None, **kwargs):
20
+ """
21
+
22
+ Parameters
23
+ ----------
24
+ path: str (optional)
25
+ Local location of the repo (uses current directory if not given).
26
+ May be deprecated in favour of ``fo``. When used with a higher
27
+ level function such as fsspec.open(), may be of the form
28
+ "git://[path-to-repo[:]][ref@]path/to/file" (but the actual
29
+ file path should not contain "@" or ":").
30
+ fo: str (optional)
31
+ Same as ``path``, but passed as part of a chained URL. This one
32
+ takes precedence if both are given.
33
+ ref: str (optional)
34
+ Reference to work with, could be a hash, tag or branch name. Defaults
35
+ to current working tree. Note that ``ls`` and ``open`` also take hash,
36
+ so this becomes the default for those operations
37
+ kwargs
38
+ """
39
+ super().__init__(**kwargs)
40
+ self.repo = pygit2.Repository(fo or path or os.getcwd())
41
+ self.ref = ref or "master"
42
+
43
+ @classmethod
44
+ def _strip_protocol(cls, path):
45
+ path = super()._strip_protocol(path).lstrip("/")
46
+ if ":" in path:
47
+ path = path.split(":", 1)[1]
48
+ if "@" in path:
49
+ path = path.split("@", 1)[1]
50
+ return path.lstrip("/")
51
+
52
+ def _path_to_object(self, path, ref):
53
+ comm, ref = self.repo.resolve_refish(ref or self.ref)
54
+ parts = path.split("/")
55
+ tree = comm.tree
56
+ for part in parts:
57
+ if part and isinstance(tree, pygit2.Tree):
58
+ tree = tree[part]
59
+ return tree
60
+
61
+ @staticmethod
62
+ def _get_kwargs_from_urls(path):
63
+ if path.startswith("git://"):
64
+ path = path[6:]
65
+ out = {}
66
+ if ":" in path:
67
+ out["path"], path = path.split(":", 1)
68
+ if "@" in path:
69
+ out["ref"], path = path.split("@", 1)
70
+ return out
71
+
72
+ def ls(self, path, detail=True, ref=None, **kwargs):
73
+ path = self._strip_protocol(path)
74
+ tree = self._path_to_object(path, ref)
75
+ if isinstance(tree, pygit2.Tree):
76
+ out = []
77
+ for obj in tree:
78
+ if isinstance(obj, pygit2.Tree):
79
+ out.append(
80
+ {
81
+ "type": "directory",
82
+ "name": "/".join([path, obj.name]).lstrip("/"),
83
+ "hex": obj.hex,
84
+ "mode": f"{obj.filemode:o}",
85
+ "size": 0,
86
+ }
87
+ )
88
+ else:
89
+ out.append(
90
+ {
91
+ "type": "file",
92
+ "name": "/".join([path, obj.name]).lstrip("/"),
93
+ "hex": obj.hex,
94
+ "mode": f"{obj.filemode:o}",
95
+ "size": obj.size,
96
+ }
97
+ )
98
+ else:
99
+ obj = tree
100
+ out = [
101
+ {
102
+ "type": "file",
103
+ "name": obj.name,
104
+ "hex": obj.hex,
105
+ "mode": f"{obj.filemode:o}",
106
+ "size": obj.size,
107
+ }
108
+ ]
109
+ if detail:
110
+ return out
111
+ return [o["name"] for o in out]
112
+
113
+ def ukey(self, path, ref=None):
114
+ return self.info(path, ref=ref)["hex"]
115
+
116
+ def _open(
117
+ self,
118
+ path,
119
+ mode="rb",
120
+ block_size=None,
121
+ autocommit=True,
122
+ cache_options=None,
123
+ ref=None,
124
+ **kwargs,
125
+ ):
126
+ obj = self._path_to_object(path, ref or self.ref)
127
+ return MemoryFile(data=obj.data)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/http.py ADDED
@@ -0,0 +1,868 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import io
3
+ import logging
4
+ import re
5
+ import weakref
6
+ from copy import copy
7
+ from urllib.parse import urlparse
8
+
9
+ import aiohttp
10
+ import yarl
11
+
12
+ from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_wrapper
13
+ from fsspec.callbacks import DEFAULT_CALLBACK
14
+ from fsspec.exceptions import FSTimeoutError
15
+ from fsspec.spec import AbstractBufferedFile
16
+ from fsspec.utils import (
17
+ DEFAULT_BLOCK_SIZE,
18
+ glob_translate,
19
+ isfilelike,
20
+ nullcontext,
21
+ tokenize,
22
+ )
23
+
24
+ from ..caching import AllBytes
25
+
26
+ # https://stackoverflow.com/a/15926317/3821154
27
+ ex = re.compile(r"""<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P<url>[^"']+)""")
28
+ ex2 = re.compile(r"""(?P<url>http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""")
29
+ logger = logging.getLogger("fsspec.http")
30
+
31
+
32
+ async def get_client(**kwargs):
33
+ return aiohttp.ClientSession(**kwargs)
34
+
35
+
36
+ class HTTPFileSystem(AsyncFileSystem):
37
+ """
38
+ Simple File-System for fetching data via HTTP(S)
39
+
40
+ ``ls()`` is implemented by loading the parent page and doing a regex
41
+ match on the result. If simple_link=True, anything of the form
42
+ "http(s)://server.com/stuff?thing=other"; otherwise only links within
43
+ HTML href tags will be used.
44
+ """
45
+
46
+ sep = "/"
47
+
48
+ def __init__(
49
+ self,
50
+ simple_links=True,
51
+ block_size=None,
52
+ same_scheme=True,
53
+ size_policy=None,
54
+ cache_type="bytes",
55
+ cache_options=None,
56
+ asynchronous=False,
57
+ loop=None,
58
+ client_kwargs=None,
59
+ get_client=get_client,
60
+ encoded=False,
61
+ **storage_options,
62
+ ):
63
+ """
64
+ NB: if this is called async, you must await set_client
65
+
66
+ Parameters
67
+ ----------
68
+ block_size: int
69
+ Blocks to read bytes; if 0, will default to raw requests file-like
70
+ objects instead of HTTPFile instances
71
+ simple_links: bool
72
+ If True, will consider both HTML <a> tags and anything that looks
73
+ like a URL; if False, will consider only the former.
74
+ same_scheme: True
75
+ When doing ls/glob, if this is True, only consider paths that have
76
+ http/https matching the input URLs.
77
+ size_policy: this argument is deprecated
78
+ client_kwargs: dict
79
+ Passed to aiohttp.ClientSession, see
80
+ https://docs.aiohttp.org/en/stable/client_reference.html
81
+ For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}``
82
+ get_client: Callable[..., aiohttp.ClientSession]
83
+ A callable which takes keyword arguments and constructs
84
+ an aiohttp.ClientSession. It's state will be managed by
85
+ the HTTPFileSystem class.
86
+ storage_options: key-value
87
+ Any other parameters passed on to requests
88
+ cache_type, cache_options: defaults used in open
89
+ """
90
+ super().__init__(self, asynchronous=asynchronous, loop=loop, **storage_options)
91
+ self.block_size = block_size if block_size is not None else DEFAULT_BLOCK_SIZE
92
+ self.simple_links = simple_links
93
+ self.same_schema = same_scheme
94
+ self.cache_type = cache_type
95
+ self.cache_options = cache_options
96
+ self.client_kwargs = client_kwargs or {}
97
+ self.get_client = get_client
98
+ self.encoded = encoded
99
+ self.kwargs = storage_options
100
+ self._session = None
101
+
102
+ # Clean caching-related parameters from `storage_options`
103
+ # before propagating them as `request_options` through `self.kwargs`.
104
+ # TODO: Maybe rename `self.kwargs` to `self.request_options` to make
105
+ # it clearer.
106
+ request_options = copy(storage_options)
107
+ self.use_listings_cache = request_options.pop("use_listings_cache", False)
108
+ request_options.pop("listings_expiry_time", None)
109
+ request_options.pop("max_paths", None)
110
+ request_options.pop("skip_instance_cache", None)
111
+ self.kwargs = request_options
112
+
113
+ @property
114
+ def fsid(self):
115
+ return "http"
116
+
117
+ def encode_url(self, url):
118
+ return yarl.URL(url, encoded=self.encoded)
119
+
120
+ @staticmethod
121
+ def close_session(loop, session):
122
+ if loop is not None and loop.is_running():
123
+ try:
124
+ sync(loop, session.close, timeout=0.1)
125
+ return
126
+ except (TimeoutError, FSTimeoutError, NotImplementedError):
127
+ pass
128
+ connector = getattr(session, "_connector", None)
129
+ if connector is not None:
130
+ # close after loop is dead
131
+ connector._close()
132
+
133
+ async def set_session(self):
134
+ if self._session is None:
135
+ self._session = await self.get_client(loop=self.loop, **self.client_kwargs)
136
+ if not self.asynchronous:
137
+ weakref.finalize(self, self.close_session, self.loop, self._session)
138
+ return self._session
139
+
140
+ @classmethod
141
+ def _strip_protocol(cls, path):
142
+ """For HTTP, we always want to keep the full URL"""
143
+ return path
144
+
145
+ @classmethod
146
+ def _parent(cls, path):
147
+ # override, since _strip_protocol is different for URLs
148
+ par = super()._parent(path)
149
+ if len(par) > 7: # "http://..."
150
+ return par
151
+ return ""
152
+
153
+ async def _ls_real(self, url, detail=True, **kwargs):
154
+ # ignoring URL-encoded arguments
155
+ kw = self.kwargs.copy()
156
+ kw.update(kwargs)
157
+ logger.debug(url)
158
+ session = await self.set_session()
159
+ async with session.get(self.encode_url(url), **self.kwargs) as r:
160
+ self._raise_not_found_for_status(r, url)
161
+ text = await r.text()
162
+ if self.simple_links:
163
+ links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
164
+ else:
165
+ links = [u[2] for u in ex.findall(text)]
166
+ out = set()
167
+ parts = urlparse(url)
168
+ for l in links:
169
+ if isinstance(l, tuple):
170
+ l = l[1]
171
+ if l.startswith("/") and len(l) > 1:
172
+ # absolute URL on this server
173
+ l = f"{parts.scheme}://{parts.netloc}{l}"
174
+ if l.startswith("http"):
175
+ if self.same_schema and l.startswith(url.rstrip("/") + "/"):
176
+ out.add(l)
177
+ elif l.replace("https", "http").startswith(
178
+ url.replace("https", "http").rstrip("/") + "/"
179
+ ):
180
+ # allowed to cross http <-> https
181
+ out.add(l)
182
+ else:
183
+ if l not in ["..", "../"]:
184
+ # Ignore FTP-like "parent"
185
+ out.add("/".join([url.rstrip("/"), l.lstrip("/")]))
186
+ if not out and url.endswith("/"):
187
+ out = await self._ls_real(url.rstrip("/"), detail=False)
188
+ if detail:
189
+ return [
190
+ {
191
+ "name": u,
192
+ "size": None,
193
+ "type": "directory" if u.endswith("/") else "file",
194
+ }
195
+ for u in out
196
+ ]
197
+ else:
198
+ return sorted(out)
199
+
200
+ async def _ls(self, url, detail=True, **kwargs):
201
+ if self.use_listings_cache and url in self.dircache:
202
+ out = self.dircache[url]
203
+ else:
204
+ out = await self._ls_real(url, detail=detail, **kwargs)
205
+ self.dircache[url] = out
206
+ return out
207
+
208
+ ls = sync_wrapper(_ls)
209
+
210
+ def _raise_not_found_for_status(self, response, url):
211
+ """
212
+ Raises FileNotFoundError for 404s, otherwise uses raise_for_status.
213
+ """
214
+ if response.status == 404:
215
+ raise FileNotFoundError(url)
216
+ response.raise_for_status()
217
+
218
+ async def _cat_file(self, url, start=None, end=None, **kwargs):
219
+ kw = self.kwargs.copy()
220
+ kw.update(kwargs)
221
+ logger.debug(url)
222
+
223
+ if start is not None or end is not None:
224
+ if start == end:
225
+ return b""
226
+ headers = kw.pop("headers", {}).copy()
227
+
228
+ headers["Range"] = await self._process_limits(url, start, end)
229
+ kw["headers"] = headers
230
+ session = await self.set_session()
231
+ async with session.get(self.encode_url(url), **kw) as r:
232
+ out = await r.read()
233
+ self._raise_not_found_for_status(r, url)
234
+ return out
235
+
236
+ async def _get_file(
237
+ self, rpath, lpath, chunk_size=5 * 2**20, callback=DEFAULT_CALLBACK, **kwargs
238
+ ):
239
+ kw = self.kwargs.copy()
240
+ kw.update(kwargs)
241
+ logger.debug(rpath)
242
+ session = await self.set_session()
243
+ async with session.get(self.encode_url(rpath), **kw) as r:
244
+ try:
245
+ size = int(r.headers["content-length"])
246
+ except (ValueError, KeyError):
247
+ size = None
248
+
249
+ callback.set_size(size)
250
+ self._raise_not_found_for_status(r, rpath)
251
+ if isfilelike(lpath):
252
+ outfile = lpath
253
+ else:
254
+ outfile = open(lpath, "wb") # noqa: ASYNC101
255
+
256
+ try:
257
+ chunk = True
258
+ while chunk:
259
+ chunk = await r.content.read(chunk_size)
260
+ outfile.write(chunk)
261
+ callback.relative_update(len(chunk))
262
+ finally:
263
+ if not isfilelike(lpath):
264
+ outfile.close()
265
+
266
+ async def _put_file(
267
+ self,
268
+ lpath,
269
+ rpath,
270
+ chunk_size=5 * 2**20,
271
+ callback=DEFAULT_CALLBACK,
272
+ method="post",
273
+ **kwargs,
274
+ ):
275
+ async def gen_chunks():
276
+ # Support passing arbitrary file-like objects
277
+ # and use them instead of streams.
278
+ if isinstance(lpath, io.IOBase):
279
+ context = nullcontext(lpath)
280
+ use_seek = False # might not support seeking
281
+ else:
282
+ context = open(lpath, "rb") # noqa: ASYNC101
283
+ use_seek = True
284
+
285
+ with context as f:
286
+ if use_seek:
287
+ callback.set_size(f.seek(0, 2))
288
+ f.seek(0)
289
+ else:
290
+ callback.set_size(getattr(f, "size", None))
291
+
292
+ chunk = f.read(chunk_size)
293
+ while chunk:
294
+ yield chunk
295
+ callback.relative_update(len(chunk))
296
+ chunk = f.read(chunk_size)
297
+
298
+ kw = self.kwargs.copy()
299
+ kw.update(kwargs)
300
+ session = await self.set_session()
301
+
302
+ method = method.lower()
303
+ if method not in ("post", "put"):
304
+ raise ValueError(
305
+ f"method has to be either 'post' or 'put', not: {method!r}"
306
+ )
307
+
308
+ meth = getattr(session, method)
309
+ async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp:
310
+ self._raise_not_found_for_status(resp, rpath)
311
+
312
+ async def _exists(self, path, **kwargs):
313
+ kw = self.kwargs.copy()
314
+ kw.update(kwargs)
315
+ try:
316
+ logger.debug(path)
317
+ session = await self.set_session()
318
+ r = await session.get(self.encode_url(path), **kw)
319
+ async with r:
320
+ return r.status < 400
321
+ except aiohttp.ClientError:
322
+ return False
323
+
324
+ async def _isfile(self, path, **kwargs):
325
+ return await self._exists(path, **kwargs)
326
+
327
+ def _open(
328
+ self,
329
+ path,
330
+ mode="rb",
331
+ block_size=None,
332
+ autocommit=None, # XXX: This differs from the base class.
333
+ cache_type=None,
334
+ cache_options=None,
335
+ size=None,
336
+ **kwargs,
337
+ ):
338
+ """Make a file-like object
339
+
340
+ Parameters
341
+ ----------
342
+ path: str
343
+ Full URL with protocol
344
+ mode: string
345
+ must be "rb"
346
+ block_size: int or None
347
+ Bytes to download in one request; use instance value if None. If
348
+ zero, will return a streaming Requests file-like instance.
349
+ kwargs: key-value
350
+ Any other parameters, passed to requests calls
351
+ """
352
+ if mode != "rb":
353
+ raise NotImplementedError
354
+ block_size = block_size if block_size is not None else self.block_size
355
+ kw = self.kwargs.copy()
356
+ kw["asynchronous"] = self.asynchronous
357
+ kw.update(kwargs)
358
+ size = size or self.info(path, **kwargs)["size"]
359
+ session = sync(self.loop, self.set_session)
360
+ if block_size and size:
361
+ return HTTPFile(
362
+ self,
363
+ path,
364
+ session=session,
365
+ block_size=block_size,
366
+ mode=mode,
367
+ size=size,
368
+ cache_type=cache_type or self.cache_type,
369
+ cache_options=cache_options or self.cache_options,
370
+ loop=self.loop,
371
+ **kw,
372
+ )
373
+ else:
374
+ return HTTPStreamFile(
375
+ self,
376
+ path,
377
+ mode=mode,
378
+ loop=self.loop,
379
+ session=session,
380
+ **kw,
381
+ )
382
+
383
+ async def open_async(self, path, mode="rb", size=None, **kwargs):
384
+ session = await self.set_session()
385
+ if size is None:
386
+ try:
387
+ size = (await self._info(path, **kwargs))["size"]
388
+ except FileNotFoundError:
389
+ pass
390
+ return AsyncStreamFile(
391
+ self,
392
+ path,
393
+ loop=self.loop,
394
+ session=session,
395
+ size=size,
396
+ **kwargs,
397
+ )
398
+
399
+ def ukey(self, url):
400
+ """Unique identifier; assume HTTP files are static, unchanging"""
401
+ return tokenize(url, self.kwargs, self.protocol)
402
+
403
+ async def _info(self, url, **kwargs):
404
+ """Get info of URL
405
+
406
+ Tries to access location via HEAD, and then GET methods, but does
407
+ not fetch the data.
408
+
409
+ It is possible that the server does not supply any size information, in
410
+ which case size will be given as None (and certain operations on the
411
+ corresponding file will not work).
412
+ """
413
+ info = {}
414
+ session = await self.set_session()
415
+
416
+ for policy in ["head", "get"]:
417
+ try:
418
+ info.update(
419
+ await _file_info(
420
+ self.encode_url(url),
421
+ size_policy=policy,
422
+ session=session,
423
+ **self.kwargs,
424
+ **kwargs,
425
+ )
426
+ )
427
+ if info.get("size") is not None:
428
+ break
429
+ except Exception as exc:
430
+ if policy == "get":
431
+ # If get failed, then raise a FileNotFoundError
432
+ raise FileNotFoundError(url) from exc
433
+ logger.debug(str(exc))
434
+
435
+ return {"name": url, "size": None, **info, "type": "file"}
436
+
437
+ async def _glob(self, path, maxdepth=None, **kwargs):
438
+ """
439
+ Find files by glob-matching.
440
+
441
+ This implementation is idntical to the one in AbstractFileSystem,
442
+ but "?" is not considered as a character for globbing, because it is
443
+ so common in URLs, often identifying the "query" part.
444
+ """
445
+ if maxdepth is not None and maxdepth < 1:
446
+ raise ValueError("maxdepth must be at least 1")
447
+ import re
448
+
449
+ ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
450
+ path = self._strip_protocol(path)
451
+ append_slash_to_dirname = ends_with_slash or path.endswith("/**")
452
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
453
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
454
+
455
+ min_idx = min(idx_star, idx_brace)
456
+
457
+ detail = kwargs.pop("detail", False)
458
+
459
+ if not has_magic(path):
460
+ if await self._exists(path, **kwargs):
461
+ if not detail:
462
+ return [path]
463
+ else:
464
+ return {path: await self._info(path, **kwargs)}
465
+ else:
466
+ if not detail:
467
+ return [] # glob of non-existent returns empty
468
+ else:
469
+ return {}
470
+ elif "/" in path[:min_idx]:
471
+ min_idx = path[:min_idx].rindex("/")
472
+ root = path[: min_idx + 1]
473
+ depth = path[min_idx + 1 :].count("/") + 1
474
+ else:
475
+ root = ""
476
+ depth = path[min_idx + 1 :].count("/") + 1
477
+
478
+ if "**" in path:
479
+ if maxdepth is not None:
480
+ idx_double_stars = path.find("**")
481
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
482
+ depth = depth - depth_double_stars + maxdepth
483
+ else:
484
+ depth = None
485
+
486
+ allpaths = await self._find(
487
+ root, maxdepth=depth, withdirs=True, detail=True, **kwargs
488
+ )
489
+
490
+ pattern = glob_translate(path + ("/" if ends_with_slash else ""))
491
+ pattern = re.compile(pattern)
492
+
493
+ out = {
494
+ p: info
495
+ for p, info in sorted(allpaths.items())
496
+ if pattern.match(
497
+ (
498
+ p + "/"
499
+ if append_slash_to_dirname and info["type"] == "directory"
500
+ else p
501
+ )
502
+ )
503
+ }
504
+
505
+ if detail:
506
+ return out
507
+ else:
508
+ return list(out)
509
+
510
+ async def _isdir(self, path):
511
+ # override, since all URLs are (also) files
512
+ try:
513
+ return bool(await self._ls(path))
514
+ except (FileNotFoundError, ValueError):
515
+ return False
516
+
517
+
518
+ class HTTPFile(AbstractBufferedFile):
519
+ """
520
+ A file-like object pointing to a remove HTTP(S) resource
521
+
522
+ Supports only reading, with read-ahead of a predermined block-size.
523
+
524
+ In the case that the server does not supply the filesize, only reading of
525
+ the complete file in one go is supported.
526
+
527
+ Parameters
528
+ ----------
529
+ url: str
530
+ Full URL of the remote resource, including the protocol
531
+ session: aiohttp.ClientSession or None
532
+ All calls will be made within this session, to avoid restarting
533
+ connections where the server allows this
534
+ block_size: int or None
535
+ The amount of read-ahead to do, in bytes. Default is 5MB, or the value
536
+ configured for the FileSystem creating this file
537
+ size: None or int
538
+ If given, this is the size of the file in bytes, and we don't attempt
539
+ to call the server to find the value.
540
+ kwargs: all other key-values are passed to requests calls.
541
+ """
542
+
543
+ def __init__(
544
+ self,
545
+ fs,
546
+ url,
547
+ session=None,
548
+ block_size=None,
549
+ mode="rb",
550
+ cache_type="bytes",
551
+ cache_options=None,
552
+ size=None,
553
+ loop=None,
554
+ asynchronous=False,
555
+ **kwargs,
556
+ ):
557
+ if mode != "rb":
558
+ raise NotImplementedError("File mode not supported")
559
+ self.asynchronous = asynchronous
560
+ self.url = url
561
+ self.session = session
562
+ self.details = {"name": url, "size": size, "type": "file"}
563
+ super().__init__(
564
+ fs=fs,
565
+ path=url,
566
+ mode=mode,
567
+ block_size=block_size,
568
+ cache_type=cache_type,
569
+ cache_options=cache_options,
570
+ **kwargs,
571
+ )
572
+ self.loop = loop
573
+
574
+ def read(self, length=-1):
575
+ """Read bytes from file
576
+
577
+ Parameters
578
+ ----------
579
+ length: int
580
+ Read up to this many bytes. If negative, read all content to end of
581
+ file. If the server has not supplied the filesize, attempting to
582
+ read only part of the data will raise a ValueError.
583
+ """
584
+ if (
585
+ (length < 0 and self.loc == 0) # explicit read all
586
+ # but not when the size is known and fits into a block anyways
587
+ and not (self.size is not None and self.size <= self.blocksize)
588
+ ):
589
+ self._fetch_all()
590
+ if self.size is None:
591
+ if length < 0:
592
+ self._fetch_all()
593
+ else:
594
+ length = min(self.size - self.loc, length)
595
+ return super().read(length)
596
+
597
+ async def async_fetch_all(self):
598
+ """Read whole file in one shot, without caching
599
+
600
+ This is only called when position is still at zero,
601
+ and read() is called without a byte-count.
602
+ """
603
+ logger.debug(f"Fetch all for {self}")
604
+ if not isinstance(self.cache, AllBytes):
605
+ r = await self.session.get(self.fs.encode_url(self.url), **self.kwargs)
606
+ async with r:
607
+ r.raise_for_status()
608
+ out = await r.read()
609
+ self.cache = AllBytes(
610
+ size=len(out), fetcher=None, blocksize=None, data=out
611
+ )
612
+ self.size = len(out)
613
+
614
+ _fetch_all = sync_wrapper(async_fetch_all)
615
+
616
+ def _parse_content_range(self, headers):
617
+ """Parse the Content-Range header"""
618
+ s = headers.get("Content-Range", "")
619
+ m = re.match(r"bytes (\d+-\d+|\*)/(\d+|\*)", s)
620
+ if not m:
621
+ return None, None, None
622
+
623
+ if m[1] == "*":
624
+ start = end = None
625
+ else:
626
+ start, end = [int(x) for x in m[1].split("-")]
627
+ total = None if m[2] == "*" else int(m[2])
628
+ return start, end, total
629
+
630
+ async def async_fetch_range(self, start, end):
631
+ """Download a block of data
632
+
633
+ The expectation is that the server returns only the requested bytes,
634
+ with HTTP code 206. If this is not the case, we first check the headers,
635
+ and then stream the output - if the data size is bigger than we
636
+ requested, an exception is raised.
637
+ """
638
+ logger.debug(f"Fetch range for {self}: {start}-{end}")
639
+ kwargs = self.kwargs.copy()
640
+ headers = kwargs.pop("headers", {}).copy()
641
+ headers["Range"] = f"bytes={start}-{end - 1}"
642
+ logger.debug(f"{self.url} : {headers['Range']}")
643
+ r = await self.session.get(
644
+ self.fs.encode_url(self.url), headers=headers, **kwargs
645
+ )
646
+ async with r:
647
+ if r.status == 416:
648
+ # range request outside file
649
+ return b""
650
+ r.raise_for_status()
651
+
652
+ # If the server has handled the range request, it should reply
653
+ # with status 206 (partial content). But we'll guess that a suitable
654
+ # Content-Range header or a Content-Length no more than the
655
+ # requested range also mean we have got the desired range.
656
+ response_is_range = (
657
+ r.status == 206
658
+ or self._parse_content_range(r.headers)[0] == start
659
+ or int(r.headers.get("Content-Length", end + 1)) <= end - start
660
+ )
661
+
662
+ if response_is_range:
663
+ # partial content, as expected
664
+ out = await r.read()
665
+ elif start > 0:
666
+ raise ValueError(
667
+ "The HTTP server doesn't appear to support range requests. "
668
+ "Only reading this file from the beginning is supported. "
669
+ "Open with block_size=0 for a streaming file interface."
670
+ )
671
+ else:
672
+ # Response is not a range, but we want the start of the file,
673
+ # so we can read the required amount anyway.
674
+ cl = 0
675
+ out = []
676
+ while True:
677
+ chunk = await r.content.read(2**20)
678
+ # data size unknown, let's read until we have enough
679
+ if chunk:
680
+ out.append(chunk)
681
+ cl += len(chunk)
682
+ if cl > end - start:
683
+ break
684
+ else:
685
+ break
686
+ out = b"".join(out)[: end - start]
687
+ return out
688
+
689
+ _fetch_range = sync_wrapper(async_fetch_range)
690
+
691
+ def __reduce__(self):
692
+ return (
693
+ reopen,
694
+ (
695
+ self.fs,
696
+ self.url,
697
+ self.mode,
698
+ self.blocksize,
699
+ self.cache.name if self.cache else "none",
700
+ self.size,
701
+ ),
702
+ )
703
+
704
+
705
+ def reopen(fs, url, mode, blocksize, cache_type, size=None):
706
+ return fs.open(
707
+ url, mode=mode, block_size=blocksize, cache_type=cache_type, size=size
708
+ )
709
+
710
+
711
+ magic_check = re.compile("([*[])")
712
+
713
+
714
+ def has_magic(s):
715
+ match = magic_check.search(s)
716
+ return match is not None
717
+
718
+
719
+ class HTTPStreamFile(AbstractBufferedFile):
720
+ def __init__(self, fs, url, mode="rb", loop=None, session=None, **kwargs):
721
+ self.asynchronous = kwargs.pop("asynchronous", False)
722
+ self.url = url
723
+ self.loop = loop
724
+ self.session = session
725
+ if mode != "rb":
726
+ raise ValueError
727
+ self.details = {"name": url, "size": None}
728
+ super().__init__(fs=fs, path=url, mode=mode, cache_type="none", **kwargs)
729
+
730
+ async def cor():
731
+ r = await self.session.get(self.fs.encode_url(url), **kwargs).__aenter__()
732
+ self.fs._raise_not_found_for_status(r, url)
733
+ return r
734
+
735
+ self.r = sync(self.loop, cor)
736
+
737
+ def seek(self, loc, whence=0):
738
+ if loc == 0 and whence == 1:
739
+ return
740
+ if loc == self.loc and whence == 0:
741
+ return
742
+ raise ValueError("Cannot seek streaming HTTP file")
743
+
744
+ async def _read(self, num=-1):
745
+ out = await self.r.content.read(num)
746
+ self.loc += len(out)
747
+ return out
748
+
749
+ read = sync_wrapper(_read)
750
+
751
+ async def _close(self):
752
+ self.r.close()
753
+
754
+ def close(self):
755
+ asyncio.run_coroutine_threadsafe(self._close(), self.loop)
756
+ super().close()
757
+
758
+ def __reduce__(self):
759
+ return reopen, (self.fs, self.url, self.mode, self.blocksize, self.cache.name)
760
+
761
+
762
+ class AsyncStreamFile(AbstractAsyncStreamedFile):
763
+ def __init__(
764
+ self, fs, url, mode="rb", loop=None, session=None, size=None, **kwargs
765
+ ):
766
+ self.url = url
767
+ self.session = session
768
+ self.r = None
769
+ if mode != "rb":
770
+ raise ValueError
771
+ self.details = {"name": url, "size": None}
772
+ self.kwargs = kwargs
773
+ super().__init__(fs=fs, path=url, mode=mode, cache_type="none")
774
+ self.size = size
775
+
776
+ async def read(self, num=-1):
777
+ if self.r is None:
778
+ r = await self.session.get(
779
+ self.fs.encode_url(self.url), **self.kwargs
780
+ ).__aenter__()
781
+ self.fs._raise_not_found_for_status(r, self.url)
782
+ self.r = r
783
+ out = await self.r.content.read(num)
784
+ self.loc += len(out)
785
+ return out
786
+
787
+ async def close(self):
788
+ if self.r is not None:
789
+ self.r.close()
790
+ self.r = None
791
+ await super().close()
792
+
793
+
794
+ async def get_range(session, url, start, end, file=None, **kwargs):
795
+ # explicit get a range when we know it must be safe
796
+ kwargs = kwargs.copy()
797
+ headers = kwargs.pop("headers", {}).copy()
798
+ headers["Range"] = f"bytes={start}-{end - 1}"
799
+ r = await session.get(url, headers=headers, **kwargs)
800
+ r.raise_for_status()
801
+ async with r:
802
+ out = await r.read()
803
+ if file:
804
+ with open(file, "r+b") as f: # noqa: ASYNC101
805
+ f.seek(start)
806
+ f.write(out)
807
+ else:
808
+ return out
809
+
810
+
811
+ async def _file_info(url, session, size_policy="head", **kwargs):
812
+ """Call HEAD on the server to get details about the file (size/checksum etc.)
813
+
814
+ Default operation is to explicitly allow redirects and use encoding
815
+ 'identity' (no compression) to get the true size of the target.
816
+ """
817
+ logger.debug("Retrieve file size for %s", url)
818
+ kwargs = kwargs.copy()
819
+ ar = kwargs.pop("allow_redirects", True)
820
+ head = kwargs.get("headers", {}).copy()
821
+ head["Accept-Encoding"] = "identity"
822
+ kwargs["headers"] = head
823
+
824
+ info = {}
825
+ if size_policy == "head":
826
+ r = await session.head(url, allow_redirects=ar, **kwargs)
827
+ elif size_policy == "get":
828
+ r = await session.get(url, allow_redirects=ar, **kwargs)
829
+ else:
830
+ raise TypeError(f'size_policy must be "head" or "get", got {size_policy}')
831
+ async with r:
832
+ r.raise_for_status()
833
+
834
+ # TODO:
835
+ # recognise lack of 'Accept-Ranges',
836
+ # or 'Accept-Ranges': 'none' (not 'bytes')
837
+ # to mean streaming only, no random access => return None
838
+ if "Content-Length" in r.headers:
839
+ # Some servers may choose to ignore Accept-Encoding and return
840
+ # compressed content, in which case the returned size is unreliable.
841
+ if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
842
+ "identity",
843
+ "",
844
+ ]:
845
+ info["size"] = int(r.headers["Content-Length"])
846
+ elif "Content-Range" in r.headers:
847
+ info["size"] = int(r.headers["Content-Range"].split("/")[1])
848
+
849
+ if "Content-Type" in r.headers:
850
+ info["mimetype"] = r.headers["Content-Type"].partition(";")[0]
851
+
852
+ info["url"] = str(r.url)
853
+
854
+ for checksum_field in ["ETag", "Content-MD5", "Digest"]:
855
+ if r.headers.get(checksum_field):
856
+ info[checksum_field] = r.headers[checksum_field]
857
+
858
+ return info
859
+
860
+
861
+ async def _file_size(url, session=None, *args, **kwargs):
862
+ if session is None:
863
+ session = await get_client()
864
+ info = await _file_info(url, session=session, *args, **kwargs)
865
+ return info.get("size")
866
+
867
+
868
+ file_size = sync_wrapper(_file_size)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/jupyter.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import re
4
+
5
+ import requests
6
+
7
+ import fsspec
8
+
9
+
10
+ class JupyterFileSystem(fsspec.AbstractFileSystem):
11
+ """View of the files as seen by a Jupyter server (notebook or lab)"""
12
+
13
+ protocol = ("jupyter", "jlab")
14
+
15
+ def __init__(self, url, tok=None, **kwargs):
16
+ """
17
+
18
+ Parameters
19
+ ----------
20
+ url : str
21
+ Base URL of the server, like "http://127.0.0.1:8888". May include
22
+ token in the string, which is given by the process when starting up
23
+ tok : str
24
+ If the token is obtained separately, can be given here
25
+ kwargs
26
+ """
27
+ if "?" in url:
28
+ if tok is None:
29
+ try:
30
+ tok = re.findall("token=([a-z0-9]+)", url)[0]
31
+ except IndexError as e:
32
+ raise ValueError("Could not determine token") from e
33
+ url = url.split("?", 1)[0]
34
+ self.url = url.rstrip("/") + "/api/contents"
35
+ self.session = requests.Session()
36
+ if tok:
37
+ self.session.headers["Authorization"] = f"token {tok}"
38
+
39
+ super().__init__(**kwargs)
40
+
41
+ def ls(self, path, detail=True, **kwargs):
42
+ path = self._strip_protocol(path)
43
+ r = self.session.get(f"{self.url}/{path}")
44
+ if r.status_code == 404:
45
+ return FileNotFoundError(path)
46
+ r.raise_for_status()
47
+ out = r.json()
48
+
49
+ if out["type"] == "directory":
50
+ out = out["content"]
51
+ else:
52
+ out = [out]
53
+ for o in out:
54
+ o["name"] = o.pop("path")
55
+ o.pop("content")
56
+ if o["type"] == "notebook":
57
+ o["type"] = "file"
58
+ if detail:
59
+ return out
60
+ return [o["name"] for o in out]
61
+
62
+ def cat_file(self, path, start=None, end=None, **kwargs):
63
+ path = self._strip_protocol(path)
64
+ r = self.session.get(f"{self.url}/{path}")
65
+ if r.status_code == 404:
66
+ return FileNotFoundError(path)
67
+ r.raise_for_status()
68
+ out = r.json()
69
+ if out["format"] == "text":
70
+ # data should be binary
71
+ b = out["content"].encode()
72
+ else:
73
+ b = base64.b64decode(out["content"])
74
+ return b[start:end]
75
+
76
+ def pipe_file(self, path, value, **_):
77
+ path = self._strip_protocol(path)
78
+ json = {
79
+ "name": path.rsplit("/", 1)[-1],
80
+ "path": path,
81
+ "size": len(value),
82
+ "content": base64.b64encode(value).decode(),
83
+ "format": "base64",
84
+ "type": "file",
85
+ }
86
+ self.session.put(f"{self.url}/{path}", json=json)
87
+
88
+ def mkdir(self, path, create_parents=True, **kwargs):
89
+ path = self._strip_protocol(path)
90
+ if create_parents and "/" in path:
91
+ self.mkdir(path.rsplit("/", 1)[0], True)
92
+ json = {
93
+ "name": path.rsplit("/", 1)[-1],
94
+ "path": path,
95
+ "size": None,
96
+ "content": None,
97
+ "type": "directory",
98
+ }
99
+ self.session.put(f"{self.url}/{path}", json=json)
100
+
101
+ def _rm(self, path):
102
+ path = self._strip_protocol(path)
103
+ self.session.delete(f"{self.url}/{path}")
104
+
105
+ def _open(self, path, mode="rb", **kwargs):
106
+ path = self._strip_protocol(path)
107
+ if mode == "rb":
108
+ data = self.cat_file(path)
109
+ return io.BytesIO(data)
110
+ else:
111
+ return SimpleFileWriter(self, path, mode="wb")
112
+
113
+
114
+ class SimpleFileWriter(fsspec.spec.AbstractBufferedFile):
115
+ def _upload_chunk(self, final=False):
116
+ """Never uploads a chunk until file is done
117
+
118
+ Not suitable for large files
119
+ """
120
+ if final is False:
121
+ return False
122
+ self.buffer.seek(0)
123
+ data = self.buffer.read()
124
+ self.fs.pipe_file(self.path, data)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/reference.py ADDED
@@ -0,0 +1,1160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import collections
3
+ import io
4
+ import itertools
5
+ import logging
6
+ import math
7
+ import os
8
+ from functools import lru_cache
9
+ from typing import TYPE_CHECKING
10
+
11
+ import fsspec.core
12
+
13
+ try:
14
+ import ujson as json
15
+ except ImportError:
16
+ if not TYPE_CHECKING:
17
+ import json
18
+
19
+ from ..asyn import AsyncFileSystem
20
+ from ..callbacks import DEFAULT_CALLBACK
21
+ from ..core import filesystem, open, split_protocol
22
+ from ..utils import isfilelike, merge_offset_ranges, other_paths
23
+
24
+ logger = logging.getLogger("fsspec.reference")
25
+
26
+
27
+ class ReferenceNotReachable(RuntimeError):
28
+ def __init__(self, reference, target, *args):
29
+ super().__init__(*args)
30
+ self.reference = reference
31
+ self.target = target
32
+
33
+ def __str__(self):
34
+ return f'Reference "{self.reference}" failed to fetch target {self.target}'
35
+
36
+
37
+ def _first(d):
38
+ return list(d.values())[0]
39
+
40
+
41
+ def _prot_in_references(path, references):
42
+ ref = references.get(path)
43
+ if isinstance(ref, (list, tuple)):
44
+ return split_protocol(ref[0])[0] if ref[0] else ref[0]
45
+
46
+
47
+ def _protocol_groups(paths, references):
48
+ if isinstance(paths, str):
49
+ return {_prot_in_references(paths, references): [paths]}
50
+ out = {}
51
+ for path in paths:
52
+ protocol = _prot_in_references(path, references)
53
+ out.setdefault(protocol, []).append(path)
54
+ return out
55
+
56
+
57
+ class RefsValuesView(collections.abc.ValuesView):
58
+ def __iter__(self):
59
+ for val in self._mapping.zmetadata.values():
60
+ yield json.dumps(val).encode()
61
+ yield from self._mapping._items.values()
62
+ for field in self._mapping.listdir():
63
+ chunk_sizes = self._mapping._get_chunk_sizes(field)
64
+ if len(chunk_sizes) == 0:
65
+ yield self._mapping[field + "/0"]
66
+ continue
67
+ yield from self._mapping._generate_all_records(field)
68
+
69
+
70
+ class RefsItemsView(collections.abc.ItemsView):
71
+ def __iter__(self):
72
+ return zip(self._mapping.keys(), self._mapping.values())
73
+
74
+
75
+ def ravel_multi_index(idx, sizes):
76
+ val = 0
77
+ mult = 1
78
+ for i, s in zip(idx[::-1], sizes[::-1]):
79
+ val += i * mult
80
+ mult *= s
81
+ return val
82
+
83
+
84
+ class LazyReferenceMapper(collections.abc.MutableMapping):
85
+ """This interface can be used to read/write references from Parquet stores.
86
+ It is not intended for other types of references.
87
+ It can be used with Kerchunk's MultiZarrToZarr method to combine
88
+ references into a parquet store.
89
+ Examples of this use-case can be found here:
90
+ https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
91
+
92
+ # import is class level to prevent numpy dep requirement for fsspec
93
+ @property
94
+ def np(self):
95
+ import numpy as np
96
+
97
+ return np
98
+
99
+ @property
100
+ def pd(self):
101
+ import pandas as pd
102
+
103
+ return pd
104
+
105
+ def __init__(
106
+ self, root, fs=None, out_root=None, cache_size=128, categorical_threshold=10
107
+ ):
108
+ """
109
+
110
+ This instance will be writable, storing changes in memory until full partitions
111
+ are accumulated or .flush() is called.
112
+
113
+ To create an empty lazy store, use .create()
114
+
115
+ Parameters
116
+ ----------
117
+ root : str
118
+ Root of parquet store
119
+ fs : fsspec.AbstractFileSystem
120
+ fsspec filesystem object, default is local filesystem.
121
+ cache_size : int, default=128
122
+ Maximum size of LRU cache, where cache_size*record_size denotes
123
+ the total number of references that can be loaded in memory at once.
124
+ categorical_threshold : int
125
+ Encode urls as pandas.Categorical to reduce memory footprint if the ratio
126
+ of the number of unique urls to total number of refs for each variable
127
+ is greater than or equal to this number. (default 10)
128
+ """
129
+ self.root = root
130
+ self.chunk_sizes = {}
131
+ self.out_root = out_root or self.root
132
+ self.cat_thresh = categorical_threshold
133
+ self.cache_size = cache_size
134
+ self.dirs = None
135
+ self.url = self.root + "/{field}/refs.{record}.parq"
136
+ # TODO: derive fs from `root`
137
+ self.fs = fsspec.filesystem("file") if fs is None else fs
138
+
139
+ def __getattr__(self, item):
140
+ if item in ("_items", "record_size", "zmetadata"):
141
+ self.setup()
142
+ # avoid possible recursion if setup fails somehow
143
+ return self.__dict__[item]
144
+ raise AttributeError(item)
145
+
146
+ def setup(self):
147
+ self._items = {}
148
+ self._items[".zmetadata"] = self.fs.cat_file(
149
+ "/".join([self.root, ".zmetadata"])
150
+ )
151
+ met = json.loads(self._items[".zmetadata"])
152
+ self.record_size = met["record_size"]
153
+ self.zmetadata = met["metadata"]
154
+
155
+ # Define function to open and decompress refs
156
+ @lru_cache(maxsize=self.cache_size)
157
+ def open_refs(field, record):
158
+ """cached parquet file loader"""
159
+ path = self.url.format(field=field, record=record)
160
+ data = io.BytesIO(self.fs.cat_file(path))
161
+ df = self.pd.read_parquet(data, engine="fastparquet")
162
+ refs = {c: df[c].values for c in df.columns}
163
+ return refs
164
+
165
+ self.open_refs = open_refs
166
+
167
+ @staticmethod
168
+ def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
169
+ """Make empty parquet reference set
170
+
171
+ First deletes the contents of the given directory, if it exists.
172
+
173
+ Parameters
174
+ ----------
175
+ root: str
176
+ Directory to contain the output; will be created
177
+ storage_options: dict | None
178
+ For making the filesystem to use for writing is fs is None
179
+ fs: FileSystem | None
180
+ Filesystem for writing
181
+ record_size: int
182
+ Number of references per parquet file
183
+ kwargs: passed to __init__
184
+
185
+ Returns
186
+ -------
187
+ LazyReferenceMapper instance
188
+ """
189
+ met = {"metadata": {}, "record_size": record_size}
190
+ if fs is None:
191
+ fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
192
+ if fs.exists(root):
193
+ fs.rm(root, recursive=True)
194
+ fs.makedirs(root, exist_ok=True)
195
+ fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
196
+ return LazyReferenceMapper(root, fs, **kwargs)
197
+
198
+ def listdir(self, basename=True):
199
+ """List top-level directories"""
200
+ # cache me?
201
+ if self.dirs is None:
202
+ dirs = [p.split("/", 1)[0] for p in self.zmetadata]
203
+ self.dirs = {p for p in dirs if p and not p.startswith(".")}
204
+ listing = self.dirs
205
+ if basename:
206
+ listing = [os.path.basename(path) for path in listing]
207
+ return listing
208
+
209
+ def ls(self, path="", detail=True):
210
+ """Shortcut file listings"""
211
+ if not path:
212
+ dirnames = self.listdir()
213
+ others = set(
214
+ [".zmetadata"]
215
+ + [name for name in self.zmetadata if "/" not in name]
216
+ + [name for name in self._items if "/" not in name]
217
+ )
218
+ if detail is False:
219
+ others.update(dirnames)
220
+ return sorted(others)
221
+ dirinfo = [
222
+ {"name": name, "type": "directory", "size": 0} for name in dirnames
223
+ ]
224
+ fileinfo = [
225
+ {
226
+ "name": name,
227
+ "type": "file",
228
+ "size": len(
229
+ json.dumps(self.zmetadata[name])
230
+ if name in self.zmetadata
231
+ else self._items[name]
232
+ ),
233
+ }
234
+ for name in others
235
+ ]
236
+ return sorted(dirinfo + fileinfo, key=lambda s: s["name"])
237
+ parts = path.split("/", 1)
238
+ if len(parts) > 1:
239
+ raise FileNotFoundError("Cannot list within directories right now")
240
+ field = parts[0]
241
+ others = set(
242
+ [name for name in self.zmetadata if name.startswith(f"{path}/")]
243
+ + [name for name in self._items if name.startswith(f"{path}/")]
244
+ )
245
+ fileinfo = [
246
+ {
247
+ "name": name,
248
+ "type": "file",
249
+ "size": len(
250
+ json.dumps(self.zmetadata[name])
251
+ if name in self.zmetadata
252
+ else self._items[name]
253
+ ),
254
+ }
255
+ for name in others
256
+ ]
257
+ keys = self._keys_in_field(field)
258
+
259
+ if detail is False:
260
+ return list(others) + list(keys)
261
+ recs = self._generate_all_records(field)
262
+ recinfo = [
263
+ {"name": name, "type": "file", "size": rec[-1]}
264
+ for name, rec in zip(keys, recs)
265
+ if rec[0] # filters out path==None, deleted/missing
266
+ ]
267
+ return fileinfo + recinfo
268
+
269
+ def _load_one_key(self, key):
270
+ """Get the reference for one key
271
+
272
+ Returns bytes, one-element list or three-element list.
273
+ """
274
+ if key in self._items:
275
+ return self._items[key]
276
+ elif key in self.zmetadata:
277
+ return json.dumps(self.zmetadata[key]).encode()
278
+ elif "/" not in key or self._is_meta(key):
279
+ raise KeyError(key)
280
+ field, sub_key = key.split("/")
281
+ record, ri, chunk_size = self._key_to_record(key)
282
+ maybe = self._items.get((field, record), {}).get(ri, False)
283
+ if maybe is None:
284
+ # explicitly deleted
285
+ raise KeyError
286
+ elif maybe:
287
+ return maybe
288
+ elif chunk_size == 0:
289
+ return b""
290
+
291
+ # Chunk keys can be loaded from row group and cached in LRU cache
292
+ try:
293
+ refs = self.open_refs(field, record)
294
+ except (ValueError, TypeError, FileNotFoundError):
295
+ raise KeyError(key)
296
+ columns = ["path", "offset", "size", "raw"]
297
+ selection = [refs[c][ri] if c in refs else None for c in columns]
298
+ raw = selection[-1]
299
+ if raw is not None:
300
+ return raw
301
+ if selection[0] is None:
302
+ raise KeyError("This reference does not exist or has been deleted")
303
+ if selection[1:3] == [0, 0]:
304
+ # URL only
305
+ return selection[:1]
306
+ # URL, offset, size
307
+ return selection[:3]
308
+
309
+ @lru_cache(4096)
310
+ def _key_to_record(self, key):
311
+ """Details needed to construct a reference for one key"""
312
+ field, chunk = key.split("/")
313
+ chunk_sizes = self._get_chunk_sizes(field)
314
+ if len(chunk_sizes) == 0:
315
+ return 0, 0, 0
316
+ chunk_idx = [int(c) for c in chunk.split(".")]
317
+ chunk_number = ravel_multi_index(chunk_idx, chunk_sizes)
318
+ record = chunk_number // self.record_size
319
+ ri = chunk_number % self.record_size
320
+ return record, ri, len(chunk_sizes)
321
+
322
+ def _get_chunk_sizes(self, field):
323
+ """The number of chunks along each axis for a given field"""
324
+ if field not in self.chunk_sizes:
325
+ zarray = self.zmetadata[f"{field}/.zarray"]
326
+ size_ratio = [
327
+ math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"])
328
+ ]
329
+ self.chunk_sizes[field] = size_ratio or [1]
330
+ return self.chunk_sizes[field]
331
+
332
+ def _generate_record(self, field, record):
333
+ """The references for a given parquet file of a given field"""
334
+ refs = self.open_refs(field, record)
335
+ it = iter(zip(*refs.values()))
336
+ if len(refs) == 3:
337
+ # All urls
338
+ return (list(t) for t in it)
339
+ elif len(refs) == 1:
340
+ # All raws
341
+ return refs["raw"]
342
+ else:
343
+ # Mix of urls and raws
344
+ return (list(t[:3]) if not t[3] else t[3] for t in it)
345
+
346
+ def _generate_all_records(self, field):
347
+ """Load all the references within a field by iterating over the parquet files"""
348
+ nrec = 1
349
+ for ch in self._get_chunk_sizes(field):
350
+ nrec *= ch
351
+ nrec = math.ceil(nrec / self.record_size)
352
+ for record in range(nrec):
353
+ yield from self._generate_record(field, record)
354
+
355
+ def values(self):
356
+ return RefsValuesView(self)
357
+
358
+ def items(self):
359
+ return RefsItemsView(self)
360
+
361
+ def __hash__(self):
362
+ return id(self)
363
+
364
+ def __getitem__(self, key):
365
+ return self._load_one_key(key)
366
+
367
+ def __setitem__(self, key, value):
368
+ if "/" in key and not self._is_meta(key):
369
+ field, chunk = key.split("/")
370
+ record, i, _ = self._key_to_record(key)
371
+ subdict = self._items.setdefault((field, record), {})
372
+ subdict[i] = value
373
+ if len(subdict) == self.record_size:
374
+ self.write(field, record)
375
+ else:
376
+ # metadata or top-level
377
+ self._items[key] = value
378
+ new_value = json.loads(
379
+ value.decode() if isinstance(value, bytes) else value
380
+ )
381
+ self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
382
+
383
+ @staticmethod
384
+ def _is_meta(key):
385
+ return key.startswith(".z") or "/.z" in key
386
+
387
+ def __delitem__(self, key):
388
+ if key in self._items:
389
+ del self._items[key]
390
+ elif key in self.zmetadata:
391
+ del self.zmetadata[key]
392
+ else:
393
+ if "/" in key and not self._is_meta(key):
394
+ field, chunk = key.split("/")
395
+ record, i, _ = self._key_to_record(key)
396
+ subdict = self._items.setdefault((field, record), {})
397
+ subdict[i] = None
398
+ if len(subdict) == self.record_size:
399
+ self.write(field, record)
400
+ else:
401
+ # metadata or top-level
402
+ self._items[key] = None
403
+
404
+ def write(self, field, record, base_url=None, storage_options=None):
405
+ # extra requirements if writing
406
+ import kerchunk.df
407
+ import numpy as np
408
+ import pandas as pd
409
+
410
+ partition = self._items[(field, record)]
411
+ original = False
412
+ if len(partition) < self.record_size:
413
+ try:
414
+ original = self.open_refs(field, record)
415
+ except IOError:
416
+ pass
417
+
418
+ if original:
419
+ paths = original["path"]
420
+ offsets = original["offset"]
421
+ sizes = original["size"]
422
+ raws = original["raw"]
423
+ else:
424
+ paths = np.full(self.record_size, np.nan, dtype="O")
425
+ offsets = np.zeros(self.record_size, dtype="int64")
426
+ sizes = np.zeros(self.record_size, dtype="int64")
427
+ raws = np.full(self.record_size, np.nan, dtype="O")
428
+ for j, data in partition.items():
429
+ if isinstance(data, list):
430
+ if (
431
+ str(paths.dtype) == "category"
432
+ and data[0] not in paths.dtype.categories
433
+ ):
434
+ paths = paths.add_categories(data[0])
435
+ paths[j] = data[0]
436
+ if len(data) > 1:
437
+ offsets[j] = data[1]
438
+ sizes[j] = data[2]
439
+ elif data is None:
440
+ # delete
441
+ paths[j] = None
442
+ offsets[j] = 0
443
+ sizes[j] = 0
444
+ raws[j] = None
445
+ else:
446
+ # this is the only call into kerchunk, could remove
447
+ raws[j] = kerchunk.df._proc_raw(data)
448
+ # TODO: only save needed columns
449
+ df = pd.DataFrame(
450
+ {
451
+ "path": paths,
452
+ "offset": offsets,
453
+ "size": sizes,
454
+ "raw": raws,
455
+ },
456
+ copy=False,
457
+ )
458
+ if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
459
+ df["path"] = df["path"].astype("category")
460
+ object_encoding = {"raw": "bytes", "path": "utf8"}
461
+ has_nulls = ["path", "raw"]
462
+
463
+ fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
464
+ self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
465
+ df.to_parquet(
466
+ fn,
467
+ engine="fastparquet",
468
+ storage_options=storage_options
469
+ or getattr(self.fs, "storage_options", None),
470
+ compression="zstd",
471
+ index=False,
472
+ stats=False,
473
+ object_encoding=object_encoding,
474
+ has_nulls=has_nulls,
475
+ # **kwargs,
476
+ )
477
+ partition.clear()
478
+ self._items.pop((field, record))
479
+
480
+ def flush(self, base_url=None, storage_options=None):
481
+ """Output any modified or deleted keys
482
+
483
+ Parameters
484
+ ----------
485
+ base_url: str
486
+ Location of the output
487
+ """
488
+ # write what we have so far and clear sub chunks
489
+ for thing in list(self._items):
490
+ if isinstance(thing, tuple):
491
+ field, record = thing
492
+ self.write(
493
+ field,
494
+ record,
495
+ base_url=base_url,
496
+ storage_options=storage_options,
497
+ )
498
+
499
+ # gather .zmetadata from self._items and write that too
500
+ for k in list(self._items):
501
+ if k != ".zmetadata" and ".z" in k:
502
+ self.zmetadata[k] = json.loads(self._items.pop(k))
503
+ met = {"metadata": self.zmetadata, "record_size": self.record_size}
504
+ self._items[".zmetadata"] = json.dumps(met).encode()
505
+ self.fs.pipe(
506
+ "/".join([base_url or self.out_root, ".zmetadata"]),
507
+ self._items[".zmetadata"],
508
+ )
509
+
510
+ # TODO: only clear those that we wrote to?
511
+ self.open_refs.cache_clear()
512
+
513
+ def __len__(self):
514
+ # Caveat: This counts expected references, not actual - but is fast
515
+ count = 0
516
+ for field in self.listdir():
517
+ if field.startswith("."):
518
+ count += 1
519
+ else:
520
+ count += math.prod(self._get_chunk_sizes(field))
521
+ count += len(self.zmetadata) # all metadata keys
522
+ # any other files not in reference partitions
523
+ count += sum(1 for _ in self._items if not isinstance(_, tuple))
524
+ return count
525
+
526
+ def __iter__(self):
527
+ # Caveat: returns only existing keys, so the number of these does not
528
+ # match len(self)
529
+ metas = set(self.zmetadata)
530
+ metas.update(self._items)
531
+ for bit in metas:
532
+ if isinstance(bit, str):
533
+ yield bit
534
+ for field in self.listdir():
535
+ for k in self._keys_in_field(field):
536
+ if k in self:
537
+ yield k
538
+
539
+ def __contains__(self, item):
540
+ try:
541
+ self._load_one_key(item)
542
+ return True
543
+ except KeyError:
544
+ return False
545
+
546
+ def _keys_in_field(self, field):
547
+ """List key names in given field
548
+
549
+ Produces strings like "field/x.y" appropriate from the chunking of the array
550
+ """
551
+ chunk_sizes = self._get_chunk_sizes(field)
552
+ if len(chunk_sizes) == 0:
553
+ yield field + "/0"
554
+ return
555
+ inds = itertools.product(*(range(i) for i in chunk_sizes))
556
+ for ind in inds:
557
+ yield field + "/" + ".".join([str(c) for c in ind])
558
+
559
+
560
+ class ReferenceFileSystem(AsyncFileSystem):
561
+ """View byte ranges of some other file as a file system
562
+ Initial version: single file system target, which must support
563
+ async, and must allow start and end args in _cat_file. Later versions
564
+ may allow multiple arbitrary URLs for the targets.
565
+ This FileSystem is read-only. It is designed to be used with async
566
+ targets (for now). This FileSystem only allows whole-file access, no
567
+ ``open``. We do not get original file details from the target FS.
568
+ Configuration is by passing a dict of references at init, or a URL to
569
+ a JSON file containing the same; this dict
570
+ can also contain concrete data for some set of paths.
571
+ Reference dict format:
572
+ {path0: bytes_data, path1: (target_url, offset, size)}
573
+ https://github.com/fsspec/kerchunk/blob/main/README.md
574
+ """
575
+
576
+ protocol = "reference"
577
+
578
+ def __init__(
579
+ self,
580
+ fo,
581
+ target=None,
582
+ ref_storage_args=None,
583
+ target_protocol=None,
584
+ target_options=None,
585
+ remote_protocol=None,
586
+ remote_options=None,
587
+ fs=None,
588
+ template_overrides=None,
589
+ simple_templates=True,
590
+ max_gap=64_000,
591
+ max_block=256_000_000,
592
+ cache_size=128,
593
+ **kwargs,
594
+ ):
595
+ """
596
+ Parameters
597
+ ----------
598
+ fo : dict or str
599
+ The set of references to use for this instance, with a structure as above.
600
+ If str referencing a JSON file, will use fsspec.open, in conjunction
601
+ with target_options and target_protocol to open and parse JSON at this
602
+ location. If a directory, then assume references are a set of parquet
603
+ files to be loaded lazily.
604
+ target : str
605
+ For any references having target_url as None, this is the default file
606
+ target to use
607
+ ref_storage_args : dict
608
+ If references is a str, use these kwargs for loading the JSON file.
609
+ Deprecated: use target_options instead.
610
+ target_protocol : str
611
+ Used for loading the reference file, if it is a path. If None, protocol
612
+ will be derived from the given path
613
+ target_options : dict
614
+ Extra FS options for loading the reference file ``fo``, if given as a path
615
+ remote_protocol : str
616
+ The protocol of the filesystem on which the references will be evaluated
617
+ (unless fs is provided). If not given, will be derived from the first
618
+ URL that has a protocol in the templates or in the references, in that
619
+ order.
620
+ remote_options : dict
621
+ kwargs to go with remote_protocol
622
+ fs : AbstractFileSystem | dict(str, (AbstractFileSystem | dict))
623
+ Directly provide a file system(s):
624
+ - a single filesystem instance
625
+ - a dict of protocol:filesystem, where each value is either a filesystem
626
+ instance, or a dict of kwargs that can be used to create in
627
+ instance for the given protocol
628
+
629
+ If this is given, remote_options and remote_protocol are ignored.
630
+ template_overrides : dict
631
+ Swap out any templates in the references file with these - useful for
632
+ testing.
633
+ simple_templates: bool
634
+ Whether templates can be processed with simple replace (True) or if
635
+ jinja is needed (False, much slower). All reference sets produced by
636
+ ``kerchunk`` are simple in this sense, but the spec allows for complex.
637
+ max_gap, max_block: int
638
+ For merging multiple concurrent requests to the same remote file.
639
+ Neighboring byte ranges will only be merged when their
640
+ inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0
641
+ to only merge when it requires no extra bytes. Pass a negative
642
+ number to disable merging, appropriate for local target files.
643
+ Neighboring byte ranges will only be merged when the size of
644
+ the aggregated range is <= ``max_block``. Default is 256MB.
645
+ cache_size : int
646
+ Maximum size of LRU cache, where cache_size*record_size denotes
647
+ the total number of references that can be loaded in memory at once.
648
+ Only used for lazily loaded references.
649
+ kwargs : passed to parent class
650
+ """
651
+ super().__init__(**kwargs)
652
+ self.target = target
653
+ self.template_overrides = template_overrides
654
+ self.simple_templates = simple_templates
655
+ self.templates = {}
656
+ self.fss = {}
657
+ self._dircache = {}
658
+ self.max_gap = max_gap
659
+ self.max_block = max_block
660
+ if isinstance(fo, str):
661
+ dic = dict(
662
+ **(ref_storage_args or target_options or {}), protocol=target_protocol
663
+ )
664
+ ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
665
+ if ref_fs.isfile(fo2):
666
+ # text JSON
667
+ with fsspec.open(fo, "rb", **dic) as f:
668
+ logger.info("Read reference from URL %s", fo)
669
+ text = json.load(f)
670
+ self._process_references(text, template_overrides)
671
+ else:
672
+ # Lazy parquet refs
673
+ logger.info("Open lazy reference dict from URL %s", fo)
674
+ self.references = LazyReferenceMapper(
675
+ fo2,
676
+ fs=ref_fs,
677
+ cache_size=cache_size,
678
+ )
679
+ else:
680
+ # dictionaries
681
+ self._process_references(fo, template_overrides)
682
+ if isinstance(fs, dict):
683
+ self.fss = {
684
+ k: (
685
+ fsspec.filesystem(k.split(":", 1)[0], **opts)
686
+ if isinstance(opts, dict)
687
+ else opts
688
+ )
689
+ for k, opts in fs.items()
690
+ }
691
+ if None not in self.fss:
692
+ self.fss[None] = filesystem("file")
693
+ return
694
+ if fs is not None:
695
+ # single remote FS
696
+ remote_protocol = (
697
+ fs.protocol[0] if isinstance(fs.protocol, tuple) else fs.protocol
698
+ )
699
+ self.fss[remote_protocol] = fs
700
+
701
+ if remote_protocol is None:
702
+ # get single protocol from any templates
703
+ for ref in self.templates.values():
704
+ if callable(ref):
705
+ ref = ref()
706
+ protocol, _ = fsspec.core.split_protocol(ref)
707
+ if protocol and protocol not in self.fss:
708
+ fs = filesystem(protocol, **(remote_options or {}))
709
+ self.fss[protocol] = fs
710
+ if remote_protocol is None:
711
+ # get single protocol from references
712
+ # TODO: warning here, since this can be very expensive?
713
+ for ref in self.references.values():
714
+ if callable(ref):
715
+ ref = ref()
716
+ if isinstance(ref, list) and ref[0]:
717
+ protocol, _ = fsspec.core.split_protocol(ref[0])
718
+ if protocol not in self.fss:
719
+ fs = filesystem(protocol, **(remote_options or {}))
720
+ self.fss[protocol] = fs
721
+ # only use first remote URL
722
+ break
723
+
724
+ if remote_protocol and remote_protocol not in self.fss:
725
+ fs = filesystem(remote_protocol, **(remote_options or {}))
726
+ self.fss[remote_protocol] = fs
727
+
728
+ self.fss[None] = fs or filesystem("file") # default one
729
+
730
+ def _cat_common(self, path, start=None, end=None):
731
+ path = self._strip_protocol(path)
732
+ logger.debug(f"cat: {path}")
733
+ try:
734
+ part = self.references[path]
735
+ except KeyError:
736
+ raise FileNotFoundError(path)
737
+ if isinstance(part, str):
738
+ part = part.encode()
739
+ if isinstance(part, bytes):
740
+ logger.debug(f"Reference: {path}, type bytes")
741
+ if part.startswith(b"base64:"):
742
+ part = base64.b64decode(part[7:])
743
+ return part, None, None
744
+
745
+ if len(part) == 1:
746
+ logger.debug(f"Reference: {path}, whole file => {part}")
747
+ url = part[0]
748
+ start1, end1 = start, end
749
+ else:
750
+ url, start0, size = part
751
+ logger.debug(f"Reference: {path} => {url}, offset {start0}, size {size}")
752
+ end0 = start0 + size
753
+
754
+ if start is not None:
755
+ if start >= 0:
756
+ start1 = start0 + start
757
+ else:
758
+ start1 = end0 + start
759
+ else:
760
+ start1 = start0
761
+ if end is not None:
762
+ if end >= 0:
763
+ end1 = start0 + end
764
+ else:
765
+ end1 = end0 + end
766
+ else:
767
+ end1 = end0
768
+ if url is None:
769
+ url = self.target
770
+ return url, start1, end1
771
+
772
+ async def _cat_file(self, path, start=None, end=None, **kwargs):
773
+ part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
774
+ if isinstance(part_or_url, bytes):
775
+ return part_or_url[start:end]
776
+ protocol, _ = split_protocol(part_or_url)
777
+ try:
778
+ await self.fss[protocol]._cat_file(part_or_url, start=start, end=end)
779
+ except Exception as e:
780
+ raise ReferenceNotReachable(path, part_or_url) from e
781
+
782
+ def cat_file(self, path, start=None, end=None, **kwargs):
783
+ part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
784
+ if isinstance(part_or_url, bytes):
785
+ return part_or_url[start:end]
786
+ protocol, _ = split_protocol(part_or_url)
787
+ try:
788
+ return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0)
789
+ except Exception as e:
790
+ raise ReferenceNotReachable(path, part_or_url) from e
791
+
792
+ def pipe_file(self, path, value, **_):
793
+ """Temporarily add binary data or reference as a file"""
794
+ self.references[path] = value
795
+
796
+ async def _get_file(self, rpath, lpath, **kwargs):
797
+ if self.isdir(rpath):
798
+ return os.makedirs(lpath, exist_ok=True)
799
+ data = await self._cat_file(rpath)
800
+ with open(lpath, "wb") as f:
801
+ f.write(data)
802
+
803
+ def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs):
804
+ if self.isdir(rpath):
805
+ return os.makedirs(lpath, exist_ok=True)
806
+ data = self.cat_file(rpath, **kwargs)
807
+ callback.set_size(len(data))
808
+ if isfilelike(lpath):
809
+ lpath.write(data)
810
+ else:
811
+ with open(lpath, "wb") as f:
812
+ f.write(data)
813
+ callback.absolute_update(len(data))
814
+
815
+ def get(self, rpath, lpath, recursive=False, **kwargs):
816
+ if recursive:
817
+ # trigger directory build
818
+ self.ls("")
819
+ rpath = self.expand_path(rpath, recursive=recursive)
820
+ fs = fsspec.filesystem("file", auto_mkdir=True)
821
+ targets = other_paths(rpath, lpath)
822
+ if recursive:
823
+ data = self.cat([r for r in rpath if not self.isdir(r)])
824
+ else:
825
+ data = self.cat(rpath)
826
+ for remote, local in zip(rpath, targets):
827
+ if remote in data:
828
+ fs.pipe_file(local, data[remote])
829
+
830
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
831
+ if isinstance(path, str) and recursive:
832
+ raise NotImplementedError
833
+ if isinstance(path, list) and (recursive or any("*" in p for p in path)):
834
+ raise NotImplementedError
835
+ # TODO: if references is lazy, pre-fetch all paths in batch before access
836
+ proto_dict = _protocol_groups(path, self.references)
837
+ out = {}
838
+ for proto, paths in proto_dict.items():
839
+ fs = self.fss[proto]
840
+ urls, starts, ends, valid_paths = [], [], [], []
841
+ for p in paths:
842
+ # find references or label not-found. Early exit if any not
843
+ # found and on_error is "raise"
844
+ try:
845
+ u, s, e = self._cat_common(p)
846
+ except FileNotFoundError as err:
847
+ if on_error == "raise":
848
+ raise
849
+ if on_error != "omit":
850
+ out[p] = err
851
+ else:
852
+ urls.append(u)
853
+ starts.append(s)
854
+ ends.append(e)
855
+ valid_paths.append(p)
856
+
857
+ # process references into form for merging
858
+ urls2 = []
859
+ starts2 = []
860
+ ends2 = []
861
+ paths2 = []
862
+ whole_files = set()
863
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
864
+ if isinstance(u, bytes):
865
+ # data
866
+ out[p] = u
867
+ elif s is None:
868
+ # whole file - limits are None, None, but no further
869
+ # entries take for this file
870
+ whole_files.add(u)
871
+ urls2.append(u)
872
+ starts2.append(s)
873
+ ends2.append(e)
874
+ paths2.append(p)
875
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
876
+ # second run to account for files that are to be loaded whole
877
+ if s is not None and u not in whole_files:
878
+ urls2.append(u)
879
+ starts2.append(s)
880
+ ends2.append(e)
881
+ paths2.append(p)
882
+
883
+ # merge and fetch consolidated ranges
884
+ new_paths, new_starts, new_ends = merge_offset_ranges(
885
+ list(urls2),
886
+ list(starts2),
887
+ list(ends2),
888
+ sort=True,
889
+ max_gap=self.max_gap,
890
+ max_block=self.max_block,
891
+ )
892
+ bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
893
+
894
+ # unbundle from merged bytes - simple approach
895
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
896
+ if p in out:
897
+ continue # was bytes, already handled
898
+ for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
899
+ if np == u and (ns is None or ne is None):
900
+ if isinstance(b, Exception):
901
+ out[p] = b
902
+ else:
903
+ out[p] = b[s:e]
904
+ elif np == u and s >= ns and e <= ne:
905
+ if isinstance(b, Exception):
906
+ out[p] = b
907
+ else:
908
+ out[p] = b[s - ns : (e - ne) or None]
909
+
910
+ for k, v in out.copy().items():
911
+ # these were valid references, but fetch failed, so transform exc
912
+ if isinstance(v, Exception) and k in self.references:
913
+ ex = out[k]
914
+ new_ex = ReferenceNotReachable(k, self.references[k])
915
+ new_ex.__cause__ = ex
916
+ if on_error == "raise":
917
+ raise new_ex
918
+ elif on_error != "omit":
919
+ out[k] = new_ex
920
+
921
+ if len(out) == 1 and isinstance(path, str) and "*" not in path:
922
+ return _first(out)
923
+ return out
924
+
925
+ def _process_references(self, references, template_overrides=None):
926
+ vers = references.get("version", None)
927
+ if vers is None:
928
+ self._process_references0(references)
929
+ elif vers == 1:
930
+ self._process_references1(references, template_overrides=template_overrides)
931
+ else:
932
+ raise ValueError(f"Unknown reference spec version: {vers}")
933
+ # TODO: we make dircache by iterating over all entries, but for Spec >= 1,
934
+ # can replace with programmatic. Is it even needed for mapper interface?
935
+
936
+ def _process_references0(self, references):
937
+ """Make reference dict for Spec Version 0"""
938
+ self.references = references
939
+
940
+ def _process_references1(self, references, template_overrides=None):
941
+ if not self.simple_templates or self.templates:
942
+ import jinja2
943
+ self.references = {}
944
+ self._process_templates(references.get("templates", {}))
945
+
946
+ @lru_cache(1000)
947
+ def _render_jinja(u):
948
+ return jinja2.Template(u).render(**self.templates)
949
+
950
+ for k, v in references.get("refs", {}).items():
951
+ if isinstance(v, str):
952
+ if v.startswith("base64:"):
953
+ self.references[k] = base64.b64decode(v[7:])
954
+ self.references[k] = v
955
+ elif self.templates:
956
+ u = v[0]
957
+ if "{{" in u:
958
+ if self.simple_templates:
959
+ u = (
960
+ u.replace("{{", "{")
961
+ .replace("}}", "}")
962
+ .format(**self.templates)
963
+ )
964
+ else:
965
+ u = _render_jinja(u)
966
+ self.references[k] = [u] if len(v) == 1 else [u, v[1], v[2]]
967
+ else:
968
+ self.references[k] = v
969
+ self.references.update(self._process_gen(references.get("gen", [])))
970
+
971
+ def _process_templates(self, tmp):
972
+ self.templates = {}
973
+ if self.template_overrides is not None:
974
+ tmp.update(self.template_overrides)
975
+ for k, v in tmp.items():
976
+ if "{{" in v:
977
+ import jinja2
978
+
979
+ self.templates[k] = lambda temp=v, **kwargs: jinja2.Template(
980
+ temp
981
+ ).render(**kwargs)
982
+ else:
983
+ self.templates[k] = v
984
+
985
+ def _process_gen(self, gens):
986
+ out = {}
987
+ for gen in gens:
988
+ dimension = {
989
+ k: v
990
+ if isinstance(v, list)
991
+ else range(v.get("start", 0), v["stop"], v.get("step", 1))
992
+ for k, v in gen["dimensions"].items()
993
+ }
994
+ products = (
995
+ dict(zip(dimension.keys(), values))
996
+ for values in itertools.product(*dimension.values())
997
+ )
998
+ for pr in products:
999
+ import jinja2
1000
+
1001
+ key = jinja2.Template(gen["key"]).render(**pr, **self.templates)
1002
+ url = jinja2.Template(gen["url"]).render(**pr, **self.templates)
1003
+ if ("offset" in gen) and ("length" in gen):
1004
+ offset = int(
1005
+ jinja2.Template(gen["offset"]).render(**pr, **self.templates)
1006
+ )
1007
+ length = int(
1008
+ jinja2.Template(gen["length"]).render(**pr, **self.templates)
1009
+ )
1010
+ out[key] = [url, offset, length]
1011
+ elif ("offset" in gen) ^ ("length" in gen):
1012
+ raise ValueError(
1013
+ "Both 'offset' and 'length' are required for a "
1014
+ "reference generator entry if either is provided."
1015
+ )
1016
+ else:
1017
+ out[key] = [url]
1018
+ return out
1019
+
1020
+ def _dircache_from_items(self):
1021
+ self.dircache = {"": []}
1022
+ it = self.references.items()
1023
+ for path, part in it:
1024
+ if isinstance(part, (bytes, str)):
1025
+ size = len(part)
1026
+ elif len(part) == 1:
1027
+ size = None
1028
+ else:
1029
+ _, _, size = part
1030
+ par = path.rsplit("/", 1)[0] if "/" in path else ""
1031
+ par0 = par
1032
+ subdirs = [par0]
1033
+ while par0 and par0 not in self.dircache:
1034
+ # collect parent directories
1035
+ par0 = self._parent(par0)
1036
+ subdirs.append(par0)
1037
+
1038
+ subdirs = subdirs[::-1]
1039
+ for parent, child in zip(subdirs, subdirs[1:]):
1040
+ # register newly discovered directories
1041
+ assert child not in self.dircache
1042
+ assert parent in self.dircache
1043
+ self.dircache[parent].append(
1044
+ {"name": child, "type": "directory", "size": 0}
1045
+ )
1046
+ self.dircache[child] = []
1047
+
1048
+ self.dircache[par].append({"name": path, "type": "file", "size": size})
1049
+
1050
+ def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
1051
+ data = self.cat_file(path) # load whole chunk into memory
1052
+ return io.BytesIO(data)
1053
+
1054
+ def ls(self, path, detail=True, **kwargs):
1055
+ path = self._strip_protocol(path)
1056
+ if isinstance(self.references, LazyReferenceMapper):
1057
+ try:
1058
+ return self.references.ls(path, detail)
1059
+ except KeyError:
1060
+ pass
1061
+ raise FileNotFoundError(f"'{path}' is not a known key")
1062
+ if not self.dircache:
1063
+ self._dircache_from_items()
1064
+ out = self._ls_from_cache(path)
1065
+ if out is None:
1066
+ raise FileNotFoundError(path)
1067
+ if detail:
1068
+ return out
1069
+ return [o["name"] for o in out]
1070
+
1071
+ def exists(self, path, **kwargs): # overwrite auto-sync version
1072
+ return self.isdir(path) or self.isfile(path)
1073
+
1074
+ def isdir(self, path): # overwrite auto-sync version
1075
+ if self.dircache:
1076
+ return path in self.dircache
1077
+ elif isinstance(self.references, LazyReferenceMapper):
1078
+ return path in self.references.listdir("")
1079
+ else:
1080
+ # this may be faster than building dircache for single calls, but
1081
+ # by looping will be slow for many calls; could cache it?
1082
+ return any(_.startswith(f"{path}/") for _ in self.references)
1083
+
1084
+ def isfile(self, path): # overwrite auto-sync version
1085
+ return path in self.references
1086
+
1087
+ async def _ls(self, path, detail=True, **kwargs): # calls fast sync code
1088
+ return self.ls(path, detail, **kwargs)
1089
+
1090
+ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
1091
+ if withdirs:
1092
+ return super().find(
1093
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, **kwargs
1094
+ )
1095
+ if path:
1096
+ path = self._strip_protocol(path)
1097
+ r = sorted(k for k in self.references if k.startswith(path))
1098
+ else:
1099
+ r = sorted(self.references)
1100
+ if detail:
1101
+ if not self.dircache:
1102
+ self._dircache_from_items()
1103
+ return {k: self._ls_from_cache(k)[0] for k in r}
1104
+ else:
1105
+ return r
1106
+
1107
+ def info(self, path, **kwargs):
1108
+ out = self.references.get(path)
1109
+ if out is not None:
1110
+ if isinstance(out, (str, bytes)):
1111
+ # decode base64 here
1112
+ return {"name": path, "type": "file", "size": len(out)}
1113
+ elif len(out) > 1:
1114
+ return {"name": path, "type": "file", "size": out[2]}
1115
+ else:
1116
+ out0 = [{"name": path, "type": "file", "size": None}]
1117
+ else:
1118
+ out = self.ls(path, True)
1119
+ out0 = [o for o in out if o["name"] == path]
1120
+ if not out0:
1121
+ return {"name": path, "type": "directory", "size": 0}
1122
+ if out0[0]["size"] is None:
1123
+ # if this is a whole remote file, update size using remote FS
1124
+ prot, _ = split_protocol(self.references[path][0])
1125
+ out0[0]["size"] = self.fss[prot].size(self.references[path][0])
1126
+ return out0[0]
1127
+
1128
+ async def _info(self, path, **kwargs): # calls fast sync code
1129
+ return self.info(path)
1130
+
1131
+ async def _rm_file(self, path, **kwargs):
1132
+ self.references.pop(
1133
+ path, None
1134
+ ) # ignores FileNotFound, just as well for directories
1135
+ self.dircache.clear() # this is a bit heavy handed
1136
+
1137
+ async def _pipe_file(self, path, data):
1138
+ # can be str or bytes
1139
+ self.references[path] = data
1140
+ self.dircache.clear() # this is a bit heavy handed
1141
+
1142
+ async def _put_file(self, lpath, rpath, **kwargs):
1143
+ # puts binary
1144
+ with open(lpath, "rb") as f:
1145
+ self.references[rpath] = f.read()
1146
+ self.dircache.clear() # this is a bit heavy handed
1147
+
1148
+ def save_json(self, url, **storage_options):
1149
+ """Write modified references into new location"""
1150
+ out = {}
1151
+ for k, v in self.references.items():
1152
+ if isinstance(v, bytes):
1153
+ try:
1154
+ out[k] = v.decode("ascii")
1155
+ except UnicodeDecodeError:
1156
+ out[k] = (b"base64:" + base64.b64encode(v)).decode()
1157
+ else:
1158
+ out[k] = v
1159
+ with fsspec.open(url, "wb", **storage_options) as f:
1160
+ f.write(json.dumps({"version": 1, "refs": out}).encode())
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/smb.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module contains SMBFileSystem class responsible for handling access to
3
+ Windows Samba network shares by using package smbprotocol
4
+ """
5
+
6
+ import datetime
7
+ import uuid
8
+ from stat import S_ISDIR, S_ISLNK
9
+
10
+ import smbclient
11
+
12
+ from .. import AbstractFileSystem
13
+ from ..utils import infer_storage_options
14
+
15
+ # ! pylint: disable=bad-continuation
16
+
17
+
18
+ class SMBFileSystem(AbstractFileSystem):
19
+ """Allow reading and writing to Windows and Samba network shares.
20
+
21
+ When using `fsspec.open()` for getting a file-like object the URI
22
+ should be specified as this format:
23
+ ``smb://workgroup;user:password@server:port/share/folder/file.csv``.
24
+
25
+ Example::
26
+
27
+ >>> import fsspec
28
+ >>> with fsspec.open(
29
+ ... 'smb://myuser:mypassword@myserver.com/' 'share/folder/file.csv'
30
+ ... ) as smbfile:
31
+ ... df = pd.read_csv(smbfile, sep='|', header=None)
32
+
33
+ Note that you need to pass in a valid hostname or IP address for the host
34
+ component of the URL. Do not use the Windows/NetBIOS machine name for the
35
+ host component.
36
+
37
+ The first component of the path in the URL points to the name of the shared
38
+ folder. Subsequent path components will point to the directory/folder/file.
39
+
40
+ The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
41
+ optional.
42
+
43
+ .. note::
44
+
45
+ For working this source require `smbprotocol`_ to be installed, e.g.::
46
+
47
+ $ pip install smbprotocol
48
+ # or
49
+ # pip install smbprotocol[kerberos]
50
+
51
+ .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
52
+
53
+ Note: if using this with the ``open`` or ``open_files``, with full URLs,
54
+ there is no way to tell if a path is relative, so all paths are assumed
55
+ to be absolute.
56
+ """
57
+
58
+ protocol = "smb"
59
+
60
+ # pylint: disable=too-many-arguments
61
+ def __init__(
62
+ self,
63
+ host,
64
+ port=None,
65
+ username=None,
66
+ password=None,
67
+ timeout=60,
68
+ encrypt=None,
69
+ share_access=None,
70
+ **kwargs,
71
+ ):
72
+ """
73
+ You can use _get_kwargs_from_urls to get some kwargs from
74
+ a reasonable SMB url.
75
+
76
+ Authentication will be anonymous or integrated if username/password are not
77
+ given.
78
+
79
+ Parameters
80
+ ----------
81
+ host: str
82
+ The remote server name/ip to connect to
83
+ port: int or None
84
+ Port to connect with. Usually 445, sometimes 139.
85
+ username: str or None
86
+ Username to connect with. Required if Kerberos auth is not being used.
87
+ password: str or None
88
+ User's password on the server, if using username
89
+ timeout: int
90
+ Connection timeout in seconds
91
+ encrypt: bool
92
+ Whether to force encryption or not, once this has been set to True
93
+ the session cannot be changed back to False.
94
+ share_access: str or None
95
+ Specifies the default access applied to file open operations
96
+ performed with this file system object.
97
+ This affects whether other processes can concurrently open a handle
98
+ to the same file.
99
+
100
+ - None (the default): exclusively locks the file until closed.
101
+ - 'r': Allow other handles to be opened with read access.
102
+ - 'w': Allow other handles to be opened with write access.
103
+ - 'd': Allow other handles to be opened with delete access.
104
+ """
105
+ super().__init__(**kwargs)
106
+ self.host = host
107
+ self.port = port
108
+ self.username = username
109
+ self.password = password
110
+ self.timeout = timeout
111
+ self.encrypt = encrypt
112
+ self.temppath = kwargs.pop("temppath", "")
113
+ self.share_access = share_access
114
+ self._connect()
115
+
116
+ @property
117
+ def _port(self):
118
+ return 445 if self.port is None else self.port
119
+
120
+ def _connect(self):
121
+ smbclient.register_session(
122
+ self.host,
123
+ username=self.username,
124
+ password=self.password,
125
+ port=self._port,
126
+ encrypt=self.encrypt,
127
+ connection_timeout=self.timeout,
128
+ )
129
+
130
+ @classmethod
131
+ def _strip_protocol(cls, path):
132
+ return infer_storage_options(path)["path"]
133
+
134
+ @staticmethod
135
+ def _get_kwargs_from_urls(path):
136
+ # smb://workgroup;user:password@host:port/share/folder/file.csv
137
+ out = infer_storage_options(path)
138
+ out.pop("path", None)
139
+ out.pop("protocol", None)
140
+ return out
141
+
142
+ def mkdir(self, path, create_parents=True, **kwargs):
143
+ wpath = _as_unc_path(self.host, path)
144
+ if create_parents:
145
+ smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
146
+ else:
147
+ smbclient.mkdir(wpath, port=self._port, **kwargs)
148
+
149
+ def makedirs(self, path, exist_ok=False):
150
+ if _share_has_path(path):
151
+ wpath = _as_unc_path(self.host, path)
152
+ smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
153
+
154
+ def rmdir(self, path):
155
+ if _share_has_path(path):
156
+ wpath = _as_unc_path(self.host, path)
157
+ smbclient.rmdir(wpath, port=self._port)
158
+
159
+ def info(self, path, **kwargs):
160
+ wpath = _as_unc_path(self.host, path)
161
+ stats = smbclient.stat(wpath, port=self._port, **kwargs)
162
+ if S_ISDIR(stats.st_mode):
163
+ stype = "directory"
164
+ elif S_ISLNK(stats.st_mode):
165
+ stype = "link"
166
+ else:
167
+ stype = "file"
168
+ res = {
169
+ "name": path + "/" if stype == "directory" else path,
170
+ "size": stats.st_size,
171
+ "type": stype,
172
+ "uid": stats.st_uid,
173
+ "gid": stats.st_gid,
174
+ "time": stats.st_atime,
175
+ "mtime": stats.st_mtime,
176
+ }
177
+ return res
178
+
179
+ def created(self, path):
180
+ """Return the created timestamp of a file as a datetime.datetime"""
181
+ wpath = _as_unc_path(self.host, path)
182
+ stats = smbclient.stat(wpath, port=self._port)
183
+ return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
184
+
185
+ def modified(self, path):
186
+ """Return the modified timestamp of a file as a datetime.datetime"""
187
+ wpath = _as_unc_path(self.host, path)
188
+ stats = smbclient.stat(wpath, port=self._port)
189
+ return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
190
+
191
+ def ls(self, path, detail=True, **kwargs):
192
+ unc = _as_unc_path(self.host, path)
193
+ listed = smbclient.listdir(unc, port=self._port, **kwargs)
194
+ dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
195
+ if detail:
196
+ dirs = [self.info(d) for d in dirs]
197
+ return dirs
198
+
199
+ # pylint: disable=too-many-arguments
200
+ def _open(
201
+ self,
202
+ path,
203
+ mode="rb",
204
+ block_size=-1,
205
+ autocommit=True,
206
+ cache_options=None,
207
+ **kwargs,
208
+ ):
209
+ """
210
+ block_size: int or None
211
+ If 0, no buffering, 1, line buffering, >1, buffer that many bytes
212
+
213
+ Notes
214
+ -----
215
+ By specifying 'share_access' in 'kwargs' it is possible to override the
216
+ default shared access setting applied in the constructor of this object.
217
+ """
218
+ bls = block_size if block_size is not None and block_size >= 0 else -1
219
+ wpath = _as_unc_path(self.host, path)
220
+ share_access = kwargs.pop("share_access", self.share_access)
221
+ if "w" in mode and autocommit is False:
222
+ temp = _as_temp_path(self.host, path, self.temppath)
223
+ return SMBFileOpener(
224
+ wpath, temp, mode, port=self._port, block_size=bls, **kwargs
225
+ )
226
+ return smbclient.open_file(
227
+ wpath,
228
+ mode,
229
+ buffering=bls,
230
+ share_access=share_access,
231
+ port=self._port,
232
+ **kwargs,
233
+ )
234
+
235
+ def copy(self, path1, path2, **kwargs):
236
+ """Copy within two locations in the same filesystem"""
237
+ wpath1 = _as_unc_path(self.host, path1)
238
+ wpath2 = _as_unc_path(self.host, path2)
239
+ smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
240
+
241
+ def _rm(self, path):
242
+ if _share_has_path(path):
243
+ wpath = _as_unc_path(self.host, path)
244
+ stats = smbclient.stat(wpath, port=self._port)
245
+ if S_ISDIR(stats.st_mode):
246
+ smbclient.rmdir(wpath, port=self._port)
247
+ else:
248
+ smbclient.remove(wpath, port=self._port)
249
+
250
+ def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
251
+ wpath1 = _as_unc_path(self.host, path1)
252
+ wpath2 = _as_unc_path(self.host, path2)
253
+ smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
254
+
255
+
256
+ def _as_unc_path(host, path):
257
+ rpath = path.replace("/", "\\")
258
+ unc = f"\\\\{host}{rpath}"
259
+ return unc
260
+
261
+
262
+ def _as_temp_path(host, path, temppath):
263
+ share = path.split("/")[1]
264
+ temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
265
+ unc = _as_unc_path(host, temp_file)
266
+ return unc
267
+
268
+
269
+ def _share_has_path(path):
270
+ parts = path.count("/")
271
+ if path.endswith("/"):
272
+ return parts > 2
273
+ return parts > 1
274
+
275
+
276
+ class SMBFileOpener:
277
+ """writes to remote temporary file, move on commit"""
278
+
279
+ def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
280
+ self.path = path
281
+ self.temp = temp
282
+ self.mode = mode
283
+ self.block_size = block_size
284
+ self.kwargs = kwargs
285
+ self.smbfile = None
286
+ self._incontext = False
287
+ self.port = port
288
+ self._open()
289
+
290
+ def _open(self):
291
+ if self.smbfile is None or self.smbfile.closed:
292
+ self.smbfile = smbclient.open_file(
293
+ self.temp,
294
+ self.mode,
295
+ port=self.port,
296
+ buffering=self.block_size,
297
+ **self.kwargs,
298
+ )
299
+
300
+ def commit(self):
301
+ """Move temp file to definitive on success."""
302
+ # TODO: use transaction support in SMB protocol
303
+ smbclient.replace(self.temp, self.path, port=self.port)
304
+
305
+ def discard(self):
306
+ """Remove the temp file on failure."""
307
+ smbclient.remove(self.temp, port=self.port)
308
+
309
+ def __fspath__(self):
310
+ return self.path
311
+
312
+ def __iter__(self):
313
+ return self.smbfile.__iter__()
314
+
315
+ def __getattr__(self, item):
316
+ return getattr(self.smbfile, item)
317
+
318
+ def __enter__(self):
319
+ self._incontext = True
320
+ return self.smbfile.__enter__()
321
+
322
+ def __exit__(self, exc_type, exc_value, traceback):
323
+ self._incontext = False
324
+ self.smbfile.__exit__(exc_type, exc_value, traceback)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/zip.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import zipfile
2
+
3
+ import fsspec
4
+ from fsspec.archive import AbstractArchiveFileSystem
5
+
6
+
7
+ class ZipFileSystem(AbstractArchiveFileSystem):
8
+ """Read/Write contents of ZIP archive as a file-system
9
+
10
+ Keeps file object open while instance lives.
11
+
12
+ This class is pickleable, but not necessarily thread-safe
13
+ """
14
+
15
+ root_marker = ""
16
+ protocol = "zip"
17
+ cachable = False
18
+
19
+ def __init__(
20
+ self,
21
+ fo="",
22
+ mode="r",
23
+ target_protocol=None,
24
+ target_options=None,
25
+ compression=zipfile.ZIP_STORED,
26
+ allowZip64=True,
27
+ compresslevel=None,
28
+ **kwargs,
29
+ ):
30
+ """
31
+ Parameters
32
+ ----------
33
+ fo: str or file-like
34
+ Contains ZIP, and must exist. If a str, will fetch file using
35
+ :meth:`~fsspec.open_files`, which must return one file exactly.
36
+ mode: str
37
+ Accept: "r", "w", "a"
38
+ target_protocol: str (optional)
39
+ If ``fo`` is a string, this value can be used to override the
40
+ FS protocol inferred from a URL
41
+ target_options: dict (optional)
42
+ Kwargs passed when instantiating the target FS, if ``fo`` is
43
+ a string.
44
+ compression, allowZip64, compresslevel: passed to ZipFile
45
+ Only relevant when creating a ZIP
46
+ """
47
+ super().__init__(self, **kwargs)
48
+ if mode not in set("rwa"):
49
+ raise ValueError(f"mode '{mode}' no understood")
50
+ self.mode = mode
51
+ if isinstance(fo, str):
52
+ if mode == "a":
53
+ m = "r+b"
54
+ else:
55
+ m = mode + "b"
56
+ fo = fsspec.open(
57
+ fo, mode=m, protocol=target_protocol, **(target_options or {})
58
+ )
59
+ self.of = fo
60
+ self.fo = fo.__enter__() # the whole instance is a context
61
+ self.zip = zipfile.ZipFile(
62
+ self.fo,
63
+ mode=mode,
64
+ compression=compression,
65
+ allowZip64=allowZip64,
66
+ compresslevel=compresslevel,
67
+ )
68
+ self.dir_cache = None
69
+
70
+ @classmethod
71
+ def _strip_protocol(cls, path):
72
+ # zip file paths are always relative to the archive root
73
+ return super()._strip_protocol(path).lstrip("/")
74
+
75
+ def __del__(self):
76
+ if hasattr(self, "zip"):
77
+ self.close()
78
+ del self.zip
79
+
80
+ def close(self):
81
+ """Commits any write changes to the file. Done on ``del`` too."""
82
+ self.zip.close()
83
+
84
+ def _get_dirs(self):
85
+ if self.dir_cache is None or self.mode in set("wa"):
86
+ # when writing, dir_cache is always in the ZipFile's attributes,
87
+ # not read from the file.
88
+ files = self.zip.infolist()
89
+ self.dir_cache = {
90
+ dirname.rstrip("/"): {
91
+ "name": dirname.rstrip("/"),
92
+ "size": 0,
93
+ "type": "directory",
94
+ }
95
+ for dirname in self._all_dirnames(self.zip.namelist())
96
+ }
97
+ for z in files:
98
+ f = {s: getattr(z, s, None) for s in zipfile.ZipInfo.__slots__}
99
+ f.update(
100
+ {
101
+ "name": z.filename.rstrip("/"),
102
+ "size": z.file_size,
103
+ "type": ("directory" if z.is_dir() else "file"),
104
+ }
105
+ )
106
+ self.dir_cache[f["name"]] = f
107
+
108
+ def pipe_file(self, path, value, **kwargs):
109
+ # override upstream, because we know the exact file size in this case
110
+ self.zip.writestr(path, value, **kwargs)
111
+
112
+ def _open(
113
+ self,
114
+ path,
115
+ mode="rb",
116
+ block_size=None,
117
+ autocommit=True,
118
+ cache_options=None,
119
+ **kwargs,
120
+ ):
121
+ path = self._strip_protocol(path)
122
+ if "r" in mode and self.mode in set("wa"):
123
+ if self.exists(path):
124
+ raise OSError("ZipFS can only be open for reading or writing, not both")
125
+ raise FileNotFoundError(path)
126
+ if "r" in self.mode and "w" in mode:
127
+ raise OSError("ZipFS can only be open for reading or writing, not both")
128
+ out = self.zip.open(path, mode.strip("b"))
129
+ if "r" in mode:
130
+ info = self.info(path)
131
+ out.size = info["size"]
132
+ out.name = info["name"]
133
+ return out
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/mapping.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import array
2
+ import posixpath
3
+ import warnings
4
+ from collections.abc import MutableMapping
5
+ from functools import cached_property
6
+
7
+ from .core import url_to_fs
8
+
9
+
10
+ class FSMap(MutableMapping):
11
+ """Wrap a FileSystem instance as a mutable wrapping.
12
+
13
+ The keys of the mapping become files under the given root, and the
14
+ values (which must be bytes) the contents of those files.
15
+
16
+ Parameters
17
+ ----------
18
+ root: string
19
+ prefix for all the files
20
+ fs: FileSystem instance
21
+ check: bool (=True)
22
+ performs a touch at the location, to check for write access.
23
+
24
+ Examples
25
+ --------
26
+ >>> fs = FileSystem(**parameters) # doctest: +SKIP
27
+ >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
28
+ or, more likely
29
+ >>> d = fs.get_mapper('my-data/path/')
30
+
31
+ >>> d['loc1'] = b'Hello World' # doctest: +SKIP
32
+ >>> list(d.keys()) # doctest: +SKIP
33
+ ['loc1']
34
+ >>> d['loc1'] # doctest: +SKIP
35
+ b'Hello World'
36
+ """
37
+
38
+ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
39
+ self.fs = fs
40
+ self.root = fs._strip_protocol(root).rstrip("/")
41
+ self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
42
+ if missing_exceptions is None:
43
+ missing_exceptions = (
44
+ FileNotFoundError,
45
+ IsADirectoryError,
46
+ NotADirectoryError,
47
+ )
48
+ self.missing_exceptions = missing_exceptions
49
+ self.check = check
50
+ self.create = create
51
+ if create:
52
+ if not self.fs.exists(root):
53
+ self.fs.mkdir(root)
54
+ if check:
55
+ if not self.fs.exists(root):
56
+ raise ValueError(
57
+ f"Path {root} does not exist. Create "
58
+ f" with the ``create=True`` keyword"
59
+ )
60
+ self.fs.touch(root + "/a")
61
+ self.fs.rm(root + "/a")
62
+
63
+ @cached_property
64
+ def dirfs(self):
65
+ """dirfs instance that can be used with the same keys as the mapper"""
66
+ from .implementations.dirfs import DirFileSystem
67
+
68
+ return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
69
+
70
+ def clear(self):
71
+ """Remove all keys below root - empties out mapping"""
72
+ try:
73
+ self.fs.rm(self.root, True)
74
+ self.fs.mkdir(self.root)
75
+ except: # noqa: E722
76
+ pass
77
+
78
+ def getitems(self, keys, on_error="raise"):
79
+ """Fetch multiple items from the store
80
+
81
+ If the backend is async-able, this might proceed concurrently
82
+
83
+ Parameters
84
+ ----------
85
+ keys: list(str)
86
+ They keys to be fetched
87
+ on_error : "raise", "omit", "return"
88
+ If raise, an underlying exception will be raised (converted to KeyError
89
+ if the type is in self.missing_exceptions); if omit, keys with exception
90
+ will simply not be included in the output; if "return", all keys are
91
+ included in the output, but the value will be bytes or an exception
92
+ instance.
93
+
94
+ Returns
95
+ -------
96
+ dict(key, bytes|exception)
97
+ """
98
+ keys2 = [self._key_to_str(k) for k in keys]
99
+ oe = on_error if on_error == "raise" else "return"
100
+ try:
101
+ out = self.fs.cat(keys2, on_error=oe)
102
+ if isinstance(out, bytes):
103
+ out = {keys2[0]: out}
104
+ except self.missing_exceptions as e:
105
+ raise KeyError from e
106
+ out = {
107
+ k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
108
+ for k, v in out.items()
109
+ }
110
+ return {
111
+ key: out[k2]
112
+ for key, k2 in zip(keys, keys2)
113
+ if on_error == "return" or not isinstance(out[k2], BaseException)
114
+ }
115
+
116
+ def setitems(self, values_dict):
117
+ """Set the values of multiple items in the store
118
+
119
+ Parameters
120
+ ----------
121
+ values_dict: dict(str, bytes)
122
+ """
123
+ values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
124
+ self.fs.pipe(values)
125
+
126
+ def delitems(self, keys):
127
+ """Remove multiple keys from the store"""
128
+ self.fs.rm([self._key_to_str(k) for k in keys])
129
+
130
+ def _key_to_str(self, key):
131
+ """Generate full path for the key"""
132
+ if not isinstance(key, str):
133
+ # raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
134
+ warnings.warn(
135
+ "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
136
+ DeprecationWarning,
137
+ )
138
+ if isinstance(key, list):
139
+ key = tuple(key)
140
+ key = str(key)
141
+ return f"{self._root_key_to_str}{key}"
142
+
143
+ def _str_to_key(self, s):
144
+ """Strip path of to leave key name"""
145
+ return s[len(self.root) :].lstrip("/")
146
+
147
+ def __getitem__(self, key, default=None):
148
+ """Retrieve data"""
149
+ k = self._key_to_str(key)
150
+ try:
151
+ result = self.fs.cat(k)
152
+ except self.missing_exceptions:
153
+ if default is not None:
154
+ return default
155
+ raise KeyError(key)
156
+ return result
157
+
158
+ def pop(self, key, default=None):
159
+ """Pop data"""
160
+ result = self.__getitem__(key, default)
161
+ try:
162
+ del self[key]
163
+ except KeyError:
164
+ pass
165
+ return result
166
+
167
+ def __setitem__(self, key, value):
168
+ """Store value in key"""
169
+ key = self._key_to_str(key)
170
+ self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
171
+ self.fs.pipe_file(key, maybe_convert(value))
172
+
173
+ def __iter__(self):
174
+ return (self._str_to_key(x) for x in self.fs.find(self.root))
175
+
176
+ def __len__(self):
177
+ return len(self.fs.find(self.root))
178
+
179
+ def __delitem__(self, key):
180
+ """Remove key"""
181
+ try:
182
+ self.fs.rm(self._key_to_str(key))
183
+ except: # noqa: E722
184
+ raise KeyError
185
+
186
+ def __contains__(self, key):
187
+ """Does key exist in mapping?"""
188
+ path = self._key_to_str(key)
189
+ return self.fs.exists(path) and self.fs.isfile(path)
190
+
191
+ def __reduce__(self):
192
+ return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
193
+
194
+
195
+ def maybe_convert(value):
196
+ if isinstance(value, array.array) or hasattr(value, "__array__"):
197
+ # bytes-like things
198
+ if hasattr(value, "dtype") and value.dtype.kind in "Mm":
199
+ # The buffer interface doesn't support datetime64/timdelta64 numpy
200
+ # arrays
201
+ value = value.view("int64")
202
+ value = bytes(memoryview(value))
203
+ return value
204
+
205
+
206
+ def get_mapper(
207
+ url="",
208
+ check=False,
209
+ create=False,
210
+ missing_exceptions=None,
211
+ alternate_root=None,
212
+ **kwargs,
213
+ ):
214
+ """Create key-value interface for given URL and options
215
+
216
+ The URL will be of the form "protocol://location" and point to the root
217
+ of the mapper required. All keys will be file-names below this location,
218
+ and their values the contents of each key.
219
+
220
+ Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
221
+
222
+ Parameters
223
+ ----------
224
+ url: str
225
+ Root URL of mapping
226
+ check: bool
227
+ Whether to attempt to read from the location before instantiation, to
228
+ check that the mapping does exist
229
+ create: bool
230
+ Whether to make the directory corresponding to the root before
231
+ instantiating
232
+ missing_exceptions: None or tuple
233
+ If given, these exception types will be regarded as missing keys and
234
+ return KeyError when trying to read data. By default, you get
235
+ (FileNotFoundError, IsADirectoryError, NotADirectoryError)
236
+ alternate_root: None or str
237
+ In cases of complex URLs, the parser may fail to pick the correct part
238
+ for the mapper root, so this arg can override
239
+
240
+ Returns
241
+ -------
242
+ ``FSMap`` instance, the dict-like key-value store.
243
+ """
244
+ # Removing protocol here - could defer to each open() on the backend
245
+ fs, urlpath = url_to_fs(url, **kwargs)
246
+ root = alternate_root if alternate_root is not None else urlpath
247
+ return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/utils.py ADDED
@@ -0,0 +1,742 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import logging
5
+ import math
6
+ import os
7
+ import pathlib
8
+ import re
9
+ import sys
10
+ import tempfile
11
+ from functools import partial
12
+ from hashlib import md5
13
+ from importlib.metadata import version
14
+ from typing import (
15
+ IO,
16
+ TYPE_CHECKING,
17
+ Any,
18
+ Callable,
19
+ Iterable,
20
+ Iterator,
21
+ Sequence,
22
+ TypeVar,
23
+ )
24
+ from urllib.parse import urlsplit
25
+
26
+ if TYPE_CHECKING:
27
+ from typing_extensions import TypeGuard
28
+
29
+ from fsspec.spec import AbstractFileSystem
30
+
31
+
32
+ DEFAULT_BLOCK_SIZE = 5 * 2**20
33
+
34
+ T = TypeVar("T")
35
+
36
+
37
+ def infer_storage_options(
38
+ urlpath: str, inherit_storage_options: dict[str, Any] | None = None
39
+ ) -> dict[str, Any]:
40
+ """Infer storage options from URL path and merge it with existing storage
41
+ options.
42
+
43
+ Parameters
44
+ ----------
45
+ urlpath: str or unicode
46
+ Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
47
+ inherit_storage_options: dict (optional)
48
+ Its contents will get merged with the inferred information from the
49
+ given path
50
+
51
+ Returns
52
+ -------
53
+ Storage options dict.
54
+
55
+ Examples
56
+ --------
57
+ >>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
58
+ {"protocol": "file", "path", "/mnt/datasets/test.csv"}
59
+ >>> infer_storage_options(
60
+ ... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
61
+ ... inherit_storage_options={'extra': 'value'},
62
+ ... ) # doctest: +SKIP
63
+ {"protocol": "hdfs", "username": "username", "password": "pwd",
64
+ "host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
65
+ "url_query": "q=1", "extra": "value"}
66
+ """
67
+ # Handle Windows paths including disk name in this special case
68
+ if (
69
+ re.match(r"^[a-zA-Z]:[\\/]", urlpath)
70
+ or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
71
+ ):
72
+ return {"protocol": "file", "path": urlpath}
73
+
74
+ parsed_path = urlsplit(urlpath)
75
+ protocol = parsed_path.scheme or "file"
76
+ if parsed_path.fragment:
77
+ path = "#".join([parsed_path.path, parsed_path.fragment])
78
+ else:
79
+ path = parsed_path.path
80
+ if protocol == "file":
81
+ # Special case parsing file protocol URL on Windows according to:
82
+ # https://msdn.microsoft.com/en-us/library/jj710207.aspx
83
+ windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
84
+ if windows_path:
85
+ path = "%s:%s" % windows_path.groups()
86
+
87
+ if protocol in ["http", "https"]:
88
+ # for HTTP, we don't want to parse, as requests will anyway
89
+ return {"protocol": protocol, "path": urlpath}
90
+
91
+ options: dict[str, Any] = {"protocol": protocol, "path": path}
92
+
93
+ if parsed_path.netloc:
94
+ # Parse `hostname` from netloc manually because `parsed_path.hostname`
95
+ # lowercases the hostname which is not always desirable (e.g. in S3):
96
+ # https://github.com/dask/dask/issues/1417
97
+ options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
98
+
99
+ if protocol in ("s3", "s3a", "gcs", "gs"):
100
+ options["path"] = options["host"] + options["path"]
101
+ else:
102
+ options["host"] = options["host"]
103
+ if parsed_path.port:
104
+ options["port"] = parsed_path.port
105
+ if parsed_path.username:
106
+ options["username"] = parsed_path.username
107
+ if parsed_path.password:
108
+ options["password"] = parsed_path.password
109
+
110
+ if parsed_path.query:
111
+ options["url_query"] = parsed_path.query
112
+ if parsed_path.fragment:
113
+ options["url_fragment"] = parsed_path.fragment
114
+
115
+ if inherit_storage_options:
116
+ update_storage_options(options, inherit_storage_options)
117
+
118
+ return options
119
+
120
+
121
+ def update_storage_options(
122
+ options: dict[str, Any], inherited: dict[str, Any] | None = None
123
+ ) -> None:
124
+ if not inherited:
125
+ inherited = {}
126
+ collisions = set(options) & set(inherited)
127
+ if collisions:
128
+ for collision in collisions:
129
+ if options.get(collision) != inherited.get(collision):
130
+ raise KeyError(
131
+ f"Collision between inferred and specified storage "
132
+ f"option:\n{collision}"
133
+ )
134
+ options.update(inherited)
135
+
136
+
137
+ # Compression extensions registered via fsspec.compression.register_compression
138
+ compressions: dict[str, str] = {}
139
+
140
+
141
+ def infer_compression(filename: str) -> str | None:
142
+ """Infer compression, if available, from filename.
143
+
144
+ Infer a named compression type, if registered and available, from filename
145
+ extension. This includes builtin (gz, bz2, zip) compressions, as well as
146
+ optional compressions. See fsspec.compression.register_compression.
147
+ """
148
+ extension = os.path.splitext(filename)[-1].strip(".").lower()
149
+ if extension in compressions:
150
+ return compressions[extension]
151
+ return None
152
+
153
+
154
+ def build_name_function(max_int: float) -> Callable[[int], str]:
155
+ """Returns a function that receives a single integer
156
+ and returns it as a string padded by enough zero characters
157
+ to align with maximum possible integer
158
+
159
+ >>> name_f = build_name_function(57)
160
+
161
+ >>> name_f(7)
162
+ '07'
163
+ >>> name_f(31)
164
+ '31'
165
+ >>> build_name_function(1000)(42)
166
+ '0042'
167
+ >>> build_name_function(999)(42)
168
+ '042'
169
+ >>> build_name_function(0)(0)
170
+ '0'
171
+ """
172
+ # handle corner cases max_int is 0 or exact power of 10
173
+ max_int += 1e-8
174
+
175
+ pad_length = int(math.ceil(math.log10(max_int)))
176
+
177
+ def name_function(i: int) -> str:
178
+ return str(i).zfill(pad_length)
179
+
180
+ return name_function
181
+
182
+
183
+ def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
184
+ r"""Seek current file to file start, file end, or byte after delimiter seq.
185
+
186
+ Seeks file to next chunk delimiter, where chunks are defined on file start,
187
+ a delimiting sequence, and file end. Use file.tell() to see location afterwards.
188
+ Note that file start is a valid split, so must be at offset > 0 to seek for
189
+ delimiter.
190
+
191
+ Parameters
192
+ ----------
193
+ file: a file
194
+ delimiter: bytes
195
+ a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
196
+ blocksize: int
197
+ Number of bytes to read from the file at once.
198
+
199
+
200
+ Returns
201
+ -------
202
+ Returns True if a delimiter was found, False if at file start or end.
203
+
204
+ """
205
+
206
+ if file.tell() == 0:
207
+ # beginning-of-file, return without seek
208
+ return False
209
+
210
+ # Interface is for binary IO, with delimiter as bytes, but initialize last
211
+ # with result of file.read to preserve compatibility with text IO.
212
+ last: bytes | None = None
213
+ while True:
214
+ current = file.read(blocksize)
215
+ if not current:
216
+ # end-of-file without delimiter
217
+ return False
218
+ full = last + current if last else current
219
+ try:
220
+ if delimiter in full:
221
+ i = full.index(delimiter)
222
+ file.seek(file.tell() - (len(full) - i) + len(delimiter))
223
+ return True
224
+ elif len(current) < blocksize:
225
+ # end-of-file without delimiter
226
+ return False
227
+ except (OSError, ValueError):
228
+ pass
229
+ last = full[-len(delimiter) :]
230
+
231
+
232
+ def read_block(
233
+ f: IO[bytes],
234
+ offset: int,
235
+ length: int | None,
236
+ delimiter: bytes | None = None,
237
+ split_before: bool = False,
238
+ ) -> bytes:
239
+ """Read a block of bytes from a file
240
+
241
+ Parameters
242
+ ----------
243
+ f: File
244
+ Open file
245
+ offset: int
246
+ Byte offset to start read
247
+ length: int
248
+ Number of bytes to read, read through end of file if None
249
+ delimiter: bytes (optional)
250
+ Ensure reading starts and stops at delimiter bytestring
251
+ split_before: bool (optional)
252
+ Start/stop read *before* delimiter bytestring.
253
+
254
+
255
+ If using the ``delimiter=`` keyword argument we ensure that the read
256
+ starts and stops at delimiter boundaries that follow the locations
257
+ ``offset`` and ``offset + length``. If ``offset`` is zero then we
258
+ start at zero, regardless of delimiter. The bytestring returned WILL
259
+ include the terminating delimiter string.
260
+
261
+ Examples
262
+ --------
263
+
264
+ >>> from io import BytesIO # doctest: +SKIP
265
+ >>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
266
+ >>> read_block(f, 0, 13) # doctest: +SKIP
267
+ b'Alice, 100\\nBo'
268
+
269
+ >>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
270
+ b'Alice, 100\\nBob, 200\\n'
271
+
272
+ >>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
273
+ b'Bob, 200\\nCharlie, 300'
274
+ """
275
+ if delimiter:
276
+ f.seek(offset)
277
+ found_start_delim = seek_delimiter(f, delimiter, 2**16)
278
+ if length is None:
279
+ return f.read()
280
+ start = f.tell()
281
+ length -= start - offset
282
+
283
+ f.seek(start + length)
284
+ found_end_delim = seek_delimiter(f, delimiter, 2**16)
285
+ end = f.tell()
286
+
287
+ # Adjust split location to before delimiter iff seek found the
288
+ # delimiter sequence, not start or end of file.
289
+ if found_start_delim and split_before:
290
+ start -= len(delimiter)
291
+
292
+ if found_end_delim and split_before:
293
+ end -= len(delimiter)
294
+
295
+ offset = start
296
+ length = end - start
297
+
298
+ f.seek(offset)
299
+
300
+ # TODO: allow length to be None and read to the end of the file?
301
+ assert length is not None
302
+ b = f.read(length)
303
+ return b
304
+
305
+
306
+ def tokenize(*args: Any, **kwargs: Any) -> str:
307
+ """Deterministic token
308
+
309
+ (modified from dask.base)
310
+
311
+ >>> tokenize([1, 2, '3'])
312
+ '9d71491b50023b06fc76928e6eddb952'
313
+
314
+ >>> tokenize('Hello') == tokenize('Hello')
315
+ True
316
+ """
317
+ if kwargs:
318
+ args += (kwargs,)
319
+ try:
320
+ h = md5(str(args).encode())
321
+ except ValueError:
322
+ # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
323
+ h = md5(str(args).encode(), usedforsecurity=False)
324
+ return h.hexdigest()
325
+
326
+
327
+ def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
328
+ """Attempt to convert a path-like object to a string.
329
+
330
+ Parameters
331
+ ----------
332
+ filepath: object to be converted
333
+
334
+ Returns
335
+ -------
336
+ filepath_str: maybe a string version of the object
337
+
338
+ Notes
339
+ -----
340
+ Objects supporting the fspath protocol are coerced according to its
341
+ __fspath__ method.
342
+
343
+ For backwards compatibility with older Python version, pathlib.Path
344
+ objects are specially coerced.
345
+
346
+ Any other object is passed through unchanged, which includes bytes,
347
+ strings, buffers, or anything else that's not even path-like.
348
+ """
349
+ if isinstance(filepath, str):
350
+ return filepath
351
+ elif hasattr(filepath, "__fspath__"):
352
+ return filepath.__fspath__()
353
+ elif isinstance(filepath, pathlib.Path):
354
+ return str(filepath)
355
+ elif hasattr(filepath, "path"):
356
+ return filepath.path
357
+ else:
358
+ return filepath # type: ignore[return-value]
359
+
360
+
361
+ def make_instance(
362
+ cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
363
+ ) -> T:
364
+ inst = cls(*args, **kwargs)
365
+ inst._determine_worker() # type: ignore[attr-defined]
366
+ return inst
367
+
368
+
369
+ def common_prefix(paths: Iterable[str]) -> str:
370
+ """For a list of paths, find the shortest prefix common to all"""
371
+ parts = [p.split("/") for p in paths]
372
+ lmax = min(len(p) for p in parts)
373
+ end = 0
374
+ for i in range(lmax):
375
+ end = all(p[i] == parts[0][i] for p in parts)
376
+ if not end:
377
+ break
378
+ i += end
379
+ return "/".join(parts[0][:i])
380
+
381
+
382
+ def other_paths(
383
+ paths: list[str],
384
+ path2: str | list[str],
385
+ exists: bool = False,
386
+ flatten: bool = False,
387
+ ) -> list[str]:
388
+ """In bulk file operations, construct a new file tree from a list of files
389
+
390
+ Parameters
391
+ ----------
392
+ paths: list of str
393
+ The input file tree
394
+ path2: str or list of str
395
+ Root to construct the new list in. If this is already a list of str, we just
396
+ assert it has the right number of elements.
397
+ exists: bool (optional)
398
+ For a str destination, it is already exists (and is a dir), files should
399
+ end up inside.
400
+ flatten: bool (optional)
401
+ Whether to flatten the input directory tree structure so that the output files
402
+ are in the same directory.
403
+
404
+ Returns
405
+ -------
406
+ list of str
407
+ """
408
+
409
+ if isinstance(path2, str):
410
+ path2 = path2.rstrip("/")
411
+
412
+ if flatten:
413
+ path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
414
+ else:
415
+ cp = common_prefix(paths)
416
+ if exists:
417
+ cp = cp.rsplit("/", 1)[0]
418
+ if not cp and all(not s.startswith("/") for s in paths):
419
+ path2 = ["/".join([path2, p]) for p in paths]
420
+ else:
421
+ path2 = [p.replace(cp, path2, 1) for p in paths]
422
+ else:
423
+ assert len(paths) == len(path2)
424
+ return path2
425
+
426
+
427
+ def is_exception(obj: Any) -> bool:
428
+ return isinstance(obj, BaseException)
429
+
430
+
431
+ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
432
+ for attr in ["read", "close", "tell"]:
433
+ if not hasattr(f, attr):
434
+ return False
435
+ return True
436
+
437
+
438
+ def get_protocol(url: str) -> str:
439
+ url = stringify_path(url)
440
+ parts = re.split(r"(\:\:|\://)", url, 1)
441
+ if len(parts) > 1:
442
+ return parts[0]
443
+ return "file"
444
+
445
+
446
+ def can_be_local(path: str) -> bool:
447
+ """Can the given URL be used with open_local?"""
448
+ from fsspec import get_filesystem_class
449
+
450
+ try:
451
+ return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
452
+ except (ValueError, ImportError):
453
+ # not in registry or import failed
454
+ return False
455
+
456
+
457
+ def get_package_version_without_import(name: str) -> str | None:
458
+ """For given package name, try to find the version without importing it
459
+
460
+ Import and package.__version__ is still the backup here, so an import
461
+ *might* happen.
462
+
463
+ Returns either the version string, or None if the package
464
+ or the version was not readily found.
465
+ """
466
+ if name in sys.modules:
467
+ mod = sys.modules[name]
468
+ if hasattr(mod, "__version__"):
469
+ return mod.__version__
470
+ try:
471
+ return version(name)
472
+ except: # noqa: E722
473
+ pass
474
+ try:
475
+ import importlib
476
+
477
+ mod = importlib.import_module(name)
478
+ return mod.__version__
479
+ except (ImportError, AttributeError):
480
+ return None
481
+
482
+
483
+ def setup_logging(
484
+ logger: logging.Logger | None = None,
485
+ logger_name: str | None = None,
486
+ level: str = "DEBUG",
487
+ clear: bool = True,
488
+ ) -> logging.Logger:
489
+ if logger is None and logger_name is None:
490
+ raise ValueError("Provide either logger object or logger name")
491
+ logger = logger or logging.getLogger(logger_name)
492
+ handle = logging.StreamHandler()
493
+ formatter = logging.Formatter(
494
+ "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
495
+ )
496
+ handle.setFormatter(formatter)
497
+ if clear:
498
+ logger.handlers.clear()
499
+ logger.addHandler(handle)
500
+ logger.setLevel(level)
501
+ return logger
502
+
503
+
504
+ def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
505
+ return fs.unstrip_protocol(name)
506
+
507
+
508
+ def mirror_from(
509
+ origin_name: str, methods: Iterable[str]
510
+ ) -> Callable[[type[T]], type[T]]:
511
+ """Mirror attributes and methods from the given
512
+ origin_name attribute of the instance to the
513
+ decorated class"""
514
+
515
+ def origin_getter(method: str, self: Any) -> Any:
516
+ origin = getattr(self, origin_name)
517
+ return getattr(origin, method)
518
+
519
+ def wrapper(cls: type[T]) -> type[T]:
520
+ for method in methods:
521
+ wrapped_method = partial(origin_getter, method)
522
+ setattr(cls, method, property(wrapped_method))
523
+ return cls
524
+
525
+ return wrapper
526
+
527
+
528
+ @contextlib.contextmanager
529
+ def nullcontext(obj: T) -> Iterator[T]:
530
+ yield obj
531
+
532
+
533
+ def merge_offset_ranges(
534
+ paths: list[str],
535
+ starts: list[int] | int,
536
+ ends: list[int] | int,
537
+ max_gap: int = 0,
538
+ max_block: int | None = None,
539
+ sort: bool = True,
540
+ ) -> tuple[list[str], list[int], list[int]]:
541
+ """Merge adjacent byte-offset ranges when the inter-range
542
+ gap is <= `max_gap`, and when the merged byte range does not
543
+ exceed `max_block` (if specified). By default, this function
544
+ will re-order the input paths and byte ranges to ensure sorted
545
+ order. If the user can guarantee that the inputs are already
546
+ sorted, passing `sort=False` will skip the re-ordering.
547
+ """
548
+ # Check input
549
+ if not isinstance(paths, list):
550
+ raise TypeError
551
+ if not isinstance(starts, list):
552
+ starts = [starts] * len(paths)
553
+ if not isinstance(ends, list):
554
+ ends = [ends] * len(paths)
555
+ if len(starts) != len(paths) or len(ends) != len(paths):
556
+ raise ValueError
557
+
558
+ # Early Return
559
+ if len(starts) <= 1:
560
+ return paths, starts, ends
561
+
562
+ starts = [s or 0 for s in starts]
563
+ # Sort by paths and then ranges if `sort=True`
564
+ if sort:
565
+ paths, starts, ends = (
566
+ list(v)
567
+ for v in zip(
568
+ *sorted(
569
+ zip(paths, starts, ends),
570
+ )
571
+ )
572
+ )
573
+
574
+ if paths:
575
+ # Loop through the coupled `paths`, `starts`, and
576
+ # `ends`, and merge adjacent blocks when appropriate
577
+ new_paths = paths[:1]
578
+ new_starts = starts[:1]
579
+ new_ends = ends[:1]
580
+ for i in range(1, len(paths)):
581
+ if paths[i] == paths[i - 1] and new_ends[-1] is None:
582
+ continue
583
+ elif (
584
+ paths[i] != paths[i - 1]
585
+ or ((starts[i] - new_ends[-1]) > max_gap)
586
+ or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
587
+ ):
588
+ # Cannot merge with previous block.
589
+ # Add new `paths`, `starts`, and `ends` elements
590
+ new_paths.append(paths[i])
591
+ new_starts.append(starts[i])
592
+ new_ends.append(ends[i])
593
+ else:
594
+ # Merge with previous block by updating the
595
+ # last element of `ends`
596
+ new_ends[-1] = ends[i]
597
+ return new_paths, new_starts, new_ends
598
+
599
+ # `paths` is empty. Just return input lists
600
+ return paths, starts, ends
601
+
602
+
603
+ def file_size(filelike: IO[bytes]) -> int:
604
+ """Find length of any open read-mode file-like"""
605
+ pos = filelike.tell()
606
+ try:
607
+ return filelike.seek(0, 2)
608
+ finally:
609
+ filelike.seek(pos)
610
+
611
+
612
+ @contextlib.contextmanager
613
+ def atomic_write(path: str, mode: str = "wb"):
614
+ """
615
+ A context manager that opens a temporary file next to `path` and, on exit,
616
+ replaces `path` with the temporary file, thereby updating `path`
617
+ atomically.
618
+ """
619
+ fd, fn = tempfile.mkstemp(
620
+ dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
621
+ )
622
+ try:
623
+ with open(fd, mode) as fp:
624
+ yield fp
625
+ except BaseException:
626
+ with contextlib.suppress(FileNotFoundError):
627
+ os.unlink(fn)
628
+ raise
629
+ else:
630
+ os.replace(fn, path)
631
+
632
+
633
+ def _translate(pat, STAR, QUESTION_MARK):
634
+ # Copied from: https://github.com/python/cpython/pull/106703.
635
+ res: list[str] = []
636
+ add = res.append
637
+ i, n = 0, len(pat)
638
+ while i < n:
639
+ c = pat[i]
640
+ i = i + 1
641
+ if c == "*":
642
+ # compress consecutive `*` into one
643
+ if (not res) or res[-1] is not STAR:
644
+ add(STAR)
645
+ elif c == "?":
646
+ add(QUESTION_MARK)
647
+ elif c == "[":
648
+ j = i
649
+ if j < n and pat[j] == "!":
650
+ j = j + 1
651
+ if j < n and pat[j] == "]":
652
+ j = j + 1
653
+ while j < n and pat[j] != "]":
654
+ j = j + 1
655
+ if j >= n:
656
+ add("\\[")
657
+ else:
658
+ stuff = pat[i:j]
659
+ if "-" not in stuff:
660
+ stuff = stuff.replace("\\", r"\\")
661
+ else:
662
+ chunks = []
663
+ k = i + 2 if pat[i] == "!" else i + 1
664
+ while True:
665
+ k = pat.find("-", k, j)
666
+ if k < 0:
667
+ break
668
+ chunks.append(pat[i:k])
669
+ i = k + 1
670
+ k = k + 3
671
+ chunk = pat[i:j]
672
+ if chunk:
673
+ chunks.append(chunk)
674
+ else:
675
+ chunks[-1] += "-"
676
+ # Remove empty ranges -- invalid in RE.
677
+ for k in range(len(chunks) - 1, 0, -1):
678
+ if chunks[k - 1][-1] > chunks[k][0]:
679
+ chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
680
+ del chunks[k]
681
+ # Escape backslashes and hyphens for set difference (--).
682
+ # Hyphens that create ranges shouldn't be escaped.
683
+ stuff = "-".join(
684
+ s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
685
+ )
686
+ # Escape set operations (&&, ~~ and ||).
687
+ stuff = re.sub(r"([&~|])", r"\\\1", stuff)
688
+ i = j + 1
689
+ if not stuff:
690
+ # Empty range: never match.
691
+ add("(?!)")
692
+ elif stuff == "!":
693
+ # Negated empty range: match any character.
694
+ add(".")
695
+ else:
696
+ if stuff[0] == "!":
697
+ stuff = "^" + stuff[1:]
698
+ elif stuff[0] in ("^", "["):
699
+ stuff = "\\" + stuff
700
+ add(f"[{stuff}]")
701
+ else:
702
+ add(re.escape(c))
703
+ assert i == n
704
+ return res
705
+
706
+
707
+ def glob_translate(pat):
708
+ # Copied from: https://github.com/python/cpython/pull/106703.
709
+ # The keyword parameters' values are fixed to:
710
+ # recursive=True, include_hidden=True, seps=None
711
+ """Translate a pathname with shell wildcards to a regular expression."""
712
+ if os.path.altsep:
713
+ seps = os.path.sep + os.path.altsep
714
+ else:
715
+ seps = os.path.sep
716
+ escaped_seps = "".join(map(re.escape, seps))
717
+ any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
718
+ not_sep = f"[^{escaped_seps}]"
719
+ one_last_segment = f"{not_sep}+"
720
+ one_segment = f"{one_last_segment}{any_sep}"
721
+ any_segments = f"(?:.+{any_sep})?"
722
+ any_last_segments = ".*"
723
+ results = []
724
+ parts = re.split(any_sep, pat)
725
+ last_part_idx = len(parts) - 1
726
+ for idx, part in enumerate(parts):
727
+ if part == "*":
728
+ results.append(one_segment if idx < last_part_idx else one_last_segment)
729
+ continue
730
+ if part == "**":
731
+ results.append(any_segments if idx < last_part_idx else any_last_segments)
732
+ continue
733
+ elif "**" in part:
734
+ raise ValueError(
735
+ "Invalid pattern: '**' can only be an entire path component"
736
+ )
737
+ if part:
738
+ results.extend(_translate(part, f"{not_sep}*", not_sep))
739
+ if idx < last_part_idx:
740
+ results.append(any_sep)
741
+ res = "".join(results)
742
+ return rf"(?s:{res})\Z"
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.aot_autograd import (
5
+ aot_autograd_decompositions,
6
+ KNOWN_TYPES,
7
+ PytreeThunk,
8
+ )
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (403 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.make_functional import _swap_state
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (315 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.vmap import (
5
+ _add_batch_dim,
6
+ _broadcast_to_and_flatten,
7
+ _create_batched_inputs,
8
+ _get_name,
9
+ _process_batched_inputs,
10
+ _remove_batch_dim,
11
+ _unwrap_batched,
12
+ _validate_and_get_batch_size,
13
+ Tensor,
14
+ tree_flatten,
15
+ tree_unflatten,
16
+ )
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (691 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/compile/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch._functorch import config
2
+ from torch._functorch.aot_autograd import (
3
+ aot_function,
4
+ aot_module,
5
+ aot_module_simplified,
6
+ compiled_function,
7
+ compiled_module,
8
+ get_aot_compilation_context,
9
+ get_aot_graph_name,
10
+ get_graph_being_compiled,
11
+ make_boxed_compiler,
12
+ make_boxed_func,
13
+ )
14
+ from torch._functorch.compilers import (
15
+ debug_compile,
16
+ default_decompositions,
17
+ draw_graph_compile,
18
+ memory_efficient_fusion,
19
+ nnc_jit,
20
+ nop,
21
+ print_compile,
22
+ ts_compile,
23
+ )
24
+ from torch._functorch.fx_minifier import minifier
25
+ from torch._functorch.partitioners import (
26
+ default_partition,
27
+ draw_graph,
28
+ draw_joint_graph,
29
+ min_cut_rematerialization_partition,
30
+ )
31
+ from torch._functorch.python_key import pythonkey_decompose
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (7.88 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc ADDED
Binary file (32.4 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/_parsing.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Adapted from https://github.com/arogozhnikov/einops/blob/36c7bb16e57d6e57f8f3050f9e07abdf3f00469f/einops/parsing.py.
2
+
3
+ MIT License
4
+
5
+ Copyright (c) 2018 Alex Rogozhnikov
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import keyword
28
+ import warnings
29
+ from typing import Collection, List, Mapping, Optional, Set, Tuple, Union
30
+
31
+ _ellipsis: str = "…" # NB, this is a single unicode symbol. String is used as it is not a list, but can be iterated
32
+
33
+
34
+ class AnonymousAxis:
35
+ """Used by `ParsedExpression` to represent an axis with a size (> 1), but no associated identifier.
36
+
37
+ Note: Different instances of this class are not equal to each other, even if they have the same value.
38
+ """
39
+
40
+ def __init__(self, value: str) -> None:
41
+ self.value = int(value)
42
+ if self.value < 1:
43
+ raise ValueError(
44
+ f"Anonymous axis should have positive length, not {self.value}"
45
+ )
46
+
47
+ def __repr__(self) -> str:
48
+ return f"{self.value}-axis"
49
+
50
+
51
+ class ParsedExpression:
52
+ """Structure containing information about one side of an `einops`-style pattern (e.g. 'b c (h w)')."""
53
+
54
+ def __init__(
55
+ self,
56
+ expression: str,
57
+ *,
58
+ allow_underscore: bool = False,
59
+ allow_duplicates: bool = False,
60
+ ) -> None:
61
+ """Parse the expression and store relevant metadata.
62
+
63
+ Args:
64
+ expression (str): the `einops`-pattern to parse
65
+ allow_underscore (bool): whether to allow axis identifier names to begin with an underscore
66
+ allow_duplicates (bool): whether to allow an identifier to appear more than once in the expression
67
+ """
68
+ self.has_ellipsis: bool = False
69
+ self.has_ellipsis_parenthesized: Optional[bool] = None
70
+ self.identifiers: Set[Union[str, AnonymousAxis]] = set()
71
+ # that's axes like 2, 3, 4 or 5. Axes with size 1 are exceptional and replaced with empty composition
72
+ self.has_non_unitary_anonymous_axes: bool = False
73
+ # composition keeps structure of composite axes, see how different corner cases are handled in tests
74
+ self.composition: List[Union[List[Union[str, AnonymousAxis]], str]] = []
75
+ if "." in expression:
76
+ if "..." not in expression:
77
+ raise ValueError(
78
+ "Expression may contain dots only inside ellipsis (...)"
79
+ )
80
+ if str.count(expression, "...") != 1 or str.count(expression, ".") != 3:
81
+ raise ValueError(
82
+ "Expression may contain dots only inside ellipsis (...); only one ellipsis for tensor "
83
+ )
84
+ expression = expression.replace("...", _ellipsis)
85
+ self.has_ellipsis = True
86
+
87
+ bracket_group: Optional[List[Union[str, AnonymousAxis]]] = None
88
+
89
+ def add_axis_name(x: str) -> None:
90
+ if x in self.identifiers:
91
+ if not (allow_underscore and x == "_") and not allow_duplicates:
92
+ raise ValueError(
93
+ f"Indexing expression contains duplicate dimension '{x}'"
94
+ )
95
+ if x == _ellipsis:
96
+ self.identifiers.add(_ellipsis)
97
+ if bracket_group is None:
98
+ self.composition.append(_ellipsis)
99
+ self.has_ellipsis_parenthesized = False
100
+ else:
101
+ bracket_group.append(_ellipsis)
102
+ self.has_ellipsis_parenthesized = True
103
+ else:
104
+ is_number = str.isdecimal(x)
105
+ if is_number and int(x) == 1:
106
+ # handling the case of anonymous axis of length 1
107
+ if bracket_group is None:
108
+ self.composition.append([])
109
+ else:
110
+ pass # no need to think about 1s inside parenthesis
111
+ return
112
+ is_axis_name, reason = self.check_axis_name_return_reason(
113
+ x, allow_underscore=allow_underscore
114
+ )
115
+ if not (is_number or is_axis_name):
116
+ raise ValueError(f"Invalid axis identifier: {x}\n{reason}")
117
+ axis_name: Union[str, AnonymousAxis] = (
118
+ AnonymousAxis(x) if is_number else x
119
+ )
120
+ self.identifiers.add(axis_name)
121
+ if is_number:
122
+ self.has_non_unitary_anonymous_axes = True
123
+ if bracket_group is None:
124
+ self.composition.append([axis_name])
125
+ else:
126
+ bracket_group.append(axis_name)
127
+
128
+ current_identifier = None
129
+ for char in expression:
130
+ if char in "() ":
131
+ if current_identifier is not None:
132
+ add_axis_name(current_identifier)
133
+ current_identifier = None
134
+ if char == "(":
135
+ if bracket_group is not None:
136
+ raise ValueError(
137
+ "Axis composition is one-level (brackets inside brackets not allowed)"
138
+ )
139
+ bracket_group = []
140
+ elif char == ")":
141
+ if bracket_group is None:
142
+ raise ValueError("Brackets are not balanced")
143
+ self.composition.append(bracket_group)
144
+ bracket_group = None
145
+ elif str.isalnum(char) or char in ["_", _ellipsis]:
146
+ if current_identifier is None:
147
+ current_identifier = char
148
+ else:
149
+ current_identifier += char
150
+ else:
151
+ raise ValueError(f"Unknown character '{char}'")
152
+
153
+ if bracket_group is not None:
154
+ raise ValueError(f"Imbalanced parentheses in expression: '{expression}'")
155
+ if current_identifier is not None:
156
+ add_axis_name(current_identifier)
157
+
158
+ @staticmethod
159
+ def check_axis_name_return_reason(
160
+ name: str, allow_underscore: bool = False
161
+ ) -> Tuple[bool, str]:
162
+ """Check if the given axis name is valid, and a message explaining why if not.
163
+
164
+ Valid axes names are python identifiers except keywords, and should not start or end with an underscore.
165
+
166
+ Args:
167
+ name (str): the axis name to check
168
+ allow_underscore (bool): whether axis names are allowed to start with an underscore
169
+
170
+ Returns:
171
+ Tuple[bool, str]: whether the axis name is valid, a message explaining why if not
172
+ """
173
+ if not str.isidentifier(name):
174
+ return False, "not a valid python identifier"
175
+ elif name[0] == "_" or name[-1] == "_":
176
+ if name == "_" and allow_underscore:
177
+ return True, ""
178
+ return False, "axis name should should not start or end with underscore"
179
+ else:
180
+ if keyword.iskeyword(name):
181
+ warnings.warn(
182
+ f"It is discouraged to use axes names that are keywords: {name}",
183
+ RuntimeWarning,
184
+ )
185
+ if name in ["axis"]:
186
+ warnings.warn(
187
+ "It is discouraged to use 'axis' as an axis name and will raise an error in future",
188
+ FutureWarning,
189
+ )
190
+ return True, ""
191
+
192
+ @staticmethod
193
+ def check_axis_name(name: str) -> bool:
194
+ """Check if the name is a valid axis name.
195
+
196
+ Args:
197
+ name (str): the axis name to check
198
+
199
+ Returns:
200
+ bool: whether the axis name is valid
201
+ """
202
+ is_valid, _ = ParsedExpression.check_axis_name_return_reason(name)
203
+ return is_valid
204
+
205
+
206
+ def parse_pattern(
207
+ pattern: str, axes_lengths: Mapping[str, int]
208
+ ) -> Tuple[ParsedExpression, ParsedExpression]:
209
+ """Parse an `einops`-style pattern into a left-hand side and right-hand side `ParsedExpression` object.
210
+
211
+ Args:
212
+ pattern (str): the `einops`-style rearrangement pattern
213
+ axes_lengths (Mapping[str, int]): any additional length specifications for dimensions
214
+
215
+ Returns:
216
+ Tuple[ParsedExpression, ParsedExpression]: a tuple containing the left-hand side and right-hand side expressions
217
+ """
218
+ # adapted from einops.einops._prepare_transformation_recipe
219
+ # https://github.com/arogozhnikov/einops/blob/230ac1526c1f42c9e1f7373912c7f8047496df11/einops/einops.py
220
+ try:
221
+ left_str, right_str = pattern.split("->")
222
+ except ValueError:
223
+ raise ValueError("Pattern must contain a single '->' separator") from None
224
+
225
+ if _ellipsis in axes_lengths:
226
+ raise ValueError(f"'{_ellipsis}' is not an allowed axis identifier")
227
+
228
+ left = ParsedExpression(left_str)
229
+ right = ParsedExpression(right_str)
230
+
231
+ if not left.has_ellipsis and right.has_ellipsis:
232
+ raise ValueError(
233
+ f"Ellipsis found in right side, but not left side of a pattern {pattern}"
234
+ )
235
+ if left.has_ellipsis and left.has_ellipsis_parenthesized:
236
+ raise ValueError(
237
+ f"Ellipsis is parenthesis in the left side is not allowed: {pattern}"
238
+ )
239
+
240
+ return left, right
241
+
242
+
243
+ def validate_rearrange_expressions(
244
+ left: ParsedExpression, right: ParsedExpression, axes_lengths: Mapping[str, int]
245
+ ) -> None:
246
+ """Perform expression validations that are specific to the `rearrange` operation.
247
+
248
+ Args:
249
+ left (ParsedExpression): left-hand side expression
250
+ right (ParsedExpression): right-hand side expression
251
+ axes_lengths (Mapping[str, int]): any additional length specifications for dimensions
252
+ """
253
+ for length in axes_lengths.values():
254
+ if (length_type := type(length)) is not int:
255
+ raise TypeError(
256
+ f"rearrange axis lengths must be integers, got: {length_type}"
257
+ )
258
+
259
+ if left.has_non_unitary_anonymous_axes or right.has_non_unitary_anonymous_axes:
260
+ raise ValueError("rearrange only supports unnamed axes of size 1")
261
+
262
+ difference = set.symmetric_difference(left.identifiers, right.identifiers)
263
+ if len(difference) > 0:
264
+ raise ValueError(
265
+ f"Identifiers only on one side of rearrange expression (should be on both): {difference}"
266
+ )
267
+
268
+ unmatched_axes = axes_lengths.keys() - left.identifiers
269
+ if len(unmatched_axes) > 0:
270
+ raise ValueError(
271
+ f"Identifiers not found in rearrange expression: {unmatched_axes}"
272
+ )
273
+
274
+
275
+ def comma_separate(collection: Collection[Union[str, Collection[str]]]) -> str:
276
+ """Convert a collection of strings representing first class dims into a comma-separated string.
277
+
278
+ Args:
279
+ collection (Collection[Union[str, Collection[str]]]): the collection of strings to convert
280
+
281
+ Returns:
282
+ str: the comma-separated string
283
+
284
+ Examples:
285
+ >>> comma_separate(('d0',))
286
+ 'd0'
287
+
288
+ >>> comma_separate(('d0', 'd1', 'd2', 'd3'))
289
+ 'd0, d1, d2, d3'
290
+
291
+ >>> comma_separate([('d1', 'd4')])
292
+ '(d1, d4)'
293
+
294
+ >>> comma_separate([('d0',), (), ('d1',), ('d2',), ('d3', 'd4')])
295
+ '(d0,), (), (d1,), (d2,), (d3, d4)'
296
+ """
297
+ return ", ".join(
298
+ item
299
+ if isinstance(item, str)
300
+ else f"({comma_separate(item)}{',' if len(item) == 1 else ''})"
301
+ for item in collection
302
+ )
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/experimental/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # PyTorch forward-mode is not mature yet
2
+ from torch._functorch.apis import chunk_vmap
3
+ from torch._functorch.batch_norm_replacement import replace_all_batch_norm_modules_
4
+ from torch._functorch.eager_transforms import hessian, jacfwd, jvp
5
+
6
+ from functorch import functionalize
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ nvidia/__pycache__/__init__.cpython-311.pyc,,
3
+ nvidia/cufft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ nvidia/cufft/__pycache__/__init__.cpython-311.pyc,,
5
+ nvidia/cufft/include/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc,,
7
+ nvidia/cufft/include/cudalibxt.h,sha256=9GDuRiOzJuO61zRDhIpWpF7XHp8FXSOIlHJNoIMwOZQ,4105
8
+ nvidia/cufft/include/cufft.h,sha256=Ui7ajKuYZcP-2bm9mpH96YN1igLKeDLgrttyc4jMQJE,12570
9
+ nvidia/cufft/include/cufftXt.h,sha256=LfRdibvAlaNQ35vYqI4n8YcMpPYROrIjpZu2L0tISi4,11463
10
+ nvidia/cufft/include/cufftw.h,sha256=DBrJQf-dnCWD-OYgdhnEzn8OiAX0U3xdteEaNdhs7mU,19412
11
+ nvidia/cufft/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc,,
13
+ nvidia/cufft/lib/libcufft.so.10,sha256=ylxvgdWEkGtNMrmErYcE3WW_db2rQzTtIs5-73UBqVo,279161544
14
+ nvidia/cufft/lib/libcufftw.so.10,sha256=GlkqWy81mpB3VQ7h_a3VjrLPnMC_q4_jl6N0-5SdoUM,1618440
15
+ nvidia_cufft_cu11-10.9.0.58.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
16
+ nvidia_cufft_cu11-10.9.0.58.dist-info/License.txt,sha256=rW9YU_ugyg0VnQ9Y1JrkmDDC-Mk_epJki5zpCttMbM0,59262
17
+ nvidia_cufft_cu11-10.9.0.58.dist-info/METADATA,sha256=XITT6bPOjdOxPQa-kAVw4XjFf4_iU-JoLUXrOwPJ4JA,1503
18
+ nvidia_cufft_cu11-10.9.0.58.dist-info/RECORD,,
19
+ nvidia_cufft_cu11-10.9.0.58.dist-info/WHEEL,sha256=-kQi_VMfvRQozZJT7HUPMfY-5vLo0LVTmAylNJ3Ft98,106
20
+ nvidia_cufft_cu11-10.9.0.58.dist-info/top_level.txt,sha256=fTkAtiFuL16nUrB9ytDDtpytz2t0B4NvYTnRzwAhO14,7
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cusolver_cu11-11.4.1.48.dist-info/License.txt ADDED
@@ -0,0 +1,1568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ End User License Agreement
2
+ --------------------------
3
+
4
+
5
+ Preface
6
+ -------
7
+
8
+ The Software License Agreement in Chapter 1 and the Supplement
9
+ in Chapter 2 contain license terms and conditions that govern
10
+ the use of NVIDIA software. By accepting this agreement, you
11
+ agree to comply with all the terms and conditions applicable
12
+ to the product(s) included herein.
13
+
14
+
15
+ NVIDIA Driver
16
+
17
+
18
+ Description
19
+
20
+ This package contains the operating system driver and
21
+ fundamental system software components for NVIDIA GPUs.
22
+
23
+
24
+ NVIDIA CUDA Toolkit
25
+
26
+
27
+ Description
28
+
29
+ The NVIDIA CUDA Toolkit provides command-line and graphical
30
+ tools for building, debugging and optimizing the performance
31
+ of applications accelerated by NVIDIA GPUs, runtime and math
32
+ libraries, and documentation including programming guides,
33
+ user manuals, and API references.
34
+
35
+
36
+ Default Install Location of CUDA Toolkit
37
+
38
+ Windows platform:
39
+
40
+ %ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
41
+
42
+ Linux platform:
43
+
44
+ /usr/local/cuda-#.#
45
+
46
+ Mac platform:
47
+
48
+ /Developer/NVIDIA/CUDA-#.#
49
+
50
+
51
+ NVIDIA CUDA Samples
52
+
53
+
54
+ Description
55
+
56
+ This package includes over 100+ CUDA examples that demonstrate
57
+ various CUDA programming principles, and efficient CUDA
58
+ implementation of algorithms in specific application domains.
59
+
60
+
61
+ Default Install Location of CUDA Samples
62
+
63
+ Windows platform:
64
+
65
+ %ProgramData%\NVIDIA Corporation\CUDA Samples\v#.#
66
+
67
+ Linux platform:
68
+
69
+ /usr/local/cuda-#.#/samples
70
+
71
+ and
72
+
73
+ $HOME/NVIDIA_CUDA-#.#_Samples
74
+
75
+ Mac platform:
76
+
77
+ /Developer/NVIDIA/CUDA-#.#/samples
78
+
79
+
80
+ NVIDIA Nsight Visual Studio Edition (Windows only)
81
+
82
+
83
+ Description
84
+
85
+ NVIDIA Nsight Development Platform, Visual Studio Edition is a
86
+ development environment integrated into Microsoft Visual
87
+ Studio that provides tools for debugging, profiling, analyzing
88
+ and optimizing your GPU computing and graphics applications.
89
+
90
+
91
+ Default Install Location of Nsight Visual Studio Edition
92
+
93
+ Windows platform:
94
+
95
+ %ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
96
+
97
+
98
+ 1. License Agreement for NVIDIA Software Development Kits
99
+ ---------------------------------------------------------
100
+
101
+
102
+ Release Date: July 26, 2018
103
+ ---------------------------
104
+
105
+
106
+ Important NoticeRead before downloading, installing,
107
+ copying or using the licensed software:
108
+ -------------------------------------------------------
109
+
110
+ This license agreement, including exhibits attached
111
+ ("Agreement”) is a legal agreement between you and NVIDIA
112
+ Corporation ("NVIDIA") and governs your use of a NVIDIA
113
+ software development kit (“SDK”).
114
+
115
+ Each SDK has its own set of software and materials, but here
116
+ is a description of the types of items that may be included in
117
+ a SDK: source code, header files, APIs, data sets and assets
118
+ (examples include images, textures, models, scenes, videos,
119
+ native API input/output files), binary software, sample code,
120
+ libraries, utility programs, programming code and
121
+ documentation.
122
+
123
+ This Agreement can be accepted only by an adult of legal age
124
+ of majority in the country in which the SDK is used.
125
+
126
+ If you are entering into this Agreement on behalf of a company
127
+ or other legal entity, you represent that you have the legal
128
+ authority to bind the entity to this Agreement, in which case
129
+ “you” will mean the entity you represent.
130
+
131
+ If you don’t have the required age or authority to accept
132
+ this Agreement, or if you don’t accept all the terms and
133
+ conditions of this Agreement, do not download, install or use
134
+ the SDK.
135
+
136
+ You agree to use the SDK only for purposes that are permitted
137
+ by (a) this Agreement, and (b) any applicable law, regulation
138
+ or generally accepted practices or guidelines in the relevant
139
+ jurisdictions.
140
+
141
+
142
+ 1.1. License
143
+
144
+
145
+ 1.1.1. License Grant
146
+
147
+ Subject to the terms of this Agreement, NVIDIA hereby grants
148
+ you a non-exclusive, non-transferable license, without the
149
+ right to sublicense (except as expressly provided in this
150
+ Agreement) to:
151
+
152
+ 1. Install and use the SDK,
153
+
154
+ 2. Modify and create derivative works of sample source code
155
+ delivered in the SDK, and
156
+
157
+ 3. Distribute those portions of the SDK that are identified
158
+ in this Agreement as distributable, as incorporated in
159
+ object code format into a software application that meets
160
+ the distribution requirements indicated in this Agreement.
161
+
162
+
163
+ 1.1.2. Distribution Requirements
164
+
165
+ These are the distribution requirements for you to exercise
166
+ the distribution grant:
167
+
168
+ 1. Your application must have material additional
169
+ functionality, beyond the included portions of the SDK.
170
+
171
+ 2. The distributable portions of the SDK shall only be
172
+ accessed by your application.
173
+
174
+ 3. The following notice shall be included in modifications
175
+ and derivative works of sample source code distributed:
176
+ “This software contains source code provided by NVIDIA
177
+ Corporation.”
178
+
179
+ 4. Unless a developer tool is identified in this Agreement
180
+ as distributable, it is delivered for your internal use
181
+ only.
182
+
183
+ 5. The terms under which you distribute your application
184
+ must be consistent with the terms of this Agreement,
185
+ including (without limitation) terms relating to the
186
+ license grant and license restrictions and protection of
187
+ NVIDIA’s intellectual property rights. Additionally, you
188
+ agree that you will protect the privacy, security and
189
+ legal rights of your application users.
190
+
191
+ 6. You agree to notify NVIDIA in writing of any known or
192
+ suspected distribution or use of the SDK not in compliance
193
+ with the requirements of this Agreement, and to enforce
194
+ the terms of your agreements with respect to distributed
195
+ SDK.
196
+
197
+
198
+ 1.1.3. Authorized Users
199
+
200
+ You may allow employees and contractors of your entity or of
201
+ your subsidiary(ies) to access and use the SDK from your
202
+ secure network to perform work on your behalf.
203
+
204
+ If you are an academic institution you may allow users
205
+ enrolled or employed by the academic institution to access and
206
+ use the SDK from your secure network.
207
+
208
+ You are responsible for the compliance with the terms of this
209
+ Agreement by your authorized users. If you become aware that
210
+ your authorized users didn’t follow the terms of this
211
+ Agreement, you agree to take reasonable steps to resolve the
212
+ non-compliance and prevent new occurrences.
213
+
214
+
215
+ 1.1.4. Pre-Release SDK
216
+
217
+ The SDK versions identified as alpha, beta, preview or
218
+ otherwise as pre-release, may not be fully functional, may
219
+ contain errors or design flaws, and may have reduced or
220
+ different security, privacy, accessibility, availability, and
221
+ reliability standards relative to commercial versions of
222
+ NVIDIA software and materials. Use of a pre-release SDK may
223
+ result in unexpected results, loss of data, project delays or
224
+ other unpredictable damage or loss.
225
+
226
+ You may use a pre-release SDK at your own risk, understanding
227
+ that pre-release SDKs are not intended for use in production
228
+ or business-critical systems.
229
+
230
+ NVIDIA may choose not to make available a commercial version
231
+ of any pre-release SDK. NVIDIA may also choose to abandon
232
+ development and terminate the availability of a pre-release
233
+ SDK at any time without liability.
234
+
235
+
236
+ 1.1.5. Updates
237
+
238
+ NVIDIA may, at its option, make available patches, workarounds
239
+ or other updates to this SDK. Unless the updates are provided
240
+ with their separate governing terms, they are deemed part of
241
+ the SDK licensed to you as provided in this Agreement. You
242
+ agree that the form and content of the SDK that NVIDIA
243
+ provides may change without prior notice to you. While NVIDIA
244
+ generally maintains compatibility between versions, NVIDIA may
245
+ in some cases make changes that introduce incompatibilities in
246
+ future versions of the SDK.
247
+
248
+
249
+ 1.1.6. Third Party Licenses
250
+
251
+ The SDK may come bundled with, or otherwise include or be
252
+ distributed with, third party software licensed by a NVIDIA
253
+ supplier and/or open source software provided under an open
254
+ source license. Use of third party software is subject to the
255
+ third-party license terms, or in the absence of third party
256
+ terms, the terms of this Agreement. Copyright to third party
257
+ software is held by the copyright holders indicated in the
258
+ third-party software or license.
259
+
260
+
261
+ 1.1.7. Reservation of Rights
262
+
263
+ NVIDIA reserves all rights, title, and interest in and to the
264
+ SDK, not expressly granted to you under this Agreement.
265
+
266
+
267
+ 1.2. Limitations
268
+
269
+ The following license limitations apply to your use of the
270
+ SDK:
271
+
272
+ 1. You may not reverse engineer, decompile or disassemble,
273
+ or remove copyright or other proprietary notices from any
274
+ portion of the SDK or copies of the SDK.
275
+
276
+ 2. Except as expressly provided in this Agreement, you may
277
+ not copy, sell, rent, sublicense, transfer, distribute,
278
+ modify, or create derivative works of any portion of the
279
+ SDK. For clarity, you may not distribute or sublicense the
280
+ SDK as a stand-alone product.
281
+
282
+ 3. Unless you have an agreement with NVIDIA for this
283
+ purpose, you may not indicate that an application created
284
+ with the SDK is sponsored or endorsed by NVIDIA.
285
+
286
+ 4. You may not bypass, disable, or circumvent any
287
+ encryption, security, digital rights management or
288
+ authentication mechanism in the SDK.
289
+
290
+ 5. You may not use the SDK in any manner that would cause it
291
+ to become subject to an open source software license. As
292
+ examples, licenses that require as a condition of use,
293
+ modification, and/or distribution that the SDK be:
294
+
295
+ a. Disclosed or distributed in source code form;
296
+
297
+ b. Licensed for the purpose of making derivative works;
298
+ or
299
+
300
+ c. Redistributable at no charge.
301
+
302
+ 6. Unless you have an agreement with NVIDIA for this
303
+ purpose, you may not use the SDK with any system or
304
+ application where the use or failure of the system or
305
+ application can reasonably be expected to threaten or
306
+ result in personal injury, death, or catastrophic loss.
307
+ Examples include use in avionics, navigation, military,
308
+ medical, life support or other life critical applications.
309
+ NVIDIA does not design, test or manufacture the SDK for
310
+ these critical uses and NVIDIA shall not be liable to you
311
+ or any third party, in whole or in part, for any claims or
312
+ damages arising from such uses.
313
+
314
+ 7. You agree to defend, indemnify and hold harmless NVIDIA
315
+ and its affiliates, and their respective employees,
316
+ contractors, agents, officers and directors, from and
317
+ against any and all claims, damages, obligations, losses,
318
+ liabilities, costs or debt, fines, restitutions and
319
+ expenses (including but not limited to attorney’s fees
320
+ and costs incident to establishing the right of
321
+ indemnification) arising out of or related to your use of
322
+ the SDK outside of the scope of this Agreement, or not in
323
+ compliance with its terms.
324
+
325
+
326
+ 1.3. Ownership
327
+
328
+ 1. NVIDIA or its licensors hold all rights, title and
329
+ interest in and to the SDK and its modifications and
330
+ derivative works, including their respective intellectual
331
+ property rights, subject to your rights described in this
332
+ section. This SDK may include software and materials from
333
+ NVIDIA’s licensors, and these licensors are intended
334
+ third party beneficiaries that may enforce this Agreement
335
+ with respect to their intellectual property rights.
336
+
337
+ 2. You hold all rights, title and interest in and to your
338
+ applications and your derivative works of the sample
339
+ source code delivered in the SDK, including their
340
+ respective intellectual property rights, subject to
341
+ NVIDIA’s rights described in this section.
342
+
343
+ 3. You may, but don’t have to, provide to NVIDIA
344
+ suggestions, feature requests or other feedback regarding
345
+ the SDK, including possible enhancements or modifications
346
+ to the SDK. For any feedback that you voluntarily provide,
347
+ you hereby grant NVIDIA and its affiliates a perpetual,
348
+ non-exclusive, worldwide, irrevocable license to use,
349
+ reproduce, modify, license, sublicense (through multiple
350
+ tiers of sublicensees), and distribute (through multiple
351
+ tiers of distributors) it without the payment of any
352
+ royalties or fees to you. NVIDIA will use feedback at its
353
+ choice. NVIDIA is constantly looking for ways to improve
354
+ its products, so you may send feedback to NVIDIA through
355
+ the developer portal at https://developer.nvidia.com.
356
+
357
+
358
+ 1.4. No Warranties
359
+
360
+ THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL
361
+ FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND
362
+ ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND
363
+ OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING,
364
+ BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
365
+ FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE
366
+ ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO
367
+ WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF
368
+ DEALING OR COURSE OF TRADE.
369
+
370
+
371
+ 1.5. Limitation of Liability
372
+
373
+ TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS
374
+ AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
375
+ PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS
376
+ OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF
377
+ PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION
378
+ WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK,
379
+ WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH
380
+ OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE),
381
+ PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF
382
+ LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES
383
+ TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS
384
+ AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE
385
+ NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS
386
+ LIMIT.
387
+
388
+ These exclusions and limitations of liability shall apply
389
+ regardless if NVIDIA or its affiliates have been advised of
390
+ the possibility of such damages, and regardless of whether a
391
+ remedy fails its essential purpose. These exclusions and
392
+ limitations of liability form an essential basis of the
393
+ bargain between the parties, and, absent any of these
394
+ exclusions or limitations of liability, the provisions of this
395
+ Agreement, including, without limitation, the economic terms,
396
+ would be substantially different.
397
+
398
+
399
+ 1.6. Termination
400
+
401
+ 1. This Agreement will continue to apply until terminated by
402
+ either you or NVIDIA as described below.
403
+
404
+ 2. If you want to terminate this Agreement, you may do so by
405
+ stopping to use the SDK.
406
+
407
+ 3. NVIDIA may, at any time, terminate this Agreement if:
408
+
409
+ a. (i) you fail to comply with any term of this
410
+ Agreement and the non-compliance is not fixed within
411
+ thirty (30) days following notice from NVIDIA (or
412
+ immediately if you violate NVIDIA’s intellectual
413
+ property rights);
414
+
415
+ b. (ii) you commence or participate in any legal
416
+ proceeding against NVIDIA with respect to the SDK; or
417
+
418
+ c. (iii) NVIDIA decides to no longer provide the SDK in
419
+ a country or, in NVIDIA’s sole discretion, the
420
+ continued use of it is no longer commercially viable.
421
+
422
+ 4. Upon any termination of this Agreement, you agree to
423
+ promptly discontinue use of the SDK and destroy all copies
424
+ in your possession or control. Your prior distributions in
425
+ accordance with this Agreement are not affected by the
426
+ termination of this Agreement. Upon written request, you
427
+ will certify in writing that you have complied with your
428
+ commitments under this section. Upon any termination of
429
+ this Agreement all provisions survive except for the
430
+ license grant provisions.
431
+
432
+
433
+ 1.7. General
434
+
435
+ If you wish to assign this Agreement or your rights and
436
+ obligations, including by merger, consolidation, dissolution
437
+ or operation of law, contact NVIDIA to ask for permission. Any
438
+ attempted assignment not approved by NVIDIA in writing shall
439
+ be void and of no effect. NVIDIA may assign, delegate or
440
+ transfer this Agreement and its rights and obligations, and if
441
+ to a non-affiliate you will be notified.
442
+
443
+ You agree to cooperate with NVIDIA and provide reasonably
444
+ requested information to verify your compliance with this
445
+ Agreement.
446
+
447
+ This Agreement will be governed in all respects by the laws of
448
+ the United States and of the State of Delaware as those laws
449
+ are applied to contracts entered into and performed entirely
450
+ within Delaware by Delaware residents, without regard to the
451
+ conflicts of laws principles. The United Nations Convention on
452
+ Contracts for the International Sale of Goods is specifically
453
+ disclaimed. You agree to all terms of this Agreement in the
454
+ English language.
455
+
456
+ The state or federal courts residing in Santa Clara County,
457
+ California shall have exclusive jurisdiction over any dispute
458
+ or claim arising out of this Agreement. Notwithstanding this,
459
+ you agree that NVIDIA shall still be allowed to apply for
460
+ injunctive remedies or an equivalent type of urgent legal
461
+ relief in any jurisdiction.
462
+
463
+ If any court of competent jurisdiction determines that any
464
+ provision of this Agreement is illegal, invalid or
465
+ unenforceable, such provision will be construed as limited to
466
+ the extent necessary to be consistent with and fully
467
+ enforceable under the law and the remaining provisions will
468
+ remain in full force and effect. Unless otherwise specified,
469
+ remedies are cumulative.
470
+
471
+ Each party acknowledges and agrees that the other is an
472
+ independent contractor in the performance of this Agreement.
473
+
474
+ The SDK has been developed entirely at private expense and is
475
+ “commercial items” consisting of “commercial computer
476
+ software” and “commercial computer software
477
+ documentation” provided with RESTRICTED RIGHTS. Use,
478
+ duplication or disclosure by the U.S. Government or a U.S.
479
+ Government subcontractor is subject to the restrictions in
480
+ this Agreement pursuant to DFARS 227.7202-3(a) or as set forth
481
+ in subparagraphs (c)(1) and (2) of the Commercial Computer
482
+ Software - Restricted Rights clause at FAR 52.227-19, as
483
+ applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas
484
+ Expressway, Santa Clara, CA 95051.
485
+
486
+ The SDK is subject to United States export laws and
487
+ regulations. You agree that you will not ship, transfer or
488
+ export the SDK into any country, or use the SDK in any manner,
489
+ prohibited by the United States Bureau of Industry and
490
+ Security or economic sanctions regulations administered by the
491
+ U.S. Department of Treasury’s Office of Foreign Assets
492
+ Control (OFAC), or any applicable export laws, restrictions or
493
+ regulations. These laws include restrictions on destinations,
494
+ end users and end use. By accepting this Agreement, you
495
+ confirm that you are not a resident or citizen of any country
496
+ currently embargoed by the U.S. and that you are not otherwise
497
+ prohibited from receiving the SDK.
498
+
499
+ Any notice delivered by NVIDIA to you under this Agreement
500
+ will be delivered via mail, email or fax. You agree that any
501
+ notices that NVIDIA sends you electronically will satisfy any
502
+ legal communication requirements. Please direct your legal
503
+ notices or other correspondence to NVIDIA Corporation, 2788
504
+ San Tomas Expressway, Santa Clara, California 95051, United
505
+ States of America, Attention: Legal Department.
506
+
507
+ This Agreement and any exhibits incorporated into this
508
+ Agreement constitute the entire agreement of the parties with
509
+ respect to the subject matter of this Agreement and supersede
510
+ all prior negotiations or documentation exchanged between the
511
+ parties relating to this SDK license. Any additional and/or
512
+ conflicting terms on documents issued by you are null, void,
513
+ and invalid. Any amendment or waiver under this Agreement
514
+ shall be in writing and signed by representatives of both
515
+ parties.
516
+
517
+
518
+ 2. CUDA Toolkit Supplement to Software License Agreement for
519
+ NVIDIA Software Development Kits
520
+ ------------------------------------------------------------
521
+
522
+
523
+ Release date: August 16, 2018
524
+ -----------------------------
525
+
526
+ The terms in this supplement govern your use of the NVIDIA
527
+ CUDA Toolkit SDK under the terms of your license agreement
528
+ (“Agreement”) as modified by this supplement. Capitalized
529
+ terms used but not defined below have the meaning assigned to
530
+ them in the Agreement.
531
+
532
+ This supplement is an exhibit to the Agreement and is
533
+ incorporated as an integral part of the Agreement. In the
534
+ event of conflict between the terms in this supplement and the
535
+ terms in the Agreement, the terms in this supplement govern.
536
+
537
+
538
+ 2.1. License Scope
539
+
540
+ The SDK is licensed for you to develop applications only for
541
+ use in systems with NVIDIA GPUs.
542
+
543
+
544
+ 2.2. Distribution
545
+
546
+ The portions of the SDK that are distributable under the
547
+ Agreement are listed in Attachment A.
548
+
549
+
550
+ 2.3. Operating Systems
551
+
552
+ Those portions of the SDK designed exclusively for use on the
553
+ Linux or FreeBSD operating systems, or other operating systems
554
+ derived from the source code to these operating systems, may
555
+ be copied and redistributed for use in accordance with this
556
+ Agreement, provided that the object code files are not
557
+ modified in any way (except for unzipping of compressed
558
+ files).
559
+
560
+
561
+ 2.4. Audio and Video Encoders and Decoders
562
+
563
+ You acknowledge and agree that it is your sole responsibility
564
+ to obtain any additional third-party licenses required to
565
+ make, have made, use, have used, sell, import, and offer for
566
+ sale your products or services that include or incorporate any
567
+ third-party software and content relating to audio and/or
568
+ video encoders and decoders from, including but not limited
569
+ to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A.,
570
+ MPEG-LA, and Coding Technologies. NVIDIA does not grant to you
571
+ under this Agreement any necessary patent or other rights with
572
+ respect to any audio and/or video encoders and decoders.
573
+
574
+
575
+ 2.5. Licensing
576
+
577
+ If the distribution terms in this Agreement are not suitable
578
+ for your organization, or for any questions regarding this
579
+ Agreement, please contact NVIDIA at
580
+ nvidia-compute-license-questions@nvidia.com.
581
+
582
+
583
+ 2.6. Attachment A
584
+
585
+ The following portions of the SDK are distributable under the
586
+ Agreement:
587
+
588
+ Component
589
+
590
+ CUDA Runtime
591
+
592
+ Windows
593
+
594
+ cudart.dll, cudart_static.lib, cudadevrt.lib
595
+
596
+ Mac OSX
597
+
598
+ libcudart.dylib, libcudart_static.a, libcudadevrt.a
599
+
600
+ Linux
601
+
602
+ libcudart.so, libcudart_static.a, libcudadevrt.a
603
+
604
+ Android
605
+
606
+ libcudart.so, libcudart_static.a, libcudadevrt.a
607
+
608
+ Component
609
+
610
+ CUDA FFT Library
611
+
612
+ Windows
613
+
614
+ cufft.dll, cufftw.dll, cufft.lib, cufftw.lib
615
+
616
+ Mac OSX
617
+
618
+ libcufft.dylib, libcufft_static.a, libcufftw.dylib,
619
+ libcufftw_static.a
620
+
621
+ Linux
622
+
623
+ libcufft.so, libcufft_static.a, libcufftw.so,
624
+ libcufftw_static.a
625
+
626
+ Android
627
+
628
+ libcufft.so, libcufft_static.a, libcufftw.so,
629
+ libcufftw_static.a
630
+
631
+ Component
632
+
633
+ CUDA BLAS Library
634
+
635
+ Windows
636
+
637
+ cublas.dll, cublasLt.dll
638
+
639
+ Mac OSX
640
+
641
+ libcublas.dylib, libcublasLt.dylib, libcublas_static.a,
642
+ libcublasLt_static.a
643
+
644
+ Linux
645
+
646
+ libcublas.so, libcublasLt.so, libcublas_static.a,
647
+ libcublasLt_static.a
648
+
649
+ Android
650
+
651
+ libcublas.so, libcublasLt.so, libcublas_static.a,
652
+ libcublasLt_static.a
653
+
654
+ Component
655
+
656
+ NVIDIA "Drop-in" BLAS Library
657
+
658
+ Windows
659
+
660
+ nvblas.dll
661
+
662
+ Mac OSX
663
+
664
+ libnvblas.dylib
665
+
666
+ Linux
667
+
668
+ libnvblas.so
669
+
670
+ Component
671
+
672
+ CUDA Sparse Matrix Library
673
+
674
+ Windows
675
+
676
+ cusparse.dll, cusparse.lib
677
+
678
+ Mac OSX
679
+
680
+ libcusparse.dylib, libcusparse_static.a
681
+
682
+ Linux
683
+
684
+ libcusparse.so, libcusparse_static.a
685
+
686
+ Android
687
+
688
+ libcusparse.so, libcusparse_static.a
689
+
690
+ Component
691
+
692
+ CUDA Linear Solver Library
693
+
694
+ Windows
695
+
696
+ cusolver.dll, cusolver.lib
697
+
698
+ Mac OSX
699
+
700
+ libcusolver.dylib, libcusolver_static.a
701
+
702
+ Linux
703
+
704
+ libcusolver.so, libcusolver_static.a
705
+
706
+ Android
707
+
708
+ libcusolver.so, libcusolver_static.a
709
+
710
+ Component
711
+
712
+ CUDA Random Number Generation Library
713
+
714
+ Windows
715
+
716
+ curand.dll, curand.lib
717
+
718
+ Mac OSX
719
+
720
+ libcurand.dylib, libcurand_static.a
721
+
722
+ Linux
723
+
724
+ libcurand.so, libcurand_static.a
725
+
726
+ Android
727
+
728
+ libcurand.so, libcurand_static.a
729
+
730
+ Component
731
+
732
+ CUDA Accelerated Graph Library
733
+
734
+ Component
735
+
736
+ NVIDIA Performance Primitives Library
737
+
738
+ Windows
739
+
740
+ nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll,
741
+ nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll,
742
+ nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib,
743
+ nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll,
744
+ nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib
745
+
746
+ Mac OSX
747
+
748
+ libnppc.dylib, libnppc_static.a, libnppial.dylib,
749
+ libnppial_static.a, libnppicc.dylib, libnppicc_static.a,
750
+ libnppicom.dylib, libnppicom_static.a, libnppidei.dylib,
751
+ libnppidei_static.a, libnppif.dylib, libnppif_static.a,
752
+ libnppig.dylib, libnppig_static.a, libnppim.dylib,
753
+ libnppisu_static.a, libnppitc.dylib, libnppitc_static.a,
754
+ libnpps.dylib, libnpps_static.a
755
+
756
+ Linux
757
+
758
+ libnppc.so, libnppc_static.a, libnppial.so,
759
+ libnppial_static.a, libnppicc.so, libnppicc_static.a,
760
+ libnppicom.so, libnppicom_static.a, libnppidei.so,
761
+ libnppidei_static.a, libnppif.so, libnppif_static.a
762
+ libnppig.so, libnppig_static.a, libnppim.so,
763
+ libnppim_static.a, libnppist.so, libnppist_static.a,
764
+ libnppisu.so, libnppisu_static.a, libnppitc.so
765
+ libnppitc_static.a, libnpps.so, libnpps_static.a
766
+
767
+ Android
768
+
769
+ libnppc.so, libnppc_static.a, libnppial.so,
770
+ libnppial_static.a, libnppicc.so, libnppicc_static.a,
771
+ libnppicom.so, libnppicom_static.a, libnppidei.so,
772
+ libnppidei_static.a, libnppif.so, libnppif_static.a
773
+ libnppig.so, libnppig_static.a, libnppim.so,
774
+ libnppim_static.a, libnppist.so, libnppist_static.a,
775
+ libnppisu.so, libnppisu_static.a, libnppitc.so
776
+ libnppitc_static.a, libnpps.so, libnpps_static.a
777
+
778
+ Component
779
+
780
+ NVIDIA JPEG Library
781
+
782
+ Linux
783
+
784
+ libnvjpeg.so, libnvjpeg_static.a
785
+
786
+ Component
787
+
788
+ Internal common library required for statically linking to
789
+ cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP
790
+
791
+ Mac OSX
792
+
793
+ libculibos.a
794
+
795
+ Linux
796
+
797
+ libculibos.a
798
+
799
+ Component
800
+
801
+ NVIDIA Runtime Compilation Library and Header
802
+
803
+ All
804
+
805
+ nvrtc.h
806
+
807
+ Windows
808
+
809
+ nvrtc.dll, nvrtc-builtins.dll
810
+
811
+ Mac OSX
812
+
813
+ libnvrtc.dylib, libnvrtc-builtins.dylib
814
+
815
+ Linux
816
+
817
+ libnvrtc.so, libnvrtc-builtins.so
818
+
819
+ Component
820
+
821
+ NVIDIA Optimizing Compiler Library
822
+
823
+ Windows
824
+
825
+ nvvm.dll
826
+
827
+ Mac OSX
828
+
829
+ libnvvm.dylib
830
+
831
+ Linux
832
+
833
+ libnvvm.so
834
+
835
+ Component
836
+
837
+ NVIDIA Common Device Math Functions Library
838
+
839
+ Windows
840
+
841
+ libdevice.10.bc
842
+
843
+ Mac OSX
844
+
845
+ libdevice.10.bc
846
+
847
+ Linux
848
+
849
+ libdevice.10.bc
850
+
851
+ Component
852
+
853
+ CUDA Occupancy Calculation Header Library
854
+
855
+ All
856
+
857
+ cuda_occupancy.h
858
+
859
+ Component
860
+
861
+ CUDA Half Precision Headers
862
+
863
+ All
864
+
865
+ cuda_fp16.h, cuda_fp16.hpp
866
+
867
+ Component
868
+
869
+ CUDA Profiling Tools Interface (CUPTI) Library
870
+
871
+ Windows
872
+
873
+ cupti.dll
874
+
875
+ Mac OSX
876
+
877
+ libcupti.dylib
878
+
879
+ Linux
880
+
881
+ libcupti.so
882
+
883
+ Component
884
+
885
+ NVIDIA Tools Extension Library
886
+
887
+ Windows
888
+
889
+ nvToolsExt.dll, nvToolsExt.lib
890
+
891
+ Mac OSX
892
+
893
+ libnvToolsExt.dylib
894
+
895
+ Linux
896
+
897
+ libnvToolsExt.so
898
+
899
+ Component
900
+
901
+ NVIDIA CUDA Driver Libraries
902
+
903
+ Linux
904
+
905
+ libcuda.so, libnvidia-fatbinaryloader.so,
906
+ libnvidia-ptxjitcompiler.so
907
+
908
+ The NVIDIA CUDA Driver Libraries are only distributable in
909
+ applications that meet this criteria:
910
+
911
+ 1. The application was developed starting from a NVIDIA CUDA
912
+ container obtained from Docker Hub or the NVIDIA GPU
913
+ Cloud, and
914
+
915
+ 2. The resulting application is packaged as a Docker
916
+ container and distributed to users on Docker Hub or the
917
+ NVIDIA GPU Cloud only.
918
+
919
+
920
+ 2.7. Attachment B
921
+
922
+
923
+ Additional Licensing Obligations
924
+
925
+ The following third party components included in the SOFTWARE
926
+ are licensed to Licensee pursuant to the following terms and
927
+ conditions:
928
+
929
+ 1. Licensee's use of the GDB third party component is
930
+ subject to the terms and conditions of GNU GPL v3:
931
+
932
+ This product includes copyrighted third-party software licensed
933
+ under the terms of the GNU General Public License v3 ("GPL v3").
934
+ All third-party software packages are copyright by their respective
935
+ authors. GPL v3 terms and conditions are hereby incorporated into
936
+ the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt
937
+
938
+ Consistent with these licensing requirements, the software
939
+ listed below is provided under the terms of the specified
940
+ open source software licenses. To obtain source code for
941
+ software provided under licenses that require
942
+ redistribution of source code, including the GNU General
943
+ Public License (GPL) and GNU Lesser General Public License
944
+ (LGPL), contact oss-requests@nvidia.com. This offer is
945
+ valid for a period of three (3) years from the date of the
946
+ distribution of this product by NVIDIA CORPORATION.
947
+
948
+ Component License
949
+ CUDA-GDB GPL v3
950
+
951
+ 2. Licensee represents and warrants that any and all third
952
+ party licensing and/or royalty payment obligations in
953
+ connection with Licensee's use of the H.264 video codecs
954
+ are solely the responsibility of Licensee.
955
+
956
+ 3. Licensee's use of the Thrust library is subject to the
957
+ terms and conditions of the Apache License Version 2.0.
958
+ All third-party software packages are copyright by their
959
+ respective authors. Apache License Version 2.0 terms and
960
+ conditions are hereby incorporated into the Agreement by
961
+ this reference.
962
+ http://www.apache.org/licenses/LICENSE-2.0.html
963
+
964
+ In addition, Licensee acknowledges the following notice:
965
+ Thrust includes source code from the Boost Iterator,
966
+ Tuple, System, and Random Number libraries.
967
+
968
+ Boost Software License - Version 1.0 - August 17th, 2003
969
+ . . . .
970
+
971
+ Permission is hereby granted, free of charge, to any person or
972
+ organization obtaining a copy of the software and accompanying
973
+ documentation covered by this license (the "Software") to use,
974
+ reproduce, display, distribute, execute, and transmit the Software,
975
+ and to prepare derivative works of the Software, and to permit
976
+ third-parties to whom the Software is furnished to do so, all
977
+ subject to the following:
978
+
979
+ The copyright notices in the Software and this entire statement,
980
+ including the above license grant, this restriction and the following
981
+ disclaimer, must be included in all copies of the Software, in whole
982
+ or in part, and all derivative works of the Software, unless such
983
+ copies or derivative works are solely in the form of machine-executable
984
+ object code generated by a source language processor.
985
+
986
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
987
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
988
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
989
+ NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
990
+ ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
991
+ OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
992
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
993
+ OTHER DEALINGS IN THE SOFTWARE.
994
+
995
+ 4. Licensee's use of the LLVM third party component is
996
+ subject to the following terms and conditions:
997
+
998
+ ======================================================
999
+ LLVM Release License
1000
+ ======================================================
1001
+ University of Illinois/NCSA
1002
+ Open Source License
1003
+
1004
+ Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
1005
+ All rights reserved.
1006
+
1007
+ Developed by:
1008
+
1009
+ LLVM Team
1010
+
1011
+ University of Illinois at Urbana-Champaign
1012
+
1013
+ http://llvm.org
1014
+
1015
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1016
+ of this software and associated documentation files (the "Software"), to
1017
+ deal with the Software without restriction, including without limitation the
1018
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
1019
+ sell copies of the Software, and to permit persons to whom the Software is
1020
+ furnished to do so, subject to the following conditions:
1021
+
1022
+ * Redistributions of source code must retain the above copyright notice,
1023
+ this list of conditions and the following disclaimers.
1024
+
1025
+ * Redistributions in binary form must reproduce the above copyright
1026
+ notice, this list of conditions and the following disclaimers in the
1027
+ documentation and/or other materials provided with the distribution.
1028
+
1029
+ * Neither the names of the LLVM Team, University of Illinois at Urbana-
1030
+ Champaign, nor the names of its contributors may be used to endorse or
1031
+ promote products derived from this Software without specific prior
1032
+ written permission.
1033
+
1034
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1035
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1036
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1037
+ THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
1038
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1039
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1040
+ DEALINGS WITH THE SOFTWARE.
1041
+
1042
+ 5. Licensee's use (e.g. nvprof) of the PCRE third party
1043
+ component is subject to the following terms and
1044
+ conditions:
1045
+
1046
+ ------------
1047
+ PCRE LICENCE
1048
+ ------------
1049
+ PCRE is a library of functions to support regular expressions whose syntax
1050
+ and semantics are as close as possible to those of the Perl 5 language.
1051
+ Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
1052
+ specified below. The documentation for PCRE, supplied in the "doc"
1053
+ directory, is distributed under the same terms as the software itself. The
1054
+ basic library functions are written in C and are freestanding. Also
1055
+ included in the distribution is a set of C++ wrapper functions, and a just-
1056
+ in-time compiler that can be used to optimize pattern matching. These are
1057
+ both optional features that can be omitted when the library is built.
1058
+
1059
+ THE BASIC LIBRARY FUNCTIONS
1060
+ ---------------------------
1061
+ Written by: Philip Hazel
1062
+ Email local part: ph10
1063
+ Email domain: cam.ac.uk
1064
+ University of Cambridge Computing Service,
1065
+ Cambridge, England.
1066
+ Copyright (c) 1997-2012 University of Cambridge
1067
+ All rights reserved.
1068
+
1069
+ PCRE JUST-IN-TIME COMPILATION SUPPORT
1070
+ -------------------------------------
1071
+ Written by: Zoltan Herczeg
1072
+ Email local part: hzmester
1073
+ Emain domain: freemail.hu
1074
+ Copyright(c) 2010-2012 Zoltan Herczeg
1075
+ All rights reserved.
1076
+
1077
+ STACK-LESS JUST-IN-TIME COMPILER
1078
+ --------------------------------
1079
+ Written by: Zoltan Herczeg
1080
+ Email local part: hzmester
1081
+ Emain domain: freemail.hu
1082
+ Copyright(c) 2009-2012 Zoltan Herczeg
1083
+ All rights reserved.
1084
+
1085
+ THE C++ WRAPPER FUNCTIONS
1086
+ -------------------------
1087
+ Contributed by: Google Inc.
1088
+ Copyright (c) 2007-2012, Google Inc.
1089
+ All rights reserved.
1090
+
1091
+ THE "BSD" LICENCE
1092
+ -----------------
1093
+ Redistribution and use in source and binary forms, with or without
1094
+ modification, are permitted provided that the following conditions are met:
1095
+
1096
+ * Redistributions of source code must retain the above copyright notice,
1097
+ this list of conditions and the following disclaimer.
1098
+
1099
+ * Redistributions in binary form must reproduce the above copyright
1100
+ notice, this list of conditions and the following disclaimer in the
1101
+ documentation and/or other materials provided with the distribution.
1102
+
1103
+ * Neither the name of the University of Cambridge nor the name of Google
1104
+ Inc. nor the names of their contributors may be used to endorse or
1105
+ promote products derived from this software without specific prior
1106
+ written permission.
1107
+
1108
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
1109
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1110
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1111
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
1112
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1113
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1114
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1115
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1116
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1117
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1118
+ POSSIBILITY OF SUCH DAMAGE.
1119
+
1120
+ 6. Some of the cuBLAS library routines were written by or
1121
+ derived from code written by Vasily Volkov and are subject
1122
+ to the Modified Berkeley Software Distribution License as
1123
+ follows:
1124
+
1125
+ Copyright (c) 2007-2009, Regents of the University of California
1126
+
1127
+ All rights reserved.
1128
+
1129
+ Redistribution and use in source and binary forms, with or without
1130
+ modification, are permitted provided that the following conditions are
1131
+ met:
1132
+ * Redistributions of source code must retain the above copyright
1133
+ notice, this list of conditions and the following disclaimer.
1134
+ * Redistributions in binary form must reproduce the above
1135
+ copyright notice, this list of conditions and the following
1136
+ disclaimer in the documentation and/or other materials provided
1137
+ with the distribution.
1138
+ * Neither the name of the University of California, Berkeley nor
1139
+ the names of its contributors may be used to endorse or promote
1140
+ products derived from this software without specific prior
1141
+ written permission.
1142
+
1143
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
1144
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1145
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1146
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1147
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1148
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1149
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1150
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1151
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1152
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1153
+ POSSIBILITY OF SUCH DAMAGE.
1154
+
1155
+ 7. Some of the cuBLAS library routines were written by or
1156
+ derived from code written by Davide Barbieri and are
1157
+ subject to the Modified Berkeley Software Distribution
1158
+ License as follows:
1159
+
1160
+ Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata.
1161
+
1162
+ All rights reserved.
1163
+
1164
+ Redistribution and use in source and binary forms, with or without
1165
+ modification, are permitted provided that the following conditions are
1166
+ met:
1167
+ * Redistributions of source code must retain the above copyright
1168
+ notice, this list of conditions and the following disclaimer.
1169
+ * Redistributions in binary form must reproduce the above
1170
+ copyright notice, this list of conditions and the following
1171
+ disclaimer in the documentation and/or other materials provided
1172
+ with the distribution.
1173
+ * The name of the author may not be used to endorse or promote
1174
+ products derived from this software without specific prior
1175
+ written permission.
1176
+
1177
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
1178
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1179
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1180
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1181
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1182
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1183
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1184
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1185
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1186
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1187
+ POSSIBILITY OF SUCH DAMAGE.
1188
+
1189
+ 8. Some of the cuBLAS library routines were derived from
1190
+ code developed by the University of Tennessee and are
1191
+ subject to the Modified Berkeley Software Distribution
1192
+ License as follows:
1193
+
1194
+ Copyright (c) 2010 The University of Tennessee.
1195
+
1196
+ All rights reserved.
1197
+
1198
+ Redistribution and use in source and binary forms, with or without
1199
+ modification, are permitted provided that the following conditions are
1200
+ met:
1201
+ * Redistributions of source code must retain the above copyright
1202
+ notice, this list of conditions and the following disclaimer.
1203
+ * Redistributions in binary form must reproduce the above
1204
+ copyright notice, this list of conditions and the following
1205
+ disclaimer listed in this license in the documentation and/or
1206
+ other materials provided with the distribution.
1207
+ * Neither the name of the copyright holders nor the names of its
1208
+ contributors may be used to endorse or promote products derived
1209
+ from this software without specific prior written permission.
1210
+
1211
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1212
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1213
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1214
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1215
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1216
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1217
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1218
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1219
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1220
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1221
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1222
+
1223
+ 9. Some of the cuBLAS library routines were written by or
1224
+ derived from code written by Jonathan Hogg and are subject
1225
+ to the Modified Berkeley Software Distribution License as
1226
+ follows:
1227
+
1228
+ Copyright (c) 2012, The Science and Technology Facilities Council (STFC).
1229
+
1230
+ All rights reserved.
1231
+
1232
+ Redistribution and use in source and binary forms, with or without
1233
+ modification, are permitted provided that the following conditions are
1234
+ met:
1235
+ * Redistributions of source code must retain the above copyright
1236
+ notice, this list of conditions and the following disclaimer.
1237
+ * Redistributions in binary form must reproduce the above
1238
+ copyright notice, this list of conditions and the following
1239
+ disclaimer in the documentation and/or other materials provided
1240
+ with the distribution.
1241
+ * Neither the name of the STFC nor the names of its contributors
1242
+ may be used to endorse or promote products derived from this
1243
+ software without specific prior written permission.
1244
+
1245
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1246
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1247
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1248
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE
1249
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1250
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1251
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
1252
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1253
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
1254
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
1255
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1256
+
1257
+ 10. Some of the cuBLAS library routines were written by or
1258
+ derived from code written by Ahmad M. Abdelfattah, David
1259
+ Keyes, and Hatem Ltaief, and are subject to the Apache
1260
+ License, Version 2.0, as follows:
1261
+
1262
+ -- (C) Copyright 2013 King Abdullah University of Science and Technology
1263
+ Authors:
1264
+ Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa)
1265
+ David Keyes (david.keyes@kaust.edu.sa)
1266
+ Hatem Ltaief (hatem.ltaief@kaust.edu.sa)
1267
+
1268
+ Redistribution and use in source and binary forms, with or without
1269
+ modification, are permitted provided that the following conditions
1270
+ are met:
1271
+
1272
+ * Redistributions of source code must retain the above copyright
1273
+ notice, this list of conditions and the following disclaimer.
1274
+ * Redistributions in binary form must reproduce the above copyright
1275
+ notice, this list of conditions and the following disclaimer in the
1276
+ documentation and/or other materials provided with the distribution.
1277
+ * Neither the name of the King Abdullah University of Science and
1278
+ Technology nor the names of its contributors may be used to endorse
1279
+ or promote products derived from this software without specific prior
1280
+ written permission.
1281
+
1282
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1283
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1284
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1285
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1286
+ HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1287
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1288
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1289
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1290
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1291
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1292
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
1293
+
1294
+ 11. Some of the cuSPARSE library routines were written by or
1295
+ derived from code written by Li-Wen Chang and are subject
1296
+ to the NCSA Open Source License as follows:
1297
+
1298
+ Copyright (c) 2012, University of Illinois.
1299
+
1300
+ All rights reserved.
1301
+
1302
+ Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu
1303
+
1304
+ Permission is hereby granted, free of charge, to any person obtaining
1305
+ a copy of this software and associated documentation files (the
1306
+ "Software"), to deal with the Software without restriction, including
1307
+ without limitation the rights to use, copy, modify, merge, publish,
1308
+ distribute, sublicense, and/or sell copies of the Software, and to
1309
+ permit persons to whom the Software is furnished to do so, subject to
1310
+ the following conditions:
1311
+ * Redistributions of source code must retain the above copyright
1312
+ notice, this list of conditions and the following disclaimer.
1313
+ * Redistributions in binary form must reproduce the above
1314
+ copyright notice, this list of conditions and the following
1315
+ disclaimers in the documentation and/or other materials provided
1316
+ with the distribution.
1317
+ * Neither the names of IMPACT Group, University of Illinois, nor
1318
+ the names of its contributors may be used to endorse or promote
1319
+ products derived from this Software without specific prior
1320
+ written permission.
1321
+
1322
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1323
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1324
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1325
+ NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
1326
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1327
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
1328
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
1329
+ SOFTWARE.
1330
+
1331
+ 12. Some of the cuRAND library routines were written by or
1332
+ derived from code written by Mutsuo Saito and Makoto
1333
+ Matsumoto and are subject to the following license:
1334
+
1335
+ Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
1336
+ University. All rights reserved.
1337
+
1338
+ Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
1339
+ University and University of Tokyo. All rights reserved.
1340
+
1341
+ Redistribution and use in source and binary forms, with or without
1342
+ modification, are permitted provided that the following conditions are
1343
+ met:
1344
+ * Redistributions of source code must retain the above copyright
1345
+ notice, this list of conditions and the following disclaimer.
1346
+ * Redistributions in binary form must reproduce the above
1347
+ copyright notice, this list of conditions and the following
1348
+ disclaimer in the documentation and/or other materials provided
1349
+ with the distribution.
1350
+ * Neither the name of the Hiroshima University nor the names of
1351
+ its contributors may be used to endorse or promote products
1352
+ derived from this software without specific prior written
1353
+ permission.
1354
+
1355
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1356
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1357
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1358
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1359
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1360
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1361
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1362
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1363
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1364
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1365
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1366
+
1367
+ 13. Some of the cuRAND library routines were derived from
1368
+ code developed by D. E. Shaw Research and are subject to
1369
+ the following license:
1370
+
1371
+ Copyright 2010-2011, D. E. Shaw Research.
1372
+
1373
+ All rights reserved.
1374
+
1375
+ Redistribution and use in source and binary forms, with or without
1376
+ modification, are permitted provided that the following conditions are
1377
+ met:
1378
+ * Redistributions of source code must retain the above copyright
1379
+ notice, this list of conditions, and the following disclaimer.
1380
+ * Redistributions in binary form must reproduce the above
1381
+ copyright notice, this list of conditions, and the following
1382
+ disclaimer in the documentation and/or other materials provided
1383
+ with the distribution.
1384
+ * Neither the name of D. E. Shaw Research nor the names of its
1385
+ contributors may be used to endorse or promote products derived
1386
+ from this software without specific prior written permission.
1387
+
1388
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1389
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1390
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1391
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1392
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1393
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1394
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1395
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1396
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1397
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1398
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1399
+
1400
+ 14. Some of the Math library routines were written by or
1401
+ derived from code developed by Norbert Juffa and are
1402
+ subject to the following license:
1403
+
1404
+ Copyright (c) 2015-2017, Norbert Juffa
1405
+ All rights reserved.
1406
+
1407
+ Redistribution and use in source and binary forms, with or without
1408
+ modification, are permitted provided that the following conditions
1409
+ are met:
1410
+
1411
+ 1. Redistributions of source code must retain the above copyright
1412
+ notice, this list of conditions and the following disclaimer.
1413
+
1414
+ 2. Redistributions in binary form must reproduce the above copyright
1415
+ notice, this list of conditions and the following disclaimer in the
1416
+ documentation and/or other materials provided with the distribution.
1417
+
1418
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1419
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1420
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1421
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1422
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1423
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1424
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1425
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1426
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1427
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1428
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1429
+
1430
+ 15. Licensee's use of the lz4 third party component is
1431
+ subject to the following terms and conditions:
1432
+
1433
+ Copyright (C) 2011-2013, Yann Collet.
1434
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1435
+
1436
+ Redistribution and use in source and binary forms, with or without
1437
+ modification, are permitted provided that the following conditions are
1438
+ met:
1439
+
1440
+ * Redistributions of source code must retain the above copyright
1441
+ notice, this list of conditions and the following disclaimer.
1442
+ * Redistributions in binary form must reproduce the above
1443
+ copyright notice, this list of conditions and the following disclaimer
1444
+ in the documentation and/or other materials provided with the
1445
+ distribution.
1446
+
1447
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1448
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1449
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1450
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1451
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1452
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1453
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1454
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1455
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1456
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1457
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1458
+
1459
+ 16. The NPP library uses code from the Boost Math Toolkit,
1460
+ and is subject to the following license:
1461
+
1462
+ Boost Software License - Version 1.0 - August 17th, 2003
1463
+ . . . .
1464
+
1465
+ Permission is hereby granted, free of charge, to any person or
1466
+ organization obtaining a copy of the software and accompanying
1467
+ documentation covered by this license (the "Software") to use,
1468
+ reproduce, display, distribute, execute, and transmit the Software,
1469
+ and to prepare derivative works of the Software, and to permit
1470
+ third-parties to whom the Software is furnished to do so, all
1471
+ subject to the following:
1472
+
1473
+ The copyright notices in the Software and this entire statement,
1474
+ including the above license grant, this restriction and the following
1475
+ disclaimer, must be included in all copies of the Software, in whole
1476
+ or in part, and all derivative works of the Software, unless such
1477
+ copies or derivative works are solely in the form of machine-executable
1478
+ object code generated by a source language processor.
1479
+
1480
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1481
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1482
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
1483
+ NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
1484
+ ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
1485
+ OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
1486
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
1487
+ OTHER DEALINGS IN THE SOFTWARE.
1488
+
1489
+ 17. Portions of the Nsight Eclipse Edition is subject to the
1490
+ following license:
1491
+
1492
+ The Eclipse Foundation makes available all content in this plug-in
1493
+ ("Content"). Unless otherwise indicated below, the Content is provided
1494
+ to you under the terms and conditions of the Eclipse Public License
1495
+ Version 1.0 ("EPL"). A copy of the EPL is available at http://
1496
+ www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program"
1497
+ will mean the Content.
1498
+
1499
+ If you did not receive this Content directly from the Eclipse
1500
+ Foundation, the Content is being redistributed by another party
1501
+ ("Redistributor") and different terms and conditions may apply to your
1502
+ use of any object code in the Content. Check the Redistributor's
1503
+ license that was provided with the Content. If no such license exists,
1504
+ contact the Redistributor. Unless otherwise indicated below, the terms
1505
+ and conditions of the EPL still apply to any source code in the
1506
+ Content and such source code may be obtained at http://www.eclipse.org.
1507
+
1508
+ 18. Some of the cuBLAS library routines uses code from
1509
+ OpenAI, which is subject to the following license:
1510
+
1511
+ License URL
1512
+ https://github.com/openai/openai-gemm/blob/master/LICENSE
1513
+
1514
+ License Text
1515
+ The MIT License
1516
+
1517
+ Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc.
1518
+
1519
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1520
+ of this software and associated documentation files (the "Software"), to deal
1521
+ in the Software without restriction, including without limitation the rights
1522
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1523
+ copies of the Software, and to permit persons to whom the Software is
1524
+ furnished to do so, subject to the following conditions:
1525
+
1526
+ The above copyright notice and this permission notice shall be included in
1527
+ all copies or substantial portions of the Software.
1528
+
1529
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1530
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1531
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1532
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1533
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1534
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
1535
+ THE SOFTWARE.
1536
+
1537
+ 19. Licensee's use of the Visual Studio Setup Configuration
1538
+ Samples is subject to the following license:
1539
+
1540
+ The MIT License (MIT)
1541
+ Copyright (C) Microsoft Corporation. All rights reserved.
1542
+
1543
+ Permission is hereby granted, free of charge, to any person
1544
+ obtaining a copy of this software and associated documentation
1545
+ files (the "Software"), to deal in the Software without restriction,
1546
+ including without limitation the rights to use, copy, modify, merge,
1547
+ publish, distribute, sublicense, and/or sell copies of the Software,
1548
+ and to permit persons to whom the Software is furnished to do so,
1549
+ subject to the following conditions:
1550
+
1551
+ The above copyright notice and this permission notice shall be included
1552
+ in all copies or substantial portions of the Software.
1553
+
1554
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
1555
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1556
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1557
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1558
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1559
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1560
+
1561
+ 20. Licensee's use of linmath.h header for CPU functions for
1562
+ GL vector/matrix operations from lunarG is subject to the
1563
+ Apache License Version 2.0.
1564
+
1565
+ 21. The DX12-CUDA sample uses the d3dx12.h header, which is
1566
+ subject to the MIT license .
1567
+
1568
+ -----------------
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/commands/__pycache__/index.cpython-311.pyc ADDED
Binary file (7.68 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/index/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (265 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (17.9 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/locations/base.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import os
3
+ import site
4
+ import sys
5
+ import sysconfig
6
+ import typing
7
+
8
+ from pip._internal.exceptions import InstallationError
9
+ from pip._internal.utils import appdirs
10
+ from pip._internal.utils.virtualenv import running_under_virtualenv
11
+
12
+ # Application Directories
13
+ USER_CACHE_DIR = appdirs.user_cache_dir("pip")
14
+
15
+ # FIXME doesn't account for venv linked to global site-packages
16
+ site_packages: str = sysconfig.get_path("purelib")
17
+
18
+
19
+ def get_major_minor_version() -> str:
20
+ """
21
+ Return the major-minor version of the current Python as a string, e.g.
22
+ "3.7" or "3.10".
23
+ """
24
+ return "{}.{}".format(*sys.version_info)
25
+
26
+
27
+ def change_root(new_root: str, pathname: str) -> str:
28
+ """Return 'pathname' with 'new_root' prepended.
29
+
30
+ If 'pathname' is relative, this is equivalent to os.path.join(new_root, pathname).
31
+ Otherwise, it requires making 'pathname' relative and then joining the
32
+ two, which is tricky on DOS/Windows and Mac OS.
33
+
34
+ This is borrowed from Python's standard library's distutils module.
35
+ """
36
+ if os.name == "posix":
37
+ if not os.path.isabs(pathname):
38
+ return os.path.join(new_root, pathname)
39
+ else:
40
+ return os.path.join(new_root, pathname[1:])
41
+
42
+ elif os.name == "nt":
43
+ (drive, path) = os.path.splitdrive(pathname)
44
+ if path[0] == "\\":
45
+ path = path[1:]
46
+ return os.path.join(new_root, path)
47
+
48
+ else:
49
+ raise InstallationError(
50
+ f"Unknown platform: {os.name}\n"
51
+ "Can not change root path prefix on unknown platform."
52
+ )
53
+
54
+
55
+ def get_src_prefix() -> str:
56
+ if running_under_virtualenv():
57
+ src_prefix = os.path.join(sys.prefix, "src")
58
+ else:
59
+ # FIXME: keep src in cwd for now (it is not a temporary folder)
60
+ try:
61
+ src_prefix = os.path.join(os.getcwd(), "src")
62
+ except OSError:
63
+ # In case the current working directory has been renamed or deleted
64
+ sys.exit("The folder you are executing pip from can no longer be found.")
65
+
66
+ # under macOS + virtualenv sys.prefix is not properly resolved
67
+ # it is something like /path/to/python/bin/..
68
+ return os.path.abspath(src_prefix)
69
+
70
+
71
+ try:
72
+ # Use getusersitepackages if this is present, as it ensures that the
73
+ # value is initialised properly.
74
+ user_site: typing.Optional[str] = site.getusersitepackages()
75
+ except AttributeError:
76
+ user_site = site.USER_SITE
77
+
78
+
79
+ @functools.lru_cache(maxsize=None)
80
+ def is_osx_framework() -> bool:
81
+ return bool(sysconfig.get_config_var("PYTHONFRAMEWORK"))
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-311.pyc ADDED
Binary file (11.7 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/__init__.py ADDED
File without changes
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/build/metadata_legacy.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Metadata generation logic for legacy source distributions.
2
+ """
3
+
4
+ import logging
5
+ import os
6
+
7
+ from pip._internal.build_env import BuildEnvironment
8
+ from pip._internal.cli.spinners import open_spinner
9
+ from pip._internal.exceptions import (
10
+ InstallationError,
11
+ InstallationSubprocessError,
12
+ MetadataGenerationFailed,
13
+ )
14
+ from pip._internal.utils.setuptools_build import make_setuptools_egg_info_args
15
+ from pip._internal.utils.subprocess import call_subprocess
16
+ from pip._internal.utils.temp_dir import TempDirectory
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def _find_egg_info(directory: str) -> str:
22
+ """Find an .egg-info subdirectory in `directory`."""
23
+ filenames = [f for f in os.listdir(directory) if f.endswith(".egg-info")]
24
+
25
+ if not filenames:
26
+ raise InstallationError(f"No .egg-info directory found in {directory}")
27
+
28
+ if len(filenames) > 1:
29
+ raise InstallationError(
30
+ f"More than one .egg-info directory found in {directory}"
31
+ )
32
+
33
+ return os.path.join(directory, filenames[0])
34
+
35
+
36
+ def generate_metadata(
37
+ build_env: BuildEnvironment,
38
+ setup_py_path: str,
39
+ source_dir: str,
40
+ isolated: bool,
41
+ details: str,
42
+ ) -> str:
43
+ """Generate metadata using setup.py-based defacto mechanisms.
44
+
45
+ Returns the generated metadata directory.
46
+ """
47
+ logger.debug(
48
+ "Running setup.py (path:%s) egg_info for package %s",
49
+ setup_py_path,
50
+ details,
51
+ )
52
+
53
+ egg_info_dir = TempDirectory(kind="pip-egg-info", globally_managed=True).path
54
+
55
+ args = make_setuptools_egg_info_args(
56
+ setup_py_path,
57
+ egg_info_dir=egg_info_dir,
58
+ no_user_config=isolated,
59
+ )
60
+
61
+ with build_env:
62
+ with open_spinner("Preparing metadata (setup.py)") as spinner:
63
+ try:
64
+ call_subprocess(
65
+ args,
66
+ cwd=source_dir,
67
+ command_desc="python setup.py egg_info",
68
+ spinner=spinner,
69
+ )
70
+ except InstallationSubprocessError as error:
71
+ raise MetadataGenerationFailed(package_details=details) from error
72
+
73
+ # Return the .egg-info directory.
74
+ return _find_egg_info(egg_info_dir)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/check.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Validation of dependencies of packages
2
+ """
3
+
4
+ import logging
5
+ from contextlib import suppress
6
+ from email.parser import Parser
7
+ from functools import reduce
8
+ from typing import (
9
+ Callable,
10
+ Dict,
11
+ FrozenSet,
12
+ Generator,
13
+ Iterable,
14
+ List,
15
+ NamedTuple,
16
+ Optional,
17
+ Set,
18
+ Tuple,
19
+ )
20
+
21
+ from pip._vendor.packaging.requirements import Requirement
22
+ from pip._vendor.packaging.tags import Tag, parse_tag
23
+ from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
24
+ from pip._vendor.packaging.version import Version
25
+
26
+ from pip._internal.distributions import make_distribution_for_install_requirement
27
+ from pip._internal.metadata import get_default_environment
28
+ from pip._internal.metadata.base import BaseDistribution
29
+ from pip._internal.req.req_install import InstallRequirement
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class PackageDetails(NamedTuple):
35
+ version: Version
36
+ dependencies: List[Requirement]
37
+
38
+
39
+ # Shorthands
40
+ PackageSet = Dict[NormalizedName, PackageDetails]
41
+ Missing = Tuple[NormalizedName, Requirement]
42
+ Conflicting = Tuple[NormalizedName, Version, Requirement]
43
+
44
+ MissingDict = Dict[NormalizedName, List[Missing]]
45
+ ConflictingDict = Dict[NormalizedName, List[Conflicting]]
46
+ CheckResult = Tuple[MissingDict, ConflictingDict]
47
+ ConflictDetails = Tuple[PackageSet, CheckResult]
48
+
49
+
50
+ def create_package_set_from_installed() -> Tuple[PackageSet, bool]:
51
+ """Converts a list of distributions into a PackageSet."""
52
+ package_set = {}
53
+ problems = False
54
+ env = get_default_environment()
55
+ for dist in env.iter_installed_distributions(local_only=False, skip=()):
56
+ name = dist.canonical_name
57
+ try:
58
+ dependencies = list(dist.iter_dependencies())
59
+ package_set[name] = PackageDetails(dist.version, dependencies)
60
+ except (OSError, ValueError) as e:
61
+ # Don't crash on unreadable or broken metadata.
62
+ logger.warning("Error parsing dependencies of %s: %s", name, e)
63
+ problems = True
64
+ return package_set, problems
65
+
66
+
67
+ def check_package_set(
68
+ package_set: PackageSet, should_ignore: Optional[Callable[[str], bool]] = None
69
+ ) -> CheckResult:
70
+ """Check if a package set is consistent
71
+
72
+ If should_ignore is passed, it should be a callable that takes a
73
+ package name and returns a boolean.
74
+ """
75
+
76
+ missing = {}
77
+ conflicting = {}
78
+
79
+ for package_name, package_detail in package_set.items():
80
+ # Info about dependencies of package_name
81
+ missing_deps: Set[Missing] = set()
82
+ conflicting_deps: Set[Conflicting] = set()
83
+
84
+ if should_ignore and should_ignore(package_name):
85
+ continue
86
+
87
+ for req in package_detail.dependencies:
88
+ name = canonicalize_name(req.name)
89
+
90
+ # Check if it's missing
91
+ if name not in package_set:
92
+ missed = True
93
+ if req.marker is not None:
94
+ missed = req.marker.evaluate({"extra": ""})
95
+ if missed:
96
+ missing_deps.add((name, req))
97
+ continue
98
+
99
+ # Check if there's a conflict
100
+ version = package_set[name].version
101
+ if not req.specifier.contains(version, prereleases=True):
102
+ conflicting_deps.add((name, version, req))
103
+
104
+ if missing_deps:
105
+ missing[package_name] = sorted(missing_deps, key=str)
106
+ if conflicting_deps:
107
+ conflicting[package_name] = sorted(conflicting_deps, key=str)
108
+
109
+ return missing, conflicting
110
+
111
+
112
+ def check_install_conflicts(to_install: List[InstallRequirement]) -> ConflictDetails:
113
+ """For checking if the dependency graph would be consistent after \
114
+ installing given requirements
115
+ """
116
+ # Start from the current state
117
+ package_set, _ = create_package_set_from_installed()
118
+ # Install packages
119
+ would_be_installed = _simulate_installation_of(to_install, package_set)
120
+
121
+ # Only warn about directly-dependent packages; create a whitelist of them
122
+ whitelist = _create_whitelist(would_be_installed, package_set)
123
+
124
+ return (
125
+ package_set,
126
+ check_package_set(
127
+ package_set, should_ignore=lambda name: name not in whitelist
128
+ ),
129
+ )
130
+
131
+
132
+ def check_unsupported(
133
+ packages: Iterable[BaseDistribution],
134
+ supported_tags: Iterable[Tag],
135
+ ) -> Generator[BaseDistribution, None, None]:
136
+ for p in packages:
137
+ with suppress(FileNotFoundError):
138
+ wheel_file = p.read_text("WHEEL")
139
+ wheel_tags: FrozenSet[Tag] = reduce(
140
+ frozenset.union,
141
+ map(parse_tag, Parser().parsestr(wheel_file).get_all("Tag", [])),
142
+ frozenset(),
143
+ )
144
+ if wheel_tags.isdisjoint(supported_tags):
145
+ yield p
146
+
147
+
148
+ def _simulate_installation_of(
149
+ to_install: List[InstallRequirement], package_set: PackageSet
150
+ ) -> Set[NormalizedName]:
151
+ """Computes the version of packages after installing to_install."""
152
+ # Keep track of packages that were installed
153
+ installed = set()
154
+
155
+ # Modify it as installing requirement_set would (assuming no errors)
156
+ for inst_req in to_install:
157
+ abstract_dist = make_distribution_for_install_requirement(inst_req)
158
+ dist = abstract_dist.get_metadata_distribution()
159
+ name = dist.canonical_name
160
+ package_set[name] = PackageDetails(dist.version, list(dist.iter_dependencies()))
161
+
162
+ installed.add(name)
163
+
164
+ return installed
165
+
166
+
167
+ def _create_whitelist(
168
+ would_be_installed: Set[NormalizedName], package_set: PackageSet
169
+ ) -> Set[NormalizedName]:
170
+ packages_affected = set(would_be_installed)
171
+
172
+ for package_name in package_set:
173
+ if package_name in packages_affected:
174
+ continue
175
+
176
+ for req in package_set[package_name].dependencies:
177
+ if canonicalize_name(req.name) in packages_affected:
178
+ packages_affected.add(package_name)
179
+ break
180
+
181
+ return packages_affected
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/freeze.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ import logging
3
+ import os
4
+ from typing import Container, Dict, Generator, Iterable, List, NamedTuple, Optional, Set
5
+
6
+ from pip._vendor.packaging.utils import canonicalize_name
7
+ from pip._vendor.packaging.version import InvalidVersion
8
+
9
+ from pip._internal.exceptions import BadCommand, InstallationError
10
+ from pip._internal.metadata import BaseDistribution, get_environment
11
+ from pip._internal.req.constructors import (
12
+ install_req_from_editable,
13
+ install_req_from_line,
14
+ )
15
+ from pip._internal.req.req_file import COMMENT_RE
16
+ from pip._internal.utils.direct_url_helpers import direct_url_as_pep440_direct_reference
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class _EditableInfo(NamedTuple):
22
+ requirement: str
23
+ comments: List[str]
24
+
25
+
26
+ def freeze(
27
+ requirement: Optional[List[str]] = None,
28
+ local_only: bool = False,
29
+ user_only: bool = False,
30
+ paths: Optional[List[str]] = None,
31
+ isolated: bool = False,
32
+ exclude_editable: bool = False,
33
+ skip: Container[str] = (),
34
+ ) -> Generator[str, None, None]:
35
+ installations: Dict[str, FrozenRequirement] = {}
36
+
37
+ dists = get_environment(paths).iter_installed_distributions(
38
+ local_only=local_only,
39
+ skip=(),
40
+ user_only=user_only,
41
+ )
42
+ for dist in dists:
43
+ req = FrozenRequirement.from_dist(dist)
44
+ if exclude_editable and req.editable:
45
+ continue
46
+ installations[req.canonical_name] = req
47
+
48
+ if requirement:
49
+ # the options that don't get turned into an InstallRequirement
50
+ # should only be emitted once, even if the same option is in multiple
51
+ # requirements files, so we need to keep track of what has been emitted
52
+ # so that we don't emit it again if it's seen again
53
+ emitted_options: Set[str] = set()
54
+ # keep track of which files a requirement is in so that we can
55
+ # give an accurate warning if a requirement appears multiple times.
56
+ req_files: Dict[str, List[str]] = collections.defaultdict(list)
57
+ for req_file_path in requirement:
58
+ with open(req_file_path) as req_file:
59
+ for line in req_file:
60
+ if (
61
+ not line.strip()
62
+ or line.strip().startswith("#")
63
+ or line.startswith(
64
+ (
65
+ "-r",
66
+ "--requirement",
67
+ "-f",
68
+ "--find-links",
69
+ "-i",
70
+ "--index-url",
71
+ "--pre",
72
+ "--trusted-host",
73
+ "--process-dependency-links",
74
+ "--extra-index-url",
75
+ "--use-feature",
76
+ )
77
+ )
78
+ ):
79
+ line = line.rstrip()
80
+ if line not in emitted_options:
81
+ emitted_options.add(line)
82
+ yield line
83
+ continue
84
+
85
+ if line.startswith("-e") or line.startswith("--editable"):
86
+ if line.startswith("-e"):
87
+ line = line[2:].strip()
88
+ else:
89
+ line = line[len("--editable") :].strip().lstrip("=")
90
+ line_req = install_req_from_editable(
91
+ line,
92
+ isolated=isolated,
93
+ )
94
+ else:
95
+ line_req = install_req_from_line(
96
+ COMMENT_RE.sub("", line).strip(),
97
+ isolated=isolated,
98
+ )
99
+
100
+ if not line_req.name:
101
+ logger.info(
102
+ "Skipping line in requirement file [%s] because "
103
+ "it's not clear what it would install: %s",
104
+ req_file_path,
105
+ line.strip(),
106
+ )
107
+ logger.info(
108
+ " (add #egg=PackageName to the URL to avoid"
109
+ " this warning)"
110
+ )
111
+ else:
112
+ line_req_canonical_name = canonicalize_name(line_req.name)
113
+ if line_req_canonical_name not in installations:
114
+ # either it's not installed, or it is installed
115
+ # but has been processed already
116
+ if not req_files[line_req.name]:
117
+ logger.warning(
118
+ "Requirement file [%s] contains %s, but "
119
+ "package %r is not installed",
120
+ req_file_path,
121
+ COMMENT_RE.sub("", line).strip(),
122
+ line_req.name,
123
+ )
124
+ else:
125
+ req_files[line_req.name].append(req_file_path)
126
+ else:
127
+ yield str(installations[line_req_canonical_name]).rstrip()
128
+ del installations[line_req_canonical_name]
129
+ req_files[line_req.name].append(req_file_path)
130
+
131
+ # Warn about requirements that were included multiple times (in a
132
+ # single requirements file or in different requirements files).
133
+ for name, files in req_files.items():
134
+ if len(files) > 1:
135
+ logger.warning(
136
+ "Requirement %s included multiple times [%s]",
137
+ name,
138
+ ", ".join(sorted(set(files))),
139
+ )
140
+
141
+ yield ("## The following requirements were added by pip freeze:")
142
+ for installation in sorted(installations.values(), key=lambda x: x.name.lower()):
143
+ if installation.canonical_name not in skip:
144
+ yield str(installation).rstrip()
145
+
146
+
147
+ def _format_as_name_version(dist: BaseDistribution) -> str:
148
+ try:
149
+ dist_version = dist.version
150
+ except InvalidVersion:
151
+ # legacy version
152
+ return f"{dist.raw_name}==={dist.raw_version}"
153
+ else:
154
+ return f"{dist.raw_name}=={dist_version}"
155
+
156
+
157
+ def _get_editable_info(dist: BaseDistribution) -> _EditableInfo:
158
+ """
159
+ Compute and return values (req, comments) for use in
160
+ FrozenRequirement.from_dist().
161
+ """
162
+ editable_project_location = dist.editable_project_location
163
+ assert editable_project_location
164
+ location = os.path.normcase(os.path.abspath(editable_project_location))
165
+
166
+ from pip._internal.vcs import RemoteNotFoundError, RemoteNotValidError, vcs
167
+
168
+ vcs_backend = vcs.get_backend_for_dir(location)
169
+
170
+ if vcs_backend is None:
171
+ display = _format_as_name_version(dist)
172
+ logger.debug(
173
+ 'No VCS found for editable requirement "%s" in: %r',
174
+ display,
175
+ location,
176
+ )
177
+ return _EditableInfo(
178
+ requirement=location,
179
+ comments=[f"# Editable install with no version control ({display})"],
180
+ )
181
+
182
+ vcs_name = type(vcs_backend).__name__
183
+
184
+ try:
185
+ req = vcs_backend.get_src_requirement(location, dist.raw_name)
186
+ except RemoteNotFoundError:
187
+ display = _format_as_name_version(dist)
188
+ return _EditableInfo(
189
+ requirement=location,
190
+ comments=[f"# Editable {vcs_name} install with no remote ({display})"],
191
+ )
192
+ except RemoteNotValidError as ex:
193
+ display = _format_as_name_version(dist)
194
+ return _EditableInfo(
195
+ requirement=location,
196
+ comments=[
197
+ f"# Editable {vcs_name} install ({display}) with either a deleted "
198
+ f"local remote or invalid URI:",
199
+ f"# '{ex.url}'",
200
+ ],
201
+ )
202
+ except BadCommand:
203
+ logger.warning(
204
+ "cannot determine version of editable source in %s "
205
+ "(%s command not found in path)",
206
+ location,
207
+ vcs_backend.name,
208
+ )
209
+ return _EditableInfo(requirement=location, comments=[])
210
+ except InstallationError as exc:
211
+ logger.warning("Error when trying to get requirement for VCS system %s", exc)
212
+ else:
213
+ return _EditableInfo(requirement=req, comments=[])
214
+
215
+ logger.warning("Could not determine repository location of %s", location)
216
+
217
+ return _EditableInfo(
218
+ requirement=location,
219
+ comments=["## !! Could not determine repository location"],
220
+ )
221
+
222
+
223
+ class FrozenRequirement:
224
+ def __init__(
225
+ self,
226
+ name: str,
227
+ req: str,
228
+ editable: bool,
229
+ comments: Iterable[str] = (),
230
+ ) -> None:
231
+ self.name = name
232
+ self.canonical_name = canonicalize_name(name)
233
+ self.req = req
234
+ self.editable = editable
235
+ self.comments = comments
236
+
237
+ @classmethod
238
+ def from_dist(cls, dist: BaseDistribution) -> "FrozenRequirement":
239
+ editable = dist.editable
240
+ if editable:
241
+ req, comments = _get_editable_info(dist)
242
+ else:
243
+ comments = []
244
+ direct_url = dist.direct_url
245
+ if direct_url:
246
+ # if PEP 610 metadata is present, use it
247
+ req = direct_url_as_pep440_direct_reference(direct_url, dist.raw_name)
248
+ else:
249
+ # name==version requirement
250
+ req = _format_as_name_version(dist)
251
+
252
+ return cls(dist.raw_name, req, editable, comments=comments)
253
+
254
+ def __str__(self) -> str:
255
+ req = self.req
256
+ if self.editable:
257
+ req = f"-e {req}"
258
+ return "\n".join(list(self.comments) + [str(req)]) + "\n"
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """For modules related to installing packages.
2
+ """
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-311.pyc ADDED
Binary file (2.21 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/install/wheel.py ADDED
@@ -0,0 +1,741 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Support for installing and building the "wheel" binary package format.
2
+ """
3
+
4
+ import collections
5
+ import compileall
6
+ import contextlib
7
+ import csv
8
+ import importlib
9
+ import logging
10
+ import os.path
11
+ import re
12
+ import shutil
13
+ import sys
14
+ import warnings
15
+ from base64 import urlsafe_b64encode
16
+ from email.message import Message
17
+ from itertools import chain, filterfalse, starmap
18
+ from typing import (
19
+ IO,
20
+ TYPE_CHECKING,
21
+ Any,
22
+ BinaryIO,
23
+ Callable,
24
+ Dict,
25
+ Generator,
26
+ Iterable,
27
+ Iterator,
28
+ List,
29
+ NewType,
30
+ Optional,
31
+ Protocol,
32
+ Sequence,
33
+ Set,
34
+ Tuple,
35
+ Union,
36
+ cast,
37
+ )
38
+ from zipfile import ZipFile, ZipInfo
39
+
40
+ from pip._vendor.distlib.scripts import ScriptMaker
41
+ from pip._vendor.distlib.util import get_export_entry
42
+ from pip._vendor.packaging.utils import canonicalize_name
43
+
44
+ from pip._internal.exceptions import InstallationError
45
+ from pip._internal.locations import get_major_minor_version
46
+ from pip._internal.metadata import (
47
+ BaseDistribution,
48
+ FilesystemWheel,
49
+ get_wheel_distribution,
50
+ )
51
+ from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, DirectUrl
52
+ from pip._internal.models.scheme import SCHEME_KEYS, Scheme
53
+ from pip._internal.utils.filesystem import adjacent_tmp_file, replace
54
+ from pip._internal.utils.misc import StreamWrapper, ensure_dir, hash_file, partition
55
+ from pip._internal.utils.unpacking import (
56
+ current_umask,
57
+ is_within_directory,
58
+ set_extracted_file_to_default_mode_plus_executable,
59
+ zip_item_is_executable,
60
+ )
61
+ from pip._internal.utils.wheel import parse_wheel
62
+
63
+ if TYPE_CHECKING:
64
+
65
+ class File(Protocol):
66
+ src_record_path: "RecordPath"
67
+ dest_path: str
68
+ changed: bool
69
+
70
+ def save(self) -> None:
71
+ pass
72
+
73
+
74
+ logger = logging.getLogger(__name__)
75
+
76
+ RecordPath = NewType("RecordPath", str)
77
+ InstalledCSVRow = Tuple[RecordPath, str, Union[int, str]]
78
+
79
+
80
+ def rehash(path: str, blocksize: int = 1 << 20) -> Tuple[str, str]:
81
+ """Return (encoded_digest, length) for path using hashlib.sha256()"""
82
+ h, length = hash_file(path, blocksize)
83
+ digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
84
+ return (digest, str(length))
85
+
86
+
87
+ def csv_io_kwargs(mode: str) -> Dict[str, Any]:
88
+ """Return keyword arguments to properly open a CSV file
89
+ in the given mode.
90
+ """
91
+ return {"mode": mode, "newline": "", "encoding": "utf-8"}
92
+
93
+
94
+ def fix_script(path: str) -> bool:
95
+ """Replace #!python with #!/path/to/python
96
+ Return True if file was changed.
97
+ """
98
+ # XXX RECORD hashes will need to be updated
99
+ assert os.path.isfile(path)
100
+
101
+ with open(path, "rb") as script:
102
+ firstline = script.readline()
103
+ if not firstline.startswith(b"#!python"):
104
+ return False
105
+ exename = sys.executable.encode(sys.getfilesystemencoding())
106
+ firstline = b"#!" + exename + os.linesep.encode("ascii")
107
+ rest = script.read()
108
+ with open(path, "wb") as script:
109
+ script.write(firstline)
110
+ script.write(rest)
111
+ return True
112
+
113
+
114
+ def wheel_root_is_purelib(metadata: Message) -> bool:
115
+ return metadata.get("Root-Is-Purelib", "").lower() == "true"
116
+
117
+
118
+ def get_entrypoints(dist: BaseDistribution) -> Tuple[Dict[str, str], Dict[str, str]]:
119
+ console_scripts = {}
120
+ gui_scripts = {}
121
+ for entry_point in dist.iter_entry_points():
122
+ if entry_point.group == "console_scripts":
123
+ console_scripts[entry_point.name] = entry_point.value
124
+ elif entry_point.group == "gui_scripts":
125
+ gui_scripts[entry_point.name] = entry_point.value
126
+ return console_scripts, gui_scripts
127
+
128
+
129
+ def message_about_scripts_not_on_PATH(scripts: Sequence[str]) -> Optional[str]:
130
+ """Determine if any scripts are not on PATH and format a warning.
131
+ Returns a warning message if one or more scripts are not on PATH,
132
+ otherwise None.
133
+ """
134
+ if not scripts:
135
+ return None
136
+
137
+ # Group scripts by the path they were installed in
138
+ grouped_by_dir: Dict[str, Set[str]] = collections.defaultdict(set)
139
+ for destfile in scripts:
140
+ parent_dir = os.path.dirname(destfile)
141
+ script_name = os.path.basename(destfile)
142
+ grouped_by_dir[parent_dir].add(script_name)
143
+
144
+ # We don't want to warn for directories that are on PATH.
145
+ not_warn_dirs = [
146
+ os.path.normcase(os.path.normpath(i)).rstrip(os.sep)
147
+ for i in os.environ.get("PATH", "").split(os.pathsep)
148
+ ]
149
+ # If an executable sits with sys.executable, we don't warn for it.
150
+ # This covers the case of venv invocations without activating the venv.
151
+ not_warn_dirs.append(
152
+ os.path.normcase(os.path.normpath(os.path.dirname(sys.executable)))
153
+ )
154
+ warn_for: Dict[str, Set[str]] = {
155
+ parent_dir: scripts
156
+ for parent_dir, scripts in grouped_by_dir.items()
157
+ if os.path.normcase(os.path.normpath(parent_dir)) not in not_warn_dirs
158
+ }
159
+ if not warn_for:
160
+ return None
161
+
162
+ # Format a message
163
+ msg_lines = []
164
+ for parent_dir, dir_scripts in warn_for.items():
165
+ sorted_scripts: List[str] = sorted(dir_scripts)
166
+ if len(sorted_scripts) == 1:
167
+ start_text = f"script {sorted_scripts[0]} is"
168
+ else:
169
+ start_text = "scripts {} are".format(
170
+ ", ".join(sorted_scripts[:-1]) + " and " + sorted_scripts[-1]
171
+ )
172
+
173
+ msg_lines.append(
174
+ f"The {start_text} installed in '{parent_dir}' which is not on PATH."
175
+ )
176
+
177
+ last_line_fmt = (
178
+ "Consider adding {} to PATH or, if you prefer "
179
+ "to suppress this warning, use --no-warn-script-location."
180
+ )
181
+ if len(msg_lines) == 1:
182
+ msg_lines.append(last_line_fmt.format("this directory"))
183
+ else:
184
+ msg_lines.append(last_line_fmt.format("these directories"))
185
+
186
+ # Add a note if any directory starts with ~
187
+ warn_for_tilde = any(
188
+ i[0] == "~" for i in os.environ.get("PATH", "").split(os.pathsep) if i
189
+ )
190
+ if warn_for_tilde:
191
+ tilde_warning_msg = (
192
+ "NOTE: The current PATH contains path(s) starting with `~`, "
193
+ "which may not be expanded by all applications."
194
+ )
195
+ msg_lines.append(tilde_warning_msg)
196
+
197
+ # Returns the formatted multiline message
198
+ return "\n".join(msg_lines)
199
+
200
+
201
+ def _normalized_outrows(
202
+ outrows: Iterable[InstalledCSVRow],
203
+ ) -> List[Tuple[str, str, str]]:
204
+ """Normalize the given rows of a RECORD file.
205
+
206
+ Items in each row are converted into str. Rows are then sorted to make
207
+ the value more predictable for tests.
208
+
209
+ Each row is a 3-tuple (path, hash, size) and corresponds to a record of
210
+ a RECORD file (see PEP 376 and PEP 427 for details). For the rows
211
+ passed to this function, the size can be an integer as an int or string,
212
+ or the empty string.
213
+ """
214
+ # Normally, there should only be one row per path, in which case the
215
+ # second and third elements don't come into play when sorting.
216
+ # However, in cases in the wild where a path might happen to occur twice,
217
+ # we don't want the sort operation to trigger an error (but still want
218
+ # determinism). Since the third element can be an int or string, we
219
+ # coerce each element to a string to avoid a TypeError in this case.
220
+ # For additional background, see--
221
+ # https://github.com/pypa/pip/issues/5868
222
+ return sorted(
223
+ (record_path, hash_, str(size)) for record_path, hash_, size in outrows
224
+ )
225
+
226
+
227
+ def _record_to_fs_path(record_path: RecordPath, lib_dir: str) -> str:
228
+ return os.path.join(lib_dir, record_path)
229
+
230
+
231
+ def _fs_to_record_path(path: str, lib_dir: str) -> RecordPath:
232
+ # On Windows, do not handle relative paths if they belong to different
233
+ # logical disks
234
+ if os.path.splitdrive(path)[0].lower() == os.path.splitdrive(lib_dir)[0].lower():
235
+ path = os.path.relpath(path, lib_dir)
236
+
237
+ path = path.replace(os.path.sep, "/")
238
+ return cast("RecordPath", path)
239
+
240
+
241
+ def get_csv_rows_for_installed(
242
+ old_csv_rows: List[List[str]],
243
+ installed: Dict[RecordPath, RecordPath],
244
+ changed: Set[RecordPath],
245
+ generated: List[str],
246
+ lib_dir: str,
247
+ ) -> List[InstalledCSVRow]:
248
+ """
249
+ :param installed: A map from archive RECORD path to installation RECORD
250
+ path.
251
+ """
252
+ installed_rows: List[InstalledCSVRow] = []
253
+ for row in old_csv_rows:
254
+ if len(row) > 3:
255
+ logger.warning("RECORD line has more than three elements: %s", row)
256
+ old_record_path = cast("RecordPath", row[0])
257
+ new_record_path = installed.pop(old_record_path, old_record_path)
258
+ if new_record_path in changed:
259
+ digest, length = rehash(_record_to_fs_path(new_record_path, lib_dir))
260
+ else:
261
+ digest = row[1] if len(row) > 1 else ""
262
+ length = row[2] if len(row) > 2 else ""
263
+ installed_rows.append((new_record_path, digest, length))
264
+ for f in generated:
265
+ path = _fs_to_record_path(f, lib_dir)
266
+ digest, length = rehash(f)
267
+ installed_rows.append((path, digest, length))
268
+ return installed_rows + [
269
+ (installed_record_path, "", "") for installed_record_path in installed.values()
270
+ ]
271
+
272
+
273
+ def get_console_script_specs(console: Dict[str, str]) -> List[str]:
274
+ """
275
+ Given the mapping from entrypoint name to callable, return the relevant
276
+ console script specs.
277
+ """
278
+ # Don't mutate caller's version
279
+ console = console.copy()
280
+
281
+ scripts_to_generate = []
282
+
283
+ # Special case pip and setuptools to generate versioned wrappers
284
+ #
285
+ # The issue is that some projects (specifically, pip and setuptools) use
286
+ # code in setup.py to create "versioned" entry points - pip2.7 on Python
287
+ # 2.7, pip3.3 on Python 3.3, etc. But these entry points are baked into
288
+ # the wheel metadata at build time, and so if the wheel is installed with
289
+ # a *different* version of Python the entry points will be wrong. The
290
+ # correct fix for this is to enhance the metadata to be able to describe
291
+ # such versioned entry points.
292
+ # Currently, projects using versioned entry points will either have
293
+ # incorrect versioned entry points, or they will not be able to distribute
294
+ # "universal" wheels (i.e., they will need a wheel per Python version).
295
+ #
296
+ # Because setuptools and pip are bundled with _ensurepip and virtualenv,
297
+ # we need to use universal wheels. As a workaround, we
298
+ # override the versioned entry points in the wheel and generate the
299
+ # correct ones.
300
+ #
301
+ # To add the level of hack in this section of code, in order to support
302
+ # ensurepip this code will look for an ``ENSUREPIP_OPTIONS`` environment
303
+ # variable which will control which version scripts get installed.
304
+ #
305
+ # ENSUREPIP_OPTIONS=altinstall
306
+ # - Only pipX.Y and easy_install-X.Y will be generated and installed
307
+ # ENSUREPIP_OPTIONS=install
308
+ # - pipX.Y, pipX, easy_install-X.Y will be generated and installed. Note
309
+ # that this option is technically if ENSUREPIP_OPTIONS is set and is
310
+ # not altinstall
311
+ # DEFAULT
312
+ # - The default behavior is to install pip, pipX, pipX.Y, easy_install
313
+ # and easy_install-X.Y.
314
+ pip_script = console.pop("pip", None)
315
+ if pip_script:
316
+ if "ENSUREPIP_OPTIONS" not in os.environ:
317
+ scripts_to_generate.append("pip = " + pip_script)
318
+
319
+ if os.environ.get("ENSUREPIP_OPTIONS", "") != "altinstall":
320
+ scripts_to_generate.append(f"pip{sys.version_info[0]} = {pip_script}")
321
+
322
+ scripts_to_generate.append(f"pip{get_major_minor_version()} = {pip_script}")
323
+ # Delete any other versioned pip entry points
324
+ pip_ep = [k for k in console if re.match(r"pip(\d+(\.\d+)?)?$", k)]
325
+ for k in pip_ep:
326
+ del console[k]
327
+ easy_install_script = console.pop("easy_install", None)
328
+ if easy_install_script:
329
+ if "ENSUREPIP_OPTIONS" not in os.environ:
330
+ scripts_to_generate.append("easy_install = " + easy_install_script)
331
+
332
+ scripts_to_generate.append(
333
+ f"easy_install-{get_major_minor_version()} = {easy_install_script}"
334
+ )
335
+ # Delete any other versioned easy_install entry points
336
+ easy_install_ep = [
337
+ k for k in console if re.match(r"easy_install(-\d+\.\d+)?$", k)
338
+ ]
339
+ for k in easy_install_ep:
340
+ del console[k]
341
+
342
+ # Generate the console entry points specified in the wheel
343
+ scripts_to_generate.extend(starmap("{} = {}".format, console.items()))
344
+
345
+ return scripts_to_generate
346
+
347
+
348
+ class ZipBackedFile:
349
+ def __init__(
350
+ self, src_record_path: RecordPath, dest_path: str, zip_file: ZipFile
351
+ ) -> None:
352
+ self.src_record_path = src_record_path
353
+ self.dest_path = dest_path
354
+ self._zip_file = zip_file
355
+ self.changed = False
356
+
357
+ def _getinfo(self) -> ZipInfo:
358
+ return self._zip_file.getinfo(self.src_record_path)
359
+
360
+ def save(self) -> None:
361
+ # When we open the output file below, any existing file is truncated
362
+ # before we start writing the new contents. This is fine in most
363
+ # cases, but can cause a segfault if pip has loaded a shared
364
+ # object (e.g. from pyopenssl through its vendored urllib3)
365
+ # Since the shared object is mmap'd an attempt to call a
366
+ # symbol in it will then cause a segfault. Unlinking the file
367
+ # allows writing of new contents while allowing the process to
368
+ # continue to use the old copy.
369
+ if os.path.exists(self.dest_path):
370
+ os.unlink(self.dest_path)
371
+
372
+ zipinfo = self._getinfo()
373
+
374
+ # optimization: the file is created by open(),
375
+ # skip the decompression when there is 0 bytes to decompress.
376
+ with open(self.dest_path, "wb") as dest:
377
+ if zipinfo.file_size > 0:
378
+ with self._zip_file.open(zipinfo) as f:
379
+ blocksize = min(zipinfo.file_size, 1024 * 1024)
380
+ shutil.copyfileobj(f, dest, blocksize)
381
+
382
+ if zip_item_is_executable(zipinfo):
383
+ set_extracted_file_to_default_mode_plus_executable(self.dest_path)
384
+
385
+
386
+ class ScriptFile:
387
+ def __init__(self, file: "File") -> None:
388
+ self._file = file
389
+ self.src_record_path = self._file.src_record_path
390
+ self.dest_path = self._file.dest_path
391
+ self.changed = False
392
+
393
+ def save(self) -> None:
394
+ self._file.save()
395
+ self.changed = fix_script(self.dest_path)
396
+
397
+
398
+ class MissingCallableSuffix(InstallationError):
399
+ def __init__(self, entry_point: str) -> None:
400
+ super().__init__(
401
+ f"Invalid script entry point: {entry_point} - A callable "
402
+ "suffix is required. Cf https://packaging.python.org/"
403
+ "specifications/entry-points/#use-for-scripts for more "
404
+ "information."
405
+ )
406
+
407
+
408
+ def _raise_for_invalid_entrypoint(specification: str) -> None:
409
+ entry = get_export_entry(specification)
410
+ if entry is not None and entry.suffix is None:
411
+ raise MissingCallableSuffix(str(entry))
412
+
413
+
414
+ class PipScriptMaker(ScriptMaker):
415
+ def make(
416
+ self, specification: str, options: Optional[Dict[str, Any]] = None
417
+ ) -> List[str]:
418
+ _raise_for_invalid_entrypoint(specification)
419
+ return super().make(specification, options)
420
+
421
+
422
+ def _install_wheel( # noqa: C901, PLR0915 function is too long
423
+ name: str,
424
+ wheel_zip: ZipFile,
425
+ wheel_path: str,
426
+ scheme: Scheme,
427
+ pycompile: bool = True,
428
+ warn_script_location: bool = True,
429
+ direct_url: Optional[DirectUrl] = None,
430
+ requested: bool = False,
431
+ ) -> None:
432
+ """Install a wheel.
433
+
434
+ :param name: Name of the project to install
435
+ :param wheel_zip: open ZipFile for wheel being installed
436
+ :param scheme: Distutils scheme dictating the install directories
437
+ :param req_description: String used in place of the requirement, for
438
+ logging
439
+ :param pycompile: Whether to byte-compile installed Python files
440
+ :param warn_script_location: Whether to check that scripts are installed
441
+ into a directory on PATH
442
+ :raises UnsupportedWheel:
443
+ * when the directory holds an unpacked wheel with incompatible
444
+ Wheel-Version
445
+ * when the .dist-info dir does not match the wheel
446
+ """
447
+ info_dir, metadata = parse_wheel(wheel_zip, name)
448
+
449
+ if wheel_root_is_purelib(metadata):
450
+ lib_dir = scheme.purelib
451
+ else:
452
+ lib_dir = scheme.platlib
453
+
454
+ # Record details of the files moved
455
+ # installed = files copied from the wheel to the destination
456
+ # changed = files changed while installing (scripts #! line typically)
457
+ # generated = files newly generated during the install (script wrappers)
458
+ installed: Dict[RecordPath, RecordPath] = {}
459
+ changed: Set[RecordPath] = set()
460
+ generated: List[str] = []
461
+
462
+ def record_installed(
463
+ srcfile: RecordPath, destfile: str, modified: bool = False
464
+ ) -> None:
465
+ """Map archive RECORD paths to installation RECORD paths."""
466
+ newpath = _fs_to_record_path(destfile, lib_dir)
467
+ installed[srcfile] = newpath
468
+ if modified:
469
+ changed.add(newpath)
470
+
471
+ def is_dir_path(path: RecordPath) -> bool:
472
+ return path.endswith("/")
473
+
474
+ def assert_no_path_traversal(dest_dir_path: str, target_path: str) -> None:
475
+ if not is_within_directory(dest_dir_path, target_path):
476
+ message = (
477
+ "The wheel {!r} has a file {!r} trying to install"
478
+ " outside the target directory {!r}"
479
+ )
480
+ raise InstallationError(
481
+ message.format(wheel_path, target_path, dest_dir_path)
482
+ )
483
+
484
+ def root_scheme_file_maker(
485
+ zip_file: ZipFile, dest: str
486
+ ) -> Callable[[RecordPath], "File"]:
487
+ def make_root_scheme_file(record_path: RecordPath) -> "File":
488
+ normed_path = os.path.normpath(record_path)
489
+ dest_path = os.path.join(dest, normed_path)
490
+ assert_no_path_traversal(dest, dest_path)
491
+ return ZipBackedFile(record_path, dest_path, zip_file)
492
+
493
+ return make_root_scheme_file
494
+
495
+ def data_scheme_file_maker(
496
+ zip_file: ZipFile, scheme: Scheme
497
+ ) -> Callable[[RecordPath], "File"]:
498
+ scheme_paths = {key: getattr(scheme, key) for key in SCHEME_KEYS}
499
+
500
+ def make_data_scheme_file(record_path: RecordPath) -> "File":
501
+ normed_path = os.path.normpath(record_path)
502
+ try:
503
+ _, scheme_key, dest_subpath = normed_path.split(os.path.sep, 2)
504
+ except ValueError:
505
+ message = (
506
+ f"Unexpected file in {wheel_path}: {record_path!r}. .data directory"
507
+ " contents should be named like: '<scheme key>/<path>'."
508
+ )
509
+ raise InstallationError(message)
510
+
511
+ try:
512
+ scheme_path = scheme_paths[scheme_key]
513
+ except KeyError:
514
+ valid_scheme_keys = ", ".join(sorted(scheme_paths))
515
+ message = (
516
+ f"Unknown scheme key used in {wheel_path}: {scheme_key} "
517
+ f"(for file {record_path!r}). .data directory contents "
518
+ f"should be in subdirectories named with a valid scheme "
519
+ f"key ({valid_scheme_keys})"
520
+ )
521
+ raise InstallationError(message)
522
+
523
+ dest_path = os.path.join(scheme_path, dest_subpath)
524
+ assert_no_path_traversal(scheme_path, dest_path)
525
+ return ZipBackedFile(record_path, dest_path, zip_file)
526
+
527
+ return make_data_scheme_file
528
+
529
+ def is_data_scheme_path(path: RecordPath) -> bool:
530
+ return path.split("/", 1)[0].endswith(".data")
531
+
532
+ paths = cast(List[RecordPath], wheel_zip.namelist())
533
+ file_paths = filterfalse(is_dir_path, paths)
534
+ root_scheme_paths, data_scheme_paths = partition(is_data_scheme_path, file_paths)
535
+
536
+ make_root_scheme_file = root_scheme_file_maker(wheel_zip, lib_dir)
537
+ files: Iterator[File] = map(make_root_scheme_file, root_scheme_paths)
538
+
539
+ def is_script_scheme_path(path: RecordPath) -> bool:
540
+ parts = path.split("/", 2)
541
+ return len(parts) > 2 and parts[0].endswith(".data") and parts[1] == "scripts"
542
+
543
+ other_scheme_paths, script_scheme_paths = partition(
544
+ is_script_scheme_path, data_scheme_paths
545
+ )
546
+
547
+ make_data_scheme_file = data_scheme_file_maker(wheel_zip, scheme)
548
+ other_scheme_files = map(make_data_scheme_file, other_scheme_paths)
549
+ files = chain(files, other_scheme_files)
550
+
551
+ # Get the defined entry points
552
+ distribution = get_wheel_distribution(
553
+ FilesystemWheel(wheel_path),
554
+ canonicalize_name(name),
555
+ )
556
+ console, gui = get_entrypoints(distribution)
557
+
558
+ def is_entrypoint_wrapper(file: "File") -> bool:
559
+ # EP, EP.exe and EP-script.py are scripts generated for
560
+ # entry point EP by setuptools
561
+ path = file.dest_path
562
+ name = os.path.basename(path)
563
+ if name.lower().endswith(".exe"):
564
+ matchname = name[:-4]
565
+ elif name.lower().endswith("-script.py"):
566
+ matchname = name[:-10]
567
+ elif name.lower().endswith(".pya"):
568
+ matchname = name[:-4]
569
+ else:
570
+ matchname = name
571
+ # Ignore setuptools-generated scripts
572
+ return matchname in console or matchname in gui
573
+
574
+ script_scheme_files: Iterator[File] = map(
575
+ make_data_scheme_file, script_scheme_paths
576
+ )
577
+ script_scheme_files = filterfalse(is_entrypoint_wrapper, script_scheme_files)
578
+ script_scheme_files = map(ScriptFile, script_scheme_files)
579
+ files = chain(files, script_scheme_files)
580
+
581
+ existing_parents = set()
582
+ for file in files:
583
+ # directory creation is lazy and after file filtering
584
+ # to ensure we don't install empty dirs; empty dirs can't be
585
+ # uninstalled.
586
+ parent_dir = os.path.dirname(file.dest_path)
587
+ if parent_dir not in existing_parents:
588
+ ensure_dir(parent_dir)
589
+ existing_parents.add(parent_dir)
590
+ file.save()
591
+ record_installed(file.src_record_path, file.dest_path, file.changed)
592
+
593
+ def pyc_source_file_paths() -> Generator[str, None, None]:
594
+ # We de-duplicate installation paths, since there can be overlap (e.g.
595
+ # file in .data maps to same location as file in wheel root).
596
+ # Sorting installation paths makes it easier to reproduce and debug
597
+ # issues related to permissions on existing files.
598
+ for installed_path in sorted(set(installed.values())):
599
+ full_installed_path = os.path.join(lib_dir, installed_path)
600
+ if not os.path.isfile(full_installed_path):
601
+ continue
602
+ if not full_installed_path.endswith(".py"):
603
+ continue
604
+ yield full_installed_path
605
+
606
+ def pyc_output_path(path: str) -> str:
607
+ """Return the path the pyc file would have been written to."""
608
+ return importlib.util.cache_from_source(path)
609
+
610
+ # Compile all of the pyc files for the installed files
611
+ if pycompile:
612
+ with contextlib.redirect_stdout(
613
+ StreamWrapper.from_stream(sys.stdout)
614
+ ) as stdout:
615
+ with warnings.catch_warnings():
616
+ warnings.filterwarnings("ignore")
617
+ for path in pyc_source_file_paths():
618
+ success = compileall.compile_file(path, force=True, quiet=True)
619
+ if success:
620
+ pyc_path = pyc_output_path(path)
621
+ assert os.path.exists(pyc_path)
622
+ pyc_record_path = cast(
623
+ "RecordPath", pyc_path.replace(os.path.sep, "/")
624
+ )
625
+ record_installed(pyc_record_path, pyc_path)
626
+ logger.debug(stdout.getvalue())
627
+
628
+ maker = PipScriptMaker(None, scheme.scripts)
629
+
630
+ # Ensure old scripts are overwritten.
631
+ # See https://github.com/pypa/pip/issues/1800
632
+ maker.clobber = True
633
+
634
+ # Ensure we don't generate any variants for scripts because this is almost
635
+ # never what somebody wants.
636
+ # See https://bitbucket.org/pypa/distlib/issue/35/
637
+ maker.variants = {""}
638
+
639
+ # This is required because otherwise distlib creates scripts that are not
640
+ # executable.
641
+ # See https://bitbucket.org/pypa/distlib/issue/32/
642
+ maker.set_mode = True
643
+
644
+ # Generate the console and GUI entry points specified in the wheel
645
+ scripts_to_generate = get_console_script_specs(console)
646
+
647
+ gui_scripts_to_generate = list(starmap("{} = {}".format, gui.items()))
648
+
649
+ generated_console_scripts = maker.make_multiple(scripts_to_generate)
650
+ generated.extend(generated_console_scripts)
651
+
652
+ generated.extend(maker.make_multiple(gui_scripts_to_generate, {"gui": True}))
653
+
654
+ if warn_script_location:
655
+ msg = message_about_scripts_not_on_PATH(generated_console_scripts)
656
+ if msg is not None:
657
+ logger.warning(msg)
658
+
659
+ generated_file_mode = 0o666 & ~current_umask()
660
+
661
+ @contextlib.contextmanager
662
+ def _generate_file(path: str, **kwargs: Any) -> Generator[BinaryIO, None, None]:
663
+ with adjacent_tmp_file(path, **kwargs) as f:
664
+ yield f
665
+ os.chmod(f.name, generated_file_mode)
666
+ replace(f.name, path)
667
+
668
+ dest_info_dir = os.path.join(lib_dir, info_dir)
669
+
670
+ # Record pip as the installer
671
+ installer_path = os.path.join(dest_info_dir, "INSTALLER")
672
+ with _generate_file(installer_path) as installer_file:
673
+ installer_file.write(b"pip\n")
674
+ generated.append(installer_path)
675
+
676
+ # Record the PEP 610 direct URL reference
677
+ if direct_url is not None:
678
+ direct_url_path = os.path.join(dest_info_dir, DIRECT_URL_METADATA_NAME)
679
+ with _generate_file(direct_url_path) as direct_url_file:
680
+ direct_url_file.write(direct_url.to_json().encode("utf-8"))
681
+ generated.append(direct_url_path)
682
+
683
+ # Record the REQUESTED file
684
+ if requested:
685
+ requested_path = os.path.join(dest_info_dir, "REQUESTED")
686
+ with open(requested_path, "wb"):
687
+ pass
688
+ generated.append(requested_path)
689
+
690
+ record_text = distribution.read_text("RECORD")
691
+ record_rows = list(csv.reader(record_text.splitlines()))
692
+
693
+ rows = get_csv_rows_for_installed(
694
+ record_rows,
695
+ installed=installed,
696
+ changed=changed,
697
+ generated=generated,
698
+ lib_dir=lib_dir,
699
+ )
700
+
701
+ # Record details of all files installed
702
+ record_path = os.path.join(dest_info_dir, "RECORD")
703
+
704
+ with _generate_file(record_path, **csv_io_kwargs("w")) as record_file:
705
+ # Explicitly cast to typing.IO[str] as a workaround for the mypy error:
706
+ # "writer" has incompatible type "BinaryIO"; expected "_Writer"
707
+ writer = csv.writer(cast("IO[str]", record_file))
708
+ writer.writerows(_normalized_outrows(rows))
709
+
710
+
711
+ @contextlib.contextmanager
712
+ def req_error_context(req_description: str) -> Generator[None, None, None]:
713
+ try:
714
+ yield
715
+ except InstallationError as e:
716
+ message = f"For req: {req_description}. {e.args[0]}"
717
+ raise InstallationError(message) from e
718
+
719
+
720
+ def install_wheel(
721
+ name: str,
722
+ wheel_path: str,
723
+ scheme: Scheme,
724
+ req_description: str,
725
+ pycompile: bool = True,
726
+ warn_script_location: bool = True,
727
+ direct_url: Optional[DirectUrl] = None,
728
+ requested: bool = False,
729
+ ) -> None:
730
+ with ZipFile(wheel_path, allowZip64=True) as z:
731
+ with req_error_context(req_description):
732
+ _install_wheel(
733
+ name=name,
734
+ wheel_zip=z,
735
+ wheel_path=wheel_path,
736
+ scheme=scheme,
737
+ pycompile=pycompile,
738
+ warn_script_location=warn_script_location,
739
+ direct_url=direct_url,
740
+ requested=requested,
741
+ )
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/operations/prepare.py ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Prepares a distribution for installation
2
+ """
3
+
4
+ # The following comment should be removed at some point in the future.
5
+ # mypy: strict-optional=False
6
+
7
+ import mimetypes
8
+ import os
9
+ import shutil
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Dict, Iterable, List, Optional
13
+
14
+ from pip._vendor.packaging.utils import canonicalize_name
15
+
16
+ from pip._internal.distributions import make_distribution_for_install_requirement
17
+ from pip._internal.distributions.installed import InstalledDistribution
18
+ from pip._internal.exceptions import (
19
+ DirectoryUrlHashUnsupported,
20
+ HashMismatch,
21
+ HashUnpinned,
22
+ InstallationError,
23
+ MetadataInconsistent,
24
+ NetworkConnectionError,
25
+ VcsHashUnsupported,
26
+ )
27
+ from pip._internal.index.package_finder import PackageFinder
28
+ from pip._internal.metadata import BaseDistribution, get_metadata_distribution
29
+ from pip._internal.models.direct_url import ArchiveInfo
30
+ from pip._internal.models.link import Link
31
+ from pip._internal.models.wheel import Wheel
32
+ from pip._internal.network.download import BatchDownloader, Downloader
33
+ from pip._internal.network.lazy_wheel import (
34
+ HTTPRangeRequestUnsupported,
35
+ dist_from_wheel_url,
36
+ )
37
+ from pip._internal.network.session import PipSession
38
+ from pip._internal.operations.build.build_tracker import BuildTracker
39
+ from pip._internal.req.req_install import InstallRequirement
40
+ from pip._internal.utils._log import getLogger
41
+ from pip._internal.utils.direct_url_helpers import (
42
+ direct_url_for_editable,
43
+ direct_url_from_link,
44
+ )
45
+ from pip._internal.utils.hashes import Hashes, MissingHashes
46
+ from pip._internal.utils.logging import indent_log
47
+ from pip._internal.utils.misc import (
48
+ display_path,
49
+ hash_file,
50
+ hide_url,
51
+ redact_auth_from_requirement,
52
+ )
53
+ from pip._internal.utils.temp_dir import TempDirectory
54
+ from pip._internal.utils.unpacking import unpack_file
55
+ from pip._internal.vcs import vcs
56
+
57
+ logger = getLogger(__name__)
58
+
59
+
60
+ def _get_prepared_distribution(
61
+ req: InstallRequirement,
62
+ build_tracker: BuildTracker,
63
+ finder: PackageFinder,
64
+ build_isolation: bool,
65
+ check_build_deps: bool,
66
+ ) -> BaseDistribution:
67
+ """Prepare a distribution for installation."""
68
+ abstract_dist = make_distribution_for_install_requirement(req)
69
+ tracker_id = abstract_dist.build_tracker_id
70
+ if tracker_id is not None:
71
+ with build_tracker.track(req, tracker_id):
72
+ abstract_dist.prepare_distribution_metadata(
73
+ finder, build_isolation, check_build_deps
74
+ )
75
+ return abstract_dist.get_metadata_distribution()
76
+
77
+
78
+ def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None:
79
+ vcs_backend = vcs.get_backend_for_scheme(link.scheme)
80
+ assert vcs_backend is not None
81
+ vcs_backend.unpack(location, url=hide_url(link.url), verbosity=verbosity)
82
+
83
+
84
+ @dataclass
85
+ class File:
86
+ path: str
87
+ content_type: Optional[str] = None
88
+
89
+ def __post_init__(self) -> None:
90
+ if self.content_type is None:
91
+ self.content_type = mimetypes.guess_type(self.path)[0]
92
+
93
+
94
+ def get_http_url(
95
+ link: Link,
96
+ download: Downloader,
97
+ download_dir: Optional[str] = None,
98
+ hashes: Optional[Hashes] = None,
99
+ ) -> File:
100
+ temp_dir = TempDirectory(kind="unpack", globally_managed=True)
101
+ # If a download dir is specified, is the file already downloaded there?
102
+ already_downloaded_path = None
103
+ if download_dir:
104
+ already_downloaded_path = _check_download_dir(link, download_dir, hashes)
105
+
106
+ if already_downloaded_path:
107
+ from_path = already_downloaded_path
108
+ content_type = None
109
+ else:
110
+ # let's download to a tmp dir
111
+ from_path, content_type = download(link, temp_dir.path)
112
+ if hashes:
113
+ hashes.check_against_path(from_path)
114
+
115
+ return File(from_path, content_type)
116
+
117
+
118
+ def get_file_url(
119
+ link: Link, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None
120
+ ) -> File:
121
+ """Get file and optionally check its hash."""
122
+ # If a download dir is specified, is the file already there and valid?
123
+ already_downloaded_path = None
124
+ if download_dir:
125
+ already_downloaded_path = _check_download_dir(link, download_dir, hashes)
126
+
127
+ if already_downloaded_path:
128
+ from_path = already_downloaded_path
129
+ else:
130
+ from_path = link.file_path
131
+
132
+ # If --require-hashes is off, `hashes` is either empty, the
133
+ # link's embedded hash, or MissingHashes; it is required to
134
+ # match. If --require-hashes is on, we are satisfied by any
135
+ # hash in `hashes` matching: a URL-based or an option-based
136
+ # one; no internet-sourced hash will be in `hashes`.
137
+ if hashes:
138
+ hashes.check_against_path(from_path)
139
+ return File(from_path, None)
140
+
141
+
142
+ def unpack_url(
143
+ link: Link,
144
+ location: str,
145
+ download: Downloader,
146
+ verbosity: int,
147
+ download_dir: Optional[str] = None,
148
+ hashes: Optional[Hashes] = None,
149
+ ) -> Optional[File]:
150
+ """Unpack link into location, downloading if required.
151
+
152
+ :param hashes: A Hashes object, one of whose embedded hashes must match,
153
+ or HashMismatch will be raised. If the Hashes is empty, no matches are
154
+ required, and unhashable types of requirements (like VCS ones, which
155
+ would ordinarily raise HashUnsupported) are allowed.
156
+ """
157
+ # non-editable vcs urls
158
+ if link.is_vcs:
159
+ unpack_vcs_link(link, location, verbosity=verbosity)
160
+ return None
161
+
162
+ assert not link.is_existing_dir()
163
+
164
+ # file urls
165
+ if link.is_file:
166
+ file = get_file_url(link, download_dir, hashes=hashes)
167
+
168
+ # http urls
169
+ else:
170
+ file = get_http_url(
171
+ link,
172
+ download,
173
+ download_dir,
174
+ hashes=hashes,
175
+ )
176
+
177
+ # unpack the archive to the build dir location. even when only downloading
178
+ # archives, they have to be unpacked to parse dependencies, except wheels
179
+ if not link.is_wheel:
180
+ unpack_file(file.path, location, file.content_type)
181
+
182
+ return file
183
+
184
+
185
+ def _check_download_dir(
186
+ link: Link,
187
+ download_dir: str,
188
+ hashes: Optional[Hashes],
189
+ warn_on_hash_mismatch: bool = True,
190
+ ) -> Optional[str]:
191
+ """Check download_dir for previously downloaded file with correct hash
192
+ If a correct file is found return its path else None
193
+ """
194
+ download_path = os.path.join(download_dir, link.filename)
195
+
196
+ if not os.path.exists(download_path):
197
+ return None
198
+
199
+ # If already downloaded, does its hash match?
200
+ logger.info("File was already downloaded %s", download_path)
201
+ if hashes:
202
+ try:
203
+ hashes.check_against_path(download_path)
204
+ except HashMismatch:
205
+ if warn_on_hash_mismatch:
206
+ logger.warning(
207
+ "Previously-downloaded file %s has bad hash. Re-downloading.",
208
+ download_path,
209
+ )
210
+ os.unlink(download_path)
211
+ return None
212
+ return download_path
213
+
214
+
215
+ class RequirementPreparer:
216
+ """Prepares a Requirement"""
217
+
218
+ def __init__(
219
+ self,
220
+ build_dir: str,
221
+ download_dir: Optional[str],
222
+ src_dir: str,
223
+ build_isolation: bool,
224
+ check_build_deps: bool,
225
+ build_tracker: BuildTracker,
226
+ session: PipSession,
227
+ progress_bar: str,
228
+ finder: PackageFinder,
229
+ require_hashes: bool,
230
+ use_user_site: bool,
231
+ lazy_wheel: bool,
232
+ verbosity: int,
233
+ legacy_resolver: bool,
234
+ ) -> None:
235
+ super().__init__()
236
+
237
+ self.src_dir = src_dir
238
+ self.build_dir = build_dir
239
+ self.build_tracker = build_tracker
240
+ self._session = session
241
+ self._download = Downloader(session, progress_bar)
242
+ self._batch_download = BatchDownloader(session, progress_bar)
243
+ self.finder = finder
244
+
245
+ # Where still-packed archives should be written to. If None, they are
246
+ # not saved, and are deleted immediately after unpacking.
247
+ self.download_dir = download_dir
248
+
249
+ # Is build isolation allowed?
250
+ self.build_isolation = build_isolation
251
+
252
+ # Should check build dependencies?
253
+ self.check_build_deps = check_build_deps
254
+
255
+ # Should hash-checking be required?
256
+ self.require_hashes = require_hashes
257
+
258
+ # Should install in user site-packages?
259
+ self.use_user_site = use_user_site
260
+
261
+ # Should wheels be downloaded lazily?
262
+ self.use_lazy_wheel = lazy_wheel
263
+
264
+ # How verbose should underlying tooling be?
265
+ self.verbosity = verbosity
266
+
267
+ # Are we using the legacy resolver?
268
+ self.legacy_resolver = legacy_resolver
269
+
270
+ # Memoized downloaded files, as mapping of url: path.
271
+ self._downloaded: Dict[str, str] = {}
272
+
273
+ # Previous "header" printed for a link-based InstallRequirement
274
+ self._previous_requirement_header = ("", "")
275
+
276
+ def _log_preparing_link(self, req: InstallRequirement) -> None:
277
+ """Provide context for the requirement being prepared."""
278
+ if req.link.is_file and not req.is_wheel_from_cache:
279
+ message = "Processing %s"
280
+ information = str(display_path(req.link.file_path))
281
+ else:
282
+ message = "Collecting %s"
283
+ information = redact_auth_from_requirement(req.req) if req.req else str(req)
284
+
285
+ # If we used req.req, inject requirement source if available (this
286
+ # would already be included if we used req directly)
287
+ if req.req and req.comes_from:
288
+ if isinstance(req.comes_from, str):
289
+ comes_from: Optional[str] = req.comes_from
290
+ else:
291
+ comes_from = req.comes_from.from_path()
292
+ if comes_from:
293
+ information += f" (from {comes_from})"
294
+
295
+ if (message, information) != self._previous_requirement_header:
296
+ self._previous_requirement_header = (message, information)
297
+ logger.info(message, information)
298
+
299
+ if req.is_wheel_from_cache:
300
+ with indent_log():
301
+ logger.info("Using cached %s", req.link.filename)
302
+
303
+ def _ensure_link_req_src_dir(
304
+ self, req: InstallRequirement, parallel_builds: bool
305
+ ) -> None:
306
+ """Ensure source_dir of a linked InstallRequirement."""
307
+ # Since source_dir is only set for editable requirements.
308
+ if req.link.is_wheel:
309
+ # We don't need to unpack wheels, so no need for a source
310
+ # directory.
311
+ return
312
+ assert req.source_dir is None
313
+ if req.link.is_existing_dir():
314
+ # build local directories in-tree
315
+ req.source_dir = req.link.file_path
316
+ return
317
+
318
+ # We always delete unpacked sdists after pip runs.
319
+ req.ensure_has_source_dir(
320
+ self.build_dir,
321
+ autodelete=True,
322
+ parallel_builds=parallel_builds,
323
+ )
324
+ req.ensure_pristine_source_checkout()
325
+
326
+ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
327
+ # By the time this is called, the requirement's link should have
328
+ # been checked so we can tell what kind of requirements req is
329
+ # and raise some more informative errors than otherwise.
330
+ # (For example, we can raise VcsHashUnsupported for a VCS URL
331
+ # rather than HashMissing.)
332
+ if not self.require_hashes:
333
+ return req.hashes(trust_internet=True)
334
+
335
+ # We could check these first 2 conditions inside unpack_url
336
+ # and save repetition of conditions, but then we would
337
+ # report less-useful error messages for unhashable
338
+ # requirements, complaining that there's no hash provided.
339
+ if req.link.is_vcs:
340
+ raise VcsHashUnsupported()
341
+ if req.link.is_existing_dir():
342
+ raise DirectoryUrlHashUnsupported()
343
+
344
+ # Unpinned packages are asking for trouble when a new version
345
+ # is uploaded. This isn't a security check, but it saves users
346
+ # a surprising hash mismatch in the future.
347
+ # file:/// URLs aren't pinnable, so don't complain about them
348
+ # not being pinned.
349
+ if not req.is_direct and not req.is_pinned:
350
+ raise HashUnpinned()
351
+
352
+ # If known-good hashes are missing for this requirement,
353
+ # shim it with a facade object that will provoke hash
354
+ # computation and then raise a HashMissing exception
355
+ # showing the user what the hash should be.
356
+ return req.hashes(trust_internet=False) or MissingHashes()
357
+
358
+ def _fetch_metadata_only(
359
+ self,
360
+ req: InstallRequirement,
361
+ ) -> Optional[BaseDistribution]:
362
+ if self.legacy_resolver:
363
+ logger.debug(
364
+ "Metadata-only fetching is not used in the legacy resolver",
365
+ )
366
+ return None
367
+ if self.require_hashes:
368
+ logger.debug(
369
+ "Metadata-only fetching is not used as hash checking is required",
370
+ )
371
+ return None
372
+ # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
373
+ return self._fetch_metadata_using_link_data_attr(
374
+ req
375
+ ) or self._fetch_metadata_using_lazy_wheel(req.link)
376
+
377
+ def _fetch_metadata_using_link_data_attr(
378
+ self,
379
+ req: InstallRequirement,
380
+ ) -> Optional[BaseDistribution]:
381
+ """Fetch metadata from the data-dist-info-metadata attribute, if possible."""
382
+ # (1) Get the link to the metadata file, if provided by the backend.
383
+ metadata_link = req.link.metadata_link()
384
+ if metadata_link is None:
385
+ return None
386
+ assert req.req is not None
387
+ logger.verbose(
388
+ "Obtaining dependency information for %s from %s",
389
+ req.req,
390
+ metadata_link,
391
+ )
392
+ # (2) Download the contents of the METADATA file, separate from the dist itself.
393
+ metadata_file = get_http_url(
394
+ metadata_link,
395
+ self._download,
396
+ hashes=metadata_link.as_hashes(),
397
+ )
398
+ with open(metadata_file.path, "rb") as f:
399
+ metadata_contents = f.read()
400
+ # (3) Generate a dist just from those file contents.
401
+ metadata_dist = get_metadata_distribution(
402
+ metadata_contents,
403
+ req.link.filename,
404
+ req.req.name,
405
+ )
406
+ # (4) Ensure the Name: field from the METADATA file matches the name from the
407
+ # install requirement.
408
+ #
409
+ # NB: raw_name will fall back to the name from the install requirement if
410
+ # the Name: field is not present, but it's noted in the raw_name docstring
411
+ # that that should NEVER happen anyway.
412
+ if canonicalize_name(metadata_dist.raw_name) != canonicalize_name(req.req.name):
413
+ raise MetadataInconsistent(
414
+ req, "Name", req.req.name, metadata_dist.raw_name
415
+ )
416
+ return metadata_dist
417
+
418
+ def _fetch_metadata_using_lazy_wheel(
419
+ self,
420
+ link: Link,
421
+ ) -> Optional[BaseDistribution]:
422
+ """Fetch metadata using lazy wheel, if possible."""
423
+ # --use-feature=fast-deps must be provided.
424
+ if not self.use_lazy_wheel:
425
+ return None
426
+ if link.is_file or not link.is_wheel:
427
+ logger.debug(
428
+ "Lazy wheel is not used as %r does not point to a remote wheel",
429
+ link,
430
+ )
431
+ return None
432
+
433
+ wheel = Wheel(link.filename)
434
+ name = canonicalize_name(wheel.name)
435
+ logger.info(
436
+ "Obtaining dependency information from %s %s",
437
+ name,
438
+ wheel.version,
439
+ )
440
+ url = link.url.split("#", 1)[0]
441
+ try:
442
+ return dist_from_wheel_url(name, url, self._session)
443
+ except HTTPRangeRequestUnsupported:
444
+ logger.debug("%s does not support range requests", url)
445
+ return None
446
+
447
+ def _complete_partial_requirements(
448
+ self,
449
+ partially_downloaded_reqs: Iterable[InstallRequirement],
450
+ parallel_builds: bool = False,
451
+ ) -> None:
452
+ """Download any requirements which were only fetched by metadata."""
453
+ # Download to a temporary directory. These will be copied over as
454
+ # needed for downstream 'download', 'wheel', and 'install' commands.
455
+ temp_dir = TempDirectory(kind="unpack", globally_managed=True).path
456
+
457
+ # Map each link to the requirement that owns it. This allows us to set
458
+ # `req.local_file_path` on the appropriate requirement after passing
459
+ # all the links at once into BatchDownloader.
460
+ links_to_fully_download: Dict[Link, InstallRequirement] = {}
461
+ for req in partially_downloaded_reqs:
462
+ assert req.link
463
+ links_to_fully_download[req.link] = req
464
+
465
+ batch_download = self._batch_download(
466
+ links_to_fully_download.keys(),
467
+ temp_dir,
468
+ )
469
+ for link, (filepath, _) in batch_download:
470
+ logger.debug("Downloading link %s to %s", link, filepath)
471
+ req = links_to_fully_download[link]
472
+ # Record the downloaded file path so wheel reqs can extract a Distribution
473
+ # in .get_dist().
474
+ req.local_file_path = filepath
475
+ # Record that the file is downloaded so we don't do it again in
476
+ # _prepare_linked_requirement().
477
+ self._downloaded[req.link.url] = filepath
478
+
479
+ # If this is an sdist, we need to unpack it after downloading, but the
480
+ # .source_dir won't be set up until we are in _prepare_linked_requirement().
481
+ # Add the downloaded archive to the install requirement to unpack after
482
+ # preparing the source dir.
483
+ if not req.is_wheel:
484
+ req.needs_unpacked_archive(Path(filepath))
485
+
486
+ # This step is necessary to ensure all lazy wheels are processed
487
+ # successfully by the 'download', 'wheel', and 'install' commands.
488
+ for req in partially_downloaded_reqs:
489
+ self._prepare_linked_requirement(req, parallel_builds)
490
+
491
+ def prepare_linked_requirement(
492
+ self, req: InstallRequirement, parallel_builds: bool = False
493
+ ) -> BaseDistribution:
494
+ """Prepare a requirement to be obtained from req.link."""
495
+ assert req.link
496
+ self._log_preparing_link(req)
497
+ with indent_log():
498
+ # Check if the relevant file is already available
499
+ # in the download directory
500
+ file_path = None
501
+ if self.download_dir is not None and req.link.is_wheel:
502
+ hashes = self._get_linked_req_hashes(req)
503
+ file_path = _check_download_dir(
504
+ req.link,
505
+ self.download_dir,
506
+ hashes,
507
+ # When a locally built wheel has been found in cache, we don't warn
508
+ # about re-downloading when the already downloaded wheel hash does
509
+ # not match. This is because the hash must be checked against the
510
+ # original link, not the cached link. It that case the already
511
+ # downloaded file will be removed and re-fetched from cache (which
512
+ # implies a hash check against the cache entry's origin.json).
513
+ warn_on_hash_mismatch=not req.is_wheel_from_cache,
514
+ )
515
+
516
+ if file_path is not None:
517
+ # The file is already available, so mark it as downloaded
518
+ self._downloaded[req.link.url] = file_path
519
+ else:
520
+ # The file is not available, attempt to fetch only metadata
521
+ metadata_dist = self._fetch_metadata_only(req)
522
+ if metadata_dist is not None:
523
+ req.needs_more_preparation = True
524
+ return metadata_dist
525
+
526
+ # None of the optimizations worked, fully prepare the requirement
527
+ return self._prepare_linked_requirement(req, parallel_builds)
528
+
529
+ def prepare_linked_requirements_more(
530
+ self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False
531
+ ) -> None:
532
+ """Prepare linked requirements more, if needed."""
533
+ reqs = [req for req in reqs if req.needs_more_preparation]
534
+ for req in reqs:
535
+ # Determine if any of these requirements were already downloaded.
536
+ if self.download_dir is not None and req.link.is_wheel:
537
+ hashes = self._get_linked_req_hashes(req)
538
+ file_path = _check_download_dir(req.link, self.download_dir, hashes)
539
+ if file_path is not None:
540
+ self._downloaded[req.link.url] = file_path
541
+ req.needs_more_preparation = False
542
+
543
+ # Prepare requirements we found were already downloaded for some
544
+ # reason. The other downloads will be completed separately.
545
+ partially_downloaded_reqs: List[InstallRequirement] = []
546
+ for req in reqs:
547
+ if req.needs_more_preparation:
548
+ partially_downloaded_reqs.append(req)
549
+ else:
550
+ self._prepare_linked_requirement(req, parallel_builds)
551
+
552
+ # TODO: separate this part out from RequirementPreparer when the v1
553
+ # resolver can be removed!
554
+ self._complete_partial_requirements(
555
+ partially_downloaded_reqs,
556
+ parallel_builds=parallel_builds,
557
+ )
558
+
559
+ def _prepare_linked_requirement(
560
+ self, req: InstallRequirement, parallel_builds: bool
561
+ ) -> BaseDistribution:
562
+ assert req.link
563
+ link = req.link
564
+
565
+ hashes = self._get_linked_req_hashes(req)
566
+
567
+ if hashes and req.is_wheel_from_cache:
568
+ assert req.download_info is not None
569
+ assert link.is_wheel
570
+ assert link.is_file
571
+ # We need to verify hashes, and we have found the requirement in the cache
572
+ # of locally built wheels.
573
+ if (
574
+ isinstance(req.download_info.info, ArchiveInfo)
575
+ and req.download_info.info.hashes
576
+ and hashes.has_one_of(req.download_info.info.hashes)
577
+ ):
578
+ # At this point we know the requirement was built from a hashable source
579
+ # artifact, and we verified that the cache entry's hash of the original
580
+ # artifact matches one of the hashes we expect. We don't verify hashes
581
+ # against the cached wheel, because the wheel is not the original.
582
+ hashes = None
583
+ else:
584
+ logger.warning(
585
+ "The hashes of the source archive found in cache entry "
586
+ "don't match, ignoring cached built wheel "
587
+ "and re-downloading source."
588
+ )
589
+ req.link = req.cached_wheel_source_link
590
+ link = req.link
591
+
592
+ self._ensure_link_req_src_dir(req, parallel_builds)
593
+
594
+ if link.is_existing_dir():
595
+ local_file = None
596
+ elif link.url not in self._downloaded:
597
+ try:
598
+ local_file = unpack_url(
599
+ link,
600
+ req.source_dir,
601
+ self._download,
602
+ self.verbosity,
603
+ self.download_dir,
604
+ hashes,
605
+ )
606
+ except NetworkConnectionError as exc:
607
+ raise InstallationError(
608
+ f"Could not install requirement {req} because of HTTP "
609
+ f"error {exc} for URL {link}"
610
+ )
611
+ else:
612
+ file_path = self._downloaded[link.url]
613
+ if hashes:
614
+ hashes.check_against_path(file_path)
615
+ local_file = File(file_path, content_type=None)
616
+
617
+ # If download_info is set, we got it from the wheel cache.
618
+ if req.download_info is None:
619
+ # Editables don't go through this function (see
620
+ # prepare_editable_requirement).
621
+ assert not req.editable
622
+ req.download_info = direct_url_from_link(link, req.source_dir)
623
+ # Make sure we have a hash in download_info. If we got it as part of the
624
+ # URL, it will have been verified and we can rely on it. Otherwise we
625
+ # compute it from the downloaded file.
626
+ # FIXME: https://github.com/pypa/pip/issues/11943
627
+ if (
628
+ isinstance(req.download_info.info, ArchiveInfo)
629
+ and not req.download_info.info.hashes
630
+ and local_file
631
+ ):
632
+ hash = hash_file(local_file.path)[0].hexdigest()
633
+ # We populate info.hash for backward compatibility.
634
+ # This will automatically populate info.hashes.
635
+ req.download_info.info.hash = f"sha256={hash}"
636
+
637
+ # For use in later processing,
638
+ # preserve the file path on the requirement.
639
+ if local_file:
640
+ req.local_file_path = local_file.path
641
+
642
+ dist = _get_prepared_distribution(
643
+ req,
644
+ self.build_tracker,
645
+ self.finder,
646
+ self.build_isolation,
647
+ self.check_build_deps,
648
+ )
649
+ return dist
650
+
651
+ def save_linked_requirement(self, req: InstallRequirement) -> None:
652
+ assert self.download_dir is not None
653
+ assert req.link is not None
654
+ link = req.link
655
+ if link.is_vcs or (link.is_existing_dir() and req.editable):
656
+ # Make a .zip of the source_dir we already created.
657
+ req.archive(self.download_dir)
658
+ return
659
+
660
+ if link.is_existing_dir():
661
+ logger.debug(
662
+ "Not copying link to destination directory "
663
+ "since it is a directory: %s",
664
+ link,
665
+ )
666
+ return
667
+ if req.local_file_path is None:
668
+ # No distribution was downloaded for this requirement.
669
+ return
670
+
671
+ download_location = os.path.join(self.download_dir, link.filename)
672
+ if not os.path.exists(download_location):
673
+ shutil.copy(req.local_file_path, download_location)
674
+ download_path = display_path(download_location)
675
+ logger.info("Saved %s", download_path)
676
+
677
+ def prepare_editable_requirement(
678
+ self,
679
+ req: InstallRequirement,
680
+ ) -> BaseDistribution:
681
+ """Prepare an editable requirement."""
682
+ assert req.editable, "cannot prepare a non-editable req as editable"
683
+
684
+ logger.info("Obtaining %s", req)
685
+
686
+ with indent_log():
687
+ if self.require_hashes:
688
+ raise InstallationError(
689
+ f"The editable requirement {req} cannot be installed when "
690
+ "requiring hashes, because there is no single file to "
691
+ "hash."
692
+ )
693
+ req.ensure_has_source_dir(self.src_dir)
694
+ req.update_editable()
695
+ assert req.source_dir
696
+ req.download_info = direct_url_for_editable(req.unpacked_source_directory)
697
+
698
+ dist = _get_prepared_distribution(
699
+ req,
700
+ self.build_tracker,
701
+ self.finder,
702
+ self.build_isolation,
703
+ self.check_build_deps,
704
+ )
705
+
706
+ req.check_if_exists(self.use_user_site)
707
+
708
+ return dist
709
+
710
+ def prepare_installed_requirement(
711
+ self,
712
+ req: InstallRequirement,
713
+ skip_reason: str,
714
+ ) -> BaseDistribution:
715
+ """Prepare an already-installed requirement."""
716
+ assert req.satisfied_by, "req should have been satisfied but isn't"
717
+ assert skip_reason is not None, (
718
+ "did not get skip reason skipped but req.satisfied_by "
719
+ f"is set to {req.satisfied_by}"
720
+ )
721
+ logger.info(
722
+ "Requirement %s: %s (%s)", skip_reason, req, req.satisfied_by.version
723
+ )
724
+ with indent_log():
725
+ if self.require_hashes:
726
+ logger.debug(
727
+ "Since it is already installed, we are trusting this "
728
+ "package without checking its hash. To ensure a "
729
+ "completely repeatable environment, install into an "
730
+ "empty virtualenv."
731
+ )
732
+ return InstalledDistribution(req).get_metadata_distribution()
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/_log.cpython-311.pyc ADDED
Binary file (2.04 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_internal/utils/__pycache__/datetime.cpython-311.pyc ADDED
Binary file (737 Bytes). View file