not-pegasus commited on
Commit
bddf20b
·
verified ·
1 Parent(s): b90379c

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/INSTALLER +1 -0
  2. env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/RECORD +92 -0
  3. env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/WHEEL +5 -0
  4. env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/entry_points.txt +2 -0
  5. env/lib/python3.13/site-packages/fsspec/__init__.py +71 -0
  6. env/lib/python3.13/site-packages/fsspec/_version.py +34 -0
  7. env/lib/python3.13/site-packages/fsspec/archive.py +75 -0
  8. env/lib/python3.13/site-packages/fsspec/asyn.py +1103 -0
  9. env/lib/python3.13/site-packages/fsspec/caching.py +1011 -0
  10. env/lib/python3.13/site-packages/fsspec/callbacks.py +324 -0
  11. env/lib/python3.13/site-packages/fsspec/compression.py +182 -0
  12. env/lib/python3.13/site-packages/fsspec/config.py +131 -0
  13. env/lib/python3.13/site-packages/fsspec/conftest.py +125 -0
  14. env/lib/python3.13/site-packages/fsspec/core.py +760 -0
  15. env/lib/python3.13/site-packages/fsspec/dircache.py +98 -0
  16. env/lib/python3.13/site-packages/fsspec/exceptions.py +18 -0
  17. env/lib/python3.13/site-packages/fsspec/fuse.py +324 -0
  18. env/lib/python3.13/site-packages/fsspec/generic.py +396 -0
  19. env/lib/python3.13/site-packages/fsspec/gui.py +417 -0
  20. env/lib/python3.13/site-packages/fsspec/json.py +112 -0
  21. env/lib/python3.13/site-packages/fsspec/mapping.py +251 -0
  22. env/lib/python3.13/site-packages/fsspec/parquet.py +580 -0
  23. env/lib/python3.13/site-packages/fsspec/registry.py +333 -0
  24. env/lib/python3.13/site-packages/fsspec/spec.py +2281 -0
  25. env/lib/python3.13/site-packages/fsspec/transaction.py +90 -0
  26. env/lib/python3.13/site-packages/fsspec/utils.py +738 -0
  27. env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/INSTALLER +1 -0
  28. env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/METADATA +48 -0
  29. env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/RECORD +12 -0
  30. env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/REQUESTED +0 -0
  31. env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/WHEEL +6 -0
  32. env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/top_level.txt +1 -0
  33. env/lib/python3.13/site-packages/h11-0.16.0.dist-info/INSTALLER +1 -0
  34. env/lib/python3.13/site-packages/h11-0.16.0.dist-info/METADATA +202 -0
  35. env/lib/python3.13/site-packages/h11-0.16.0.dist-info/RECORD +29 -0
  36. env/lib/python3.13/site-packages/h11-0.16.0.dist-info/WHEEL +5 -0
  37. env/lib/python3.13/site-packages/h11-0.16.0.dist-info/top_level.txt +1 -0
  38. env/lib/python3.13/site-packages/httpcore/__init__.py +141 -0
  39. env/lib/python3.13/site-packages/httpcore/_api.py +94 -0
  40. env/lib/python3.13/site-packages/httpcore/_exceptions.py +81 -0
  41. env/lib/python3.13/site-packages/httpcore/_models.py +516 -0
  42. env/lib/python3.13/site-packages/httpcore/_ssl.py +9 -0
  43. env/lib/python3.13/site-packages/httpcore/_synchronization.py +318 -0
  44. env/lib/python3.13/site-packages/httpcore/_trace.py +107 -0
  45. env/lib/python3.13/site-packages/httpcore/_utils.py +37 -0
  46. env/lib/python3.13/site-packages/httpcore/py.typed +0 -0
  47. env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/INSTALLER +1 -0
  48. env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/METADATA +203 -0
  49. env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/RECORD +54 -0
  50. env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/WHEEL +4 -0
env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/RECORD ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ anyio-4.12.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ anyio-4.12.0.dist-info/METADATA,sha256=rte2_C2hYKP9_iVMFYogSzBxdHBzwY45S1TrLiBsxdk,4277
3
+ anyio-4.12.0.dist-info/RECORD,,
4
+ anyio-4.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ anyio-4.12.0.dist-info/entry_points.txt,sha256=_d6Yu6uiaZmNe0CydowirE9Cmg7zUL2g08tQpoS3Qvc,39
6
+ anyio-4.12.0.dist-info/licenses/LICENSE,sha256=U2GsncWPLvX9LpsJxoKXwX8ElQkJu8gCO9uC6s8iwrA,1081
7
+ anyio-4.12.0.dist-info/top_level.txt,sha256=QglSMiWX8_5dpoVAEIHdEYzvqFMdSYWmCj6tYw2ITkQ,6
8
+ anyio/__init__.py,sha256=7iDVqMUprUuKNY91FuoKqayAhR-OY136YDPI6P78HHk,6170
9
+ anyio/__pycache__/__init__.cpython-313.pyc,,
10
+ anyio/__pycache__/from_thread.cpython-313.pyc,,
11
+ anyio/__pycache__/functools.cpython-313.pyc,,
12
+ anyio/__pycache__/lowlevel.cpython-313.pyc,,
13
+ anyio/__pycache__/pytest_plugin.cpython-313.pyc,,
14
+ anyio/__pycache__/to_interpreter.cpython-313.pyc,,
15
+ anyio/__pycache__/to_process.cpython-313.pyc,,
16
+ anyio/__pycache__/to_thread.cpython-313.pyc,,
17
+ anyio/_backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ anyio/_backends/__pycache__/__init__.cpython-313.pyc,,
19
+ anyio/_backends/__pycache__/_asyncio.cpython-313.pyc,,
20
+ anyio/_backends/__pycache__/_trio.cpython-313.pyc,,
21
+ anyio/_backends/_asyncio.py,sha256=w6gCSMs_2D1doKVtzi32bOloBl1df-IHubl8-Vks908,99656
22
+ anyio/_backends/_trio.py,sha256=ScNVMQB0iiuJMAon1epQCVOVbIbf-Lxnfb5OxujzMok,42398
23
+ anyio/_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ anyio/_core/__pycache__/__init__.cpython-313.pyc,,
25
+ anyio/_core/__pycache__/_asyncio_selector_thread.cpython-313.pyc,,
26
+ anyio/_core/__pycache__/_contextmanagers.cpython-313.pyc,,
27
+ anyio/_core/__pycache__/_eventloop.cpython-313.pyc,,
28
+ anyio/_core/__pycache__/_exceptions.cpython-313.pyc,,
29
+ anyio/_core/__pycache__/_fileio.cpython-313.pyc,,
30
+ anyio/_core/__pycache__/_resources.cpython-313.pyc,,
31
+ anyio/_core/__pycache__/_signals.cpython-313.pyc,,
32
+ anyio/_core/__pycache__/_sockets.cpython-313.pyc,,
33
+ anyio/_core/__pycache__/_streams.cpython-313.pyc,,
34
+ anyio/_core/__pycache__/_subprocesses.cpython-313.pyc,,
35
+ anyio/_core/__pycache__/_synchronization.cpython-313.pyc,,
36
+ anyio/_core/__pycache__/_tasks.cpython-313.pyc,,
37
+ anyio/_core/__pycache__/_tempfile.cpython-313.pyc,,
38
+ anyio/_core/__pycache__/_testing.cpython-313.pyc,,
39
+ anyio/_core/__pycache__/_typedattr.cpython-313.pyc,,
40
+ anyio/_core/_asyncio_selector_thread.py,sha256=2PdxFM3cs02Kp6BSppbvmRT7q7asreTW5FgBxEsflBo,5626
41
+ anyio/_core/_contextmanagers.py,sha256=YInBCabiEeS-UaP_Jdxa1CaFC71ETPW8HZTHIM8Rsc8,7215
42
+ anyio/_core/_eventloop.py,sha256=xsoYgHIddNYusTqAFDVmcvpjHKJFmdgtDcAjpN3JEWQ,6261
43
+ anyio/_core/_exceptions.py,sha256=fR2SvRUBYVHvolNKbzWSLt8FC_5NFB2OAzGD738fD8Q,4257
44
+ anyio/_core/_fileio.py,sha256=uc7t10Vb-If7GbdWM_zFf-ajUe6uek63fSt7IBLlZW0,25731
45
+ anyio/_core/_resources.py,sha256=NbmU5O5UX3xEyACnkmYX28Fmwdl-f-ny0tHym26e0w0,435
46
+ anyio/_core/_signals.py,sha256=vulT1M1xdLYtAR-eY5TamIgaf1WTlOwOrMGwswlTTr8,905
47
+ anyio/_core/_sockets.py,sha256=aTbgMr0qPmBPfrapxLykyajsmS7IAerhW9_Qk5r5E18,34311
48
+ anyio/_core/_streams.py,sha256=FczFwIgDpnkK0bODWJXMpsUJYdvAD04kaUaGzJU8DK0,1806
49
+ anyio/_core/_subprocesses.py,sha256=EXm5igL7dj55iYkPlbYVAqtbqxJxjU-6OndSTIx9SRg,8047
50
+ anyio/_core/_synchronization.py,sha256=SY3nsr1ZZyDrjamsOVoYcvj-x6d_AR13Cu5lZecG0gY,20894
51
+ anyio/_core/_tasks.py,sha256=km6hVE1fsuIenya3MDud8KP6-J_bNzlgYC10wUxI7iA,4880
52
+ anyio/_core/_tempfile.py,sha256=lHb7CW4FyIlpkf5ADAf4VmLHCKwEHF9nxqNyBCFFUiA,19697
53
+ anyio/_core/_testing.py,sha256=YUGwA5cgFFbUTv4WFd7cv_BSVr4ryTtPp8owQA3JdWE,2118
54
+ anyio/_core/_typedattr.py,sha256=P4ozZikn3-DbpoYcvyghS_FOYAgbmUxeoU8-L_07pZM,2508
55
+ anyio/abc/__init__.py,sha256=6mWhcl_pGXhrgZVHP_TCfMvIXIOp9mroEFM90fYCU_U,2869
56
+ anyio/abc/__pycache__/__init__.cpython-313.pyc,,
57
+ anyio/abc/__pycache__/_eventloop.cpython-313.pyc,,
58
+ anyio/abc/__pycache__/_resources.cpython-313.pyc,,
59
+ anyio/abc/__pycache__/_sockets.cpython-313.pyc,,
60
+ anyio/abc/__pycache__/_streams.cpython-313.pyc,,
61
+ anyio/abc/__pycache__/_subprocesses.cpython-313.pyc,,
62
+ anyio/abc/__pycache__/_tasks.cpython-313.pyc,,
63
+ anyio/abc/__pycache__/_testing.cpython-313.pyc,,
64
+ anyio/abc/_eventloop.py,sha256=GTZbdItBHcj_b-8K2XylET2-bBYLZ3XjW4snY7vK7LE,10900
65
+ anyio/abc/_resources.py,sha256=DrYvkNN1hH6Uvv5_5uKySvDsnknGVDe8FCKfko0VtN8,783
66
+ anyio/abc/_sockets.py,sha256=ECTY0jLEF18gryANHR3vFzXzGdZ-xPwELq1QdgOb0Jo,13258
67
+ anyio/abc/_streams.py,sha256=005GKSCXGprxnhucILboSqc2JFovECZk9m3p-qqxXVc,7640
68
+ anyio/abc/_subprocesses.py,sha256=cumAPJTktOQtw63IqG0lDpyZqu_l1EElvQHMiwJgL08,2067
69
+ anyio/abc/_tasks.py,sha256=KC7wrciE48AINOI-AhPutnFhe1ewfP7QnamFlDzqesQ,3721
70
+ anyio/abc/_testing.py,sha256=tBJUzkSfOXJw23fe8qSJ03kJlShOYjjaEyFB6k6MYT8,1821
71
+ anyio/from_thread.py,sha256=-YZOTpu9WVHtAsMxQGIOaHMjaDRNeKQilx6Nn2qDU-o,19017
72
+ anyio/functools.py,sha256=tIWQ90cuLMxfJIpdBfFY3W3CC1zqFCRAyR3DxKc0Xlo,10061
73
+ anyio/lowlevel.py,sha256=NnPYQ6tWDzLRwpalX2CvsbkXkTeasbJcL52gPopWdYg,5048
74
+ anyio/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
+ anyio/pytest_plugin.py,sha256=3jAFQn0jv_pyoWE2GBBlHaj9sqXj4e8vob0_hgrsXE8,10244
76
+ anyio/streams/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
+ anyio/streams/__pycache__/__init__.cpython-313.pyc,,
78
+ anyio/streams/__pycache__/buffered.cpython-313.pyc,,
79
+ anyio/streams/__pycache__/file.cpython-313.pyc,,
80
+ anyio/streams/__pycache__/memory.cpython-313.pyc,,
81
+ anyio/streams/__pycache__/stapled.cpython-313.pyc,,
82
+ anyio/streams/__pycache__/text.cpython-313.pyc,,
83
+ anyio/streams/__pycache__/tls.cpython-313.pyc,,
84
+ anyio/streams/buffered.py,sha256=2R3PeJhe4EXrdYqz44Y6-Eg9R6DrmlsYrP36Ir43-po,6263
85
+ anyio/streams/file.py,sha256=4WZ7XGz5WNu39FQHvqbe__TQ0HDP9OOhgO1mk9iVpVU,4470
86
+ anyio/streams/memory.py,sha256=F0zwzvFJKAhX_LRZGoKzzqDC2oMM-f-yyTBrEYEGOaU,10740
87
+ anyio/streams/stapled.py,sha256=T8Xqwf8K6EgURPxbt1N4i7A8BAk-gScv-GRhjLXIf_o,4390
88
+ anyio/streams/text.py,sha256=BcVAGJw1VRvtIqnv-o0Rb0pwH7p8vwlvl21xHq522ag,5765
89
+ anyio/streams/tls.py,sha256=Jpxy0Mfbcp1BxHCwE-YjSSFaLnIBbnnwur-excYThs4,15368
90
+ anyio/to_interpreter.py,sha256=_mLngrMy97TMR6VbW4Y6YzDUk9ZuPcQMPlkuyRh3C9k,7100
91
+ anyio/to_process.py,sha256=cEyYUgb8LJVRJCfs6rK3aEM_T3k2gEmhl0nBjEvflOk,9687
92
+ anyio/to_thread.py,sha256=tXQPvHohvQ2Vrw2pBtdzkRPNV7u3H2_UDbvwL2u_R7k,2465
env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/entry_points.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [pytest11]
2
+ anyio = anyio.pytest_plugin
env/lib/python3.13/site-packages/fsspec/__init__.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from . import caching
2
+ from ._version import __version__ # noqa: F401
3
+ from .callbacks import Callback
4
+ from .compression import available_compressions
5
+ from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
6
+ from .exceptions import FSTimeoutError
7
+ from .mapping import FSMap, get_mapper
8
+ from .registry import (
9
+ available_protocols,
10
+ filesystem,
11
+ get_filesystem_class,
12
+ register_implementation,
13
+ registry,
14
+ )
15
+ from .spec import AbstractFileSystem
16
+
17
+ __all__ = [
18
+ "AbstractFileSystem",
19
+ "FSTimeoutError",
20
+ "FSMap",
21
+ "filesystem",
22
+ "register_implementation",
23
+ "get_filesystem_class",
24
+ "get_fs_token_paths",
25
+ "get_mapper",
26
+ "open",
27
+ "open_files",
28
+ "open_local",
29
+ "registry",
30
+ "caching",
31
+ "Callback",
32
+ "available_protocols",
33
+ "available_compressions",
34
+ "url_to_fs",
35
+ ]
36
+
37
+
38
+ def process_entries():
39
+ try:
40
+ from importlib.metadata import entry_points
41
+ except ImportError:
42
+ return
43
+ if entry_points is not None:
44
+ try:
45
+ eps = entry_points()
46
+ except TypeError:
47
+ pass # importlib-metadata < 0.8
48
+ else:
49
+ if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
50
+ specs = eps.select(group="fsspec.specs")
51
+ else:
52
+ specs = eps.get("fsspec.specs", [])
53
+ registered_names = {}
54
+ for spec in specs:
55
+ err_msg = f"Unable to load filesystem from {spec}"
56
+ name = spec.name
57
+ if name in registered_names:
58
+ continue
59
+ registered_names[name] = True
60
+ register_implementation(
61
+ name,
62
+ spec.value.replace(":", "."),
63
+ errtxt=err_msg,
64
+ # We take our implementations as the ones to overload with if
65
+ # for some reason we encounter some, may be the same, already
66
+ # registered
67
+ clobber=True,
68
+ )
69
+
70
+
71
+ process_entries()
env/lib/python3.13/site-packages/fsspec/_version.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '2025.12.0'
32
+ __version_tuple__ = version_tuple = (2025, 12, 0)
33
+
34
+ __commit_id__ = commit_id = None
env/lib/python3.13/site-packages/fsspec/archive.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import operator
2
+
3
+ from fsspec import AbstractFileSystem
4
+ from fsspec.utils import tokenize
5
+
6
+
7
+ class AbstractArchiveFileSystem(AbstractFileSystem):
8
+ """
9
+ A generic superclass for implementing Archive-based filesystems.
10
+
11
+ Currently, it is shared amongst
12
+ :class:`~fsspec.implementations.zip.ZipFileSystem`,
13
+ :class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
14
+ :class:`~fsspec.implementations.tar.TarFileSystem`.
15
+ """
16
+
17
+ def __str__(self):
18
+ return f"<Archive-like object {type(self).__name__} at {id(self)}>"
19
+
20
+ __repr__ = __str__
21
+
22
+ def ukey(self, path):
23
+ return tokenize(path, self.fo, self.protocol)
24
+
25
+ def _all_dirnames(self, paths):
26
+ """Returns *all* directory names for each path in paths, including intermediate
27
+ ones.
28
+
29
+ Parameters
30
+ ----------
31
+ paths: Iterable of path strings
32
+ """
33
+ if len(paths) == 0:
34
+ return set()
35
+
36
+ dirnames = {self._parent(path) for path in paths} - {self.root_marker}
37
+ return dirnames | self._all_dirnames(dirnames)
38
+
39
+ def info(self, path, **kwargs):
40
+ self._get_dirs()
41
+ path = self._strip_protocol(path)
42
+ if path in {"", "/"} and self.dir_cache:
43
+ return {"name": "", "type": "directory", "size": 0}
44
+ if path in self.dir_cache:
45
+ return self.dir_cache[path]
46
+ elif path + "/" in self.dir_cache:
47
+ return self.dir_cache[path + "/"]
48
+ else:
49
+ raise FileNotFoundError(path)
50
+
51
+ def ls(self, path, detail=True, **kwargs):
52
+ self._get_dirs()
53
+ paths = {}
54
+ for p, f in self.dir_cache.items():
55
+ p = p.rstrip("/")
56
+ if "/" in p:
57
+ root = p.rsplit("/", 1)[0]
58
+ else:
59
+ root = ""
60
+ if root == path.rstrip("/"):
61
+ paths[p] = f
62
+ elif all(
63
+ (a == b)
64
+ for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
65
+ ):
66
+ # root directory entry
67
+ ppath = p.rstrip("/").split("/", 1)[0]
68
+ if ppath not in paths:
69
+ out = {"name": ppath, "size": 0, "type": "directory"}
70
+ paths[ppath] = out
71
+ if detail:
72
+ out = sorted(paths.values(), key=operator.itemgetter("name"))
73
+ return out
74
+ else:
75
+ return sorted(paths)
env/lib/python3.13/site-packages/fsspec/asyn.py ADDED
@@ -0,0 +1,1103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import asyncio.events
3
+ import functools
4
+ import inspect
5
+ import io
6
+ import numbers
7
+ import os
8
+ import re
9
+ import threading
10
+ from collections.abc import Iterable
11
+ from glob import has_magic
12
+ from typing import TYPE_CHECKING
13
+
14
+ from .callbacks import DEFAULT_CALLBACK
15
+ from .exceptions import FSTimeoutError
16
+ from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
17
+ from .spec import AbstractBufferedFile, AbstractFileSystem
18
+ from .utils import glob_translate, is_exception, other_paths
19
+
20
+ private = re.compile("_[^_]")
21
+ iothread = [None] # dedicated fsspec IO thread
22
+ loop = [None] # global event loop for any non-async instance
23
+ _lock = None # global lock placeholder
24
+ get_running_loop = asyncio.get_running_loop
25
+
26
+
27
+ def get_lock():
28
+ """Allocate or return a threading lock.
29
+
30
+ The lock is allocated on first use to allow setting one lock per forked process.
31
+ """
32
+ global _lock
33
+ if not _lock:
34
+ _lock = threading.Lock()
35
+ return _lock
36
+
37
+
38
+ def reset_lock():
39
+ """Reset the global lock.
40
+
41
+ This should be called only on the init of a forked process to reset the lock to
42
+ None, enabling the new forked process to get a new lock.
43
+ """
44
+ global _lock
45
+
46
+ iothread[0] = None
47
+ loop[0] = None
48
+ _lock = None
49
+
50
+
51
+ async def _runner(event, coro, result, timeout=None):
52
+ timeout = timeout if timeout else None # convert 0 or 0.0 to None
53
+ if timeout is not None:
54
+ coro = asyncio.wait_for(coro, timeout=timeout)
55
+ try:
56
+ result[0] = await coro
57
+ except Exception as ex:
58
+ result[0] = ex
59
+ finally:
60
+ event.set()
61
+
62
+
63
+ def sync(loop, func, *args, timeout=None, **kwargs):
64
+ """
65
+ Make loop run coroutine until it returns. Runs in other thread
66
+
67
+ Examples
68
+ --------
69
+ >>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
70
+ timeout=timeout, **kwargs)
71
+ """
72
+ timeout = timeout if timeout else None # convert 0 or 0.0 to None
73
+ # NB: if the loop is not running *yet*, it is OK to submit work
74
+ # and we will wait for it
75
+ if loop is None or loop.is_closed():
76
+ raise RuntimeError("Loop is not running")
77
+ try:
78
+ loop0 = asyncio.events.get_running_loop()
79
+ if loop0 is loop:
80
+ raise NotImplementedError("Calling sync() from within a running loop")
81
+ except NotImplementedError:
82
+ raise
83
+ except RuntimeError:
84
+ pass
85
+ coro = func(*args, **kwargs)
86
+ result = [None]
87
+ event = threading.Event()
88
+ asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
89
+ while True:
90
+ # this loops allows thread to get interrupted
91
+ if event.wait(1):
92
+ break
93
+ if timeout is not None:
94
+ timeout -= 1
95
+ if timeout < 0:
96
+ raise FSTimeoutError
97
+
98
+ return_result = result[0]
99
+ if isinstance(return_result, asyncio.TimeoutError):
100
+ # suppress asyncio.TimeoutError, raise FSTimeoutError
101
+ raise FSTimeoutError from return_result
102
+ elif isinstance(return_result, BaseException):
103
+ raise return_result
104
+ else:
105
+ return return_result
106
+
107
+
108
+ def sync_wrapper(func, obj=None):
109
+ """Given a function, make so can be called in blocking contexts
110
+
111
+ Leave obj=None if defining within a class. Pass the instance if attaching
112
+ as an attribute of the instance.
113
+ """
114
+
115
+ @functools.wraps(func)
116
+ def wrapper(*args, **kwargs):
117
+ self = obj or args[0]
118
+ return sync(self.loop, func, *args, **kwargs)
119
+
120
+ return wrapper
121
+
122
+
123
+ def get_loop():
124
+ """Create or return the default fsspec IO loop
125
+
126
+ The loop will be running on a separate thread.
127
+ """
128
+ if loop[0] is None:
129
+ with get_lock():
130
+ # repeat the check just in case the loop got filled between the
131
+ # previous two calls from another thread
132
+ if loop[0] is None:
133
+ loop[0] = asyncio.new_event_loop()
134
+ th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
135
+ th.daemon = True
136
+ th.start()
137
+ iothread[0] = th
138
+ return loop[0]
139
+
140
+
141
+ def reset_after_fork():
142
+ global lock
143
+ loop[0] = None
144
+ iothread[0] = None
145
+ lock = None
146
+
147
+
148
+ if hasattr(os, "register_at_fork"):
149
+ # should be posix; this will do nothing for spawn or forkserver subprocesses
150
+ os.register_at_fork(after_in_child=reset_after_fork)
151
+
152
+
153
+ if TYPE_CHECKING:
154
+ import resource
155
+
156
+ ResourceError = resource.error
157
+ else:
158
+ try:
159
+ import resource
160
+ except ImportError:
161
+ resource = None
162
+ ResourceError = OSError
163
+ else:
164
+ ResourceError = getattr(resource, "error", OSError)
165
+
166
+ _DEFAULT_BATCH_SIZE = 128
167
+ _NOFILES_DEFAULT_BATCH_SIZE = 1280
168
+
169
+
170
+ def _get_batch_size(nofiles=False):
171
+ from fsspec.config import conf
172
+
173
+ if nofiles:
174
+ if "nofiles_gather_batch_size" in conf:
175
+ return conf["nofiles_gather_batch_size"]
176
+ else:
177
+ if "gather_batch_size" in conf:
178
+ return conf["gather_batch_size"]
179
+ if nofiles:
180
+ return _NOFILES_DEFAULT_BATCH_SIZE
181
+ if resource is None:
182
+ return _DEFAULT_BATCH_SIZE
183
+
184
+ try:
185
+ soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
186
+ except (ImportError, ValueError, ResourceError):
187
+ return _DEFAULT_BATCH_SIZE
188
+
189
+ if soft_limit == resource.RLIM_INFINITY:
190
+ return -1
191
+ else:
192
+ return soft_limit // 8
193
+
194
+
195
+ def running_async() -> bool:
196
+ """Being executed by an event loop?"""
197
+ try:
198
+ asyncio.get_running_loop()
199
+ return True
200
+ except RuntimeError:
201
+ return False
202
+
203
+
204
+ async def _run_coros_in_chunks(
205
+ coros,
206
+ batch_size=None,
207
+ callback=DEFAULT_CALLBACK,
208
+ timeout=None,
209
+ return_exceptions=False,
210
+ nofiles=False,
211
+ ):
212
+ """Run the given coroutines in chunks.
213
+
214
+ Parameters
215
+ ----------
216
+ coros: list of coroutines to run
217
+ batch_size: int or None
218
+ Number of coroutines to submit/wait on simultaneously.
219
+ If -1, then it will not be any throttling. If
220
+ None, it will be inferred from _get_batch_size()
221
+ callback: fsspec.callbacks.Callback instance
222
+ Gets a relative_update when each coroutine completes
223
+ timeout: number or None
224
+ If given, each coroutine times out after this time. Note that, since
225
+ there are multiple batches, the total run time of this function will in
226
+ general be longer
227
+ return_exceptions: bool
228
+ Same meaning as in asyncio.gather
229
+ nofiles: bool
230
+ If inferring the batch_size, does this operation involve local files?
231
+ If yes, you normally expect smaller batches.
232
+ """
233
+
234
+ if batch_size is None:
235
+ batch_size = _get_batch_size(nofiles=nofiles)
236
+
237
+ if batch_size == -1:
238
+ batch_size = len(coros)
239
+
240
+ assert batch_size > 0
241
+
242
+ async def _run_coro(coro, i):
243
+ try:
244
+ return await asyncio.wait_for(coro, timeout=timeout), i
245
+ except Exception as e:
246
+ if not return_exceptions:
247
+ raise
248
+ return e, i
249
+ finally:
250
+ callback.relative_update(1)
251
+
252
+ i = 0
253
+ n = len(coros)
254
+ results = [None] * n
255
+ pending = set()
256
+
257
+ while pending or i < n:
258
+ while len(pending) < batch_size and i < n:
259
+ pending.add(asyncio.ensure_future(_run_coro(coros[i], i)))
260
+ i += 1
261
+
262
+ if not pending:
263
+ break
264
+
265
+ done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
266
+ while done:
267
+ result, k = await done.pop()
268
+ results[k] = result
269
+
270
+ return results
271
+
272
+
273
+ # these methods should be implemented as async by any async-able backend
274
+ async_methods = [
275
+ "_ls",
276
+ "_cat_file",
277
+ "_get_file",
278
+ "_put_file",
279
+ "_rm_file",
280
+ "_cp_file",
281
+ "_pipe_file",
282
+ "_expand_path",
283
+ "_info",
284
+ "_isfile",
285
+ "_isdir",
286
+ "_exists",
287
+ "_walk",
288
+ "_glob",
289
+ "_find",
290
+ "_du",
291
+ "_size",
292
+ "_mkdir",
293
+ "_makedirs",
294
+ ]
295
+
296
+
297
+ class AsyncFileSystem(AbstractFileSystem):
298
+ """Async file operations, default implementations
299
+
300
+ Passes bulk operations to asyncio.gather for concurrent operation.
301
+
302
+ Implementations that have concurrent batch operations and/or async methods
303
+ should inherit from this class instead of AbstractFileSystem. Docstrings are
304
+ copied from the un-underscored method in AbstractFileSystem, if not given.
305
+ """
306
+
307
+ # note that methods do not have docstring here; they will be copied
308
+ # for _* methods and inferred for overridden methods.
309
+
310
+ async_impl = True
311
+ mirror_sync_methods = True
312
+ disable_throttling = False
313
+
314
+ def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
315
+ self.asynchronous = asynchronous
316
+ self._pid = os.getpid()
317
+ if not asynchronous:
318
+ self._loop = loop or get_loop()
319
+ else:
320
+ self._loop = None
321
+ self.batch_size = batch_size
322
+ super().__init__(*args, **kwargs)
323
+
324
+ @property
325
+ def loop(self):
326
+ if self._pid != os.getpid():
327
+ raise RuntimeError("This class is not fork-safe")
328
+ return self._loop
329
+
330
+ async def _rm_file(self, path, **kwargs):
331
+ if (
332
+ inspect.iscoroutinefunction(self._rm)
333
+ and type(self)._rm is not AsyncFileSystem._rm
334
+ ):
335
+ return await self._rm(path, recursive=False, batch_size=1, **kwargs)
336
+ raise NotImplementedError
337
+
338
+ async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
339
+ # TODO: implement on_error
340
+ batch_size = batch_size or self.batch_size
341
+ path = await self._expand_path(path, recursive=recursive)
342
+ return await _run_coros_in_chunks(
343
+ [self._rm_file(p, **kwargs) for p in reversed(path)],
344
+ batch_size=batch_size,
345
+ nofiles=True,
346
+ )
347
+
348
+ async def _cp_file(self, path1, path2, **kwargs):
349
+ raise NotImplementedError
350
+
351
+ async def _mv_file(self, path1, path2):
352
+ await self._cp_file(path1, path2)
353
+ await self._rm_file(path1)
354
+
355
+ async def _copy(
356
+ self,
357
+ path1,
358
+ path2,
359
+ recursive=False,
360
+ on_error=None,
361
+ maxdepth=None,
362
+ batch_size=None,
363
+ **kwargs,
364
+ ):
365
+ if on_error is None and recursive:
366
+ on_error = "ignore"
367
+ elif on_error is None:
368
+ on_error = "raise"
369
+
370
+ if isinstance(path1, list) and isinstance(path2, list):
371
+ # No need to expand paths when both source and destination
372
+ # are provided as lists
373
+ paths1 = path1
374
+ paths2 = path2
375
+ else:
376
+ source_is_str = isinstance(path1, str)
377
+ paths1 = await self._expand_path(
378
+ path1, maxdepth=maxdepth, recursive=recursive
379
+ )
380
+ if source_is_str and (not recursive or maxdepth is not None):
381
+ # Non-recursive glob does not copy directories
382
+ paths1 = [
383
+ p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
384
+ ]
385
+ if not paths1:
386
+ return
387
+
388
+ source_is_file = len(paths1) == 1
389
+ dest_is_dir = isinstance(path2, str) and (
390
+ trailing_sep(path2) or await self._isdir(path2)
391
+ )
392
+
393
+ exists = source_is_str and (
394
+ (has_magic(path1) and source_is_file)
395
+ or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
396
+ )
397
+ paths2 = other_paths(
398
+ paths1,
399
+ path2,
400
+ exists=exists,
401
+ flatten=not source_is_str,
402
+ )
403
+
404
+ batch_size = batch_size or self.batch_size
405
+ coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
406
+ result = await _run_coros_in_chunks(
407
+ coros, batch_size=batch_size, return_exceptions=True, nofiles=True
408
+ )
409
+
410
+ for ex in filter(is_exception, result):
411
+ if on_error == "ignore" and isinstance(ex, FileNotFoundError):
412
+ continue
413
+ raise ex
414
+
415
+ async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
416
+ raise NotImplementedError
417
+
418
+ async def _pipe(self, path, value=None, batch_size=None, **kwargs):
419
+ if isinstance(path, str):
420
+ path = {path: value}
421
+ batch_size = batch_size or self.batch_size
422
+ return await _run_coros_in_chunks(
423
+ [self._pipe_file(k, v, **kwargs) for k, v in path.items()],
424
+ batch_size=batch_size,
425
+ nofiles=True,
426
+ )
427
+
428
+ async def _process_limits(self, url, start, end):
429
+ """Helper for "Range"-based _cat_file"""
430
+ size = None
431
+ suff = False
432
+ if start is not None and start < 0:
433
+ # if start is negative and end None, end is the "suffix length"
434
+ if end is None:
435
+ end = -start
436
+ start = ""
437
+ suff = True
438
+ else:
439
+ size = size or (await self._info(url))["size"]
440
+ start = size + start
441
+ elif start is None:
442
+ start = 0
443
+ if not suff:
444
+ if end is not None and end < 0:
445
+ if start is not None:
446
+ size = size or (await self._info(url))["size"]
447
+ end = size + end
448
+ elif end is None:
449
+ end = ""
450
+ if isinstance(end, numbers.Integral):
451
+ end -= 1 # bytes range is inclusive
452
+ return f"bytes={start}-{end}"
453
+
454
+ async def _cat_file(self, path, start=None, end=None, **kwargs):
455
+ raise NotImplementedError
456
+
457
+ async def _cat(
458
+ self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
459
+ ):
460
+ paths = await self._expand_path(path, recursive=recursive)
461
+ coros = [self._cat_file(path, **kwargs) for path in paths]
462
+ batch_size = batch_size or self.batch_size
463
+ out = await _run_coros_in_chunks(
464
+ coros, batch_size=batch_size, nofiles=True, return_exceptions=True
465
+ )
466
+ if on_error == "raise":
467
+ ex = next(filter(is_exception, out), False)
468
+ if ex:
469
+ raise ex
470
+ if (
471
+ len(paths) > 1
472
+ or isinstance(path, list)
473
+ or paths[0] != self._strip_protocol(path)
474
+ ):
475
+ return {
476
+ k: v
477
+ for k, v in zip(paths, out)
478
+ if on_error != "omit" or not is_exception(v)
479
+ }
480
+ else:
481
+ return out[0]
482
+
483
+ async def _cat_ranges(
484
+ self,
485
+ paths,
486
+ starts,
487
+ ends,
488
+ max_gap=None,
489
+ batch_size=None,
490
+ on_error="return",
491
+ **kwargs,
492
+ ):
493
+ """Get the contents of byte ranges from one or more files
494
+
495
+ Parameters
496
+ ----------
497
+ paths: list
498
+ A list of of filepaths on this filesystems
499
+ starts, ends: int or list
500
+ Bytes limits of the read. If using a single int, the same value will be
501
+ used to read all the specified files.
502
+ """
503
+ # TODO: on_error
504
+ if max_gap is not None:
505
+ # use utils.merge_offset_ranges
506
+ raise NotImplementedError
507
+ if not isinstance(paths, list):
508
+ raise TypeError
509
+ if not isinstance(starts, Iterable):
510
+ starts = [starts] * len(paths)
511
+ if not isinstance(ends, Iterable):
512
+ ends = [ends] * len(paths)
513
+ if len(starts) != len(paths) or len(ends) != len(paths):
514
+ raise ValueError
515
+ coros = [
516
+ self._cat_file(p, start=s, end=e, **kwargs)
517
+ for p, s, e in zip(paths, starts, ends)
518
+ ]
519
+ batch_size = batch_size or self.batch_size
520
+ return await _run_coros_in_chunks(
521
+ coros, batch_size=batch_size, nofiles=True, return_exceptions=True
522
+ )
523
+
524
+ async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
525
+ raise NotImplementedError
526
+
527
+ async def _put(
528
+ self,
529
+ lpath,
530
+ rpath,
531
+ recursive=False,
532
+ callback=DEFAULT_CALLBACK,
533
+ batch_size=None,
534
+ maxdepth=None,
535
+ **kwargs,
536
+ ):
537
+ """Copy file(s) from local.
538
+
539
+ Copies a specific file or tree of files (if recursive=True). If rpath
540
+ ends with a "/", it will be assumed to be a directory, and target files
541
+ will go within.
542
+
543
+ The put_file method will be called concurrently on a batch of files. The
544
+ batch_size option can configure the amount of futures that can be executed
545
+ at the same time. If it is -1, then all the files will be uploaded concurrently.
546
+ The default can be set for this instance by passing "batch_size" in the
547
+ constructor, or for all instances by setting the "gather_batch_size" key
548
+ in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
549
+ """
550
+ if isinstance(lpath, list) and isinstance(rpath, list):
551
+ # No need to expand paths when both source and destination
552
+ # are provided as lists
553
+ rpaths = rpath
554
+ lpaths = lpath
555
+ else:
556
+ source_is_str = isinstance(lpath, str)
557
+ if source_is_str:
558
+ lpath = make_path_posix(lpath)
559
+ fs = LocalFileSystem()
560
+ lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
561
+ if source_is_str and (not recursive or maxdepth is not None):
562
+ # Non-recursive glob does not copy directories
563
+ lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
564
+ if not lpaths:
565
+ return
566
+
567
+ source_is_file = len(lpaths) == 1
568
+ dest_is_dir = isinstance(rpath, str) and (
569
+ trailing_sep(rpath) or await self._isdir(rpath)
570
+ )
571
+
572
+ rpath = self._strip_protocol(rpath)
573
+ exists = source_is_str and (
574
+ (has_magic(lpath) and source_is_file)
575
+ or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
576
+ )
577
+ rpaths = other_paths(
578
+ lpaths,
579
+ rpath,
580
+ exists=exists,
581
+ flatten=not source_is_str,
582
+ )
583
+
584
+ is_dir = {l: os.path.isdir(l) for l in lpaths}
585
+ rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
586
+ file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
587
+
588
+ await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
589
+ batch_size = batch_size or self.batch_size
590
+
591
+ coros = []
592
+ callback.set_size(len(file_pairs))
593
+ for lfile, rfile in file_pairs:
594
+ put_file = callback.branch_coro(self._put_file)
595
+ coros.append(put_file(lfile, rfile, **kwargs))
596
+
597
+ return await _run_coros_in_chunks(
598
+ coros, batch_size=batch_size, callback=callback
599
+ )
600
+
601
+ async def _get_file(self, rpath, lpath, **kwargs):
602
+ raise NotImplementedError
603
+
604
+ async def _get(
605
+ self,
606
+ rpath,
607
+ lpath,
608
+ recursive=False,
609
+ callback=DEFAULT_CALLBACK,
610
+ maxdepth=None,
611
+ **kwargs,
612
+ ):
613
+ """Copy file(s) to local.
614
+
615
+ Copies a specific file or tree of files (if recursive=True). If lpath
616
+ ends with a "/", it will be assumed to be a directory, and target files
617
+ will go within. Can submit a list of paths, which may be glob-patterns
618
+ and will be expanded.
619
+
620
+ The get_file method will be called concurrently on a batch of files. The
621
+ batch_size option can configure the amount of futures that can be executed
622
+ at the same time. If it is -1, then all the files will be uploaded concurrently.
623
+ The default can be set for this instance by passing "batch_size" in the
624
+ constructor, or for all instances by setting the "gather_batch_size" key
625
+ in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
626
+ """
627
+ if isinstance(lpath, list) and isinstance(rpath, list):
628
+ # No need to expand paths when both source and destination
629
+ # are provided as lists
630
+ rpaths = rpath
631
+ lpaths = lpath
632
+ else:
633
+ source_is_str = isinstance(rpath, str)
634
+ # First check for rpath trailing slash as _strip_protocol removes it.
635
+ source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
636
+ rpath = self._strip_protocol(rpath)
637
+ rpaths = await self._expand_path(
638
+ rpath, recursive=recursive, maxdepth=maxdepth
639
+ )
640
+ if source_is_str and (not recursive or maxdepth is not None):
641
+ # Non-recursive glob does not copy directories
642
+ rpaths = [
643
+ p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
644
+ ]
645
+ if not rpaths:
646
+ return
647
+
648
+ lpath = make_path_posix(lpath)
649
+ source_is_file = len(rpaths) == 1
650
+ dest_is_dir = isinstance(lpath, str) and (
651
+ trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
652
+ )
653
+
654
+ exists = source_is_str and (
655
+ (has_magic(rpath) and source_is_file)
656
+ or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
657
+ )
658
+ lpaths = other_paths(
659
+ rpaths,
660
+ lpath,
661
+ exists=exists,
662
+ flatten=not source_is_str,
663
+ )
664
+
665
+ [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
666
+ batch_size = kwargs.pop("batch_size", self.batch_size)
667
+
668
+ coros = []
669
+ callback.set_size(len(lpaths))
670
+ for lpath, rpath in zip(lpaths, rpaths):
671
+ get_file = callback.branch_coro(self._get_file)
672
+ coros.append(get_file(rpath, lpath, **kwargs))
673
+ return await _run_coros_in_chunks(
674
+ coros, batch_size=batch_size, callback=callback
675
+ )
676
+
677
+ async def _isfile(self, path):
678
+ try:
679
+ return (await self._info(path))["type"] == "file"
680
+ except: # noqa: E722
681
+ return False
682
+
683
+ async def _isdir(self, path):
684
+ try:
685
+ return (await self._info(path))["type"] == "directory"
686
+ except OSError:
687
+ return False
688
+
689
+ async def _size(self, path):
690
+ return (await self._info(path)).get("size", None)
691
+
692
+ async def _sizes(self, paths, batch_size=None):
693
+ batch_size = batch_size or self.batch_size
694
+ return await _run_coros_in_chunks(
695
+ [self._size(p) for p in paths], batch_size=batch_size
696
+ )
697
+
698
+ async def _exists(self, path, **kwargs):
699
+ try:
700
+ await self._info(path, **kwargs)
701
+ return True
702
+ except FileNotFoundError:
703
+ return False
704
+
705
+ async def _info(self, path, **kwargs):
706
+ raise NotImplementedError
707
+
708
+ async def _ls(self, path, detail=True, **kwargs):
709
+ raise NotImplementedError
710
+
711
+ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
712
+ if maxdepth is not None and maxdepth < 1:
713
+ raise ValueError("maxdepth must be at least 1")
714
+
715
+ path = self._strip_protocol(path)
716
+ full_dirs = {}
717
+ dirs = {}
718
+ files = {}
719
+
720
+ detail = kwargs.pop("detail", False)
721
+ try:
722
+ listing = await self._ls(path, detail=True, **kwargs)
723
+ except (FileNotFoundError, OSError) as e:
724
+ if on_error == "raise":
725
+ raise
726
+ elif callable(on_error):
727
+ on_error(e)
728
+ if detail:
729
+ yield path, {}, {}
730
+ else:
731
+ yield path, [], []
732
+ return
733
+
734
+ for info in listing:
735
+ # each info name must be at least [path]/part , but here
736
+ # we check also for names like [path]/part/
737
+ pathname = info["name"].rstrip("/")
738
+ name = pathname.rsplit("/", 1)[-1]
739
+ if info["type"] == "directory" and pathname != path:
740
+ # do not include "self" path
741
+ full_dirs[name] = pathname
742
+ dirs[name] = info
743
+ elif pathname == path:
744
+ # file-like with same name as give path
745
+ files[""] = info
746
+ else:
747
+ files[name] = info
748
+
749
+ if detail:
750
+ yield path, dirs, files
751
+ else:
752
+ yield path, list(dirs), list(files)
753
+
754
+ if maxdepth is not None:
755
+ maxdepth -= 1
756
+ if maxdepth < 1:
757
+ return
758
+
759
+ for d in dirs:
760
+ async for _ in self._walk(
761
+ full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
762
+ ):
763
+ yield _
764
+
765
+ async def _glob(self, path, maxdepth=None, **kwargs):
766
+ if maxdepth is not None and maxdepth < 1:
767
+ raise ValueError("maxdepth must be at least 1")
768
+
769
+ import re
770
+
771
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
772
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
773
+ path = self._strip_protocol(path)
774
+ append_slash_to_dirname = ends_with_sep or path.endswith(
775
+ tuple(sep + "**" for sep in seps)
776
+ )
777
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
778
+ idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
779
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
780
+
781
+ min_idx = min(idx_star, idx_qmark, idx_brace)
782
+
783
+ detail = kwargs.pop("detail", False)
784
+ withdirs = kwargs.pop("withdirs", True)
785
+
786
+ if not has_magic(path):
787
+ if await self._exists(path, **kwargs):
788
+ if not detail:
789
+ return [path]
790
+ else:
791
+ return {path: await self._info(path, **kwargs)}
792
+ else:
793
+ if not detail:
794
+ return [] # glob of non-existent returns empty
795
+ else:
796
+ return {}
797
+ elif "/" in path[:min_idx]:
798
+ min_idx = path[:min_idx].rindex("/")
799
+ root = path[: min_idx + 1]
800
+ depth = path[min_idx + 1 :].count("/") + 1
801
+ else:
802
+ root = ""
803
+ depth = path[min_idx + 1 :].count("/") + 1
804
+
805
+ if "**" in path:
806
+ if maxdepth is not None:
807
+ idx_double_stars = path.find("**")
808
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
809
+ depth = depth - depth_double_stars + maxdepth
810
+ else:
811
+ depth = None
812
+
813
+ allpaths = await self._find(
814
+ root, maxdepth=depth, withdirs=withdirs, detail=True, **kwargs
815
+ )
816
+
817
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
818
+ pattern = re.compile(pattern)
819
+
820
+ out = {
821
+ p: info
822
+ for p, info in sorted(allpaths.items())
823
+ if pattern.match(
824
+ p + "/"
825
+ if append_slash_to_dirname and info["type"] == "directory"
826
+ else p
827
+ )
828
+ }
829
+
830
+ if detail:
831
+ return out
832
+ else:
833
+ return list(out)
834
+
835
+ async def _du(self, path, total=True, maxdepth=None, **kwargs):
836
+ sizes = {}
837
+ # async for?
838
+ for f in await self._find(path, maxdepth=maxdepth, **kwargs):
839
+ info = await self._info(f)
840
+ sizes[info["name"]] = info["size"]
841
+ if total:
842
+ return sum(sizes.values())
843
+ else:
844
+ return sizes
845
+
846
+ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
847
+ path = self._strip_protocol(path)
848
+ out = {}
849
+ detail = kwargs.pop("detail", False)
850
+
851
+ # Add the root directory if withdirs is requested
852
+ # This is needed for posix glob compliance
853
+ if withdirs and path != "" and await self._isdir(path):
854
+ out[path] = await self._info(path)
855
+
856
+ # async for?
857
+ async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
858
+ if withdirs:
859
+ files.update(dirs)
860
+ out.update({info["name"]: info for name, info in files.items()})
861
+ if not out and (await self._isfile(path)):
862
+ # walk works on directories, but find should also return [path]
863
+ # when path happens to be a file
864
+ out[path] = {}
865
+ names = sorted(out)
866
+ if not detail:
867
+ return names
868
+ else:
869
+ return {name: out[name] for name in names}
870
+
871
+ async def _expand_path(self, path, recursive=False, maxdepth=None):
872
+ if maxdepth is not None and maxdepth < 1:
873
+ raise ValueError("maxdepth must be at least 1")
874
+
875
+ if isinstance(path, str):
876
+ out = await self._expand_path([path], recursive, maxdepth)
877
+ else:
878
+ out = set()
879
+ path = [self._strip_protocol(p) for p in path]
880
+ for p in path: # can gather here
881
+ if has_magic(p):
882
+ bit = set(await self._glob(p, maxdepth=maxdepth))
883
+ out |= bit
884
+ if recursive:
885
+ # glob call above expanded one depth so if maxdepth is defined
886
+ # then decrement it in expand_path call below. If it is zero
887
+ # after decrementing then avoid expand_path call.
888
+ if maxdepth is not None and maxdepth <= 1:
889
+ continue
890
+ out |= set(
891
+ await self._expand_path(
892
+ list(bit),
893
+ recursive=recursive,
894
+ maxdepth=maxdepth - 1 if maxdepth is not None else None,
895
+ )
896
+ )
897
+ continue
898
+ elif recursive:
899
+ rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
900
+ out |= rec
901
+ if p not in out and (recursive is False or (await self._exists(p))):
902
+ # should only check once, for the root
903
+ out.add(p)
904
+ if not out:
905
+ raise FileNotFoundError(path)
906
+ return sorted(out)
907
+
908
+ async def _mkdir(self, path, create_parents=True, **kwargs):
909
+ pass # not necessary to implement, may not have directories
910
+
911
+ async def _makedirs(self, path, exist_ok=False):
912
+ pass # not necessary to implement, may not have directories
913
+
914
+ async def open_async(self, path, mode="rb", **kwargs):
915
+ if "b" not in mode or kwargs.get("compression"):
916
+ raise ValueError
917
+ raise NotImplementedError
918
+
919
+
920
+ def mirror_sync_methods(obj):
921
+ """Populate sync and async methods for obj
922
+
923
+ For each method will create a sync version if the name refers to an async method
924
+ (coroutine) and there is no override in the child class; will create an async
925
+ method for the corresponding sync method if there is no implementation.
926
+
927
+ Uses the methods specified in
928
+ - async_methods: the set that an implementation is expected to provide
929
+ - default_async_methods: that can be derived from their sync version in
930
+ AbstractFileSystem
931
+ - AsyncFileSystem: async-specific default coroutines
932
+ """
933
+ from fsspec import AbstractFileSystem
934
+
935
+ for method in async_methods + dir(AsyncFileSystem):
936
+ if not method.startswith("_"):
937
+ continue
938
+ smethod = method[1:]
939
+ if private.match(method):
940
+ isco = inspect.iscoroutinefunction(getattr(obj, method, None))
941
+ unsync = getattr(getattr(obj, smethod, False), "__func__", None)
942
+ is_default = unsync is getattr(AbstractFileSystem, smethod, "")
943
+ if isco and is_default:
944
+ mth = sync_wrapper(getattr(obj, method), obj=obj)
945
+ setattr(obj, smethod, mth)
946
+ if not mth.__doc__:
947
+ mth.__doc__ = getattr(
948
+ getattr(AbstractFileSystem, smethod, None), "__doc__", ""
949
+ )
950
+
951
+
952
+ class FSSpecCoroutineCancel(Exception):
953
+ pass
954
+
955
+
956
+ def _dump_running_tasks(
957
+ printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
958
+ ):
959
+ import traceback
960
+
961
+ tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
962
+ if printout:
963
+ [task.print_stack() for task in tasks]
964
+ out = [
965
+ {
966
+ "locals": task._coro.cr_frame.f_locals,
967
+ "file": task._coro.cr_frame.f_code.co_filename,
968
+ "firstline": task._coro.cr_frame.f_code.co_firstlineno,
969
+ "linelo": task._coro.cr_frame.f_lineno,
970
+ "stack": traceback.format_stack(task._coro.cr_frame),
971
+ "task": task if with_task else None,
972
+ }
973
+ for task in tasks
974
+ ]
975
+ if cancel:
976
+ for t in tasks:
977
+ cbs = t._callbacks
978
+ t.cancel()
979
+ asyncio.futures.Future.set_exception(t, exc)
980
+ asyncio.futures.Future.cancel(t)
981
+ [cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures
982
+ try:
983
+ t._coro.throw(exc) # exits coro, unless explicitly handled
984
+ except exc:
985
+ pass
986
+ return out
987
+
988
+
989
+ class AbstractAsyncStreamedFile(AbstractBufferedFile):
990
+ # no read buffering, and always auto-commit
991
+ # TODO: readahead might still be useful here, but needs async version
992
+
993
+ async def read(self, length=-1):
994
+ """
995
+ Return data from cache, or fetch pieces as necessary
996
+
997
+ Parameters
998
+ ----------
999
+ length: int (-1)
1000
+ Number of bytes to read; if <0, all remaining bytes.
1001
+ """
1002
+ length = -1 if length is None else int(length)
1003
+ if self.mode != "rb":
1004
+ raise ValueError("File not in read mode")
1005
+ if length < 0:
1006
+ length = self.size - self.loc
1007
+ if self.closed:
1008
+ raise ValueError("I/O operation on closed file.")
1009
+ if length == 0:
1010
+ # don't even bother calling fetch
1011
+ return b""
1012
+ out = await self._fetch_range(self.loc, self.loc + length)
1013
+ self.loc += len(out)
1014
+ return out
1015
+
1016
+ async def write(self, data):
1017
+ """
1018
+ Write data to buffer.
1019
+
1020
+ Buffer only sent on flush() or if buffer is greater than
1021
+ or equal to blocksize.
1022
+
1023
+ Parameters
1024
+ ----------
1025
+ data: bytes
1026
+ Set of bytes to be written.
1027
+ """
1028
+ if self.mode not in {"wb", "ab"}:
1029
+ raise ValueError("File not in write mode")
1030
+ if self.closed:
1031
+ raise ValueError("I/O operation on closed file.")
1032
+ if self.forced:
1033
+ raise ValueError("This file has been force-flushed, can only close")
1034
+ out = self.buffer.write(data)
1035
+ self.loc += out
1036
+ if self.buffer.tell() >= self.blocksize:
1037
+ await self.flush()
1038
+ return out
1039
+
1040
+ async def close(self):
1041
+ """Close file
1042
+
1043
+ Finalizes writes, discards cache
1044
+ """
1045
+ if getattr(self, "_unclosable", False):
1046
+ return
1047
+ if self.closed:
1048
+ return
1049
+ if self.mode == "rb":
1050
+ self.cache = None
1051
+ else:
1052
+ if not self.forced:
1053
+ await self.flush(force=True)
1054
+
1055
+ if self.fs is not None:
1056
+ self.fs.invalidate_cache(self.path)
1057
+ self.fs.invalidate_cache(self.fs._parent(self.path))
1058
+
1059
+ self.closed = True
1060
+
1061
+ async def flush(self, force=False):
1062
+ if self.closed:
1063
+ raise ValueError("Flush on closed file")
1064
+ if force and self.forced:
1065
+ raise ValueError("Force flush cannot be called more than once")
1066
+ if force:
1067
+ self.forced = True
1068
+
1069
+ if self.mode not in {"wb", "ab"}:
1070
+ # no-op to flush on read-mode
1071
+ return
1072
+
1073
+ if not force and self.buffer.tell() < self.blocksize:
1074
+ # Defer write on small block
1075
+ return
1076
+
1077
+ if self.offset is None:
1078
+ # Initialize a multipart upload
1079
+ self.offset = 0
1080
+ try:
1081
+ await self._initiate_upload()
1082
+ except:
1083
+ self.closed = True
1084
+ raise
1085
+
1086
+ if await self._upload_chunk(final=force) is not False:
1087
+ self.offset += self.buffer.seek(0, 2)
1088
+ self.buffer = io.BytesIO()
1089
+
1090
+ async def __aenter__(self):
1091
+ return self
1092
+
1093
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1094
+ await self.close()
1095
+
1096
+ async def _fetch_range(self, start, end):
1097
+ raise NotImplementedError
1098
+
1099
+ async def _initiate_upload(self):
1100
+ pass
1101
+
1102
+ async def _upload_chunk(self, final=False):
1103
+ raise NotImplementedError
env/lib/python3.13/site-packages/fsspec/caching.py ADDED
@@ -0,0 +1,1011 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import collections
4
+ import functools
5
+ import logging
6
+ import math
7
+ import os
8
+ import threading
9
+ from collections import OrderedDict
10
+ from collections.abc import Callable
11
+ from concurrent.futures import Future, ThreadPoolExecutor
12
+ from itertools import groupby
13
+ from operator import itemgetter
14
+ from typing import TYPE_CHECKING, Any, ClassVar, Generic, NamedTuple, TypeVar
15
+
16
+ if TYPE_CHECKING:
17
+ import mmap
18
+
19
+ from typing_extensions import ParamSpec
20
+
21
+ P = ParamSpec("P")
22
+ else:
23
+ P = TypeVar("P")
24
+
25
+ T = TypeVar("T")
26
+
27
+
28
+ logger = logging.getLogger("fsspec")
29
+
30
+ Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
31
+ MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes
32
+
33
+
34
+ class BaseCache:
35
+ """Pass-though cache: doesn't keep anything, calls every time
36
+
37
+ Acts as base class for other cachers
38
+
39
+ Parameters
40
+ ----------
41
+ blocksize: int
42
+ How far to read ahead in numbers of bytes
43
+ fetcher: func
44
+ Function of the form f(start, end) which gets bytes from remote as
45
+ specified
46
+ size: int
47
+ How big this file is
48
+ """
49
+
50
+ name: ClassVar[str] = "none"
51
+
52
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
53
+ self.blocksize = blocksize
54
+ self.nblocks = 0
55
+ self.fetcher = fetcher
56
+ self.size = size
57
+ self.hit_count = 0
58
+ self.miss_count = 0
59
+ # the bytes that we actually requested
60
+ self.total_requested_bytes = 0
61
+
62
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
63
+ if start is None:
64
+ start = 0
65
+ if stop is None:
66
+ stop = self.size
67
+ if start >= self.size or start >= stop:
68
+ return b""
69
+ return self.fetcher(start, stop)
70
+
71
+ def _reset_stats(self) -> None:
72
+ """Reset hit and miss counts for a more ganular report e.g. by file."""
73
+ self.hit_count = 0
74
+ self.miss_count = 0
75
+ self.total_requested_bytes = 0
76
+
77
+ def _log_stats(self) -> str:
78
+ """Return a formatted string of the cache statistics."""
79
+ if self.hit_count == 0 and self.miss_count == 0:
80
+ # a cache that does nothing, this is for logs only
81
+ return ""
82
+ return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
83
+
84
+ def __repr__(self) -> str:
85
+ # TODO: use rich for better formatting
86
+ return f"""
87
+ <{self.__class__.__name__}:
88
+ block size : {self.blocksize}
89
+ block count : {self.nblocks}
90
+ file size : {self.size}
91
+ cache hits : {self.hit_count}
92
+ cache misses: {self.miss_count}
93
+ total requested bytes: {self.total_requested_bytes}>
94
+ """
95
+
96
+
97
+ class MMapCache(BaseCache):
98
+ """memory-mapped sparse file cache
99
+
100
+ Opens temporary file, which is filled blocks-wise when data is requested.
101
+ Ensure there is enough disc space in the temporary location.
102
+
103
+ This cache method might only work on posix
104
+
105
+ Parameters
106
+ ----------
107
+ blocksize: int
108
+ How far to read ahead in numbers of bytes
109
+ fetcher: Fetcher
110
+ Function of the form f(start, end) which gets bytes from remote as
111
+ specified
112
+ size: int
113
+ How big this file is
114
+ location: str
115
+ Where to create the temporary file. If None, a temporary file is
116
+ created using tempfile.TemporaryFile().
117
+ blocks: set[int]
118
+ Set of block numbers that have already been fetched. If None, an empty
119
+ set is created.
120
+ multi_fetcher: MultiFetcher
121
+ Function of the form f([(start, end)]) which gets bytes from remote
122
+ as specified. This function is used to fetch multiple blocks at once.
123
+ If not specified, the fetcher function is used instead.
124
+ """
125
+
126
+ name = "mmap"
127
+
128
+ def __init__(
129
+ self,
130
+ blocksize: int,
131
+ fetcher: Fetcher,
132
+ size: int,
133
+ location: str | None = None,
134
+ blocks: set[int] | None = None,
135
+ multi_fetcher: MultiFetcher | None = None,
136
+ ) -> None:
137
+ super().__init__(blocksize, fetcher, size)
138
+ self.blocks = set() if blocks is None else blocks
139
+ self.location = location
140
+ self.multi_fetcher = multi_fetcher
141
+ self.cache = self._makefile()
142
+
143
+ def _makefile(self) -> mmap.mmap | bytearray:
144
+ import mmap
145
+ import tempfile
146
+
147
+ if self.size == 0:
148
+ return bytearray()
149
+
150
+ # posix version
151
+ if self.location is None or not os.path.exists(self.location):
152
+ if self.location is None:
153
+ fd = tempfile.TemporaryFile()
154
+ self.blocks = set()
155
+ else:
156
+ fd = open(self.location, "wb+")
157
+ fd.seek(self.size - 1)
158
+ fd.write(b"1")
159
+ fd.flush()
160
+ else:
161
+ fd = open(self.location, "r+b")
162
+
163
+ return mmap.mmap(fd.fileno(), self.size)
164
+
165
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
166
+ logger.debug(f"MMap cache fetching {start}-{end}")
167
+ if start is None:
168
+ start = 0
169
+ if end is None:
170
+ end = self.size
171
+ if start >= self.size or start >= end:
172
+ return b""
173
+ start_block = start // self.blocksize
174
+ end_block = end // self.blocksize
175
+ block_range = range(start_block, end_block + 1)
176
+ # Determine which blocks need to be fetched. This sequence is sorted by construction.
177
+ need = (i for i in block_range if i not in self.blocks)
178
+ # Count the number of blocks already cached
179
+ self.hit_count += sum(1 for i in block_range if i in self.blocks)
180
+
181
+ ranges = []
182
+
183
+ # Consolidate needed blocks.
184
+ # Algorithm adapted from Python 2.x itertools documentation.
185
+ # We are grouping an enumerated sequence of blocks. By comparing when the difference
186
+ # between an ascending range (provided by enumerate) and the needed block numbers
187
+ # we can detect when the block number skips values. The key computes this difference.
188
+ # Whenever the difference changes, we know that we have previously cached block(s),
189
+ # and a new group is started. In other words, this algorithm neatly groups
190
+ # runs of consecutive block numbers so they can be fetched together.
191
+ for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
192
+ # Extract the blocks from the enumerated sequence
193
+ _blocks = tuple(map(itemgetter(1), _blocks))
194
+ # Compute start of first block
195
+ sstart = _blocks[0] * self.blocksize
196
+ # Compute the end of the last block. Last block may not be full size.
197
+ send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
198
+
199
+ # Fetch bytes (could be multiple consecutive blocks)
200
+ self.total_requested_bytes += send - sstart
201
+ logger.debug(
202
+ f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
203
+ )
204
+ ranges.append((sstart, send))
205
+
206
+ # Update set of cached blocks
207
+ self.blocks.update(_blocks)
208
+ # Update cache statistics with number of blocks we had to cache
209
+ self.miss_count += len(_blocks)
210
+
211
+ if not ranges:
212
+ return self.cache[start:end]
213
+
214
+ if self.multi_fetcher:
215
+ logger.debug(f"MMap get blocks {ranges}")
216
+ for idx, r in enumerate(self.multi_fetcher(ranges)):
217
+ (sstart, send) = ranges[idx]
218
+ logger.debug(f"MMap copy block ({sstart}-{send}")
219
+ self.cache[sstart:send] = r
220
+ else:
221
+ for sstart, send in ranges:
222
+ logger.debug(f"MMap get block ({sstart}-{send}")
223
+ self.cache[sstart:send] = self.fetcher(sstart, send)
224
+
225
+ return self.cache[start:end]
226
+
227
+ def __getstate__(self) -> dict[str, Any]:
228
+ state = self.__dict__.copy()
229
+ # Remove the unpicklable entries.
230
+ del state["cache"]
231
+ return state
232
+
233
+ def __setstate__(self, state: dict[str, Any]) -> None:
234
+ # Restore instance attributes
235
+ self.__dict__.update(state)
236
+ self.cache = self._makefile()
237
+
238
+
239
+ class ReadAheadCache(BaseCache):
240
+ """Cache which reads only when we get beyond a block of data
241
+
242
+ This is a much simpler version of BytesCache, and does not attempt to
243
+ fill holes in the cache or keep fragments alive. It is best suited to
244
+ many small reads in a sequential order (e.g., reading lines from a file).
245
+ """
246
+
247
+ name = "readahead"
248
+
249
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
250
+ super().__init__(blocksize, fetcher, size)
251
+ self.cache = b""
252
+ self.start = 0
253
+ self.end = 0
254
+
255
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
256
+ if start is None:
257
+ start = 0
258
+ if end is None or end > self.size:
259
+ end = self.size
260
+ if start >= self.size or start >= end:
261
+ return b""
262
+ l = end - start
263
+ if start >= self.start and end <= self.end:
264
+ # cache hit
265
+ self.hit_count += 1
266
+ return self.cache[start - self.start : end - self.start]
267
+ elif self.start <= start < self.end:
268
+ # partial hit
269
+ self.miss_count += 1
270
+ part = self.cache[start - self.start :]
271
+ l -= len(part)
272
+ start = self.end
273
+ else:
274
+ # miss
275
+ self.miss_count += 1
276
+ part = b""
277
+ end = min(self.size, end + self.blocksize)
278
+ self.total_requested_bytes += end - start
279
+ self.cache = self.fetcher(start, end) # new block replaces old
280
+ self.start = start
281
+ self.end = self.start + len(self.cache)
282
+ return part + self.cache[:l]
283
+
284
+
285
+ class FirstChunkCache(BaseCache):
286
+ """Caches the first block of a file only
287
+
288
+ This may be useful for file types where the metadata is stored in the header,
289
+ but is randomly accessed.
290
+ """
291
+
292
+ name = "first"
293
+
294
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
295
+ if blocksize > size:
296
+ # this will buffer the whole thing
297
+ blocksize = size
298
+ super().__init__(blocksize, fetcher, size)
299
+ self.cache: bytes | None = None
300
+
301
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
302
+ start = start or 0
303
+ if start > self.size:
304
+ logger.debug("FirstChunkCache: requested start > file size")
305
+ return b""
306
+
307
+ end = min(end, self.size)
308
+
309
+ if start < self.blocksize:
310
+ if self.cache is None:
311
+ self.miss_count += 1
312
+ if end > self.blocksize:
313
+ self.total_requested_bytes += end
314
+ data = self.fetcher(0, end)
315
+ self.cache = data[: self.blocksize]
316
+ return data[start:]
317
+ self.cache = self.fetcher(0, self.blocksize)
318
+ self.total_requested_bytes += self.blocksize
319
+ part = self.cache[start:end]
320
+ if end > self.blocksize:
321
+ self.total_requested_bytes += end - self.blocksize
322
+ part += self.fetcher(self.blocksize, end)
323
+ self.hit_count += 1
324
+ return part
325
+ else:
326
+ self.miss_count += 1
327
+ self.total_requested_bytes += end - start
328
+ return self.fetcher(start, end)
329
+
330
+
331
+ class BlockCache(BaseCache):
332
+ """
333
+ Cache holding memory as a set of blocks.
334
+
335
+ Requests are only ever made ``blocksize`` at a time, and are
336
+ stored in an LRU cache. The least recently accessed block is
337
+ discarded when more than ``maxblocks`` are stored.
338
+
339
+ Parameters
340
+ ----------
341
+ blocksize : int
342
+ The number of bytes to store in each block.
343
+ Requests are only ever made for ``blocksize``, so this
344
+ should balance the overhead of making a request against
345
+ the granularity of the blocks.
346
+ fetcher : Callable
347
+ size : int
348
+ The total size of the file being cached.
349
+ maxblocks : int
350
+ The maximum number of blocks to cache for. The maximum memory
351
+ use for this cache is then ``blocksize * maxblocks``.
352
+ """
353
+
354
+ name = "blockcache"
355
+
356
+ def __init__(
357
+ self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
358
+ ) -> None:
359
+ super().__init__(blocksize, fetcher, size)
360
+ self.nblocks = math.ceil(size / blocksize)
361
+ self.maxblocks = maxblocks
362
+ self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
363
+
364
+ def cache_info(self):
365
+ """
366
+ The statistics on the block cache.
367
+
368
+ Returns
369
+ -------
370
+ NamedTuple
371
+ Returned directly from the LRU Cache used internally.
372
+ """
373
+ return self._fetch_block_cached.cache_info()
374
+
375
+ def __getstate__(self) -> dict[str, Any]:
376
+ state = self.__dict__
377
+ del state["_fetch_block_cached"]
378
+ return state
379
+
380
+ def __setstate__(self, state: dict[str, Any]) -> None:
381
+ self.__dict__.update(state)
382
+ self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
383
+ self._fetch_block
384
+ )
385
+
386
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
387
+ if start is None:
388
+ start = 0
389
+ if end is None:
390
+ end = self.size
391
+ if start >= self.size or start >= end:
392
+ return b""
393
+
394
+ # byte position -> block numbers
395
+ start_block_number = start // self.blocksize
396
+ end_block_number = end // self.blocksize
397
+
398
+ # these are cached, so safe to do multiple calls for the same start and end.
399
+ for block_number in range(start_block_number, end_block_number + 1):
400
+ self._fetch_block_cached(block_number)
401
+
402
+ return self._read_cache(
403
+ start,
404
+ end,
405
+ start_block_number=start_block_number,
406
+ end_block_number=end_block_number,
407
+ )
408
+
409
+ def _fetch_block(self, block_number: int) -> bytes:
410
+ """
411
+ Fetch the block of data for `block_number`.
412
+ """
413
+ if block_number > self.nblocks:
414
+ raise ValueError(
415
+ f"'block_number={block_number}' is greater than "
416
+ f"the number of blocks ({self.nblocks})"
417
+ )
418
+
419
+ start = block_number * self.blocksize
420
+ end = start + self.blocksize
421
+ self.total_requested_bytes += end - start
422
+ self.miss_count += 1
423
+ logger.info("BlockCache fetching block %d", block_number)
424
+ block_contents = super()._fetch(start, end)
425
+ return block_contents
426
+
427
+ def _read_cache(
428
+ self, start: int, end: int, start_block_number: int, end_block_number: int
429
+ ) -> bytes:
430
+ """
431
+ Read from our block cache.
432
+
433
+ Parameters
434
+ ----------
435
+ start, end : int
436
+ The start and end byte positions.
437
+ start_block_number, end_block_number : int
438
+ The start and end block numbers.
439
+ """
440
+ start_pos = start % self.blocksize
441
+ end_pos = end % self.blocksize
442
+
443
+ self.hit_count += 1
444
+ if start_block_number == end_block_number:
445
+ block: bytes = self._fetch_block_cached(start_block_number)
446
+ return block[start_pos:end_pos]
447
+
448
+ else:
449
+ # read from the initial
450
+ out = [self._fetch_block_cached(start_block_number)[start_pos:]]
451
+
452
+ # intermediate blocks
453
+ # Note: it'd be nice to combine these into one big request. However
454
+ # that doesn't play nicely with our LRU cache.
455
+ out.extend(
456
+ map(
457
+ self._fetch_block_cached,
458
+ range(start_block_number + 1, end_block_number),
459
+ )
460
+ )
461
+
462
+ # final block
463
+ out.append(self._fetch_block_cached(end_block_number)[:end_pos])
464
+
465
+ return b"".join(out)
466
+
467
+
468
+ class BytesCache(BaseCache):
469
+ """Cache which holds data in a in-memory bytes object
470
+
471
+ Implements read-ahead by the block size, for semi-random reads progressing
472
+ through the file.
473
+
474
+ Parameters
475
+ ----------
476
+ trim: bool
477
+ As we read more data, whether to discard the start of the buffer when
478
+ we are more than a blocksize ahead of it.
479
+ """
480
+
481
+ name: ClassVar[str] = "bytes"
482
+
483
+ def __init__(
484
+ self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
485
+ ) -> None:
486
+ super().__init__(blocksize, fetcher, size)
487
+ self.cache = b""
488
+ self.start: int | None = None
489
+ self.end: int | None = None
490
+ self.trim = trim
491
+
492
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
493
+ # TODO: only set start/end after fetch, in case it fails?
494
+ # is this where retry logic might go?
495
+ if start is None:
496
+ start = 0
497
+ if end is None:
498
+ end = self.size
499
+ if start >= self.size or start >= end:
500
+ return b""
501
+ if (
502
+ self.start is not None
503
+ and start >= self.start
504
+ and self.end is not None
505
+ and end < self.end
506
+ ):
507
+ # cache hit: we have all the required data
508
+ offset = start - self.start
509
+ self.hit_count += 1
510
+ return self.cache[offset : offset + end - start]
511
+
512
+ if self.blocksize:
513
+ bend = min(self.size, end + self.blocksize)
514
+ else:
515
+ bend = end
516
+
517
+ if bend == start or start > self.size:
518
+ return b""
519
+
520
+ if (self.start is None or start < self.start) and (
521
+ self.end is None or end > self.end
522
+ ):
523
+ # First read, or extending both before and after
524
+ self.total_requested_bytes += bend - start
525
+ self.miss_count += 1
526
+ self.cache = self.fetcher(start, bend)
527
+ self.start = start
528
+ else:
529
+ assert self.start is not None
530
+ assert self.end is not None
531
+ self.miss_count += 1
532
+
533
+ if start < self.start:
534
+ if self.end is None or self.end - end > self.blocksize:
535
+ self.total_requested_bytes += bend - start
536
+ self.cache = self.fetcher(start, bend)
537
+ self.start = start
538
+ else:
539
+ self.total_requested_bytes += self.start - start
540
+ new = self.fetcher(start, self.start)
541
+ self.start = start
542
+ self.cache = new + self.cache
543
+ elif self.end is not None and bend > self.end:
544
+ if self.end > self.size:
545
+ pass
546
+ elif end - self.end > self.blocksize:
547
+ self.total_requested_bytes += bend - start
548
+ self.cache = self.fetcher(start, bend)
549
+ self.start = start
550
+ else:
551
+ self.total_requested_bytes += bend - self.end
552
+ new = self.fetcher(self.end, bend)
553
+ self.cache = self.cache + new
554
+
555
+ self.end = self.start + len(self.cache)
556
+ offset = start - self.start
557
+ out = self.cache[offset : offset + end - start]
558
+ if self.trim:
559
+ num = (self.end - self.start) // (self.blocksize + 1)
560
+ if num > 1:
561
+ self.start += self.blocksize * num
562
+ self.cache = self.cache[self.blocksize * num :]
563
+ return out
564
+
565
+ def __len__(self) -> int:
566
+ return len(self.cache)
567
+
568
+
569
+ class AllBytes(BaseCache):
570
+ """Cache entire contents of the file"""
571
+
572
+ name: ClassVar[str] = "all"
573
+
574
+ def __init__(
575
+ self,
576
+ blocksize: int | None = None,
577
+ fetcher: Fetcher | None = None,
578
+ size: int | None = None,
579
+ data: bytes | None = None,
580
+ ) -> None:
581
+ super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
582
+ if data is None:
583
+ self.miss_count += 1
584
+ self.total_requested_bytes += self.size
585
+ data = self.fetcher(0, self.size)
586
+ self.data = data
587
+
588
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
589
+ self.hit_count += 1
590
+ return self.data[start:stop]
591
+
592
+
593
+ class KnownPartsOfAFile(BaseCache):
594
+ """
595
+ Cache holding known file parts.
596
+
597
+ Parameters
598
+ ----------
599
+ blocksize: int
600
+ How far to read ahead in numbers of bytes
601
+ fetcher: func
602
+ Function of the form f(start, end) which gets bytes from remote as
603
+ specified
604
+ size: int
605
+ How big this file is
606
+ data: dict
607
+ A dictionary mapping explicit `(start, stop)` file-offset tuples
608
+ with known bytes.
609
+ strict: bool, default True
610
+ Whether to fetch reads that go beyond a known byte-range boundary.
611
+ If `False`, any read that ends outside a known part will be zero
612
+ padded. Note that zero padding will not be used for reads that
613
+ begin outside a known byte-range.
614
+ """
615
+
616
+ name: ClassVar[str] = "parts"
617
+
618
+ def __init__(
619
+ self,
620
+ blocksize: int,
621
+ fetcher: Fetcher,
622
+ size: int,
623
+ data: dict[tuple[int, int], bytes] | None = None,
624
+ strict: bool = False,
625
+ **_: Any,
626
+ ):
627
+ super().__init__(blocksize, fetcher, size)
628
+ self.strict = strict
629
+
630
+ # simple consolidation of contiguous blocks
631
+ if data:
632
+ old_offsets = sorted(data.keys())
633
+ offsets = [old_offsets[0]]
634
+ blocks = [data.pop(old_offsets[0])]
635
+ for start, stop in old_offsets[1:]:
636
+ start0, stop0 = offsets[-1]
637
+ if start == stop0:
638
+ offsets[-1] = (start0, stop)
639
+ blocks[-1] += data.pop((start, stop))
640
+ else:
641
+ offsets.append((start, stop))
642
+ blocks.append(data.pop((start, stop)))
643
+
644
+ self.data = dict(zip(offsets, blocks))
645
+ else:
646
+ self.data = {}
647
+
648
+ @property
649
+ def size(self):
650
+ return sum(_[1] - _[0] for _ in self.data)
651
+
652
+ @size.setter
653
+ def size(self, value):
654
+ pass
655
+
656
+ @property
657
+ def nblocks(self):
658
+ return len(self.data)
659
+
660
+ @nblocks.setter
661
+ def nblocks(self, value):
662
+ pass
663
+
664
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
665
+ if start is None:
666
+ start = 0
667
+ if stop is None:
668
+ stop = self.size
669
+ self.total_requested_bytes += stop - start
670
+
671
+ out = b""
672
+ started = False
673
+ loc_old = 0
674
+ for loc0, loc1 in sorted(self.data):
675
+ if (loc0 <= start < loc1) and (loc0 <= stop <= loc1):
676
+ # entirely within the block
677
+ off = start - loc0
678
+ self.hit_count += 1
679
+ return self.data[(loc0, loc1)][off : off + stop - start]
680
+ if stop <= loc0:
681
+ break
682
+ if started and loc0 > loc_old:
683
+ # a gap where we need data
684
+ self.miss_count += 1
685
+ if self.strict:
686
+ raise ValueError
687
+ out += b"\x00" * (loc0 - loc_old)
688
+ if loc0 <= start < loc1:
689
+ # found the start
690
+ self.hit_count += 1
691
+ off = start - loc0
692
+ out = self.data[(loc0, loc1)][off : off + stop - start]
693
+ started = True
694
+ elif start < loc0 and stop > loc1:
695
+ # the whole block
696
+ self.hit_count += 1
697
+ out += self.data[(loc0, loc1)]
698
+ elif loc0 <= stop <= loc1:
699
+ # end block
700
+ self.hit_count += 1
701
+ return out + self.data[(loc0, loc1)][: stop - loc0]
702
+ loc_old = loc1
703
+ self.miss_count += 1
704
+ if started and not self.strict:
705
+ return out + b"\x00" * (stop - loc_old)
706
+ raise ValueError
707
+
708
+
709
+ class UpdatableLRU(Generic[P, T]):
710
+ """
711
+ Custom implementation of LRU cache that allows updating keys
712
+
713
+ Used by BackgroudBlockCache
714
+ """
715
+
716
+ class CacheInfo(NamedTuple):
717
+ hits: int
718
+ misses: int
719
+ maxsize: int
720
+ currsize: int
721
+
722
+ def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
723
+ self._cache: OrderedDict[Any, T] = collections.OrderedDict()
724
+ self._func = func
725
+ self._max_size = max_size
726
+ self._hits = 0
727
+ self._misses = 0
728
+ self._lock = threading.Lock()
729
+
730
+ def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
731
+ if kwargs:
732
+ raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
733
+ with self._lock:
734
+ if args in self._cache:
735
+ self._cache.move_to_end(args)
736
+ self._hits += 1
737
+ return self._cache[args]
738
+
739
+ result = self._func(*args, **kwargs)
740
+
741
+ with self._lock:
742
+ self._cache[args] = result
743
+ self._misses += 1
744
+ if len(self._cache) > self._max_size:
745
+ self._cache.popitem(last=False)
746
+
747
+ return result
748
+
749
+ def is_key_cached(self, *args: Any) -> bool:
750
+ with self._lock:
751
+ return args in self._cache
752
+
753
+ def add_key(self, result: T, *args: Any) -> None:
754
+ with self._lock:
755
+ self._cache[args] = result
756
+ if len(self._cache) > self._max_size:
757
+ self._cache.popitem(last=False)
758
+
759
+ def cache_info(self) -> UpdatableLRU.CacheInfo:
760
+ with self._lock:
761
+ return self.CacheInfo(
762
+ maxsize=self._max_size,
763
+ currsize=len(self._cache),
764
+ hits=self._hits,
765
+ misses=self._misses,
766
+ )
767
+
768
+
769
+ class BackgroundBlockCache(BaseCache):
770
+ """
771
+ Cache holding memory as a set of blocks with pre-loading of
772
+ the next block in the background.
773
+
774
+ Requests are only ever made ``blocksize`` at a time, and are
775
+ stored in an LRU cache. The least recently accessed block is
776
+ discarded when more than ``maxblocks`` are stored. If the
777
+ next block is not in cache, it is loaded in a separate thread
778
+ in non-blocking way.
779
+
780
+ Parameters
781
+ ----------
782
+ blocksize : int
783
+ The number of bytes to store in each block.
784
+ Requests are only ever made for ``blocksize``, so this
785
+ should balance the overhead of making a request against
786
+ the granularity of the blocks.
787
+ fetcher : Callable
788
+ size : int
789
+ The total size of the file being cached.
790
+ maxblocks : int
791
+ The maximum number of blocks to cache for. The maximum memory
792
+ use for this cache is then ``blocksize * maxblocks``.
793
+ """
794
+
795
+ name: ClassVar[str] = "background"
796
+
797
+ def __init__(
798
+ self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
799
+ ) -> None:
800
+ super().__init__(blocksize, fetcher, size)
801
+ self.nblocks = math.ceil(size / blocksize)
802
+ self.maxblocks = maxblocks
803
+ self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
804
+
805
+ self._thread_executor = ThreadPoolExecutor(max_workers=1)
806
+ self._fetch_future_block_number: int | None = None
807
+ self._fetch_future: Future[bytes] | None = None
808
+ self._fetch_future_lock = threading.Lock()
809
+
810
+ def cache_info(self) -> UpdatableLRU.CacheInfo:
811
+ """
812
+ The statistics on the block cache.
813
+
814
+ Returns
815
+ -------
816
+ NamedTuple
817
+ Returned directly from the LRU Cache used internally.
818
+ """
819
+ return self._fetch_block_cached.cache_info()
820
+
821
+ def __getstate__(self) -> dict[str, Any]:
822
+ state = self.__dict__
823
+ del state["_fetch_block_cached"]
824
+ del state["_thread_executor"]
825
+ del state["_fetch_future_block_number"]
826
+ del state["_fetch_future"]
827
+ del state["_fetch_future_lock"]
828
+ return state
829
+
830
+ def __setstate__(self, state) -> None:
831
+ self.__dict__.update(state)
832
+ self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
833
+ self._thread_executor = ThreadPoolExecutor(max_workers=1)
834
+ self._fetch_future_block_number = None
835
+ self._fetch_future = None
836
+ self._fetch_future_lock = threading.Lock()
837
+
838
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
839
+ if start is None:
840
+ start = 0
841
+ if end is None:
842
+ end = self.size
843
+ if start >= self.size or start >= end:
844
+ return b""
845
+
846
+ # byte position -> block numbers
847
+ start_block_number = start // self.blocksize
848
+ end_block_number = end // self.blocksize
849
+
850
+ fetch_future_block_number = None
851
+ fetch_future = None
852
+ with self._fetch_future_lock:
853
+ # Background thread is running. Check we we can or must join it.
854
+ if self._fetch_future is not None:
855
+ assert self._fetch_future_block_number is not None
856
+ if self._fetch_future.done():
857
+ logger.info("BlockCache joined background fetch without waiting.")
858
+ self._fetch_block_cached.add_key(
859
+ self._fetch_future.result(), self._fetch_future_block_number
860
+ )
861
+ # Cleanup the fetch variables. Done with fetching the block.
862
+ self._fetch_future_block_number = None
863
+ self._fetch_future = None
864
+ else:
865
+ # Must join if we need the block for the current fetch
866
+ must_join = bool(
867
+ start_block_number
868
+ <= self._fetch_future_block_number
869
+ <= end_block_number
870
+ )
871
+ if must_join:
872
+ # Copy to the local variables to release lock
873
+ # before waiting for result
874
+ fetch_future_block_number = self._fetch_future_block_number
875
+ fetch_future = self._fetch_future
876
+
877
+ # Cleanup the fetch variables. Have a local copy.
878
+ self._fetch_future_block_number = None
879
+ self._fetch_future = None
880
+
881
+ # Need to wait for the future for the current read
882
+ if fetch_future is not None:
883
+ logger.info("BlockCache waiting for background fetch.")
884
+ # Wait until result and put it in cache
885
+ self._fetch_block_cached.add_key(
886
+ fetch_future.result(), fetch_future_block_number
887
+ )
888
+
889
+ # these are cached, so safe to do multiple calls for the same start and end.
890
+ for block_number in range(start_block_number, end_block_number + 1):
891
+ self._fetch_block_cached(block_number)
892
+
893
+ # fetch next block in the background if nothing is running in the background,
894
+ # the block is within file and it is not already cached
895
+ end_block_plus_1 = end_block_number + 1
896
+ with self._fetch_future_lock:
897
+ if (
898
+ self._fetch_future is None
899
+ and end_block_plus_1 <= self.nblocks
900
+ and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
901
+ ):
902
+ self._fetch_future_block_number = end_block_plus_1
903
+ self._fetch_future = self._thread_executor.submit(
904
+ self._fetch_block, end_block_plus_1, "async"
905
+ )
906
+
907
+ return self._read_cache(
908
+ start,
909
+ end,
910
+ start_block_number=start_block_number,
911
+ end_block_number=end_block_number,
912
+ )
913
+
914
+ def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
915
+ """
916
+ Fetch the block of data for `block_number`.
917
+ """
918
+ if block_number > self.nblocks:
919
+ raise ValueError(
920
+ f"'block_number={block_number}' is greater than "
921
+ f"the number of blocks ({self.nblocks})"
922
+ )
923
+
924
+ start = block_number * self.blocksize
925
+ end = start + self.blocksize
926
+ logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
927
+ self.total_requested_bytes += end - start
928
+ self.miss_count += 1
929
+ block_contents = super()._fetch(start, end)
930
+ return block_contents
931
+
932
+ def _read_cache(
933
+ self, start: int, end: int, start_block_number: int, end_block_number: int
934
+ ) -> bytes:
935
+ """
936
+ Read from our block cache.
937
+
938
+ Parameters
939
+ ----------
940
+ start, end : int
941
+ The start and end byte positions.
942
+ start_block_number, end_block_number : int
943
+ The start and end block numbers.
944
+ """
945
+ start_pos = start % self.blocksize
946
+ end_pos = end % self.blocksize
947
+
948
+ # kind of pointless to count this as a hit, but it is
949
+ self.hit_count += 1
950
+
951
+ if start_block_number == end_block_number:
952
+ block = self._fetch_block_cached(start_block_number)
953
+ return block[start_pos:end_pos]
954
+
955
+ else:
956
+ # read from the initial
957
+ out = [self._fetch_block_cached(start_block_number)[start_pos:]]
958
+
959
+ # intermediate blocks
960
+ # Note: it'd be nice to combine these into one big request. However
961
+ # that doesn't play nicely with our LRU cache.
962
+ out.extend(
963
+ map(
964
+ self._fetch_block_cached,
965
+ range(start_block_number + 1, end_block_number),
966
+ )
967
+ )
968
+
969
+ # final block
970
+ out.append(self._fetch_block_cached(end_block_number)[:end_pos])
971
+
972
+ return b"".join(out)
973
+
974
+
975
+ caches: dict[str | None, type[BaseCache]] = {
976
+ # one custom case
977
+ None: BaseCache,
978
+ }
979
+
980
+
981
+ def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
982
+ """'Register' cache implementation.
983
+
984
+ Parameters
985
+ ----------
986
+ clobber: bool, optional
987
+ If set to True (default is False) - allow to overwrite existing
988
+ entry.
989
+
990
+ Raises
991
+ ------
992
+ ValueError
993
+ """
994
+ name = cls.name
995
+ if not clobber and name in caches:
996
+ raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
997
+ caches[name] = cls
998
+
999
+
1000
+ for c in (
1001
+ BaseCache,
1002
+ MMapCache,
1003
+ BytesCache,
1004
+ ReadAheadCache,
1005
+ BlockCache,
1006
+ FirstChunkCache,
1007
+ AllBytes,
1008
+ KnownPartsOfAFile,
1009
+ BackgroundBlockCache,
1010
+ ):
1011
+ register_cache(c)
env/lib/python3.13/site-packages/fsspec/callbacks.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import wraps
2
+
3
+
4
+ class Callback:
5
+ """
6
+ Base class and interface for callback mechanism
7
+
8
+ This class can be used directly for monitoring file transfers by
9
+ providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
10
+ below), or subclassed for more specialised behaviour.
11
+
12
+ Parameters
13
+ ----------
14
+ size: int (optional)
15
+ Nominal quantity for the value that corresponds to a complete
16
+ transfer, e.g., total number of tiles or total number of
17
+ bytes
18
+ value: int (0)
19
+ Starting internal counter value
20
+ hooks: dict or None
21
+ A dict of named functions to be called on each update. The signature
22
+ of these must be ``f(size, value, **kwargs)``
23
+ """
24
+
25
+ def __init__(self, size=None, value=0, hooks=None, **kwargs):
26
+ self.size = size
27
+ self.value = value
28
+ self.hooks = hooks or {}
29
+ self.kw = kwargs
30
+
31
+ def __enter__(self):
32
+ return self
33
+
34
+ def __exit__(self, *exc_args):
35
+ self.close()
36
+
37
+ def close(self):
38
+ """Close callback."""
39
+
40
+ def branched(self, path_1, path_2, **kwargs):
41
+ """
42
+ Return callback for child transfers
43
+
44
+ If this callback is operating at a higher level, e.g., put, which may
45
+ trigger transfers that can also be monitored. The function returns a callback
46
+ that has to be passed to the child method, e.g., put_file,
47
+ as `callback=` argument.
48
+
49
+ The implementation uses `callback.branch` for compatibility.
50
+ When implementing callbacks, it is recommended to override this function instead
51
+ of `branch` and avoid calling `super().branched(...)`.
52
+
53
+ Prefer using this function over `branch`.
54
+
55
+ Parameters
56
+ ----------
57
+ path_1: str
58
+ Child's source path
59
+ path_2: str
60
+ Child's destination path
61
+ **kwargs:
62
+ Arbitrary keyword arguments
63
+
64
+ Returns
65
+ -------
66
+ callback: Callback
67
+ A callback instance to be passed to the child method
68
+ """
69
+ self.branch(path_1, path_2, kwargs)
70
+ # mutate kwargs so that we can force the caller to pass "callback=" explicitly
71
+ return kwargs.pop("callback", DEFAULT_CALLBACK)
72
+
73
+ def branch_coro(self, fn):
74
+ """
75
+ Wraps a coroutine, and pass a new child callback to it.
76
+ """
77
+
78
+ @wraps(fn)
79
+ async def func(path1, path2: str, **kwargs):
80
+ with self.branched(path1, path2, **kwargs) as child:
81
+ return await fn(path1, path2, callback=child, **kwargs)
82
+
83
+ return func
84
+
85
+ def set_size(self, size):
86
+ """
87
+ Set the internal maximum size attribute
88
+
89
+ Usually called if not initially set at instantiation. Note that this
90
+ triggers a ``call()``.
91
+
92
+ Parameters
93
+ ----------
94
+ size: int
95
+ """
96
+ self.size = size
97
+ self.call()
98
+
99
+ def absolute_update(self, value):
100
+ """
101
+ Set the internal value state
102
+
103
+ Triggers ``call()``
104
+
105
+ Parameters
106
+ ----------
107
+ value: int
108
+ """
109
+ self.value = value
110
+ self.call()
111
+
112
+ def relative_update(self, inc=1):
113
+ """
114
+ Delta increment the internal counter
115
+
116
+ Triggers ``call()``
117
+
118
+ Parameters
119
+ ----------
120
+ inc: int
121
+ """
122
+ self.value += inc
123
+ self.call()
124
+
125
+ def call(self, hook_name=None, **kwargs):
126
+ """
127
+ Execute hook(s) with current state
128
+
129
+ Each function is passed the internal size and current value
130
+
131
+ Parameters
132
+ ----------
133
+ hook_name: str or None
134
+ If given, execute on this hook
135
+ kwargs: passed on to (all) hook(s)
136
+ """
137
+ if not self.hooks:
138
+ return
139
+ kw = self.kw.copy()
140
+ kw.update(kwargs)
141
+ if hook_name:
142
+ if hook_name not in self.hooks:
143
+ return
144
+ return self.hooks[hook_name](self.size, self.value, **kw)
145
+ for hook in self.hooks.values() or []:
146
+ hook(self.size, self.value, **kw)
147
+
148
+ def wrap(self, iterable):
149
+ """
150
+ Wrap an iterable to call ``relative_update`` on each iterations
151
+
152
+ Parameters
153
+ ----------
154
+ iterable: Iterable
155
+ The iterable that is being wrapped
156
+ """
157
+ for item in iterable:
158
+ self.relative_update()
159
+ yield item
160
+
161
+ def branch(self, path_1, path_2, kwargs):
162
+ """
163
+ Set callbacks for child transfers
164
+
165
+ If this callback is operating at a higher level, e.g., put, which may
166
+ trigger transfers that can also be monitored. The passed kwargs are
167
+ to be *mutated* to add ``callback=``, if this class supports branching
168
+ to children.
169
+
170
+ Parameters
171
+ ----------
172
+ path_1: str
173
+ Child's source path
174
+ path_2: str
175
+ Child's destination path
176
+ kwargs: dict
177
+ arguments passed to child method, e.g., put_file.
178
+
179
+ Returns
180
+ -------
181
+
182
+ """
183
+ return None
184
+
185
+ def no_op(self, *_, **__):
186
+ pass
187
+
188
+ def __getattr__(self, item):
189
+ """
190
+ If undefined methods are called on this class, nothing happens
191
+ """
192
+ return self.no_op
193
+
194
+ @classmethod
195
+ def as_callback(cls, maybe_callback=None):
196
+ """Transform callback=... into Callback instance
197
+
198
+ For the special value of ``None``, return the global instance of
199
+ ``NoOpCallback``. This is an alternative to including
200
+ ``callback=DEFAULT_CALLBACK`` directly in a method signature.
201
+ """
202
+ if maybe_callback is None:
203
+ return DEFAULT_CALLBACK
204
+ return maybe_callback
205
+
206
+
207
+ class NoOpCallback(Callback):
208
+ """
209
+ This implementation of Callback does exactly nothing
210
+ """
211
+
212
+ def call(self, *args, **kwargs):
213
+ return None
214
+
215
+
216
+ class DotPrinterCallback(Callback):
217
+ """
218
+ Simple example Callback implementation
219
+
220
+ Almost identical to Callback with a hook that prints a char; here we
221
+ demonstrate how the outer layer may print "#" and the inner layer "."
222
+ """
223
+
224
+ def __init__(self, chr_to_print="#", **kwargs):
225
+ self.chr = chr_to_print
226
+ super().__init__(**kwargs)
227
+
228
+ def branch(self, path_1, path_2, kwargs):
229
+ """Mutate kwargs to add new instance with different print char"""
230
+ kwargs["callback"] = DotPrinterCallback(".")
231
+
232
+ def call(self, **kwargs):
233
+ """Just outputs a character"""
234
+ print(self.chr, end="")
235
+
236
+
237
+ class TqdmCallback(Callback):
238
+ """
239
+ A callback to display a progress bar using tqdm
240
+
241
+ Parameters
242
+ ----------
243
+ tqdm_kwargs : dict, (optional)
244
+ Any argument accepted by the tqdm constructor.
245
+ See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
246
+ Will be forwarded to `tqdm_cls`.
247
+ tqdm_cls: (optional)
248
+ subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
249
+
250
+ Examples
251
+ --------
252
+ >>> import fsspec
253
+ >>> from fsspec.callbacks import TqdmCallback
254
+ >>> fs = fsspec.filesystem("memory")
255
+ >>> path2distant_data = "/your-path"
256
+ >>> fs.upload(
257
+ ".",
258
+ path2distant_data,
259
+ recursive=True,
260
+ callback=TqdmCallback(),
261
+ )
262
+
263
+ You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
264
+
265
+ >>> fs.upload(
266
+ ".",
267
+ path2distant_data,
268
+ recursive=True,
269
+ callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
270
+ )
271
+
272
+ You can also customize the progress bar by passing a subclass of `tqdm`.
273
+
274
+ .. code-block:: python
275
+
276
+ class TqdmFormat(tqdm):
277
+ '''Provides a `total_time` format parameter'''
278
+ @property
279
+ def format_dict(self):
280
+ d = super().format_dict
281
+ total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
282
+ d.update(total_time=self.format_interval(total_time) + " in total")
283
+ return d
284
+
285
+ >>> with TqdmCallback(
286
+ tqdm_kwargs={
287
+ "desc": "desc",
288
+ "bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
289
+ },
290
+ tqdm_cls=TqdmFormat,
291
+ ) as callback:
292
+ fs.upload(".", path2distant_data, recursive=True, callback=callback)
293
+ """
294
+
295
+ def __init__(self, tqdm_kwargs=None, *args, **kwargs):
296
+ try:
297
+ from tqdm import tqdm
298
+
299
+ except ImportError as exce:
300
+ raise ImportError(
301
+ "Using TqdmCallback requires tqdm to be installed"
302
+ ) from exce
303
+
304
+ self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
305
+ self._tqdm_kwargs = tqdm_kwargs or {}
306
+ self.tqdm = None
307
+ super().__init__(*args, **kwargs)
308
+
309
+ def call(self, *args, **kwargs):
310
+ if self.tqdm is None:
311
+ self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
312
+ self.tqdm.total = self.size
313
+ self.tqdm.update(self.value - self.tqdm.n)
314
+
315
+ def close(self):
316
+ if self.tqdm is not None:
317
+ self.tqdm.close()
318
+ self.tqdm = None
319
+
320
+ def __del__(self):
321
+ return self.close()
322
+
323
+
324
+ DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
env/lib/python3.13/site-packages/fsspec/compression.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper functions for a standard streaming compression API"""
2
+
3
+ from zipfile import ZipFile
4
+
5
+ import fsspec.utils
6
+ from fsspec.spec import AbstractBufferedFile
7
+
8
+
9
+ def noop_file(file, mode, **kwargs):
10
+ return file
11
+
12
+
13
+ # TODO: files should also be available as contexts
14
+ # should be functions of the form func(infile, mode=, **kwargs) -> file-like
15
+ compr = {None: noop_file}
16
+
17
+
18
+ def register_compression(name, callback, extensions, force=False):
19
+ """Register an "inferable" file compression type.
20
+
21
+ Registers transparent file compression type for use with fsspec.open.
22
+ Compression can be specified by name in open, or "infer"-ed for any files
23
+ ending with the given extensions.
24
+
25
+ Args:
26
+ name: (str) The compression type name. Eg. "gzip".
27
+ callback: A callable of form (infile, mode, **kwargs) -> file-like.
28
+ Accepts an input file-like object, the target mode and kwargs.
29
+ Returns a wrapped file-like object.
30
+ extensions: (str, Iterable[str]) A file extension, or list of file
31
+ extensions for which to infer this compression scheme. Eg. "gz".
32
+ force: (bool) Force re-registration of compression type or extensions.
33
+
34
+ Raises:
35
+ ValueError: If name or extensions already registered, and not force.
36
+
37
+ """
38
+ if isinstance(extensions, str):
39
+ extensions = [extensions]
40
+
41
+ # Validate registration
42
+ if name in compr and not force:
43
+ raise ValueError(f"Duplicate compression registration: {name}")
44
+
45
+ for ext in extensions:
46
+ if ext in fsspec.utils.compressions and not force:
47
+ raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
48
+
49
+ compr[name] = callback
50
+
51
+ for ext in extensions:
52
+ fsspec.utils.compressions[ext] = name
53
+
54
+
55
+ def unzip(infile, mode="rb", filename=None, **kwargs):
56
+ if "r" not in mode:
57
+ filename = filename or "file"
58
+ z = ZipFile(infile, mode="w", **kwargs)
59
+ fo = z.open(filename, mode="w")
60
+ fo.close = lambda closer=fo.close: closer() or z.close()
61
+ return fo
62
+ z = ZipFile(infile)
63
+ if filename is None:
64
+ filename = z.namelist()[0]
65
+ return z.open(filename, mode="r", **kwargs)
66
+
67
+
68
+ register_compression("zip", unzip, "zip")
69
+
70
+ try:
71
+ from bz2 import BZ2File
72
+ except ImportError:
73
+ pass
74
+ else:
75
+ register_compression("bz2", BZ2File, "bz2")
76
+
77
+ try: # pragma: no cover
78
+ from isal import igzip
79
+
80
+ def isal(infile, mode="rb", **kwargs):
81
+ return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
82
+
83
+ register_compression("gzip", isal, "gz")
84
+ except ImportError:
85
+ from gzip import GzipFile
86
+
87
+ register_compression(
88
+ "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
89
+ )
90
+
91
+ try:
92
+ from lzma import LZMAFile
93
+
94
+ register_compression("lzma", LZMAFile, "lzma")
95
+ register_compression("xz", LZMAFile, "xz")
96
+ except ImportError:
97
+ pass
98
+
99
+ try:
100
+ import lzmaffi
101
+
102
+ register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
103
+ register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
104
+ except ImportError:
105
+ pass
106
+
107
+
108
+ class SnappyFile(AbstractBufferedFile):
109
+ def __init__(self, infile, mode, **kwargs):
110
+ import snappy
111
+
112
+ super().__init__(
113
+ fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
114
+ )
115
+ self.infile = infile
116
+ if "r" in mode:
117
+ self.codec = snappy.StreamDecompressor()
118
+ else:
119
+ self.codec = snappy.StreamCompressor()
120
+
121
+ def _upload_chunk(self, final=False):
122
+ self.buffer.seek(0)
123
+ out = self.codec.add_chunk(self.buffer.read())
124
+ self.infile.write(out)
125
+ return True
126
+
127
+ def seek(self, loc, whence=0):
128
+ raise NotImplementedError("SnappyFile is not seekable")
129
+
130
+ def seekable(self):
131
+ return False
132
+
133
+ def _fetch_range(self, start, end):
134
+ """Get the specified set of bytes from remote"""
135
+ data = self.infile.read(end - start)
136
+ return self.codec.decompress(data)
137
+
138
+
139
+ try:
140
+ import snappy
141
+
142
+ snappy.compress(b"")
143
+ # Snappy may use the .sz file extension, but this is not part of the
144
+ # standard implementation.
145
+ register_compression("snappy", SnappyFile, [])
146
+
147
+ except (ImportError, NameError, AttributeError):
148
+ pass
149
+
150
+ try:
151
+ import lz4.frame
152
+
153
+ register_compression("lz4", lz4.frame.open, "lz4")
154
+ except ImportError:
155
+ pass
156
+
157
+ try:
158
+ # zstd in the standard library for python >= 3.14
159
+ from compression.zstd import ZstdFile
160
+
161
+ register_compression("zstd", ZstdFile, "zst")
162
+
163
+ except ImportError:
164
+ try:
165
+ import zstandard as zstd
166
+
167
+ def zstandard_file(infile, mode="rb"):
168
+ if "r" in mode:
169
+ cctx = zstd.ZstdDecompressor()
170
+ return cctx.stream_reader(infile)
171
+ else:
172
+ cctx = zstd.ZstdCompressor(level=10)
173
+ return cctx.stream_writer(infile)
174
+
175
+ register_compression("zstd", zstandard_file, "zst")
176
+ except ImportError:
177
+ pass
178
+
179
+
180
+ def available_compressions():
181
+ """Return a list of the implemented compressions."""
182
+ return list(compr)
env/lib/python3.13/site-packages/fsspec/config.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import configparser
4
+ import json
5
+ import os
6
+ import warnings
7
+ from typing import Any
8
+
9
+ conf: dict[str, dict[str, Any]] = {}
10
+ default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
11
+ conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
12
+
13
+
14
+ def set_conf_env(conf_dict, envdict=os.environ):
15
+ """Set config values from environment variables
16
+
17
+ Looks for variables of the form ``FSSPEC_<protocol>`` and
18
+ ``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
19
+ as a json dictionary and used to ``update`` the config of the
20
+ corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
21
+ attempt to convert the string value, but the kwarg keys will be lower-cased.
22
+
23
+ The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
24
+ ``FSSPEC_<protocol>`` ones.
25
+
26
+ Parameters
27
+ ----------
28
+ conf_dict : dict(str, dict)
29
+ This dict will be mutated
30
+ envdict : dict-like(str, str)
31
+ Source for the values - usually the real environment
32
+ """
33
+ kwarg_keys = []
34
+ for key in envdict:
35
+ if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
36
+ if key.count("_") > 1:
37
+ kwarg_keys.append(key)
38
+ continue
39
+ try:
40
+ value = json.loads(envdict[key])
41
+ except json.decoder.JSONDecodeError as ex:
42
+ warnings.warn(
43
+ f"Ignoring environment variable {key} due to a parse failure: {ex}"
44
+ )
45
+ else:
46
+ if isinstance(value, dict):
47
+ _, proto = key.split("_", 1)
48
+ conf_dict.setdefault(proto.lower(), {}).update(value)
49
+ else:
50
+ warnings.warn(
51
+ f"Ignoring environment variable {key} due to not being a dict:"
52
+ f" {type(value)}"
53
+ )
54
+ elif key.startswith("FSSPEC"):
55
+ warnings.warn(
56
+ f"Ignoring environment variable {key} due to having an unexpected name"
57
+ )
58
+
59
+ for key in kwarg_keys:
60
+ _, proto, kwarg = key.split("_", 2)
61
+ conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
62
+
63
+
64
+ def set_conf_files(cdir, conf_dict):
65
+ """Set config values from files
66
+
67
+ Scans for INI and JSON files in the given dictionary, and uses their
68
+ contents to set the config. In case of repeated values, later values
69
+ win.
70
+
71
+ In the case of INI files, all values are strings, and these will not
72
+ be converted.
73
+
74
+ Parameters
75
+ ----------
76
+ cdir : str
77
+ Directory to search
78
+ conf_dict : dict(str, dict)
79
+ This dict will be mutated
80
+ """
81
+ if not os.path.isdir(cdir):
82
+ return
83
+ allfiles = sorted(os.listdir(cdir))
84
+ for fn in allfiles:
85
+ if fn.endswith(".ini"):
86
+ ini = configparser.ConfigParser()
87
+ ini.read(os.path.join(cdir, fn))
88
+ for key in ini:
89
+ if key == "DEFAULT":
90
+ continue
91
+ conf_dict.setdefault(key, {}).update(dict(ini[key]))
92
+ if fn.endswith(".json"):
93
+ with open(os.path.join(cdir, fn)) as f:
94
+ js = json.load(f)
95
+ for key in js:
96
+ conf_dict.setdefault(key, {}).update(dict(js[key]))
97
+
98
+
99
+ def apply_config(cls, kwargs, conf_dict=None):
100
+ """Supply default values for kwargs when instantiating class
101
+
102
+ Augments the passed kwargs, by finding entries in the config dict
103
+ which match the classes ``.protocol`` attribute (one or more str)
104
+
105
+ Parameters
106
+ ----------
107
+ cls : file system implementation
108
+ kwargs : dict
109
+ conf_dict : dict of dict
110
+ Typically this is the global configuration
111
+
112
+ Returns
113
+ -------
114
+ dict : the modified set of kwargs
115
+ """
116
+ if conf_dict is None:
117
+ conf_dict = conf
118
+ protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
119
+ kw = {}
120
+ for proto in protos:
121
+ # default kwargs from the current state of the config
122
+ if proto in conf_dict:
123
+ kw.update(conf_dict[proto])
124
+ # explicit kwargs always win
125
+ kw.update(**kwargs)
126
+ kwargs = kw
127
+ return kwargs
128
+
129
+
130
+ set_conf_files(conf_dir, conf)
131
+ set_conf_env(conf)
env/lib/python3.13/site-packages/fsspec/conftest.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import sys
5
+ import time
6
+ from collections import deque
7
+ from collections.abc import Generator, Sequence
8
+
9
+ import pytest
10
+
11
+ import fsspec
12
+
13
+
14
+ @pytest.fixture()
15
+ def m():
16
+ """
17
+ Fixture providing a memory filesystem.
18
+ """
19
+ m = fsspec.filesystem("memory")
20
+ m.store.clear()
21
+ m.pseudo_dirs.clear()
22
+ m.pseudo_dirs.append("")
23
+ try:
24
+ yield m
25
+ finally:
26
+ m.store.clear()
27
+ m.pseudo_dirs.clear()
28
+ m.pseudo_dirs.append("")
29
+
30
+
31
+ class InstanceCacheInspector:
32
+ """
33
+ Helper class to inspect instance caches of filesystem classes in tests.
34
+ """
35
+
36
+ def clear(self) -> None:
37
+ """
38
+ Clear instance caches of all currently imported filesystem classes.
39
+ """
40
+ classes = deque([fsspec.spec.AbstractFileSystem])
41
+ while classes:
42
+ cls = classes.popleft()
43
+ cls.clear_instance_cache()
44
+ classes.extend(cls.__subclasses__())
45
+
46
+ def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]:
47
+ """
48
+ Gather counts of filesystem instances in the instance caches
49
+ of all currently imported filesystem classes.
50
+
51
+ Parameters
52
+ ----------
53
+ omit_zero:
54
+ Whether to omit instance types with no cached instances.
55
+ """
56
+ out: dict[str, int] = {}
57
+ classes = deque([fsspec.spec.AbstractFileSystem])
58
+ while classes:
59
+ cls = classes.popleft()
60
+ count = len(cls._cache) # there is no public interface for the cache
61
+ # note: skip intermediate AbstractFileSystem subclasses
62
+ # if they proxy the protocol attribute via a property.
63
+ if isinstance(cls.protocol, (Sequence, str)):
64
+ key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0]
65
+ if count or not omit_zero:
66
+ out[key] = count
67
+ classes.extend(cls.__subclasses__())
68
+ return out
69
+
70
+
71
+ @pytest.fixture(scope="function", autouse=True)
72
+ def instance_caches() -> Generator[InstanceCacheInspector, None, None]:
73
+ """
74
+ Fixture to ensure empty filesystem instance caches before and after a test.
75
+
76
+ Used by default for all tests.
77
+ Clears caches of all imported filesystem classes.
78
+ Can be used to write test assertions about instance caches.
79
+
80
+ Usage:
81
+
82
+ def test_something(instance_caches):
83
+ # Test code here
84
+ fsspec.open("file://abc")
85
+ fsspec.open("memory://foo/bar")
86
+
87
+ # Test assertion
88
+ assert instance_caches.gather_counts() == {"file": 1, "memory": 1}
89
+
90
+ Returns
91
+ -------
92
+ instance_caches: An instance cache inspector for clearing and inspecting caches.
93
+ """
94
+ ic = InstanceCacheInspector()
95
+
96
+ ic.clear()
97
+ try:
98
+ yield ic
99
+ finally:
100
+ ic.clear()
101
+
102
+
103
+ @pytest.fixture(scope="function")
104
+ def ftp_writable(tmpdir):
105
+ """
106
+ Fixture providing a writable FTP filesystem.
107
+ """
108
+ pytest.importorskip("pyftpdlib")
109
+
110
+ d = str(tmpdir)
111
+ with open(os.path.join(d, "out"), "wb") as f:
112
+ f.write(b"hello" * 10000)
113
+ P = subprocess.Popen(
114
+ [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
115
+ )
116
+ try:
117
+ time.sleep(1)
118
+ yield "localhost", 2121, "user", "pass"
119
+ finally:
120
+ P.terminate()
121
+ P.wait()
122
+ try:
123
+ shutil.rmtree(tmpdir)
124
+ except Exception:
125
+ pass
env/lib/python3.13/site-packages/fsspec/core.py ADDED
@@ -0,0 +1,760 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ import os
6
+ import re
7
+ from glob import has_magic
8
+ from pathlib import Path
9
+
10
+ # for backwards compat, we export cache things from here too
11
+ from fsspec.caching import ( # noqa: F401
12
+ BaseCache,
13
+ BlockCache,
14
+ BytesCache,
15
+ MMapCache,
16
+ ReadAheadCache,
17
+ caches,
18
+ )
19
+ from fsspec.compression import compr
20
+ from fsspec.config import conf
21
+ from fsspec.registry import available_protocols, filesystem, get_filesystem_class
22
+ from fsspec.utils import (
23
+ _unstrip_protocol,
24
+ build_name_function,
25
+ infer_compression,
26
+ stringify_path,
27
+ )
28
+
29
+ logger = logging.getLogger("fsspec")
30
+
31
+
32
+ class OpenFile:
33
+ """
34
+ File-like object to be used in a context
35
+
36
+ Can layer (buffered) text-mode and compression over any file-system, which
37
+ are typically binary-only.
38
+
39
+ These instances are safe to serialize, as the low-level file object
40
+ is not created until invoked using ``with``.
41
+
42
+ Parameters
43
+ ----------
44
+ fs: FileSystem
45
+ The file system to use for opening the file. Should be a subclass or duck-type
46
+ with ``fsspec.spec.AbstractFileSystem``
47
+ path: str
48
+ Location to open
49
+ mode: str like 'rb', optional
50
+ Mode of the opened file
51
+ compression: str or None, optional
52
+ Compression to apply
53
+ encoding: str or None, optional
54
+ The encoding to use if opened in text mode.
55
+ errors: str or None, optional
56
+ How to handle encoding errors if opened in text mode.
57
+ newline: None or str
58
+ Passed to TextIOWrapper in text mode, how to handle line endings.
59
+ autoopen: bool
60
+ If True, calls open() immediately. Mostly used by pickle
61
+ pos: int
62
+ If given and autoopen is True, seek to this location immediately
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ fs,
68
+ path,
69
+ mode="rb",
70
+ compression=None,
71
+ encoding=None,
72
+ errors=None,
73
+ newline=None,
74
+ ):
75
+ self.fs = fs
76
+ self.path = path
77
+ self.mode = mode
78
+ self.compression = get_compression(path, compression)
79
+ self.encoding = encoding
80
+ self.errors = errors
81
+ self.newline = newline
82
+ self.fobjects = []
83
+
84
+ def __reduce__(self):
85
+ return (
86
+ OpenFile,
87
+ (
88
+ self.fs,
89
+ self.path,
90
+ self.mode,
91
+ self.compression,
92
+ self.encoding,
93
+ self.errors,
94
+ self.newline,
95
+ ),
96
+ )
97
+
98
+ def __repr__(self):
99
+ return f"<OpenFile '{self.path}'>"
100
+
101
+ def __enter__(self):
102
+ mode = self.mode.replace("t", "").replace("b", "") + "b"
103
+
104
+ try:
105
+ f = self.fs.open(self.path, mode=mode)
106
+ except FileNotFoundError as e:
107
+ if has_magic(self.path):
108
+ raise FileNotFoundError(
109
+ "%s not found. The URL contains glob characters: you maybe needed\n"
110
+ "to pass expand=True in fsspec.open() or the storage_options of \n"
111
+ "your library. You can also set the config value 'open_expand'\n"
112
+ "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
113
+ self.path,
114
+ ) from e
115
+ raise
116
+
117
+ self.fobjects = [f]
118
+
119
+ if self.compression is not None:
120
+ compress = compr[self.compression]
121
+ f = compress(f, mode=mode[0])
122
+ self.fobjects.append(f)
123
+
124
+ if "b" not in self.mode:
125
+ # assume, for example, that 'r' is equivalent to 'rt' as in builtin
126
+ f = PickleableTextIOWrapper(
127
+ f, encoding=self.encoding, errors=self.errors, newline=self.newline
128
+ )
129
+ self.fobjects.append(f)
130
+
131
+ return self.fobjects[-1]
132
+
133
+ def __exit__(self, *args):
134
+ self.close()
135
+
136
+ @property
137
+ def full_name(self):
138
+ return _unstrip_protocol(self.path, self.fs)
139
+
140
+ def open(self):
141
+ """Materialise this as a real open file without context
142
+
143
+ The OpenFile object should be explicitly closed to avoid enclosed file
144
+ instances persisting. You must, therefore, keep a reference to the OpenFile
145
+ during the life of the file-like it generates.
146
+ """
147
+ return self.__enter__()
148
+
149
+ def close(self):
150
+ """Close all encapsulated file objects"""
151
+ for f in reversed(self.fobjects):
152
+ if "r" not in self.mode and not f.closed:
153
+ f.flush()
154
+ f.close()
155
+ self.fobjects.clear()
156
+
157
+
158
+ class OpenFiles(list):
159
+ """List of OpenFile instances
160
+
161
+ Can be used in a single context, which opens and closes all of the
162
+ contained files. Normal list access to get the elements works as
163
+ normal.
164
+
165
+ A special case is made for caching filesystems - the files will
166
+ be down/uploaded together at the start or end of the context, and
167
+ this may happen concurrently, if the target filesystem supports it.
168
+ """
169
+
170
+ def __init__(self, *args, mode="rb", fs=None):
171
+ self.mode = mode
172
+ self.fs = fs
173
+ self.files = []
174
+ super().__init__(*args)
175
+
176
+ def __enter__(self):
177
+ if self.fs is None:
178
+ raise ValueError("Context has already been used")
179
+
180
+ fs = self.fs
181
+ while True:
182
+ if hasattr(fs, "open_many"):
183
+ # check for concurrent cache download; or set up for upload
184
+ self.files = fs.open_many(self)
185
+ return self.files
186
+ if hasattr(fs, "fs") and fs.fs is not None:
187
+ fs = fs.fs
188
+ else:
189
+ break
190
+ return [s.__enter__() for s in self]
191
+
192
+ def __exit__(self, *args):
193
+ fs = self.fs
194
+ [s.__exit__(*args) for s in self]
195
+ if "r" not in self.mode:
196
+ while True:
197
+ if hasattr(fs, "open_many"):
198
+ # check for concurrent cache upload
199
+ fs.commit_many(self.files)
200
+ return
201
+ if hasattr(fs, "fs") and fs.fs is not None:
202
+ fs = fs.fs
203
+ else:
204
+ break
205
+
206
+ def __getitem__(self, item):
207
+ out = super().__getitem__(item)
208
+ if isinstance(item, slice):
209
+ return OpenFiles(out, mode=self.mode, fs=self.fs)
210
+ return out
211
+
212
+ def __repr__(self):
213
+ return f"<List of {len(self)} OpenFile instances>"
214
+
215
+
216
+ def open_files(
217
+ urlpath,
218
+ mode="rb",
219
+ compression=None,
220
+ encoding="utf8",
221
+ errors=None,
222
+ name_function=None,
223
+ num=1,
224
+ protocol=None,
225
+ newline=None,
226
+ auto_mkdir=True,
227
+ expand=True,
228
+ **kwargs,
229
+ ):
230
+ """Given a path or paths, return a list of ``OpenFile`` objects.
231
+
232
+ For writing, a str path must contain the "*" character, which will be filled
233
+ in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
234
+
235
+ For either reading or writing, can instead provide explicit list of paths.
236
+
237
+ Parameters
238
+ ----------
239
+ urlpath: string or list
240
+ Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
241
+ to read from alternative filesystems. To read from multiple files you
242
+ can pass a globstring or a list of paths, with the caveat that they
243
+ must all have the same protocol.
244
+ mode: 'rb', 'wt', etc.
245
+ compression: string or None
246
+ If given, open file using compression codec. Can either be a compression
247
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
248
+ compression from the filename suffix.
249
+ encoding: str
250
+ For text mode only
251
+ errors: None or str
252
+ Passed to TextIOWrapper in text mode
253
+ name_function: function or None
254
+ if opening a set of files for writing, those files do not yet exist,
255
+ so we need to generate their names by formatting the urlpath for
256
+ each sequence number
257
+ num: int [1]
258
+ if writing mode, number of files we expect to create (passed to
259
+ name+function)
260
+ protocol: str or None
261
+ If given, overrides the protocol found in the URL.
262
+ newline: bytes or None
263
+ Used for line terminator in text mode. If None, uses system default;
264
+ if blank, uses no translation.
265
+ auto_mkdir: bool (True)
266
+ If in write mode, this will ensure the target directory exists before
267
+ writing, by calling ``fs.mkdirs(exist_ok=True)``.
268
+ expand: bool
269
+ **kwargs: dict
270
+ Extra options that make sense to a particular storage connection, e.g.
271
+ host, port, username, password, etc.
272
+
273
+ Examples
274
+ --------
275
+ >>> files = open_files('2015-*-*.csv') # doctest: +SKIP
276
+ >>> files = open_files(
277
+ ... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
278
+ ... ) # doctest: +SKIP
279
+
280
+ Returns
281
+ -------
282
+ An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
283
+ be used as a single context
284
+
285
+ Notes
286
+ -----
287
+ For a full list of the available protocols and the implementations that
288
+ they map across to see the latest online documentation:
289
+
290
+ - For implementations built into ``fsspec`` see
291
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
292
+ - For implementations in separate packages see
293
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
294
+ """
295
+ fs, fs_token, paths = get_fs_token_paths(
296
+ urlpath,
297
+ mode,
298
+ num=num,
299
+ name_function=name_function,
300
+ storage_options=kwargs,
301
+ protocol=protocol,
302
+ expand=expand,
303
+ )
304
+ if fs.protocol == "file":
305
+ fs.auto_mkdir = auto_mkdir
306
+ elif "r" not in mode and auto_mkdir:
307
+ parents = {fs._parent(path) for path in paths}
308
+ for parent in parents:
309
+ try:
310
+ fs.makedirs(parent, exist_ok=True)
311
+ except PermissionError:
312
+ pass
313
+ return OpenFiles(
314
+ [
315
+ OpenFile(
316
+ fs,
317
+ path,
318
+ mode=mode,
319
+ compression=compression,
320
+ encoding=encoding,
321
+ errors=errors,
322
+ newline=newline,
323
+ )
324
+ for path in paths
325
+ ],
326
+ mode=mode,
327
+ fs=fs,
328
+ )
329
+
330
+
331
+ def _un_chain(path, kwargs):
332
+ # Avoid a circular import
333
+ from fsspec.implementations.chained import ChainedFileSystem
334
+
335
+ if "::" in path:
336
+ x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
337
+ known_protocols = set(available_protocols())
338
+ bits = []
339
+
340
+ # split on '::', then ensure each bit has a protocol
341
+ for p in path.split("::"):
342
+ if p in known_protocols:
343
+ bits.append(p + "://")
344
+ elif "://" in p or x.match(p):
345
+ bits.append(p)
346
+ else:
347
+ bits.append(p + "://")
348
+ else:
349
+ bits = [path]
350
+
351
+ # [[url, protocol, kwargs], ...]
352
+ out = []
353
+ previous_bit = None
354
+ kwargs = kwargs.copy()
355
+
356
+ for bit in reversed(bits):
357
+ protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
358
+ cls = get_filesystem_class(protocol)
359
+ extra_kwargs = cls._get_kwargs_from_urls(bit)
360
+ kws = kwargs.pop(protocol, {})
361
+
362
+ if bit is bits[0]:
363
+ kws.update(kwargs)
364
+
365
+ kw = dict(
366
+ **{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
367
+ **kws,
368
+ )
369
+ bit = cls._strip_protocol(bit)
370
+
371
+ if (
372
+ "target_protocol" not in kw
373
+ and issubclass(cls, ChainedFileSystem)
374
+ and not bit
375
+ ):
376
+ # replace bit if we are chaining and no path given
377
+ bit = previous_bit
378
+
379
+ out.append((bit, protocol, kw))
380
+ previous_bit = bit
381
+
382
+ out.reverse()
383
+ return out
384
+
385
+
386
+ def url_to_fs(url, **kwargs):
387
+ """
388
+ Turn fully-qualified and potentially chained URL into filesystem instance
389
+
390
+ Parameters
391
+ ----------
392
+ url : str
393
+ The fsspec-compatible URL
394
+ **kwargs: dict
395
+ Extra options that make sense to a particular storage connection, e.g.
396
+ host, port, username, password, etc.
397
+
398
+ Returns
399
+ -------
400
+ filesystem : FileSystem
401
+ The new filesystem discovered from ``url`` and created with
402
+ ``**kwargs``.
403
+ urlpath : str
404
+ The file-systems-specific URL for ``url``.
405
+ """
406
+ url = stringify_path(url)
407
+ # non-FS arguments that appear in fsspec.open()
408
+ # inspect could keep this in sync with open()'s signature
409
+ known_kwargs = {
410
+ "compression",
411
+ "encoding",
412
+ "errors",
413
+ "expand",
414
+ "mode",
415
+ "name_function",
416
+ "newline",
417
+ "num",
418
+ }
419
+ kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
420
+ chain = _un_chain(url, kwargs)
421
+ inkwargs = {}
422
+ # Reverse iterate the chain, creating a nested target_* structure
423
+ for i, ch in enumerate(reversed(chain)):
424
+ urls, protocol, kw = ch
425
+ if i == len(chain) - 1:
426
+ inkwargs = dict(**kw, **inkwargs)
427
+ continue
428
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
429
+ inkwargs["target_protocol"] = protocol
430
+ inkwargs["fo"] = urls
431
+ urlpath, protocol, _ = chain[0]
432
+ fs = filesystem(protocol, **inkwargs)
433
+ return fs, urlpath
434
+
435
+
436
+ DEFAULT_EXPAND = conf.get("open_expand", False)
437
+
438
+
439
+ def open(
440
+ urlpath,
441
+ mode="rb",
442
+ compression=None,
443
+ encoding="utf8",
444
+ errors=None,
445
+ protocol=None,
446
+ newline=None,
447
+ expand=None,
448
+ **kwargs,
449
+ ):
450
+ """Given a path or paths, return one ``OpenFile`` object.
451
+
452
+ Parameters
453
+ ----------
454
+ urlpath: string or list
455
+ Absolute or relative filepath. Prefix with a protocol like ``s3://``
456
+ to read from alternative filesystems. Should not include glob
457
+ character(s).
458
+ mode: 'rb', 'wt', etc.
459
+ compression: string or None
460
+ If given, open file using compression codec. Can either be a compression
461
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
462
+ compression from the filename suffix.
463
+ encoding: str
464
+ For text mode only
465
+ errors: None or str
466
+ Passed to TextIOWrapper in text mode
467
+ protocol: str or None
468
+ If given, overrides the protocol found in the URL.
469
+ newline: bytes or None
470
+ Used for line terminator in text mode. If None, uses system default;
471
+ if blank, uses no translation.
472
+ expand: bool or None
473
+ Whether to regard file paths containing special glob characters as needing
474
+ expansion (finding the first match) or absolute. Setting False allows using
475
+ paths which do embed such characters. If None (default), this argument
476
+ takes its value from the DEFAULT_EXPAND module variable, which takes
477
+ its initial value from the "open_expand" config value at startup, which will
478
+ be False if not set.
479
+ **kwargs: dict
480
+ Extra options that make sense to a particular storage connection, e.g.
481
+ host, port, username, password, etc.
482
+
483
+ Examples
484
+ --------
485
+ >>> openfile = open('2015-01-01.csv') # doctest: +SKIP
486
+ >>> openfile = open(
487
+ ... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
488
+ ... ) # doctest: +SKIP
489
+ >>> with openfile as f:
490
+ ... df = pd.read_csv(f) # doctest: +SKIP
491
+ ...
492
+
493
+ Returns
494
+ -------
495
+ ``OpenFile`` object.
496
+
497
+ Notes
498
+ -----
499
+ For a full list of the available protocols and the implementations that
500
+ they map across to see the latest online documentation:
501
+
502
+ - For implementations built into ``fsspec`` see
503
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
504
+ - For implementations in separate packages see
505
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
506
+ """
507
+ expand = DEFAULT_EXPAND if expand is None else expand
508
+ out = open_files(
509
+ urlpath=[urlpath],
510
+ mode=mode,
511
+ compression=compression,
512
+ encoding=encoding,
513
+ errors=errors,
514
+ protocol=protocol,
515
+ newline=newline,
516
+ expand=expand,
517
+ **kwargs,
518
+ )
519
+ if not out:
520
+ raise FileNotFoundError(urlpath)
521
+ return out[0]
522
+
523
+
524
+ def open_local(
525
+ url: str | list[str] | Path | list[Path],
526
+ mode: str = "rb",
527
+ **storage_options: dict,
528
+ ) -> str | list[str]:
529
+ """Open file(s) which can be resolved to local
530
+
531
+ For files which either are local, or get downloaded upon open
532
+ (e.g., by file caching)
533
+
534
+ Parameters
535
+ ----------
536
+ url: str or list(str)
537
+ mode: str
538
+ Must be read mode
539
+ storage_options:
540
+ passed on to FS for or used by open_files (e.g., compression)
541
+ """
542
+ if "r" not in mode:
543
+ raise ValueError("Can only ensure local files when reading")
544
+ of = open_files(url, mode=mode, **storage_options)
545
+ if not getattr(of[0].fs, "local_file", False):
546
+ raise ValueError(
547
+ "open_local can only be used on a filesystem which"
548
+ " has attribute local_file=True"
549
+ )
550
+ with of as files:
551
+ paths = [f.name for f in files]
552
+ if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
553
+ return paths[0]
554
+ return paths
555
+
556
+
557
+ def get_compression(urlpath, compression):
558
+ if compression == "infer":
559
+ compression = infer_compression(urlpath)
560
+ if compression is not None and compression not in compr:
561
+ raise ValueError(f"Compression type {compression} not supported")
562
+ return compression
563
+
564
+
565
+ def split_protocol(urlpath):
566
+ """Return protocol, path pair"""
567
+ urlpath = stringify_path(urlpath)
568
+ if "://" in urlpath:
569
+ protocol, path = urlpath.split("://", 1)
570
+ if len(protocol) > 1:
571
+ # excludes Windows paths
572
+ return protocol, path
573
+ if urlpath.startswith("data:"):
574
+ return urlpath.split(":", 1)
575
+ return None, urlpath
576
+
577
+
578
+ def strip_protocol(urlpath):
579
+ """Return only path part of full URL, according to appropriate backend"""
580
+ protocol, _ = split_protocol(urlpath)
581
+ cls = get_filesystem_class(protocol)
582
+ return cls._strip_protocol(urlpath)
583
+
584
+
585
+ def expand_paths_if_needed(paths, mode, num, fs, name_function):
586
+ """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
587
+ in them (read mode).
588
+
589
+ :param paths: list of paths
590
+ mode: str
591
+ Mode in which to open files.
592
+ num: int
593
+ If opening in writing mode, number of files we expect to create.
594
+ fs: filesystem object
595
+ name_function: callable
596
+ If opening in writing mode, this callable is used to generate path
597
+ names. Names are generated for each partition by
598
+ ``urlpath.replace('*', name_function(partition_index))``.
599
+ :return: list of paths
600
+ """
601
+ expanded_paths = []
602
+ paths = list(paths)
603
+
604
+ if "w" in mode: # read mode
605
+ if sum(1 for p in paths if "*" in p) > 1:
606
+ raise ValueError(
607
+ "When writing data, only one filename mask can be specified."
608
+ )
609
+ num = max(num, len(paths))
610
+
611
+ for curr_path in paths:
612
+ if "*" in curr_path:
613
+ # expand using name_function
614
+ expanded_paths.extend(_expand_paths(curr_path, name_function, num))
615
+ else:
616
+ expanded_paths.append(curr_path)
617
+ # if we generated more paths that asked for, trim the list
618
+ if len(expanded_paths) > num:
619
+ expanded_paths = expanded_paths[:num]
620
+
621
+ else: # read mode
622
+ for curr_path in paths:
623
+ if has_magic(curr_path):
624
+ # expand using glob
625
+ expanded_paths.extend(fs.glob(curr_path))
626
+ else:
627
+ expanded_paths.append(curr_path)
628
+
629
+ return expanded_paths
630
+
631
+
632
+ def get_fs_token_paths(
633
+ urlpath,
634
+ mode="rb",
635
+ num=1,
636
+ name_function=None,
637
+ storage_options=None,
638
+ protocol=None,
639
+ expand=True,
640
+ ):
641
+ """Filesystem, deterministic token, and paths from a urlpath and options.
642
+
643
+ Parameters
644
+ ----------
645
+ urlpath: string or iterable
646
+ Absolute or relative filepath, URL (may include protocols like
647
+ ``s3://``), or globstring pointing to data.
648
+ mode: str, optional
649
+ Mode in which to open files.
650
+ num: int, optional
651
+ If opening in writing mode, number of files we expect to create.
652
+ name_function: callable, optional
653
+ If opening in writing mode, this callable is used to generate path
654
+ names. Names are generated for each partition by
655
+ ``urlpath.replace('*', name_function(partition_index))``.
656
+ storage_options: dict, optional
657
+ Additional keywords to pass to the filesystem class.
658
+ protocol: str or None
659
+ To override the protocol specifier in the URL
660
+ expand: bool
661
+ Expand string paths for writing, assuming the path is a directory
662
+ """
663
+ if isinstance(urlpath, (list, tuple, set)):
664
+ if not urlpath:
665
+ raise ValueError("empty urlpath sequence")
666
+ urlpath0 = stringify_path(next(iter(urlpath)))
667
+ else:
668
+ urlpath0 = stringify_path(urlpath)
669
+ storage_options = storage_options or {}
670
+ if protocol:
671
+ storage_options["protocol"] = protocol
672
+ chain = _un_chain(urlpath0, storage_options or {})
673
+ inkwargs = {}
674
+ # Reverse iterate the chain, creating a nested target_* structure
675
+ for i, ch in enumerate(reversed(chain)):
676
+ urls, nested_protocol, kw = ch
677
+ if i == len(chain) - 1:
678
+ inkwargs = dict(**kw, **inkwargs)
679
+ continue
680
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
681
+ inkwargs["target_protocol"] = nested_protocol
682
+ inkwargs["fo"] = urls
683
+ paths, protocol, _ = chain[0]
684
+ fs = filesystem(protocol, **inkwargs)
685
+ if isinstance(urlpath, (list, tuple, set)):
686
+ pchains = [
687
+ _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
688
+ ]
689
+ if len({pc[1] for pc in pchains}) > 1:
690
+ raise ValueError("Protocol mismatch getting fs from %s", urlpath)
691
+ paths = [pc[0] for pc in pchains]
692
+ else:
693
+ paths = fs._strip_protocol(paths)
694
+ if isinstance(paths, (list, tuple, set)):
695
+ if expand:
696
+ paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
697
+ elif not isinstance(paths, list):
698
+ paths = list(paths)
699
+ else:
700
+ if ("w" in mode or "x" in mode) and expand:
701
+ paths = _expand_paths(paths, name_function, num)
702
+ elif "*" in paths:
703
+ paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
704
+ else:
705
+ paths = [paths]
706
+
707
+ return fs, fs._fs_token, paths
708
+
709
+
710
+ def _expand_paths(path, name_function, num):
711
+ if isinstance(path, str):
712
+ if path.count("*") > 1:
713
+ raise ValueError("Output path spec must contain exactly one '*'.")
714
+ elif "*" not in path:
715
+ path = os.path.join(path, "*.part")
716
+
717
+ if name_function is None:
718
+ name_function = build_name_function(num - 1)
719
+
720
+ paths = [path.replace("*", name_function(i)) for i in range(num)]
721
+ if paths != sorted(paths):
722
+ logger.warning(
723
+ "In order to preserve order between partitions"
724
+ " paths created with ``name_function`` should "
725
+ "sort to partition order"
726
+ )
727
+ elif isinstance(path, (tuple, list)):
728
+ assert len(path) == num
729
+ paths = list(path)
730
+ else:
731
+ raise ValueError(
732
+ "Path should be either\n"
733
+ "1. A list of paths: ['foo.json', 'bar.json', ...]\n"
734
+ "2. A directory: 'foo/\n"
735
+ "3. A path with a '*' in it: 'foo.*.json'"
736
+ )
737
+ return paths
738
+
739
+
740
+ class PickleableTextIOWrapper(io.TextIOWrapper):
741
+ """TextIOWrapper cannot be pickled. This solves it.
742
+
743
+ Requires that ``buffer`` be pickleable, which all instances of
744
+ AbstractBufferedFile are.
745
+ """
746
+
747
+ def __init__(
748
+ self,
749
+ buffer,
750
+ encoding=None,
751
+ errors=None,
752
+ newline=None,
753
+ line_buffering=False,
754
+ write_through=False,
755
+ ):
756
+ self.args = buffer, encoding, errors, newline, line_buffering, write_through
757
+ super().__init__(*self.args)
758
+
759
+ def __reduce__(self):
760
+ return PickleableTextIOWrapper, self.args
env/lib/python3.13/site-packages/fsspec/dircache.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from collections.abc import MutableMapping
3
+ from functools import lru_cache
4
+
5
+
6
+ class DirCache(MutableMapping):
7
+ """
8
+ Caching of directory listings, in a structure like::
9
+
10
+ {"path0": [
11
+ {"name": "path0/file0",
12
+ "size": 123,
13
+ "type": "file",
14
+ ...
15
+ },
16
+ {"name": "path0/file1",
17
+ },
18
+ ...
19
+ ],
20
+ "path1": [...]
21
+ }
22
+
23
+ Parameters to this class control listing expiry or indeed turn
24
+ caching off
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ use_listings_cache=True,
30
+ listings_expiry_time=None,
31
+ max_paths=None,
32
+ **kwargs,
33
+ ):
34
+ """
35
+
36
+ Parameters
37
+ ----------
38
+ use_listings_cache: bool
39
+ If False, this cache never returns items, but always reports KeyError,
40
+ and setting items has no effect
41
+ listings_expiry_time: int or float (optional)
42
+ Time in seconds that a listing is considered valid. If None,
43
+ listings do not expire.
44
+ max_paths: int (optional)
45
+ The number of most recent listings that are considered valid; 'recent'
46
+ refers to when the entry was set.
47
+ """
48
+ self._cache = {}
49
+ self._times = {}
50
+ if max_paths:
51
+ self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
52
+ self.use_listings_cache = use_listings_cache
53
+ self.listings_expiry_time = listings_expiry_time
54
+ self.max_paths = max_paths
55
+
56
+ def __getitem__(self, item):
57
+ if self.listings_expiry_time is not None:
58
+ if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
59
+ del self._cache[item]
60
+ if self.max_paths:
61
+ self._q(item)
62
+ return self._cache[item] # maybe raises KeyError
63
+
64
+ def clear(self):
65
+ self._cache.clear()
66
+
67
+ def __len__(self):
68
+ return len(self._cache)
69
+
70
+ def __contains__(self, item):
71
+ try:
72
+ self[item]
73
+ return True
74
+ except KeyError:
75
+ return False
76
+
77
+ def __setitem__(self, key, value):
78
+ if not self.use_listings_cache:
79
+ return
80
+ if self.max_paths:
81
+ self._q(key)
82
+ self._cache[key] = value
83
+ if self.listings_expiry_time is not None:
84
+ self._times[key] = time.time()
85
+
86
+ def __delitem__(self, key):
87
+ del self._cache[key]
88
+
89
+ def __iter__(self):
90
+ entries = list(self._cache)
91
+
92
+ return (k for k in entries if k in self)
93
+
94
+ def __reduce__(self):
95
+ return (
96
+ DirCache,
97
+ (self.use_listings_cache, self.listings_expiry_time, self.max_paths),
98
+ )
env/lib/python3.13/site-packages/fsspec/exceptions.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ fsspec user-defined exception classes
3
+ """
4
+
5
+ import asyncio
6
+
7
+
8
+ class BlocksizeMismatchError(ValueError):
9
+ """
10
+ Raised when a cached file is opened with a different blocksize than it was
11
+ written with
12
+ """
13
+
14
+
15
+ class FSTimeoutError(asyncio.TimeoutError):
16
+ """
17
+ Raised when a fsspec function timed out occurs
18
+ """
env/lib/python3.13/site-packages/fsspec/fuse.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import logging
3
+ import os
4
+ import stat
5
+ import threading
6
+ import time
7
+ from errno import EIO, ENOENT
8
+
9
+ from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
10
+
11
+ from fsspec import __version__
12
+ from fsspec.core import url_to_fs
13
+
14
+ logger = logging.getLogger("fsspec.fuse")
15
+
16
+
17
+ class FUSEr(Operations):
18
+ def __init__(self, fs, path, ready_file=False):
19
+ self.fs = fs
20
+ self.cache = {}
21
+ self.root = path.rstrip("/") + "/"
22
+ self.counter = 0
23
+ logger.info("Starting FUSE at %s", path)
24
+ self._ready_file = ready_file
25
+
26
+ def getattr(self, path, fh=None):
27
+ logger.debug("getattr %s", path)
28
+ if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
29
+ return {"type": "file", "st_size": 5}
30
+
31
+ path = "".join([self.root, path.lstrip("/")]).rstrip("/")
32
+ try:
33
+ info = self.fs.info(path)
34
+ except FileNotFoundError as exc:
35
+ raise FuseOSError(ENOENT) from exc
36
+
37
+ data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
38
+ perm = info.get("mode", 0o777)
39
+
40
+ if info["type"] != "file":
41
+ data["st_mode"] = stat.S_IFDIR | perm
42
+ data["st_size"] = 0
43
+ data["st_blksize"] = 0
44
+ else:
45
+ data["st_mode"] = stat.S_IFREG | perm
46
+ data["st_size"] = info["size"]
47
+ data["st_blksize"] = 5 * 2**20
48
+ data["st_nlink"] = 1
49
+ data["st_atime"] = info["atime"] if "atime" in info else time.time()
50
+ data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
51
+ data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
52
+ return data
53
+
54
+ def readdir(self, path, fh):
55
+ logger.debug("readdir %s", path)
56
+ path = "".join([self.root, path.lstrip("/")])
57
+ files = self.fs.ls(path, False)
58
+ files = [os.path.basename(f.rstrip("/")) for f in files]
59
+ return [".", ".."] + files
60
+
61
+ def mkdir(self, path, mode):
62
+ path = "".join([self.root, path.lstrip("/")])
63
+ self.fs.mkdir(path)
64
+ return 0
65
+
66
+ def rmdir(self, path):
67
+ path = "".join([self.root, path.lstrip("/")])
68
+ self.fs.rmdir(path)
69
+ return 0
70
+
71
+ def read(self, path, size, offset, fh):
72
+ logger.debug("read %s", (path, size, offset))
73
+ if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
74
+ # status indicator
75
+ return b"ready"
76
+
77
+ f = self.cache[fh]
78
+ f.seek(offset)
79
+ out = f.read(size)
80
+ return out
81
+
82
+ def write(self, path, data, offset, fh):
83
+ logger.debug("write %s", (path, offset))
84
+ f = self.cache[fh]
85
+ f.seek(offset)
86
+ f.write(data)
87
+ return len(data)
88
+
89
+ def create(self, path, flags, fi=None):
90
+ logger.debug("create %s", (path, flags))
91
+ fn = "".join([self.root, path.lstrip("/")])
92
+ self.fs.touch(fn) # OS will want to get attributes immediately
93
+ f = self.fs.open(fn, "wb")
94
+ self.cache[self.counter] = f
95
+ self.counter += 1
96
+ return self.counter - 1
97
+
98
+ def open(self, path, flags):
99
+ logger.debug("open %s", (path, flags))
100
+ fn = "".join([self.root, path.lstrip("/")])
101
+ if flags % 2 == 0:
102
+ # read
103
+ mode = "rb"
104
+ else:
105
+ # write/create
106
+ mode = "wb"
107
+ self.cache[self.counter] = self.fs.open(fn, mode)
108
+ self.counter += 1
109
+ return self.counter - 1
110
+
111
+ def truncate(self, path, length, fh=None):
112
+ fn = "".join([self.root, path.lstrip("/")])
113
+ if length != 0:
114
+ raise NotImplementedError
115
+ # maybe should be no-op since open with write sets size to zero anyway
116
+ self.fs.touch(fn)
117
+
118
+ def unlink(self, path):
119
+ fn = "".join([self.root, path.lstrip("/")])
120
+ try:
121
+ self.fs.rm(fn, False)
122
+ except (OSError, FileNotFoundError) as exc:
123
+ raise FuseOSError(EIO) from exc
124
+
125
+ def release(self, path, fh):
126
+ try:
127
+ if fh in self.cache:
128
+ f = self.cache[fh]
129
+ f.close()
130
+ self.cache.pop(fh)
131
+ except Exception as e:
132
+ print(e)
133
+ return 0
134
+
135
+ def chmod(self, path, mode):
136
+ if hasattr(self.fs, "chmod"):
137
+ path = "".join([self.root, path.lstrip("/")])
138
+ return self.fs.chmod(path, mode)
139
+ raise NotImplementedError
140
+
141
+
142
+ def run(
143
+ fs,
144
+ path,
145
+ mount_point,
146
+ foreground=True,
147
+ threads=False,
148
+ ready_file=False,
149
+ ops_class=FUSEr,
150
+ ):
151
+ """Mount stuff in a local directory
152
+
153
+ This uses fusepy to make it appear as if a given path on an fsspec
154
+ instance is in fact resident within the local file-system.
155
+
156
+ This requires that fusepy by installed, and that FUSE be available on
157
+ the system (typically requiring a package to be installed with
158
+ apt, yum, brew, etc.).
159
+
160
+ Parameters
161
+ ----------
162
+ fs: file-system instance
163
+ From one of the compatible implementations
164
+ path: str
165
+ Location on that file-system to regard as the root directory to
166
+ mount. Note that you typically should include the terminating "/"
167
+ character.
168
+ mount_point: str
169
+ An empty directory on the local file-system where the contents of
170
+ the remote path will appear.
171
+ foreground: bool
172
+ Whether or not calling this function will block. Operation will
173
+ typically be more stable if True.
174
+ threads: bool
175
+ Whether or not to create threads when responding to file operations
176
+ within the mounter directory. Operation will typically be more
177
+ stable if False.
178
+ ready_file: bool
179
+ Whether the FUSE process is ready. The ``.fuse_ready`` file will
180
+ exist in the ``mount_point`` directory if True. Debugging purpose.
181
+ ops_class: FUSEr or Subclass of FUSEr
182
+ To override the default behavior of FUSEr. For Example, logging
183
+ to file.
184
+
185
+ """
186
+ func = lambda: FUSE(
187
+ ops_class(fs, path, ready_file=ready_file),
188
+ mount_point,
189
+ nothreads=not threads,
190
+ foreground=foreground,
191
+ )
192
+ if not foreground:
193
+ th = threading.Thread(target=func)
194
+ th.daemon = True
195
+ th.start()
196
+ return th
197
+ else: # pragma: no cover
198
+ try:
199
+ func()
200
+ except KeyboardInterrupt:
201
+ pass
202
+
203
+
204
+ def main(args):
205
+ """Mount filesystem from chained URL to MOUNT_POINT.
206
+
207
+ Examples:
208
+
209
+ python3 -m fsspec.fuse memory /usr/share /tmp/mem
210
+
211
+ python3 -m fsspec.fuse local /tmp/source /tmp/local \\
212
+ -l /tmp/fsspecfuse.log
213
+
214
+ You can also mount chained-URLs and use special settings:
215
+
216
+ python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
217
+ / /tmp/zip \\
218
+ -o 'filecache-cache_storage=/tmp/simplecache'
219
+
220
+ You can specify the type of the setting by using `[int]` or `[bool]`,
221
+ (`true`, `yes`, `1` represents the Boolean value `True`):
222
+
223
+ python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
224
+ /historic/packages/RPMS /tmp/ftp \\
225
+ -o 'simplecache-cache_storage=/tmp/simplecache' \\
226
+ -o 'simplecache-check_files=false[bool]' \\
227
+ -o 'ftp-listings_expiry_time=60[int]' \\
228
+ -o 'ftp-username=anonymous' \\
229
+ -o 'ftp-password=xieyanbo'
230
+ """
231
+
232
+ class RawDescriptionArgumentParser(argparse.ArgumentParser):
233
+ def format_help(self):
234
+ usage = super().format_help()
235
+ parts = usage.split("\n\n")
236
+ parts[1] = self.description.rstrip()
237
+ return "\n\n".join(parts)
238
+
239
+ parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
240
+ parser.add_argument("--version", action="version", version=__version__)
241
+ parser.add_argument("url", type=str, help="fs url")
242
+ parser.add_argument("source_path", type=str, help="source directory in fs")
243
+ parser.add_argument("mount_point", type=str, help="local directory")
244
+ parser.add_argument(
245
+ "-o",
246
+ "--option",
247
+ action="append",
248
+ help="Any options of protocol included in the chained URL",
249
+ )
250
+ parser.add_argument(
251
+ "-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
252
+ )
253
+ parser.add_argument(
254
+ "-f",
255
+ "--foreground",
256
+ action="store_false",
257
+ help="Running in foreground or not (Default: False)",
258
+ )
259
+ parser.add_argument(
260
+ "-t",
261
+ "--threads",
262
+ action="store_false",
263
+ help="Running with threads support (Default: False)",
264
+ )
265
+ parser.add_argument(
266
+ "-r",
267
+ "--ready-file",
268
+ action="store_false",
269
+ help="The `.fuse_ready` file will exist after FUSE is ready. "
270
+ "(Debugging purpose, Default: False)",
271
+ )
272
+ args = parser.parse_args(args)
273
+
274
+ kwargs = {}
275
+ for item in args.option or []:
276
+ key, sep, value = item.partition("=")
277
+ if not sep:
278
+ parser.error(message=f"Wrong option: {item!r}")
279
+ val = value.lower()
280
+ if val.endswith("[int]"):
281
+ value = int(value[: -len("[int]")])
282
+ elif val.endswith("[bool]"):
283
+ value = val[: -len("[bool]")] in ["1", "yes", "true"]
284
+
285
+ if "-" in key:
286
+ fs_name, setting_name = key.split("-", 1)
287
+ if fs_name in kwargs:
288
+ kwargs[fs_name][setting_name] = value
289
+ else:
290
+ kwargs[fs_name] = {setting_name: value}
291
+ else:
292
+ kwargs[key] = value
293
+
294
+ if args.log_file:
295
+ logging.basicConfig(
296
+ level=logging.DEBUG,
297
+ filename=args.log_file,
298
+ format="%(asctime)s %(message)s",
299
+ )
300
+
301
+ class LoggingFUSEr(FUSEr, LoggingMixIn):
302
+ pass
303
+
304
+ fuser = LoggingFUSEr
305
+ else:
306
+ fuser = FUSEr
307
+
308
+ fs, url_path = url_to_fs(args.url, **kwargs)
309
+ logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
310
+ run(
311
+ fs,
312
+ args.source_path,
313
+ args.mount_point,
314
+ foreground=args.foreground,
315
+ threads=args.threads,
316
+ ready_file=args.ready_file,
317
+ ops_class=fuser,
318
+ )
319
+
320
+
321
+ if __name__ == "__main__":
322
+ import sys
323
+
324
+ main(sys.argv[1:])
env/lib/python3.13/site-packages/fsspec/generic.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import uuid
8
+
9
+ from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
10
+ from .callbacks import DEFAULT_CALLBACK
11
+ from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
12
+
13
+ _generic_fs = {}
14
+ logger = logging.getLogger("fsspec.generic")
15
+
16
+
17
+ def set_generic_fs(protocol, **storage_options):
18
+ """Populate the dict used for method=="generic" lookups"""
19
+ _generic_fs[protocol] = filesystem(protocol, **storage_options)
20
+
21
+
22
+ def _resolve_fs(url, method, protocol=None, storage_options=None):
23
+ """Pick instance of backend FS"""
24
+ url = url[0] if isinstance(url, (list, tuple)) else url
25
+ protocol = protocol or split_protocol(url)[0]
26
+ storage_options = storage_options or {}
27
+ if method == "default":
28
+ return filesystem(protocol)
29
+ if method == "generic":
30
+ return _generic_fs[protocol]
31
+ if method == "current":
32
+ cls = get_filesystem_class(protocol)
33
+ return cls.current()
34
+ if method == "options":
35
+ fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
36
+ return fs
37
+ raise ValueError(f"Unknown FS resolution method: {method}")
38
+
39
+
40
+ def rsync(
41
+ source,
42
+ destination,
43
+ delete_missing=False,
44
+ source_field="size",
45
+ dest_field="size",
46
+ update_cond="different",
47
+ inst_kwargs=None,
48
+ fs=None,
49
+ **kwargs,
50
+ ):
51
+ """Sync files between two directory trees
52
+
53
+ (experimental)
54
+
55
+ Parameters
56
+ ----------
57
+ source: str
58
+ Root of the directory tree to take files from. This must be a directory, but
59
+ do not include any terminating "/" character
60
+ destination: str
61
+ Root path to copy into. The contents of this location should be
62
+ identical to the contents of ``source`` when done. This will be made a
63
+ directory, and the terminal "/" should not be included.
64
+ delete_missing: bool
65
+ If there are paths in the destination that don't exist in the
66
+ source and this is True, delete them. Otherwise, leave them alone.
67
+ source_field: str | callable
68
+ If ``update_field`` is "different", this is the key in the info
69
+ of source files to consider for difference. Maybe a function of the
70
+ info dict.
71
+ dest_field: str | callable
72
+ If ``update_field`` is "different", this is the key in the info
73
+ of destination files to consider for difference. May be a function of
74
+ the info dict.
75
+ update_cond: "different"|"always"|"never"
76
+ If "always", every file is copied, regardless of whether it exists in
77
+ the destination. If "never", files that exist in the destination are
78
+ not copied again. If "different" (default), only copy if the info
79
+ fields given by ``source_field`` and ``dest_field`` (usually "size")
80
+ are different. Other comparisons may be added in the future.
81
+ inst_kwargs: dict|None
82
+ If ``fs`` is None, use this set of keyword arguments to make a
83
+ GenericFileSystem instance
84
+ fs: GenericFileSystem|None
85
+ Instance to use if explicitly given. The instance defines how to
86
+ to make downstream file system instances from paths.
87
+
88
+ Returns
89
+ -------
90
+ dict of the copy operations that were performed, {source: destination}
91
+ """
92
+ fs = fs or GenericFileSystem(**(inst_kwargs or {}))
93
+ source = fs._strip_protocol(source)
94
+ destination = fs._strip_protocol(destination)
95
+ allfiles = fs.find(source, withdirs=True, detail=True)
96
+ if not fs.isdir(source):
97
+ raise ValueError("Can only rsync on a directory")
98
+ otherfiles = fs.find(destination, withdirs=True, detail=True)
99
+ dirs = [
100
+ a
101
+ for a, v in allfiles.items()
102
+ if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
103
+ ]
104
+ logger.debug(f"{len(dirs)} directories to create")
105
+ if dirs:
106
+ fs.make_many_dirs(
107
+ [dirn.replace(source, destination) for dirn in dirs], exist_ok=True
108
+ )
109
+ allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
110
+ logger.debug(f"{len(allfiles)} files to consider for copy")
111
+ to_delete = [
112
+ o
113
+ for o, v in otherfiles.items()
114
+ if o.replace(destination, source) not in allfiles and v["type"] == "file"
115
+ ]
116
+ for k, v in allfiles.copy().items():
117
+ otherfile = k.replace(source, destination)
118
+ if otherfile in otherfiles:
119
+ if update_cond == "always":
120
+ allfiles[k] = otherfile
121
+ elif update_cond == "never":
122
+ allfiles.pop(k)
123
+ elif update_cond == "different":
124
+ inf1 = source_field(v) if callable(source_field) else v[source_field]
125
+ v2 = otherfiles[otherfile]
126
+ inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
127
+ if inf1 != inf2:
128
+ # details mismatch, make copy
129
+ allfiles[k] = otherfile
130
+ else:
131
+ # details match, don't copy
132
+ allfiles.pop(k)
133
+ else:
134
+ # file not in target yet
135
+ allfiles[k] = otherfile
136
+ logger.debug(f"{len(allfiles)} files to copy")
137
+ if allfiles:
138
+ source_files, target_files = zip(*allfiles.items())
139
+ fs.cp(source_files, target_files, **kwargs)
140
+ logger.debug(f"{len(to_delete)} files to delete")
141
+ if delete_missing and to_delete:
142
+ fs.rm(to_delete)
143
+ return allfiles
144
+
145
+
146
+ class GenericFileSystem(AsyncFileSystem):
147
+ """Wrapper over all other FS types
148
+
149
+ <experimental!>
150
+
151
+ This implementation is a single unified interface to be able to run FS operations
152
+ over generic URLs, and dispatch to the specific implementations using the URL
153
+ protocol prefix.
154
+
155
+ Note: instances of this FS are always async, even if you never use it with any async
156
+ backend.
157
+ """
158
+
159
+ protocol = "generic" # there is no real reason to ever use a protocol with this FS
160
+
161
+ def __init__(self, default_method="default", storage_options=None, **kwargs):
162
+ """
163
+
164
+ Parameters
165
+ ----------
166
+ default_method: str (optional)
167
+ Defines how to configure backend FS instances. Options are:
168
+ - "default": instantiate like FSClass(), with no
169
+ extra arguments; this is the default instance of that FS, and can be
170
+ configured via the config system
171
+ - "generic": takes instances from the `_generic_fs` dict in this module,
172
+ which you must populate before use. Keys are by protocol
173
+ - "options": expects storage_options, a dict mapping protocol to
174
+ kwargs to use when constructing the filesystem
175
+ - "current": takes the most recently instantiated version of each FS
176
+ """
177
+ self.method = default_method
178
+ self.st_opts = storage_options
179
+ super().__init__(**kwargs)
180
+
181
+ def _parent(self, path):
182
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
183
+ return fs.unstrip_protocol(fs._parent(path))
184
+
185
+ def _strip_protocol(self, path):
186
+ # normalization only
187
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
188
+ return fs.unstrip_protocol(fs._strip_protocol(path))
189
+
190
+ async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
191
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
192
+ if fs.async_impl:
193
+ out = await fs._find(
194
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
195
+ )
196
+ else:
197
+ out = fs.find(
198
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
199
+ )
200
+ result = {}
201
+ for k, v in out.items():
202
+ v = v.copy() # don't corrupt target FS dircache
203
+ name = fs.unstrip_protocol(k)
204
+ v["name"] = name
205
+ result[name] = v
206
+ if detail:
207
+ return result
208
+ return list(result)
209
+
210
+ async def _info(self, url, **kwargs):
211
+ fs = _resolve_fs(url, self.method)
212
+ if fs.async_impl:
213
+ out = await fs._info(url, **kwargs)
214
+ else:
215
+ out = fs.info(url, **kwargs)
216
+ out = out.copy() # don't edit originals
217
+ out["name"] = fs.unstrip_protocol(out["name"])
218
+ return out
219
+
220
+ async def _ls(
221
+ self,
222
+ url,
223
+ detail=True,
224
+ **kwargs,
225
+ ):
226
+ fs = _resolve_fs(url, self.method)
227
+ if fs.async_impl:
228
+ out = await fs._ls(url, detail=True, **kwargs)
229
+ else:
230
+ out = fs.ls(url, detail=True, **kwargs)
231
+ out = [o.copy() for o in out] # don't edit originals
232
+ for o in out:
233
+ o["name"] = fs.unstrip_protocol(o["name"])
234
+ if detail:
235
+ return out
236
+ else:
237
+ return [o["name"] for o in out]
238
+
239
+ async def _cat_file(
240
+ self,
241
+ url,
242
+ **kwargs,
243
+ ):
244
+ fs = _resolve_fs(url, self.method)
245
+ if fs.async_impl:
246
+ return await fs._cat_file(url, **kwargs)
247
+ else:
248
+ return fs.cat_file(url, **kwargs)
249
+
250
+ async def _pipe_file(
251
+ self,
252
+ path,
253
+ value,
254
+ **kwargs,
255
+ ):
256
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
257
+ if fs.async_impl:
258
+ return await fs._pipe_file(path, value, **kwargs)
259
+ else:
260
+ return fs.pipe_file(path, value, **kwargs)
261
+
262
+ async def _rm(self, url, **kwargs):
263
+ urls = url
264
+ if isinstance(urls, str):
265
+ urls = [urls]
266
+ fs = _resolve_fs(urls[0], self.method)
267
+ if fs.async_impl:
268
+ await fs._rm(urls, **kwargs)
269
+ else:
270
+ fs.rm(url, **kwargs)
271
+
272
+ async def _makedirs(self, path, exist_ok=False):
273
+ logger.debug("Make dir %s", path)
274
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
275
+ if fs.async_impl:
276
+ await fs._makedirs(path, exist_ok=exist_ok)
277
+ else:
278
+ fs.makedirs(path, exist_ok=exist_ok)
279
+
280
+ def rsync(self, source, destination, **kwargs):
281
+ """Sync files between two directory trees
282
+
283
+ See `func:rsync` for more details.
284
+ """
285
+ rsync(source, destination, fs=self, **kwargs)
286
+
287
+ async def _cp_file(
288
+ self,
289
+ url,
290
+ url2,
291
+ blocksize=2**20,
292
+ callback=DEFAULT_CALLBACK,
293
+ tempdir: str | None = None,
294
+ **kwargs,
295
+ ):
296
+ fs = _resolve_fs(url, self.method)
297
+ fs2 = _resolve_fs(url2, self.method)
298
+ if fs is fs2:
299
+ # pure remote
300
+ if fs.async_impl:
301
+ return await fs._copy(url, url2, **kwargs)
302
+ else:
303
+ return fs.copy(url, url2, **kwargs)
304
+ await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
305
+
306
+ async def _make_many_dirs(self, urls, exist_ok=True):
307
+ fs = _resolve_fs(urls[0], self.method)
308
+ if fs.async_impl:
309
+ coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
310
+ await _run_coros_in_chunks(coros)
311
+ else:
312
+ for u in urls:
313
+ fs.makedirs(u, exist_ok=exist_ok)
314
+
315
+ make_many_dirs = sync_wrapper(_make_many_dirs)
316
+
317
+ async def _copy(
318
+ self,
319
+ path1: list[str],
320
+ path2: list[str],
321
+ recursive: bool = False,
322
+ on_error: str = "ignore",
323
+ maxdepth: int | None = None,
324
+ batch_size: int | None = None,
325
+ tempdir: str | None = None,
326
+ **kwargs,
327
+ ):
328
+ # TODO: special case for one FS being local, which can use get/put
329
+ # TODO: special case for one being memFS, which can use cat/pipe
330
+ if recursive:
331
+ raise NotImplementedError("Please use fsspec.generic.rsync")
332
+ path1 = [path1] if isinstance(path1, str) else path1
333
+ path2 = [path2] if isinstance(path2, str) else path2
334
+
335
+ fs = _resolve_fs(path1, self.method)
336
+ fs2 = _resolve_fs(path2, self.method)
337
+
338
+ if fs is fs2:
339
+ if fs.async_impl:
340
+ return await fs._copy(path1, path2, **kwargs)
341
+ else:
342
+ return fs.copy(path1, path2, **kwargs)
343
+
344
+ await copy_file_op(
345
+ fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
346
+ )
347
+
348
+
349
+ async def copy_file_op(
350
+ fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
351
+ ):
352
+ import tempfile
353
+
354
+ tempdir = tempdir or tempfile.mkdtemp()
355
+ try:
356
+ coros = [
357
+ _copy_file_op(
358
+ fs1,
359
+ u1,
360
+ fs2,
361
+ u2,
362
+ os.path.join(tempdir, uuid.uuid4().hex),
363
+ )
364
+ for u1, u2 in zip(url1, url2)
365
+ ]
366
+ out = await _run_coros_in_chunks(
367
+ coros, batch_size=batch_size, return_exceptions=True
368
+ )
369
+ finally:
370
+ shutil.rmtree(tempdir)
371
+ if on_error == "return":
372
+ return out
373
+ elif on_error == "raise":
374
+ for o in out:
375
+ if isinstance(o, Exception):
376
+ raise o
377
+
378
+
379
+ async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
380
+ if fs1.async_impl:
381
+ await fs1._get_file(url1, local)
382
+ else:
383
+ fs1.get_file(url1, local)
384
+ if fs2.async_impl:
385
+ await fs2._put_file(local, url2)
386
+ else:
387
+ fs2.put_file(local, url2)
388
+ os.unlink(local)
389
+ logger.debug("Copy %s -> %s; done", url1, url2)
390
+
391
+
392
+ async def maybe_await(cor):
393
+ if inspect.iscoroutine(cor):
394
+ return await cor
395
+ else:
396
+ return cor
env/lib/python3.13/site-packages/fsspec/gui.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import contextlib
3
+ import logging
4
+ import os
5
+ import re
6
+ from collections.abc import Sequence
7
+ from typing import ClassVar
8
+
9
+ import panel as pn
10
+
11
+ from .core import OpenFile, get_filesystem_class, split_protocol
12
+ from .registry import known_implementations
13
+
14
+ pn.extension()
15
+ logger = logging.getLogger("fsspec.gui")
16
+
17
+
18
+ class SigSlot:
19
+ """Signal-slot mixin, for Panel event passing
20
+
21
+ Include this class in a widget manager's superclasses to be able to
22
+ register events and callbacks on Panel widgets managed by that class.
23
+
24
+ The method ``_register`` should be called as widgets are added, and external
25
+ code should call ``connect`` to associate callbacks.
26
+
27
+ By default, all signals emit a DEBUG logging statement.
28
+ """
29
+
30
+ # names of signals that this class may emit each of which must be
31
+ # set by _register for any new instance
32
+ signals: ClassVar[Sequence[str]] = []
33
+ # names of actions that this class may respond to
34
+ slots: ClassVar[Sequence[str]] = []
35
+
36
+ # each of which must be a method name
37
+
38
+ def __init__(self):
39
+ self._ignoring_events = False
40
+ self._sigs = {}
41
+ self._map = {}
42
+ self._setup()
43
+
44
+ def _setup(self):
45
+ """Create GUI elements and register signals"""
46
+ self.panel = pn.pane.PaneBase()
47
+ # no signals to set up in the base class
48
+
49
+ def _register(
50
+ self, widget, name, thing="value", log_level=logging.DEBUG, auto=False
51
+ ):
52
+ """Watch the given attribute of a widget and assign it a named event
53
+
54
+ This is normally called at the time a widget is instantiated, in the
55
+ class which owns it.
56
+
57
+ Parameters
58
+ ----------
59
+ widget : pn.layout.Panel or None
60
+ Widget to watch. If None, an anonymous signal not associated with
61
+ any widget.
62
+ name : str
63
+ Name of this event
64
+ thing : str
65
+ Attribute of the given widget to watch
66
+ log_level : int
67
+ When the signal is triggered, a logging event of the given level
68
+ will be fired in the dfviz logger.
69
+ auto : bool
70
+ If True, automatically connects with a method in this class of the
71
+ same name.
72
+ """
73
+ if name not in self.signals:
74
+ raise ValueError(f"Attempt to assign an undeclared signal: {name}")
75
+ self._sigs[name] = {
76
+ "widget": widget,
77
+ "callbacks": [],
78
+ "thing": thing,
79
+ "log": log_level,
80
+ }
81
+ wn = "-".join(
82
+ [
83
+ getattr(widget, "name", str(widget)) if widget is not None else "none",
84
+ thing,
85
+ ]
86
+ )
87
+ self._map[wn] = name
88
+ if widget is not None:
89
+ widget.param.watch(self._signal, thing, onlychanged=True)
90
+ if auto and hasattr(self, name):
91
+ self.connect(name, getattr(self, name))
92
+
93
+ def _repr_mimebundle_(self, *args, **kwargs):
94
+ """Display in a notebook or a server"""
95
+ try:
96
+ return self.panel._repr_mimebundle_(*args, **kwargs)
97
+ except (ValueError, AttributeError) as exc:
98
+ raise NotImplementedError(
99
+ "Panel does not seem to be set up properly"
100
+ ) from exc
101
+
102
+ def connect(self, signal, slot):
103
+ """Associate call back with given event
104
+
105
+ The callback must be a function which takes the "new" value of the
106
+ watched attribute as the only parameter. If the callback return False,
107
+ this cancels any further processing of the given event.
108
+
109
+ Alternatively, the callback can be a string, in which case it means
110
+ emitting the correspondingly-named event (i.e., connect to self)
111
+ """
112
+ self._sigs[signal]["callbacks"].append(slot)
113
+
114
+ def _signal(self, event):
115
+ """This is called by a an action on a widget
116
+
117
+ Within an self.ignore_events context, nothing happens.
118
+
119
+ Tests can execute this method by directly changing the values of
120
+ widget components.
121
+ """
122
+ if not self._ignoring_events:
123
+ wn = "-".join([event.obj.name, event.name])
124
+ if wn in self._map and self._map[wn] in self._sigs:
125
+ self._emit(self._map[wn], event.new)
126
+
127
+ @contextlib.contextmanager
128
+ def ignore_events(self):
129
+ """Temporarily turn off events processing in this instance
130
+
131
+ (does not propagate to children)
132
+ """
133
+ self._ignoring_events = True
134
+ try:
135
+ yield
136
+ finally:
137
+ self._ignoring_events = False
138
+
139
+ def _emit(self, sig, value=None):
140
+ """An event happened, call its callbacks
141
+
142
+ This method can be used in tests to simulate message passing without
143
+ directly changing visual elements.
144
+
145
+ Calling of callbacks will halt whenever one returns False.
146
+ """
147
+ logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
148
+ for callback in self._sigs[sig]["callbacks"]:
149
+ if isinstance(callback, str):
150
+ self._emit(callback)
151
+ else:
152
+ try:
153
+ # running callbacks should not break the interface
154
+ ret = callback(value)
155
+ if ret is False:
156
+ break
157
+ except Exception as e:
158
+ logger.exception(
159
+ "Exception (%s) while executing callback for signal: %s",
160
+ e,
161
+ sig,
162
+ )
163
+
164
+ def show(self, threads=False):
165
+ """Open a new browser tab and display this instance's interface"""
166
+ self.panel.show(threads=threads, verbose=False)
167
+ return self
168
+
169
+
170
+ class SingleSelect(SigSlot):
171
+ """A multiselect which only allows you to select one item for an event"""
172
+
173
+ signals = ["_selected", "selected"] # the first is internal
174
+ slots = ["set_options", "set_selection", "add", "clear", "select"]
175
+
176
+ def __init__(self, **kwargs):
177
+ self.kwargs = kwargs
178
+ super().__init__()
179
+
180
+ def _setup(self):
181
+ self.panel = pn.widgets.MultiSelect(**self.kwargs)
182
+ self._register(self.panel, "_selected", "value")
183
+ self._register(None, "selected")
184
+ self.connect("_selected", self.select_one)
185
+
186
+ def _signal(self, *args, **kwargs):
187
+ super()._signal(*args, **kwargs)
188
+
189
+ def select_one(self, *_):
190
+ with self.ignore_events():
191
+ val = [self.panel.value[-1]] if self.panel.value else []
192
+ self.panel.value = val
193
+ self._emit("selected", self.panel.value)
194
+
195
+ def set_options(self, options):
196
+ self.panel.options = options
197
+
198
+ def clear(self):
199
+ self.panel.options = []
200
+
201
+ @property
202
+ def value(self):
203
+ return self.panel.value
204
+
205
+ def set_selection(self, selection):
206
+ self.panel.value = [selection]
207
+
208
+
209
+ class FileSelector(SigSlot):
210
+ """Panel-based graphical file selector widget
211
+
212
+ Instances of this widget are interactive and can be displayed in jupyter by having
213
+ them as the output of a cell, or in a separate browser tab using ``.show()``.
214
+ """
215
+
216
+ signals = [
217
+ "protocol_changed",
218
+ "selection_changed",
219
+ "directory_entered",
220
+ "home_clicked",
221
+ "up_clicked",
222
+ "go_clicked",
223
+ "filters_changed",
224
+ ]
225
+ slots = ["set_filters", "go_home"]
226
+
227
+ def __init__(self, url=None, filters=None, ignore=None, kwargs=None):
228
+ """
229
+
230
+ Parameters
231
+ ----------
232
+ url : str (optional)
233
+ Initial value of the URL to populate the dialog; should include protocol
234
+ filters : list(str) (optional)
235
+ File endings to include in the listings. If not included, all files are
236
+ allowed. Does not affect directories.
237
+ If given, the endings will appear as checkboxes in the interface
238
+ ignore : list(str) (optional)
239
+ Regex(s) of file basename patterns to ignore, e.g., "\\." for typical
240
+ hidden files on posix
241
+ kwargs : dict (optional)
242
+ To pass to file system instance
243
+ """
244
+ if url:
245
+ self.init_protocol, url = split_protocol(url)
246
+ else:
247
+ self.init_protocol, url = "file", os.getcwd()
248
+ self.init_url = url
249
+ self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
250
+ self.filters = filters
251
+ self.ignore = [re.compile(i) for i in ignore or []]
252
+ self._fs = None
253
+ super().__init__()
254
+
255
+ def _setup(self):
256
+ self.url = pn.widgets.TextInput(
257
+ name="url",
258
+ value=self.init_url,
259
+ align="end",
260
+ sizing_mode="stretch_width",
261
+ width_policy="max",
262
+ )
263
+ self.protocol = pn.widgets.Select(
264
+ options=sorted(known_implementations),
265
+ value=self.init_protocol,
266
+ name="protocol",
267
+ align="center",
268
+ )
269
+ self.kwargs = pn.widgets.TextInput(
270
+ name="kwargs", value=self.init_kwargs, align="center"
271
+ )
272
+ self.go = pn.widgets.Button(name="⇨", align="end", width=45)
273
+ self.main = SingleSelect(size=10)
274
+ self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
275
+ self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end")
276
+
277
+ self._register(self.protocol, "protocol_changed", auto=True)
278
+ self._register(self.go, "go_clicked", "clicks", auto=True)
279
+ self._register(self.up, "up_clicked", "clicks", auto=True)
280
+ self._register(self.home, "home_clicked", "clicks", auto=True)
281
+ self._register(None, "selection_changed")
282
+ self.main.connect("selected", self.selection_changed)
283
+ self._register(None, "directory_entered")
284
+ self.prev_protocol = self.protocol.value
285
+ self.prev_kwargs = self.storage_options
286
+
287
+ self.filter_sel = pn.widgets.CheckBoxGroup(
288
+ value=[], options=[], inline=False, align="end", width_policy="min"
289
+ )
290
+ self._register(self.filter_sel, "filters_changed", auto=True)
291
+
292
+ self.panel = pn.Column(
293
+ pn.Row(self.protocol, self.kwargs),
294
+ pn.Row(self.home, self.up, self.url, self.go, self.filter_sel),
295
+ self.main.panel,
296
+ )
297
+ self.set_filters(self.filters)
298
+ self.go_clicked()
299
+
300
+ def set_filters(self, filters=None):
301
+ self.filters = filters
302
+ if filters:
303
+ self.filter_sel.options = filters
304
+ self.filter_sel.value = filters
305
+ else:
306
+ self.filter_sel.options = []
307
+ self.filter_sel.value = []
308
+
309
+ @property
310
+ def storage_options(self):
311
+ """Value of the kwargs box as a dictionary"""
312
+ return ast.literal_eval(self.kwargs.value) or {}
313
+
314
+ @property
315
+ def fs(self):
316
+ """Current filesystem instance"""
317
+ if self._fs is None:
318
+ cls = get_filesystem_class(self.protocol.value)
319
+ self._fs = cls(**self.storage_options)
320
+ return self._fs
321
+
322
+ @property
323
+ def urlpath(self):
324
+ """URL of currently selected item"""
325
+ return (
326
+ (f"{self.protocol.value}://{self.main.value[0]}")
327
+ if self.main.value
328
+ else None
329
+ )
330
+
331
+ def open_file(self, mode="rb", compression=None, encoding=None):
332
+ """Create OpenFile instance for the currently selected item
333
+
334
+ For example, in a notebook you might do something like
335
+
336
+ .. code-block::
337
+
338
+ [ ]: sel = FileSelector(); sel
339
+
340
+ # user selects their file
341
+
342
+ [ ]: with sel.open_file('rb') as f:
343
+ ... out = f.read()
344
+
345
+ Parameters
346
+ ----------
347
+ mode: str (optional)
348
+ Open mode for the file.
349
+ compression: str (optional)
350
+ The interact with the file as compressed. Set to 'infer' to guess
351
+ compression from the file ending
352
+ encoding: str (optional)
353
+ If using text mode, use this encoding; defaults to UTF8.
354
+ """
355
+ if self.urlpath is None:
356
+ raise ValueError("No file selected")
357
+ return OpenFile(self.fs, self.urlpath, mode, compression, encoding)
358
+
359
+ def filters_changed(self, values):
360
+ self.filters = values
361
+ self.go_clicked()
362
+
363
+ def selection_changed(self, *_):
364
+ if self.urlpath is None:
365
+ return
366
+ if self.fs.isdir(self.urlpath):
367
+ self.url.value = self.fs._strip_protocol(self.urlpath)
368
+ self.go_clicked()
369
+
370
+ def go_clicked(self, *_):
371
+ if (
372
+ self.prev_protocol != self.protocol.value
373
+ or self.prev_kwargs != self.storage_options
374
+ ):
375
+ self._fs = None # causes fs to be recreated
376
+ self.prev_protocol = self.protocol.value
377
+ self.prev_kwargs = self.storage_options
378
+ listing = sorted(
379
+ self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"]
380
+ )
381
+ listing = [
382
+ l
383
+ for l in listing
384
+ if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore)
385
+ ]
386
+ folders = {
387
+ "📁 " + o["name"].rsplit("/", 1)[-1]: o["name"]
388
+ for o in listing
389
+ if o["type"] == "directory"
390
+ }
391
+ files = {
392
+ "📄 " + o["name"].rsplit("/", 1)[-1]: o["name"]
393
+ for o in listing
394
+ if o["type"] == "file"
395
+ }
396
+ if self.filters:
397
+ files = {
398
+ k: v
399
+ for k, v in files.items()
400
+ if any(v.endswith(ext) for ext in self.filters)
401
+ }
402
+ self.main.set_options(dict(**folders, **files))
403
+
404
+ def protocol_changed(self, *_):
405
+ self._fs = None
406
+ self.main.options = []
407
+ self.url.value = ""
408
+
409
+ def home_clicked(self, *_):
410
+ self.protocol.value = self.init_protocol
411
+ self.kwargs.value = self.init_kwargs
412
+ self.url.value = self.init_url
413
+ self.go_clicked()
414
+
415
+ def up_clicked(self, *_):
416
+ self.url.value = self.fs._parent(self.url.value)
417
+ self.go_clicked()
env/lib/python3.13/site-packages/fsspec/json.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from collections.abc import Callable, Mapping, Sequence
3
+ from contextlib import suppress
4
+ from pathlib import PurePath
5
+ from typing import Any, ClassVar
6
+
7
+ from .registry import _import_class, get_filesystem_class
8
+ from .spec import AbstractFileSystem
9
+
10
+
11
+ class FilesystemJSONEncoder(json.JSONEncoder):
12
+ include_password: ClassVar[bool] = True
13
+
14
+ def default(self, o: Any) -> Any:
15
+ if isinstance(o, AbstractFileSystem):
16
+ return o.to_dict(include_password=self.include_password)
17
+ if isinstance(o, PurePath):
18
+ cls = type(o)
19
+ return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)}
20
+
21
+ return super().default(o)
22
+
23
+ def make_serializable(self, obj: Any) -> Any:
24
+ """
25
+ Recursively converts an object so that it can be JSON serialized via
26
+ :func:`json.dumps` and :func:`json.dump`, without actually calling
27
+ said functions.
28
+ """
29
+ if isinstance(obj, (str, int, float, bool)):
30
+ return obj
31
+ if isinstance(obj, Mapping):
32
+ return {k: self.make_serializable(v) for k, v in obj.items()}
33
+ if isinstance(obj, Sequence):
34
+ return [self.make_serializable(v) for v in obj]
35
+
36
+ return self.default(obj)
37
+
38
+
39
+ class FilesystemJSONDecoder(json.JSONDecoder):
40
+ def __init__(
41
+ self,
42
+ *,
43
+ object_hook: Callable[[dict[str, Any]], Any] | None = None,
44
+ parse_float: Callable[[str], Any] | None = None,
45
+ parse_int: Callable[[str], Any] | None = None,
46
+ parse_constant: Callable[[str], Any] | None = None,
47
+ strict: bool = True,
48
+ object_pairs_hook: Callable[[list[tuple[str, Any]]], Any] | None = None,
49
+ ) -> None:
50
+ self.original_object_hook = object_hook
51
+
52
+ super().__init__(
53
+ object_hook=self.custom_object_hook,
54
+ parse_float=parse_float,
55
+ parse_int=parse_int,
56
+ parse_constant=parse_constant,
57
+ strict=strict,
58
+ object_pairs_hook=object_pairs_hook,
59
+ )
60
+
61
+ @classmethod
62
+ def try_resolve_path_cls(cls, dct: dict[str, Any]):
63
+ with suppress(Exception):
64
+ fqp = dct["cls"]
65
+
66
+ path_cls = _import_class(fqp)
67
+
68
+ if issubclass(path_cls, PurePath):
69
+ return path_cls
70
+
71
+ return None
72
+
73
+ @classmethod
74
+ def try_resolve_fs_cls(cls, dct: dict[str, Any]):
75
+ with suppress(Exception):
76
+ if "cls" in dct:
77
+ try:
78
+ fs_cls = _import_class(dct["cls"])
79
+ if issubclass(fs_cls, AbstractFileSystem):
80
+ return fs_cls
81
+ except Exception:
82
+ if "protocol" in dct: # Fallback if cls cannot be imported
83
+ return get_filesystem_class(dct["protocol"])
84
+
85
+ raise
86
+
87
+ return None
88
+
89
+ def custom_object_hook(self, dct: dict[str, Any]):
90
+ if "cls" in dct:
91
+ if (obj_cls := self.try_resolve_fs_cls(dct)) is not None:
92
+ return AbstractFileSystem.from_dict(dct)
93
+ if (obj_cls := self.try_resolve_path_cls(dct)) is not None:
94
+ return obj_cls(dct["str"])
95
+
96
+ if self.original_object_hook is not None:
97
+ return self.original_object_hook(dct)
98
+
99
+ return dct
100
+
101
+ def unmake_serializable(self, obj: Any) -> Any:
102
+ """
103
+ Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`.
104
+ """
105
+ if isinstance(obj, dict):
106
+ obj = self.custom_object_hook(obj)
107
+ if isinstance(obj, dict):
108
+ return {k: self.unmake_serializable(v) for k, v in obj.items()}
109
+ if isinstance(obj, (list, tuple)):
110
+ return [self.unmake_serializable(v) for v in obj]
111
+
112
+ return obj
env/lib/python3.13/site-packages/fsspec/mapping.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import array
2
+ import logging
3
+ import posixpath
4
+ import warnings
5
+ from collections.abc import MutableMapping
6
+ from functools import cached_property
7
+
8
+ from fsspec.core import url_to_fs
9
+
10
+ logger = logging.getLogger("fsspec.mapping")
11
+
12
+
13
+ class FSMap(MutableMapping):
14
+ """Wrap a FileSystem instance as a mutable wrapping.
15
+
16
+ The keys of the mapping become files under the given root, and the
17
+ values (which must be bytes) the contents of those files.
18
+
19
+ Parameters
20
+ ----------
21
+ root: string
22
+ prefix for all the files
23
+ fs: FileSystem instance
24
+ check: bool (=True)
25
+ performs a touch at the location, to check for write access.
26
+
27
+ Examples
28
+ --------
29
+ >>> fs = FileSystem(**parameters) # doctest: +SKIP
30
+ >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
31
+ or, more likely
32
+ >>> d = fs.get_mapper('my-data/path/')
33
+
34
+ >>> d['loc1'] = b'Hello World' # doctest: +SKIP
35
+ >>> list(d.keys()) # doctest: +SKIP
36
+ ['loc1']
37
+ >>> d['loc1'] # doctest: +SKIP
38
+ b'Hello World'
39
+ """
40
+
41
+ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
42
+ self.fs = fs
43
+ self.root = fs._strip_protocol(root)
44
+ self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
45
+ if missing_exceptions is None:
46
+ missing_exceptions = (
47
+ FileNotFoundError,
48
+ IsADirectoryError,
49
+ NotADirectoryError,
50
+ )
51
+ self.missing_exceptions = missing_exceptions
52
+ self.check = check
53
+ self.create = create
54
+ if create:
55
+ if not self.fs.exists(root):
56
+ self.fs.mkdir(root)
57
+ if check:
58
+ if not self.fs.exists(root):
59
+ raise ValueError(
60
+ f"Path {root} does not exist. Create "
61
+ f" with the ``create=True`` keyword"
62
+ )
63
+ self.fs.touch(root + "/a")
64
+ self.fs.rm(root + "/a")
65
+
66
+ @cached_property
67
+ def dirfs(self):
68
+ """dirfs instance that can be used with the same keys as the mapper"""
69
+ from .implementations.dirfs import DirFileSystem
70
+
71
+ return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
72
+
73
+ def clear(self):
74
+ """Remove all keys below root - empties out mapping"""
75
+ logger.info("Clear mapping at %s", self.root)
76
+ try:
77
+ self.fs.rm(self.root, True)
78
+ self.fs.mkdir(self.root)
79
+ except: # noqa: E722
80
+ pass
81
+
82
+ def getitems(self, keys, on_error="raise"):
83
+ """Fetch multiple items from the store
84
+
85
+ If the backend is async-able, this might proceed concurrently
86
+
87
+ Parameters
88
+ ----------
89
+ keys: list(str)
90
+ They keys to be fetched
91
+ on_error : "raise", "omit", "return"
92
+ If raise, an underlying exception will be raised (converted to KeyError
93
+ if the type is in self.missing_exceptions); if omit, keys with exception
94
+ will simply not be included in the output; if "return", all keys are
95
+ included in the output, but the value will be bytes or an exception
96
+ instance.
97
+
98
+ Returns
99
+ -------
100
+ dict(key, bytes|exception)
101
+ """
102
+ keys2 = [self._key_to_str(k) for k in keys]
103
+ oe = on_error if on_error == "raise" else "return"
104
+ try:
105
+ out = self.fs.cat(keys2, on_error=oe)
106
+ if isinstance(out, bytes):
107
+ out = {keys2[0]: out}
108
+ except self.missing_exceptions as e:
109
+ raise KeyError from e
110
+ out = {
111
+ k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
112
+ for k, v in out.items()
113
+ }
114
+ return {
115
+ key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
116
+ for key, k2 in zip(keys, keys2)
117
+ if on_error == "return" or not isinstance(out[k2], BaseException)
118
+ }
119
+
120
+ def setitems(self, values_dict):
121
+ """Set the values of multiple items in the store
122
+
123
+ Parameters
124
+ ----------
125
+ values_dict: dict(str, bytes)
126
+ """
127
+ values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
128
+ self.fs.pipe(values)
129
+
130
+ def delitems(self, keys):
131
+ """Remove multiple keys from the store"""
132
+ self.fs.rm([self._key_to_str(k) for k in keys])
133
+
134
+ def _key_to_str(self, key):
135
+ """Generate full path for the key"""
136
+ if not isinstance(key, str):
137
+ # raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
138
+ warnings.warn(
139
+ "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
140
+ DeprecationWarning,
141
+ )
142
+ if isinstance(key, list):
143
+ key = tuple(key)
144
+ key = str(key)
145
+ return f"{self._root_key_to_str}{key}".rstrip("/")
146
+
147
+ def _str_to_key(self, s):
148
+ """Strip path of to leave key name"""
149
+ return s[len(self.root) :].lstrip("/")
150
+
151
+ def __getitem__(self, key, default=None):
152
+ """Retrieve data"""
153
+ k = self._key_to_str(key)
154
+ try:
155
+ result = self.fs.cat(k)
156
+ except self.missing_exceptions as exc:
157
+ if default is not None:
158
+ return default
159
+ raise KeyError(key) from exc
160
+ return result
161
+
162
+ def pop(self, key, default=None):
163
+ """Pop data"""
164
+ result = self.__getitem__(key, default)
165
+ try:
166
+ del self[key]
167
+ except KeyError:
168
+ pass
169
+ return result
170
+
171
+ def __setitem__(self, key, value):
172
+ """Store value in key"""
173
+ key = self._key_to_str(key)
174
+ self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
175
+ self.fs.pipe_file(key, maybe_convert(value))
176
+
177
+ def __iter__(self):
178
+ return (self._str_to_key(x) for x in self.fs.find(self.root))
179
+
180
+ def __len__(self):
181
+ return len(self.fs.find(self.root))
182
+
183
+ def __delitem__(self, key):
184
+ """Remove key"""
185
+ try:
186
+ self.fs.rm(self._key_to_str(key))
187
+ except Exception as exc:
188
+ raise KeyError from exc
189
+
190
+ def __contains__(self, key):
191
+ """Does key exist in mapping?"""
192
+ path = self._key_to_str(key)
193
+ return self.fs.isfile(path)
194
+
195
+ def __reduce__(self):
196
+ return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
197
+
198
+
199
+ def maybe_convert(value):
200
+ if isinstance(value, array.array) or hasattr(value, "__array__"):
201
+ # bytes-like things
202
+ if hasattr(value, "dtype") and value.dtype.kind in "Mm":
203
+ # The buffer interface doesn't support datetime64/timdelta64 numpy
204
+ # arrays
205
+ value = value.view("int64")
206
+ value = bytes(memoryview(value))
207
+ return value
208
+
209
+
210
+ def get_mapper(
211
+ url="",
212
+ check=False,
213
+ create=False,
214
+ missing_exceptions=None,
215
+ alternate_root=None,
216
+ **kwargs,
217
+ ):
218
+ """Create key-value interface for given URL and options
219
+
220
+ The URL will be of the form "protocol://location" and point to the root
221
+ of the mapper required. All keys will be file-names below this location,
222
+ and their values the contents of each key.
223
+
224
+ Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
225
+
226
+ Parameters
227
+ ----------
228
+ url: str
229
+ Root URL of mapping
230
+ check: bool
231
+ Whether to attempt to read from the location before instantiation, to
232
+ check that the mapping does exist
233
+ create: bool
234
+ Whether to make the directory corresponding to the root before
235
+ instantiating
236
+ missing_exceptions: None or tuple
237
+ If given, these exception types will be regarded as missing keys and
238
+ return KeyError when trying to read data. By default, you get
239
+ (FileNotFoundError, IsADirectoryError, NotADirectoryError)
240
+ alternate_root: None or str
241
+ In cases of complex URLs, the parser may fail to pick the correct part
242
+ for the mapper root, so this arg can override
243
+
244
+ Returns
245
+ -------
246
+ ``FSMap`` instance, the dict-like key-value store.
247
+ """
248
+ # Removing protocol here - could defer to each open() on the backend
249
+ fs, urlpath = url_to_fs(url, **kwargs)
250
+ root = alternate_root if alternate_root is not None else urlpath
251
+ return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
env/lib/python3.13/site-packages/fsspec/parquet.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import warnings
4
+ from typing import Literal
5
+
6
+ import fsspec
7
+
8
+ from .core import url_to_fs
9
+ from .spec import AbstractBufferedFile
10
+ from .utils import merge_offset_ranges
11
+
12
+ # Parquet-Specific Utilities for fsspec
13
+ #
14
+ # Most of the functions defined in this module are NOT
15
+ # intended for public consumption. The only exception
16
+ # to this is `open_parquet_file`, which should be used
17
+ # place of `fs.open()` to open parquet-formatted files
18
+ # on remote file systems.
19
+
20
+
21
+ class AlreadyBufferedFile(AbstractBufferedFile):
22
+ def _fetch_range(self, start, end):
23
+ raise NotImplementedError
24
+
25
+
26
+ def open_parquet_files(
27
+ path: list[str],
28
+ mode: Literal["rb"] = "rb",
29
+ fs: None | fsspec.AbstractFileSystem = None,
30
+ metadata=None,
31
+ columns: None | list[str] = None,
32
+ row_groups: None | list[int] = None,
33
+ storage_options: None | dict = None,
34
+ engine: str = "auto",
35
+ max_gap: int = 64_000,
36
+ max_block: int = 256_000_000,
37
+ footer_sample_size: int = 1_000_000,
38
+ filters: None | list[list[list[str]]] = None,
39
+ **kwargs,
40
+ ):
41
+ """
42
+ Return a file-like object for a single Parquet file.
43
+
44
+ The specified parquet `engine` will be used to parse the
45
+ footer metadata, and determine the required byte ranges
46
+ from the file. The target path will then be opened with
47
+ the "parts" (`KnownPartsOfAFile`) caching strategy.
48
+
49
+ Note that this method is intended for usage with remote
50
+ file systems, and is unlikely to improve parquet-read
51
+ performance on local file systems.
52
+
53
+ Parameters
54
+ ----------
55
+ path: str
56
+ Target file path.
57
+ mode: str, optional
58
+ Mode option to be passed through to `fs.open`. Default is "rb".
59
+ metadata: Any, optional
60
+ Parquet metadata object. Object type must be supported
61
+ by the backend parquet engine. For now, only the "fastparquet"
62
+ engine supports an explicit `ParquetFile` metadata object.
63
+ If a metadata object is supplied, the remote footer metadata
64
+ will not need to be transferred into local memory.
65
+ fs: AbstractFileSystem, optional
66
+ Filesystem object to use for opening the file. If nothing is
67
+ specified, an `AbstractFileSystem` object will be inferred.
68
+ engine : str, default "auto"
69
+ Parquet engine to use for metadata parsing. Allowed options
70
+ include "fastparquet", "pyarrow", and "auto". The specified
71
+ engine must be installed in the current environment. If
72
+ "auto" is specified, and both engines are installed,
73
+ "fastparquet" will take precedence over "pyarrow".
74
+ columns: list, optional
75
+ List of all column names that may be read from the file.
76
+ row_groups : list, optional
77
+ List of all row-groups that may be read from the file. This
78
+ may be a list of row-group indices (integers), or it may be
79
+ a list of `RowGroup` metadata objects (if the "fastparquet"
80
+ engine is used).
81
+ storage_options : dict, optional
82
+ Used to generate an `AbstractFileSystem` object if `fs` was
83
+ not specified.
84
+ max_gap : int, optional
85
+ Neighboring byte ranges will only be merged when their
86
+ inter-range gap is <= `max_gap`. Default is 64KB.
87
+ max_block : int, optional
88
+ Neighboring byte ranges will only be merged when the size of
89
+ the aggregated range is <= `max_block`. Default is 256MB.
90
+ footer_sample_size : int, optional
91
+ Number of bytes to read from the end of the path to look
92
+ for the footer metadata. If the sampled bytes do not contain
93
+ the footer, a second read request will be required, and
94
+ performance will suffer. Default is 1MB.
95
+ filters : list[list], optional
96
+ List of filters to apply to prevent reading row groups, of the
97
+ same format as accepted by the loading engines. Ignored if
98
+ ``row_groups`` is specified.
99
+ **kwargs :
100
+ Optional key-word arguments to pass to `fs.open`
101
+ """
102
+
103
+ # Make sure we have an `AbstractFileSystem` object
104
+ # to work with
105
+ if fs is None:
106
+ path0 = path
107
+ if isinstance(path, (list, tuple)):
108
+ path = path[0]
109
+ fs, path = url_to_fs(path, **(storage_options or {}))
110
+ else:
111
+ path0 = path
112
+
113
+ # For now, `columns == []` not supported, is the same
114
+ # as all columns
115
+ if columns is not None and len(columns) == 0:
116
+ columns = None
117
+
118
+ # Set the engine
119
+ engine = _set_engine(engine)
120
+
121
+ if isinstance(path0, (list, tuple)):
122
+ paths = path0
123
+ elif "*" in path:
124
+ paths = fs.glob(path)
125
+ elif path0.endswith("/"): # or fs.isdir(path):
126
+ paths = [
127
+ _
128
+ for _ in fs.find(path, withdirs=False, detail=False)
129
+ if _.endswith((".parquet", ".parq"))
130
+ ]
131
+ else:
132
+ paths = [path]
133
+
134
+ data = _get_parquet_byte_ranges(
135
+ paths,
136
+ fs,
137
+ metadata=metadata,
138
+ columns=columns,
139
+ row_groups=row_groups,
140
+ engine=engine,
141
+ max_gap=max_gap,
142
+ max_block=max_block,
143
+ footer_sample_size=footer_sample_size,
144
+ filters=filters,
145
+ )
146
+
147
+ # Call self.open with "parts" caching
148
+ options = kwargs.pop("cache_options", {}).copy()
149
+ return [
150
+ AlreadyBufferedFile(
151
+ fs=None,
152
+ path=fn,
153
+ mode=mode,
154
+ cache_type="parts",
155
+ cache_options={
156
+ **options,
157
+ "data": data.get(fn, {}),
158
+ },
159
+ size=max(_[1] for _ in data.get(fn, {})),
160
+ **kwargs,
161
+ )
162
+ for fn in data
163
+ ]
164
+
165
+
166
+ def open_parquet_file(*args, **kwargs):
167
+ """Create files tailed to reading specific parts of parquet files
168
+
169
+ Please see ``open_parquet_files`` for details of the arguments. The
170
+ difference is, this function always returns a single ``AleadyBufferedFile``,
171
+ whereas `open_parquet_files`` always returns a list of files, even if
172
+ there are one or zero matching parquet files.
173
+ """
174
+ return open_parquet_files(*args, **kwargs)[0]
175
+
176
+
177
+ def _get_parquet_byte_ranges(
178
+ paths,
179
+ fs,
180
+ metadata=None,
181
+ columns=None,
182
+ row_groups=None,
183
+ max_gap=64_000,
184
+ max_block=256_000_000,
185
+ footer_sample_size=1_000_000,
186
+ engine="auto",
187
+ filters=None,
188
+ ):
189
+ """Get a dictionary of the known byte ranges needed
190
+ to read a specific column/row-group selection from a
191
+ Parquet dataset. Each value in the output dictionary
192
+ is intended for use as the `data` argument for the
193
+ `KnownPartsOfAFile` caching strategy of a single path.
194
+ """
195
+
196
+ # Set engine if necessary
197
+ if isinstance(engine, str):
198
+ engine = _set_engine(engine)
199
+
200
+ # Pass to specialized function if metadata is defined
201
+ if metadata is not None:
202
+ # Use the provided parquet metadata object
203
+ # to avoid transferring/parsing footer metadata
204
+ return _get_parquet_byte_ranges_from_metadata(
205
+ metadata,
206
+ fs,
207
+ engine,
208
+ columns=columns,
209
+ row_groups=row_groups,
210
+ max_gap=max_gap,
211
+ max_block=max_block,
212
+ filters=filters,
213
+ )
214
+
215
+ # Get file sizes asynchronously
216
+ file_sizes = fs.sizes(paths)
217
+
218
+ # Populate global paths, starts, & ends
219
+ result = {}
220
+ data_paths = []
221
+ data_starts = []
222
+ data_ends = []
223
+ add_header_magic = True
224
+ if columns is None and row_groups is None and filters is None:
225
+ # We are NOT selecting specific columns or row-groups.
226
+ #
227
+ # We can avoid sampling the footers, and just transfer
228
+ # all file data with cat_ranges
229
+ for i, path in enumerate(paths):
230
+ result[path] = {}
231
+ data_paths.append(path)
232
+ data_starts.append(0)
233
+ data_ends.append(file_sizes[i])
234
+ add_header_magic = False # "Magic" should already be included
235
+ else:
236
+ # We ARE selecting specific columns or row-groups.
237
+ #
238
+ # Gather file footers.
239
+ # We just take the last `footer_sample_size` bytes of each
240
+ # file (or the entire file if it is smaller than that)
241
+ footer_starts = []
242
+ footer_ends = []
243
+ for i, path in enumerate(paths):
244
+ footer_ends.append(file_sizes[i])
245
+ sample_size = max(0, file_sizes[i] - footer_sample_size)
246
+ footer_starts.append(sample_size)
247
+ footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
248
+
249
+ # Check our footer samples and re-sample if necessary.
250
+ missing_footer_starts = footer_starts.copy()
251
+ large_footer = 0
252
+ for i, path in enumerate(paths):
253
+ footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
254
+ real_footer_start = file_sizes[i] - (footer_size + 8)
255
+ if real_footer_start < footer_starts[i]:
256
+ missing_footer_starts[i] = real_footer_start
257
+ large_footer = max(large_footer, (footer_size + 8))
258
+ if large_footer:
259
+ warnings.warn(
260
+ f"Not enough data was used to sample the parquet footer. "
261
+ f"Try setting footer_sample_size >= {large_footer}."
262
+ )
263
+ for i, block in enumerate(
264
+ fs.cat_ranges(
265
+ paths,
266
+ missing_footer_starts,
267
+ footer_starts,
268
+ )
269
+ ):
270
+ footer_samples[i] = block + footer_samples[i]
271
+ footer_starts[i] = missing_footer_starts[i]
272
+
273
+ # Calculate required byte ranges for each path
274
+ for i, path in enumerate(paths):
275
+ # Use "engine" to collect data byte ranges
276
+ path_data_starts, path_data_ends = engine._parquet_byte_ranges(
277
+ columns,
278
+ row_groups=row_groups,
279
+ footer=footer_samples[i],
280
+ footer_start=footer_starts[i],
281
+ filters=filters,
282
+ )
283
+
284
+ data_paths += [path] * len(path_data_starts)
285
+ data_starts += path_data_starts
286
+ data_ends += path_data_ends
287
+ result.setdefault(path, {})[(footer_starts[i], file_sizes[i])] = (
288
+ footer_samples[i]
289
+ )
290
+
291
+ # Merge adjacent offset ranges
292
+ data_paths, data_starts, data_ends = merge_offset_ranges(
293
+ data_paths,
294
+ data_starts,
295
+ data_ends,
296
+ max_gap=max_gap,
297
+ max_block=max_block,
298
+ sort=False, # Should already be sorted
299
+ )
300
+
301
+ # Start by populating `result` with footer samples
302
+ for i, path in enumerate(paths):
303
+ result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
304
+
305
+ # Transfer the data byte-ranges into local memory
306
+ _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
307
+
308
+ # Add b"PAR1" to header if necessary
309
+ if add_header_magic:
310
+ _add_header_magic(result)
311
+
312
+ return result
313
+
314
+
315
+ def _get_parquet_byte_ranges_from_metadata(
316
+ metadata,
317
+ fs,
318
+ engine,
319
+ columns=None,
320
+ row_groups=None,
321
+ max_gap=64_000,
322
+ max_block=256_000_000,
323
+ filters=None,
324
+ ):
325
+ """Simplified version of `_get_parquet_byte_ranges` for
326
+ the case that an engine-specific `metadata` object is
327
+ provided, and the remote footer metadata does not need to
328
+ be transferred before calculating the required byte ranges.
329
+ """
330
+
331
+ # Use "engine" to collect data byte ranges
332
+ data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
333
+ columns, row_groups=row_groups, metadata=metadata, filters=filters
334
+ )
335
+
336
+ # Merge adjacent offset ranges
337
+ data_paths, data_starts, data_ends = merge_offset_ranges(
338
+ data_paths,
339
+ data_starts,
340
+ data_ends,
341
+ max_gap=max_gap,
342
+ max_block=max_block,
343
+ sort=False, # Should be sorted
344
+ )
345
+
346
+ # Transfer the data byte-ranges into local memory
347
+ result = {fn: {} for fn in list(set(data_paths))}
348
+ _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
349
+
350
+ # Add b"PAR1" to header
351
+ _add_header_magic(result)
352
+
353
+ return result
354
+
355
+
356
+ def _transfer_ranges(fs, blocks, paths, starts, ends):
357
+ # Use cat_ranges to gather the data byte_ranges
358
+ ranges = (paths, starts, ends)
359
+ for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
360
+ blocks[path][(start, stop)] = data
361
+
362
+
363
+ def _add_header_magic(data):
364
+ # Add b"PAR1" to file headers
365
+ for path in list(data.keys()):
366
+ add_magic = True
367
+ for k in data[path]:
368
+ if k[0] == 0 and k[1] >= 4:
369
+ add_magic = False
370
+ break
371
+ if add_magic:
372
+ data[path][(0, 4)] = b"PAR1"
373
+
374
+
375
+ def _set_engine(engine_str):
376
+ # Define a list of parquet engines to try
377
+ if engine_str == "auto":
378
+ try_engines = ("fastparquet", "pyarrow")
379
+ elif not isinstance(engine_str, str):
380
+ raise ValueError(
381
+ "Failed to set parquet engine! "
382
+ "Please pass 'fastparquet', 'pyarrow', or 'auto'"
383
+ )
384
+ elif engine_str not in ("fastparquet", "pyarrow"):
385
+ raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
386
+ else:
387
+ try_engines = [engine_str]
388
+
389
+ # Try importing the engines in `try_engines`,
390
+ # and choose the first one that succeeds
391
+ for engine in try_engines:
392
+ try:
393
+ if engine == "fastparquet":
394
+ return FastparquetEngine()
395
+ elif engine == "pyarrow":
396
+ return PyarrowEngine()
397
+ except ImportError:
398
+ pass
399
+
400
+ # Raise an error if a supported parquet engine
401
+ # was not found
402
+ raise ImportError(
403
+ f"The following parquet engines are not installed "
404
+ f"in your python environment: {try_engines}."
405
+ f"Please install 'fastparquert' or 'pyarrow' to "
406
+ f"utilize the `fsspec.parquet` module."
407
+ )
408
+
409
+
410
+ class FastparquetEngine:
411
+ # The purpose of the FastparquetEngine class is
412
+ # to check if fastparquet can be imported (on initialization)
413
+ # and to define a `_parquet_byte_ranges` method. In the
414
+ # future, this class may also be used to define other
415
+ # methods/logic that are specific to fastparquet.
416
+
417
+ def __init__(self):
418
+ import fastparquet as fp
419
+
420
+ self.fp = fp
421
+
422
+ def _row_group_filename(self, row_group, pf):
423
+ return pf.row_group_filename(row_group)
424
+
425
+ def _parquet_byte_ranges(
426
+ self,
427
+ columns,
428
+ row_groups=None,
429
+ metadata=None,
430
+ footer=None,
431
+ footer_start=None,
432
+ filters=None,
433
+ ):
434
+ # Initialize offset ranges and define ParqetFile metadata
435
+ pf = metadata
436
+ data_paths, data_starts, data_ends = [], [], []
437
+ if filters and row_groups:
438
+ raise ValueError("filters and row_groups cannot be used together")
439
+ if pf is None:
440
+ pf = self.fp.ParquetFile(io.BytesIO(footer))
441
+
442
+ # Convert columns to a set and add any index columns
443
+ # specified in the pandas metadata (just in case)
444
+ column_set = None if columns is None else {c.split(".", 1)[0] for c in columns}
445
+ if column_set is not None and hasattr(pf, "pandas_metadata"):
446
+ md_index = [
447
+ ind
448
+ for ind in pf.pandas_metadata.get("index_columns", [])
449
+ # Ignore RangeIndex information
450
+ if not isinstance(ind, dict)
451
+ ]
452
+ column_set |= set(md_index)
453
+
454
+ # Check if row_groups is a list of integers
455
+ # or a list of row-group metadata
456
+ if filters:
457
+ from fastparquet.api import filter_row_groups
458
+
459
+ row_group_indices = None
460
+ row_groups = filter_row_groups(pf, filters)
461
+ elif row_groups and not isinstance(row_groups[0], int):
462
+ # Input row_groups contains row-group metadata
463
+ row_group_indices = None
464
+ else:
465
+ # Input row_groups contains row-group indices
466
+ row_group_indices = row_groups
467
+ row_groups = pf.row_groups
468
+
469
+ # Loop through column chunks to add required byte ranges
470
+ for r, row_group in enumerate(row_groups):
471
+ # Skip this row-group if we are targeting
472
+ # specific row-groups
473
+ if row_group_indices is None or r in row_group_indices:
474
+ # Find the target parquet-file path for `row_group`
475
+ fn = self._row_group_filename(row_group, pf)
476
+
477
+ for column in row_group.columns:
478
+ name = column.meta_data.path_in_schema[0]
479
+ # Skip this column if we are targeting a
480
+ # specific columns
481
+ if column_set is None or name in column_set:
482
+ file_offset0 = column.meta_data.dictionary_page_offset
483
+ if file_offset0 is None:
484
+ file_offset0 = column.meta_data.data_page_offset
485
+ num_bytes = column.meta_data.total_compressed_size
486
+ if footer_start is None or file_offset0 < footer_start:
487
+ data_paths.append(fn)
488
+ data_starts.append(file_offset0)
489
+ data_ends.append(
490
+ min(
491
+ file_offset0 + num_bytes,
492
+ footer_start or (file_offset0 + num_bytes),
493
+ )
494
+ )
495
+
496
+ if metadata:
497
+ # The metadata in this call may map to multiple
498
+ # file paths. Need to include `data_paths`
499
+ return data_paths, data_starts, data_ends
500
+ return data_starts, data_ends
501
+
502
+
503
+ class PyarrowEngine:
504
+ # The purpose of the PyarrowEngine class is
505
+ # to check if pyarrow can be imported (on initialization)
506
+ # and to define a `_parquet_byte_ranges` method. In the
507
+ # future, this class may also be used to define other
508
+ # methods/logic that are specific to pyarrow.
509
+
510
+ def __init__(self):
511
+ import pyarrow.parquet as pq
512
+
513
+ self.pq = pq
514
+
515
+ def _row_group_filename(self, row_group, metadata):
516
+ raise NotImplementedError
517
+
518
+ def _parquet_byte_ranges(
519
+ self,
520
+ columns,
521
+ row_groups=None,
522
+ metadata=None,
523
+ footer=None,
524
+ footer_start=None,
525
+ filters=None,
526
+ ):
527
+ if metadata is not None:
528
+ raise ValueError("metadata input not supported for PyarrowEngine")
529
+ if filters:
530
+ raise NotImplementedError
531
+
532
+ data_starts, data_ends = [], []
533
+ md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
534
+
535
+ # Convert columns to a set and add any index columns
536
+ # specified in the pandas metadata (just in case)
537
+ column_set = None if columns is None else set(columns)
538
+ if column_set is not None:
539
+ schema = md.schema.to_arrow_schema()
540
+ has_pandas_metadata = (
541
+ schema.metadata is not None and b"pandas" in schema.metadata
542
+ )
543
+ if has_pandas_metadata:
544
+ md_index = [
545
+ ind
546
+ for ind in json.loads(
547
+ schema.metadata[b"pandas"].decode("utf8")
548
+ ).get("index_columns", [])
549
+ # Ignore RangeIndex information
550
+ if not isinstance(ind, dict)
551
+ ]
552
+ column_set |= set(md_index)
553
+
554
+ # Loop through column chunks to add required byte ranges
555
+ for r in range(md.num_row_groups):
556
+ # Skip this row-group if we are targeting
557
+ # specific row-groups
558
+ if row_groups is None or r in row_groups:
559
+ row_group = md.row_group(r)
560
+ for c in range(row_group.num_columns):
561
+ column = row_group.column(c)
562
+ name = column.path_in_schema
563
+ # Skip this column if we are targeting a
564
+ # specific columns
565
+ split_name = name.split(".")[0]
566
+ if (
567
+ column_set is None
568
+ or name in column_set
569
+ or split_name in column_set
570
+ ):
571
+ file_offset0 = column.dictionary_page_offset
572
+ if file_offset0 is None:
573
+ file_offset0 = column.data_page_offset
574
+ num_bytes = column.total_compressed_size
575
+ if file_offset0 < footer_start:
576
+ data_starts.append(file_offset0)
577
+ data_ends.append(
578
+ min(file_offset0 + num_bytes, footer_start)
579
+ )
580
+ return data_starts, data_ends
env/lib/python3.13/site-packages/fsspec/registry.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import types
5
+ import warnings
6
+
7
+ __all__ = ["registry", "get_filesystem_class", "default"]
8
+
9
+ # internal, mutable
10
+ _registry: dict[str, type] = {}
11
+
12
+ # external, immutable
13
+ registry = types.MappingProxyType(_registry)
14
+ default = "file"
15
+
16
+
17
+ def register_implementation(name, cls, clobber=False, errtxt=None):
18
+ """Add implementation class to the registry
19
+
20
+ Parameters
21
+ ----------
22
+ name: str
23
+ Protocol name to associate with the class
24
+ cls: class or str
25
+ if a class: fsspec-compliant implementation class (normally inherits from
26
+ ``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
27
+ str, the full path to an implementation class like package.module.class,
28
+ which gets added to known_implementations,
29
+ so the import is deferred until the filesystem is actually used.
30
+ clobber: bool (optional)
31
+ Whether to overwrite a protocol with the same name; if False, will raise
32
+ instead.
33
+ errtxt: str (optional)
34
+ If given, then a failure to import the given class will result in this
35
+ text being given.
36
+ """
37
+ if isinstance(cls, str):
38
+ if name in known_implementations and clobber is False:
39
+ if cls != known_implementations[name]["class"]:
40
+ raise ValueError(
41
+ f"Name ({name}) already in the known_implementations and clobber "
42
+ f"is False"
43
+ )
44
+ else:
45
+ known_implementations[name] = {
46
+ "class": cls,
47
+ "err": errtxt or f"{cls} import failed for protocol {name}",
48
+ }
49
+
50
+ else:
51
+ if name in registry and clobber is False:
52
+ if _registry[name] is not cls:
53
+ raise ValueError(
54
+ f"Name ({name}) already in the registry and clobber is False"
55
+ )
56
+ else:
57
+ _registry[name] = cls
58
+
59
+
60
+ # protocols mapped to the class which implements them. This dict can be
61
+ # updated with register_implementation
62
+ known_implementations = {
63
+ "abfs": {
64
+ "class": "adlfs.AzureBlobFileSystem",
65
+ "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
66
+ },
67
+ "adl": {
68
+ "class": "adlfs.AzureDatalakeFileSystem",
69
+ "err": "Install adlfs to access Azure Datalake Gen1",
70
+ },
71
+ "arrow_hdfs": {
72
+ "class": "fsspec.implementations.arrow.HadoopFileSystem",
73
+ "err": "pyarrow and local java libraries required for HDFS",
74
+ },
75
+ "async_wrapper": {
76
+ "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
77
+ },
78
+ "asynclocal": {
79
+ "class": "morefs.asyn_local.AsyncLocalFileSystem",
80
+ "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
81
+ },
82
+ "asyncwrapper": {
83
+ "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
84
+ },
85
+ "az": {
86
+ "class": "adlfs.AzureBlobFileSystem",
87
+ "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
88
+ },
89
+ "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
90
+ "box": {
91
+ "class": "boxfs.BoxFileSystem",
92
+ "err": "Please install boxfs to access BoxFileSystem",
93
+ },
94
+ "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
95
+ "dask": {
96
+ "class": "fsspec.implementations.dask.DaskWorkerFileSystem",
97
+ "err": "Install dask distributed to access worker file system",
98
+ },
99
+ "data": {"class": "fsspec.implementations.data.DataFileSystem"},
100
+ "dbfs": {
101
+ "class": "fsspec.implementations.dbfs.DatabricksFileSystem",
102
+ "err": "Install the requests package to use the DatabricksFileSystem",
103
+ },
104
+ "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
105
+ "dropbox": {
106
+ "class": "dropboxdrivefs.DropboxDriveFileSystem",
107
+ "err": (
108
+ 'DropboxFileSystem requires "dropboxdrivefs","requests" and "'
109
+ '"dropbox" to be installed'
110
+ ),
111
+ },
112
+ "dvc": {
113
+ "class": "dvc.api.DVCFileSystem",
114
+ "err": "Install dvc to access DVCFileSystem",
115
+ },
116
+ "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
117
+ "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
118
+ "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
119
+ "gcs": {
120
+ "class": "gcsfs.GCSFileSystem",
121
+ "err": "Please install gcsfs to access Google Storage",
122
+ },
123
+ "gdrive": {
124
+ "class": "gdrive_fsspec.GoogleDriveFileSystem",
125
+ "err": "Please install gdrive_fs for access to Google Drive",
126
+ },
127
+ "generic": {"class": "fsspec.generic.GenericFileSystem"},
128
+ "gist": {
129
+ "class": "fsspec.implementations.gist.GistFileSystem",
130
+ "err": "Install the requests package to use the gist FS",
131
+ },
132
+ "git": {
133
+ "class": "fsspec.implementations.git.GitFileSystem",
134
+ "err": "Install pygit2 to browse local git repos",
135
+ },
136
+ "github": {
137
+ "class": "fsspec.implementations.github.GithubFileSystem",
138
+ "err": "Install the requests package to use the github FS",
139
+ },
140
+ "gs": {
141
+ "class": "gcsfs.GCSFileSystem",
142
+ "err": "Please install gcsfs to access Google Storage",
143
+ },
144
+ "hdfs": {
145
+ "class": "fsspec.implementations.arrow.HadoopFileSystem",
146
+ "err": "pyarrow and local java libraries required for HDFS",
147
+ },
148
+ "hf": {
149
+ "class": "huggingface_hub.HfFileSystem",
150
+ "err": "Install huggingface_hub to access HfFileSystem",
151
+ },
152
+ "http": {
153
+ "class": "fsspec.implementations.http.HTTPFileSystem",
154
+ "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
155
+ },
156
+ "https": {
157
+ "class": "fsspec.implementations.http.HTTPFileSystem",
158
+ "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
159
+ },
160
+ "jlab": {
161
+ "class": "fsspec.implementations.jupyter.JupyterFileSystem",
162
+ "err": "Jupyter FS requires requests to be installed",
163
+ },
164
+ "jupyter": {
165
+ "class": "fsspec.implementations.jupyter.JupyterFileSystem",
166
+ "err": "Jupyter FS requires requests to be installed",
167
+ },
168
+ "lakefs": {
169
+ "class": "lakefs_spec.LakeFSFileSystem",
170
+ "err": "Please install lakefs-spec to access LakeFSFileSystem",
171
+ },
172
+ "libarchive": {
173
+ "class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
174
+ "err": "LibArchive requires to be installed",
175
+ },
176
+ "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
177
+ "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
178
+ "oci": {
179
+ "class": "ocifs.OCIFileSystem",
180
+ "err": "Install ocifs to access OCI Object Storage",
181
+ },
182
+ "ocilake": {
183
+ "class": "ocifs.OCIFileSystem",
184
+ "err": "Install ocifs to access OCI Data Lake",
185
+ },
186
+ "oss": {
187
+ "class": "ossfs.OSSFileSystem",
188
+ "err": "Install ossfs to access Alibaba Object Storage System",
189
+ },
190
+ "pyscript": {
191
+ "class": "pyscript_fsspec_client.client.PyscriptFileSystem",
192
+ "err": "Install requests (cpython) or run in pyscript",
193
+ },
194
+ "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
195
+ "root": {
196
+ "class": "fsspec_xrootd.XRootDFileSystem",
197
+ "err": (
198
+ "Install fsspec-xrootd to access xrootd storage system. "
199
+ "Note: 'root' is the protocol name for xrootd storage systems, "
200
+ "not referring to root directories"
201
+ ),
202
+ },
203
+ "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
204
+ "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
205
+ "sftp": {
206
+ "class": "fsspec.implementations.sftp.SFTPFileSystem",
207
+ "err": 'SFTPFileSystem requires "paramiko" to be installed',
208
+ },
209
+ "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
210
+ "smb": {
211
+ "class": "fsspec.implementations.smb.SMBFileSystem",
212
+ "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
213
+ },
214
+ "ssh": {
215
+ "class": "fsspec.implementations.sftp.SFTPFileSystem",
216
+ "err": 'SFTPFileSystem requires "paramiko" to be installed',
217
+ },
218
+ "tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
219
+ "tos": {
220
+ "class": "tosfs.TosFileSystem",
221
+ "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
222
+ },
223
+ "tosfs": {
224
+ "class": "tosfs.TosFileSystem",
225
+ "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
226
+ },
227
+ "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
228
+ "webdav": {
229
+ "class": "webdav4.fsspec.WebdavFileSystem",
230
+ "err": "Install webdav4 to access WebDAV",
231
+ },
232
+ "webhdfs": {
233
+ "class": "fsspec.implementations.webhdfs.WebHDFS",
234
+ "err": 'webHDFS access requires "requests" to be installed',
235
+ },
236
+ "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
237
+ }
238
+
239
+ assert list(known_implementations) == sorted(known_implementations), (
240
+ "Not in alphabetical order"
241
+ )
242
+
243
+
244
+ def get_filesystem_class(protocol):
245
+ """Fetch named protocol implementation from the registry
246
+
247
+ The dict ``known_implementations`` maps protocol names to the locations
248
+ of classes implementing the corresponding file-system. When used for the
249
+ first time, appropriate imports will happen and the class will be placed in
250
+ the registry. All subsequent calls will fetch directly from the registry.
251
+
252
+ Some protocol implementations require additional dependencies, and so the
253
+ import may fail. In this case, the string in the "err" field of the
254
+ ``known_implementations`` will be given as the error message.
255
+ """
256
+ if not protocol:
257
+ protocol = default
258
+
259
+ if protocol not in registry:
260
+ if protocol not in known_implementations:
261
+ raise ValueError(f"Protocol not known: {protocol}")
262
+ bit = known_implementations[protocol]
263
+ try:
264
+ register_implementation(protocol, _import_class(bit["class"]))
265
+ except ImportError as e:
266
+ raise ImportError(bit.get("err")) from e
267
+ cls = registry[protocol]
268
+ if getattr(cls, "protocol", None) in ("abstract", None):
269
+ cls.protocol = protocol
270
+
271
+ return cls
272
+
273
+
274
+ s3_msg = """Your installed version of s3fs is very old and known to cause
275
+ severe performance issues, see also https://github.com/dask/dask/issues/10276
276
+
277
+ To fix, you should specify a lower version bound on s3fs, or
278
+ update the current installation.
279
+ """
280
+
281
+
282
+ def _import_class(fqp: str):
283
+ """Take a fully-qualified path and return the imported class or identifier.
284
+
285
+ ``fqp`` is of the form "package.module.klass" or
286
+ "package.module:subobject.klass".
287
+
288
+ Warnings
289
+ --------
290
+ This can import arbitrary modules. Make sure you haven't installed any modules
291
+ that may execute malicious code at import time.
292
+ """
293
+ if ":" in fqp:
294
+ mod, name = fqp.rsplit(":", 1)
295
+ else:
296
+ mod, name = fqp.rsplit(".", 1)
297
+
298
+ is_s3 = mod == "s3fs"
299
+ mod = importlib.import_module(mod)
300
+ if is_s3 and mod.__version__.split(".") < ["0", "5"]:
301
+ warnings.warn(s3_msg)
302
+ for part in name.split("."):
303
+ mod = getattr(mod, part)
304
+
305
+ if not isinstance(mod, type):
306
+ raise TypeError(f"{fqp} is not a class")
307
+
308
+ return mod
309
+
310
+
311
+ def filesystem(protocol, **storage_options):
312
+ """Instantiate filesystems for given protocol and arguments
313
+
314
+ ``storage_options`` are specific to the protocol being chosen, and are
315
+ passed directly to the class.
316
+ """
317
+ if protocol == "arrow_hdfs":
318
+ warnings.warn(
319
+ "The 'arrow_hdfs' protocol has been deprecated and will be "
320
+ "removed in the future. Specify it as 'hdfs'.",
321
+ DeprecationWarning,
322
+ )
323
+
324
+ cls = get_filesystem_class(protocol)
325
+ return cls(**storage_options)
326
+
327
+
328
+ def available_protocols():
329
+ """Return a list of the implemented protocols.
330
+
331
+ Note that any given protocol may require extra packages to be importable.
332
+ """
333
+ return list(known_implementations)
env/lib/python3.13/site-packages/fsspec/spec.py ADDED
@@ -0,0 +1,2281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import json
5
+ import logging
6
+ import os
7
+ import threading
8
+ import warnings
9
+ import weakref
10
+ from errno import ESPIPE
11
+ from glob import has_magic
12
+ from hashlib import sha256
13
+ from typing import Any, ClassVar
14
+
15
+ from .callbacks import DEFAULT_CALLBACK
16
+ from .config import apply_config, conf
17
+ from .dircache import DirCache
18
+ from .transaction import Transaction
19
+ from .utils import (
20
+ _unstrip_protocol,
21
+ glob_translate,
22
+ isfilelike,
23
+ other_paths,
24
+ read_block,
25
+ stringify_path,
26
+ tokenize,
27
+ )
28
+
29
+ logger = logging.getLogger("fsspec")
30
+
31
+
32
+ def make_instance(cls, args, kwargs):
33
+ return cls(*args, **kwargs)
34
+
35
+
36
+ class _Cached(type):
37
+ """
38
+ Metaclass for caching file system instances.
39
+
40
+ Notes
41
+ -----
42
+ Instances are cached according to
43
+
44
+ * The values of the class attributes listed in `_extra_tokenize_attributes`
45
+ * The arguments passed to ``__init__``.
46
+
47
+ This creates an additional reference to the filesystem, which prevents the
48
+ filesystem from being garbage collected when all *user* references go away.
49
+ A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
50
+ be made for a filesystem instance to be garbage collected.
51
+ """
52
+
53
+ def __init__(cls, *args, **kwargs):
54
+ super().__init__(*args, **kwargs)
55
+ # Note: we intentionally create a reference here, to avoid garbage
56
+ # collecting instances when all other references are gone. To really
57
+ # delete a FileSystem, the cache must be cleared.
58
+ if conf.get("weakref_instance_cache"): # pragma: no cover
59
+ # debug option for analysing fork/spawn conditions
60
+ cls._cache = weakref.WeakValueDictionary()
61
+ else:
62
+ cls._cache = {}
63
+ cls._pid = os.getpid()
64
+
65
+ def __call__(cls, *args, **kwargs):
66
+ kwargs = apply_config(cls, kwargs)
67
+ extra_tokens = tuple(
68
+ getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
69
+ )
70
+ strip_tokenize_options = {
71
+ k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs
72
+ }
73
+ token = tokenize(
74
+ cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
75
+ )
76
+ skip = kwargs.pop("skip_instance_cache", False)
77
+ if os.getpid() != cls._pid:
78
+ cls._cache.clear()
79
+ cls._pid = os.getpid()
80
+ if not skip and cls.cachable and token in cls._cache:
81
+ cls._latest = token
82
+ return cls._cache[token]
83
+ else:
84
+ obj = super().__call__(*args, **kwargs, **strip_tokenize_options)
85
+ # Setting _fs_token here causes some static linters to complain.
86
+ obj._fs_token_ = token
87
+ obj.storage_args = args
88
+ obj.storage_options = kwargs
89
+ if obj.async_impl and obj.mirror_sync_methods:
90
+ from .asyn import mirror_sync_methods
91
+
92
+ mirror_sync_methods(obj)
93
+
94
+ if cls.cachable and not skip:
95
+ cls._latest = token
96
+ cls._cache[token] = obj
97
+ return obj
98
+
99
+
100
+ class AbstractFileSystem(metaclass=_Cached):
101
+ """
102
+ An abstract super-class for pythonic file-systems
103
+
104
+ Implementations are expected to be compatible with or, better, subclass
105
+ from here.
106
+ """
107
+
108
+ cachable = True # this class can be cached, instances reused
109
+ _cached = False
110
+ blocksize = 2**22
111
+ sep = "/"
112
+ protocol: ClassVar[str | tuple[str, ...]] = "abstract"
113
+ _latest = None
114
+ async_impl = False
115
+ mirror_sync_methods = False
116
+ root_marker = "" # For some FSs, may require leading '/' or other character
117
+ transaction_type = Transaction
118
+
119
+ #: Extra *class attributes* that should be considered when hashing.
120
+ _extra_tokenize_attributes = ()
121
+ #: *storage options* that should not be considered when hashing.
122
+ _strip_tokenize_options = ()
123
+
124
+ # Set by _Cached metaclass
125
+ storage_args: tuple[Any, ...]
126
+ storage_options: dict[str, Any]
127
+
128
+ def __init__(self, *args, **storage_options):
129
+ """Create and configure file-system instance
130
+
131
+ Instances may be cachable, so if similar enough arguments are seen
132
+ a new instance is not required. The token attribute exists to allow
133
+ implementations to cache instances if they wish.
134
+
135
+ A reasonable default should be provided if there are no arguments.
136
+
137
+ Subclasses should call this method.
138
+
139
+ Parameters
140
+ ----------
141
+ use_listings_cache, listings_expiry_time, max_paths:
142
+ passed to ``DirCache``, if the implementation supports
143
+ directory listing caching. Pass use_listings_cache=False
144
+ to disable such caching.
145
+ skip_instance_cache: bool
146
+ If this is a cachable implementation, pass True here to force
147
+ creating a new instance even if a matching instance exists, and prevent
148
+ storing this instance.
149
+ asynchronous: bool
150
+ loop: asyncio-compatible IOLoop or None
151
+ """
152
+ if self._cached:
153
+ # reusing instance, don't change
154
+ return
155
+ self._cached = True
156
+ self._intrans = False
157
+ self._transaction = None
158
+ self._invalidated_caches_in_transaction = []
159
+ self.dircache = DirCache(**storage_options)
160
+
161
+ if storage_options.pop("add_docs", None):
162
+ warnings.warn("add_docs is no longer supported.", FutureWarning)
163
+
164
+ if storage_options.pop("add_aliases", None):
165
+ warnings.warn("add_aliases has been removed.", FutureWarning)
166
+ # This is set in _Cached
167
+ self._fs_token_ = None
168
+
169
+ @property
170
+ def fsid(self):
171
+ """Persistent filesystem id that can be used to compare filesystems
172
+ across sessions.
173
+ """
174
+ raise NotImplementedError
175
+
176
+ @property
177
+ def _fs_token(self):
178
+ return self._fs_token_
179
+
180
+ def __dask_tokenize__(self):
181
+ return self._fs_token
182
+
183
+ def __hash__(self):
184
+ return int(self._fs_token, 16)
185
+
186
+ def __eq__(self, other):
187
+ return isinstance(other, type(self)) and self._fs_token == other._fs_token
188
+
189
+ def __reduce__(self):
190
+ return make_instance, (type(self), self.storage_args, self.storage_options)
191
+
192
+ @classmethod
193
+ def _strip_protocol(cls, path):
194
+ """Turn path from fully-qualified to file-system-specific
195
+
196
+ May require FS-specific handling, e.g., for relative paths or links.
197
+ """
198
+ if isinstance(path, list):
199
+ return [cls._strip_protocol(p) for p in path]
200
+ path = stringify_path(path)
201
+ protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
202
+ for protocol in protos:
203
+ if path.startswith(protocol + "://"):
204
+ path = path[len(protocol) + 3 :]
205
+ elif path.startswith(protocol + "::"):
206
+ path = path[len(protocol) + 2 :]
207
+ path = path.rstrip("/")
208
+ # use of root_marker to make minimum required path, e.g., "/"
209
+ return path or cls.root_marker
210
+
211
+ def unstrip_protocol(self, name: str) -> str:
212
+ """Format FS-specific path to generic, including protocol"""
213
+ protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
214
+ for protocol in protos:
215
+ if name.startswith(f"{protocol}://"):
216
+ return name
217
+ return f"{protos[0]}://{name}"
218
+
219
+ @staticmethod
220
+ def _get_kwargs_from_urls(path):
221
+ """If kwargs can be encoded in the paths, extract them here
222
+
223
+ This should happen before instantiation of the class; incoming paths
224
+ then should be amended to strip the options in methods.
225
+
226
+ Examples may look like an sftp path "sftp://user@host:/my/path", where
227
+ the user and host should become kwargs and later get stripped.
228
+ """
229
+ # by default, nothing happens
230
+ return {}
231
+
232
+ @classmethod
233
+ def current(cls):
234
+ """Return the most recently instantiated FileSystem
235
+
236
+ If no instance has been created, then create one with defaults
237
+ """
238
+ if cls._latest in cls._cache:
239
+ return cls._cache[cls._latest]
240
+ return cls()
241
+
242
+ @property
243
+ def transaction(self):
244
+ """A context within which files are committed together upon exit
245
+
246
+ Requires the file class to implement `.commit()` and `.discard()`
247
+ for the normal and exception cases.
248
+ """
249
+ if self._transaction is None:
250
+ self._transaction = self.transaction_type(self)
251
+ return self._transaction
252
+
253
+ def start_transaction(self):
254
+ """Begin write transaction for deferring files, non-context version"""
255
+ self._intrans = True
256
+ self._transaction = self.transaction_type(self)
257
+ return self.transaction
258
+
259
+ def end_transaction(self):
260
+ """Finish write transaction, non-context version"""
261
+ self.transaction.complete()
262
+ self._transaction = None
263
+ # The invalid cache must be cleared after the transaction is completed.
264
+ for path in self._invalidated_caches_in_transaction:
265
+ self.invalidate_cache(path)
266
+ self._invalidated_caches_in_transaction.clear()
267
+
268
+ def invalidate_cache(self, path=None):
269
+ """
270
+ Discard any cached directory information
271
+
272
+ Parameters
273
+ ----------
274
+ path: string or None
275
+ If None, clear all listings cached else listings at or under given
276
+ path.
277
+ """
278
+ # Not necessary to implement invalidation mechanism, may have no cache.
279
+ # But if have, you should call this method of parent class from your
280
+ # subclass to ensure expiring caches after transacations correctly.
281
+ # See the implementation of FTPFileSystem in ftp.py
282
+ if self._intrans:
283
+ self._invalidated_caches_in_transaction.append(path)
284
+
285
+ def mkdir(self, path, create_parents=True, **kwargs):
286
+ """
287
+ Create directory entry at path
288
+
289
+ For systems that don't have true directories, may create an for
290
+ this instance only and not touch the real filesystem
291
+
292
+ Parameters
293
+ ----------
294
+ path: str
295
+ location
296
+ create_parents: bool
297
+ if True, this is equivalent to ``makedirs``
298
+ kwargs:
299
+ may be permissions, etc.
300
+ """
301
+ pass # not necessary to implement, may not have directories
302
+
303
+ def makedirs(self, path, exist_ok=False):
304
+ """Recursively make directories
305
+
306
+ Creates directory at path and any intervening required directories.
307
+ Raises exception if, for instance, the path already exists but is a
308
+ file.
309
+
310
+ Parameters
311
+ ----------
312
+ path: str
313
+ leaf directory name
314
+ exist_ok: bool (False)
315
+ If False, will error if the target already exists
316
+ """
317
+ pass # not necessary to implement, may not have directories
318
+
319
+ def rmdir(self, path):
320
+ """Remove a directory, if empty"""
321
+ pass # not necessary to implement, may not have directories
322
+
323
+ def ls(self, path, detail=True, **kwargs):
324
+ """List objects at path.
325
+
326
+ This should include subdirectories and files at that location. The
327
+ difference between a file and a directory must be clear when details
328
+ are requested.
329
+
330
+ The specific keys, or perhaps a FileInfo class, or similar, is TBD,
331
+ but must be consistent across implementations.
332
+ Must include:
333
+
334
+ - full path to the entry (without protocol)
335
+ - size of the entry, in bytes. If the value cannot be determined, will
336
+ be ``None``.
337
+ - type of entry, "file", "directory" or other
338
+
339
+ Additional information
340
+ may be present, appropriate to the file-system, e.g., generation,
341
+ checksum, etc.
342
+
343
+ May use refresh=True|False to allow use of self._ls_from_cache to
344
+ check for a saved listing and avoid calling the backend. This would be
345
+ common where listing may be expensive.
346
+
347
+ Parameters
348
+ ----------
349
+ path: str
350
+ detail: bool
351
+ if True, gives a list of dictionaries, where each is the same as
352
+ the result of ``info(path)``. If False, gives a list of paths
353
+ (str).
354
+ kwargs: may have additional backend-specific options, such as version
355
+ information
356
+
357
+ Returns
358
+ -------
359
+ List of strings if detail is False, or list of directory information
360
+ dicts if detail is True.
361
+ """
362
+ raise NotImplementedError
363
+
364
+ def _ls_from_cache(self, path):
365
+ """Check cache for listing
366
+
367
+ Returns listing, if found (may be empty list for a directly that exists
368
+ but contains nothing), None if not in cache.
369
+ """
370
+ parent = self._parent(path)
371
+ try:
372
+ return self.dircache[path.rstrip("/")]
373
+ except KeyError:
374
+ pass
375
+ try:
376
+ files = [
377
+ f
378
+ for f in self.dircache[parent]
379
+ if f["name"] == path
380
+ or (f["name"] == path.rstrip("/") and f["type"] == "directory")
381
+ ]
382
+ if len(files) == 0:
383
+ # parent dir was listed but did not contain this file
384
+ raise FileNotFoundError(path)
385
+ return files
386
+ except KeyError:
387
+ pass
388
+
389
+ def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
390
+ """Return all files under the given path.
391
+
392
+ List all files, recursing into subdirectories; output is iterator-style,
393
+ like ``os.walk()``. For a simple list of files, ``find()`` is available.
394
+
395
+ When topdown is True, the caller can modify the dirnames list in-place (perhaps
396
+ using del or slice assignment), and walk() will
397
+ only recurse into the subdirectories whose names remain in dirnames;
398
+ this can be used to prune the search, impose a specific order of visiting,
399
+ or even to inform walk() about directories the caller creates or renames before
400
+ it resumes walk() again.
401
+ Modifying dirnames when topdown is False has no effect. (see os.walk)
402
+
403
+ Note that the "files" outputted will include anything that is not
404
+ a directory, such as links.
405
+
406
+ Parameters
407
+ ----------
408
+ path: str
409
+ Root to recurse into
410
+ maxdepth: int
411
+ Maximum recursion depth. None means limitless, but not recommended
412
+ on link-based file-systems.
413
+ topdown: bool (True)
414
+ Whether to walk the directory tree from the top downwards or from
415
+ the bottom upwards.
416
+ on_error: "omit", "raise", a callable
417
+ if omit (default), path with exception will simply be empty;
418
+ If raise, an underlying exception will be raised;
419
+ if callable, it will be called with a single OSError instance as argument
420
+ kwargs: passed to ``ls``
421
+ """
422
+ if maxdepth is not None and maxdepth < 1:
423
+ raise ValueError("maxdepth must be at least 1")
424
+
425
+ path = self._strip_protocol(path)
426
+ full_dirs = {}
427
+ dirs = {}
428
+ files = {}
429
+
430
+ detail = kwargs.pop("detail", False)
431
+ try:
432
+ listing = self.ls(path, detail=True, **kwargs)
433
+ except (FileNotFoundError, OSError) as e:
434
+ if on_error == "raise":
435
+ raise
436
+ if callable(on_error):
437
+ on_error(e)
438
+ return
439
+
440
+ for info in listing:
441
+ # each info name must be at least [path]/part , but here
442
+ # we check also for names like [path]/part/
443
+ pathname = info["name"].rstrip("/")
444
+ name = pathname.rsplit("/", 1)[-1]
445
+ if info["type"] == "directory" and pathname != path:
446
+ # do not include "self" path
447
+ full_dirs[name] = pathname
448
+ dirs[name] = info
449
+ elif pathname == path:
450
+ # file-like with same name as give path
451
+ files[""] = info
452
+ else:
453
+ files[name] = info
454
+
455
+ if not detail:
456
+ dirs = list(dirs)
457
+ files = list(files)
458
+
459
+ if topdown:
460
+ # Yield before recursion if walking top down
461
+ yield path, dirs, files
462
+
463
+ if maxdepth is not None:
464
+ maxdepth -= 1
465
+ if maxdepth < 1:
466
+ if not topdown:
467
+ yield path, dirs, files
468
+ return
469
+
470
+ for d in dirs:
471
+ yield from self.walk(
472
+ full_dirs[d],
473
+ maxdepth=maxdepth,
474
+ detail=detail,
475
+ topdown=topdown,
476
+ **kwargs,
477
+ )
478
+
479
+ if not topdown:
480
+ # Yield after recursion if walking bottom up
481
+ yield path, dirs, files
482
+
483
+ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
484
+ """List all files below path.
485
+
486
+ Like posix ``find`` command without conditions
487
+
488
+ Parameters
489
+ ----------
490
+ path : str
491
+ maxdepth: int or None
492
+ If not None, the maximum number of levels to descend
493
+ withdirs: bool
494
+ Whether to include directory paths in the output. This is True
495
+ when used by glob, but users usually only want files.
496
+ kwargs are passed to ``ls``.
497
+ """
498
+ # TODO: allow equivalent of -name parameter
499
+ path = self._strip_protocol(path)
500
+ out = {}
501
+
502
+ # Add the root directory if withdirs is requested
503
+ # This is needed for posix glob compliance
504
+ if withdirs and path != "" and self.isdir(path):
505
+ out[path] = self.info(path)
506
+
507
+ for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
508
+ if withdirs:
509
+ files.update(dirs)
510
+ out.update({info["name"]: info for name, info in files.items()})
511
+ if not out and self.isfile(path):
512
+ # walk works on directories, but find should also return [path]
513
+ # when path happens to be a file
514
+ out[path] = {}
515
+ names = sorted(out)
516
+ if not detail:
517
+ return names
518
+ else:
519
+ return {name: out[name] for name in names}
520
+
521
+ def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
522
+ """Space used by files and optionally directories within a path
523
+
524
+ Directory size does not include the size of its contents.
525
+
526
+ Parameters
527
+ ----------
528
+ path: str
529
+ total: bool
530
+ Whether to sum all the file sizes
531
+ maxdepth: int or None
532
+ Maximum number of directory levels to descend, None for unlimited.
533
+ withdirs: bool
534
+ Whether to include directory paths in the output.
535
+ kwargs: passed to ``find``
536
+
537
+ Returns
538
+ -------
539
+ Dict of {path: size} if total=False, or int otherwise, where numbers
540
+ refer to bytes used.
541
+ """
542
+ sizes = {}
543
+ if withdirs and self.isdir(path):
544
+ # Include top-level directory in output
545
+ info = self.info(path)
546
+ sizes[info["name"]] = info["size"]
547
+ for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
548
+ info = self.info(f)
549
+ sizes[info["name"]] = info["size"]
550
+ if total:
551
+ return sum(sizes.values())
552
+ else:
553
+ return sizes
554
+
555
+ def glob(self, path, maxdepth=None, **kwargs):
556
+ """Find files by glob-matching.
557
+
558
+ Pattern matching capabilities for finding files that match the given pattern.
559
+
560
+ Parameters
561
+ ----------
562
+ path: str
563
+ The glob pattern to match against
564
+ maxdepth: int or None
565
+ Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found.
566
+ Must be at least 1 if provided.
567
+ kwargs:
568
+ Additional arguments passed to ``find`` (e.g., detail=True)
569
+
570
+ Returns
571
+ -------
572
+ List of matched paths, or dict of paths and their info if detail=True
573
+
574
+ Notes
575
+ -----
576
+ Supported patterns:
577
+ - '*': Matches any sequence of characters within a single directory level
578
+ - ``'**'``: Matches any number of directory levels (must be an entire path component)
579
+ - '?': Matches exactly one character
580
+ - '[abc]': Matches any character in the set
581
+ - '[a-z]': Matches any character in the range
582
+ - '[!abc]': Matches any character NOT in the set
583
+
584
+ Special behaviors:
585
+ - If the path ends with '/', only folders are returned
586
+ - Consecutive '*' characters are compressed into a single '*'
587
+ - Empty brackets '[]' never match anything
588
+ - Negated empty brackets '[!]' match any single character
589
+ - Special characters in character classes are escaped properly
590
+
591
+ Limitations:
592
+ - ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``)
593
+ - No brace expansion ('{a,b}.txt')
594
+ - No extended glob patterns ('+(pattern)', '!(pattern)')
595
+ """
596
+ if maxdepth is not None and maxdepth < 1:
597
+ raise ValueError("maxdepth must be at least 1")
598
+
599
+ import re
600
+
601
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
602
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
603
+ path = self._strip_protocol(path)
604
+ append_slash_to_dirname = ends_with_sep or path.endswith(
605
+ tuple(sep + "**" for sep in seps)
606
+ )
607
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
608
+ idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
609
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
610
+
611
+ min_idx = min(idx_star, idx_qmark, idx_brace)
612
+
613
+ detail = kwargs.pop("detail", False)
614
+
615
+ if not has_magic(path):
616
+ if self.exists(path, **kwargs):
617
+ if not detail:
618
+ return [path]
619
+ else:
620
+ return {path: self.info(path, **kwargs)}
621
+ else:
622
+ if not detail:
623
+ return [] # glob of non-existent returns empty
624
+ else:
625
+ return {}
626
+ elif "/" in path[:min_idx]:
627
+ min_idx = path[:min_idx].rindex("/")
628
+ root = path[: min_idx + 1]
629
+ depth = path[min_idx + 1 :].count("/") + 1
630
+ else:
631
+ root = ""
632
+ depth = path[min_idx + 1 :].count("/") + 1
633
+
634
+ if "**" in path:
635
+ if maxdepth is not None:
636
+ idx_double_stars = path.find("**")
637
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
638
+ depth = depth - depth_double_stars + maxdepth
639
+ else:
640
+ depth = None
641
+
642
+ allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
643
+
644
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
645
+ pattern = re.compile(pattern)
646
+
647
+ out = {
648
+ p: info
649
+ for p, info in sorted(allpaths.items())
650
+ if pattern.match(
651
+ p + "/"
652
+ if append_slash_to_dirname and info["type"] == "directory"
653
+ else p
654
+ )
655
+ }
656
+
657
+ if detail:
658
+ return out
659
+ else:
660
+ return list(out)
661
+
662
+ def exists(self, path, **kwargs):
663
+ """Is there a file at the given path"""
664
+ try:
665
+ self.info(path, **kwargs)
666
+ return True
667
+ except: # noqa: E722
668
+ # any exception allowed bar FileNotFoundError?
669
+ return False
670
+
671
+ def lexists(self, path, **kwargs):
672
+ """If there is a file at the given path (including
673
+ broken links)"""
674
+ return self.exists(path)
675
+
676
+ def info(self, path, **kwargs):
677
+ """Give details of entry at path
678
+
679
+ Returns a single dictionary, with exactly the same information as ``ls``
680
+ would with ``detail=True``.
681
+
682
+ The default implementation calls ls and could be overridden by a
683
+ shortcut. kwargs are passed on to ```ls()``.
684
+
685
+ Some file systems might not be able to measure the file's size, in
686
+ which case, the returned dict will include ``'size': None``.
687
+
688
+ Returns
689
+ -------
690
+ dict with keys: name (full path in the FS), size (in bytes), type (file,
691
+ directory, or something else) and other FS-specific keys.
692
+ """
693
+ path = self._strip_protocol(path)
694
+ out = self.ls(self._parent(path), detail=True, **kwargs)
695
+ out = [o for o in out if o["name"].rstrip("/") == path]
696
+ if out:
697
+ return out[0]
698
+ out = self.ls(path, detail=True, **kwargs)
699
+ path = path.rstrip("/")
700
+ out1 = [o for o in out if o["name"].rstrip("/") == path]
701
+ if len(out1) == 1:
702
+ if "size" not in out1[0]:
703
+ out1[0]["size"] = None
704
+ return out1[0]
705
+ elif len(out1) > 1 or out:
706
+ return {"name": path, "size": 0, "type": "directory"}
707
+ else:
708
+ raise FileNotFoundError(path)
709
+
710
+ def checksum(self, path):
711
+ """Unique value for current version of file
712
+
713
+ If the checksum is the same from one moment to another, the contents
714
+ are guaranteed to be the same. If the checksum changes, the contents
715
+ *might* have changed.
716
+
717
+ This should normally be overridden; default will probably capture
718
+ creation/modification timestamp (which would be good) or maybe
719
+ access timestamp (which would be bad)
720
+ """
721
+ return int(tokenize(self.info(path)), 16)
722
+
723
+ def size(self, path):
724
+ """Size in bytes of file"""
725
+ return self.info(path).get("size", None)
726
+
727
+ def sizes(self, paths):
728
+ """Size in bytes of each file in a list of paths"""
729
+ return [self.size(p) for p in paths]
730
+
731
+ def isdir(self, path):
732
+ """Is this entry directory-like?"""
733
+ try:
734
+ return self.info(path)["type"] == "directory"
735
+ except OSError:
736
+ return False
737
+
738
+ def isfile(self, path):
739
+ """Is this entry file-like?"""
740
+ try:
741
+ return self.info(path)["type"] == "file"
742
+ except: # noqa: E722
743
+ return False
744
+
745
+ def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
746
+ """Get the contents of the file as a string.
747
+
748
+ Parameters
749
+ ----------
750
+ path: str
751
+ URL of file on this filesystems
752
+ encoding, errors, newline: same as `open`.
753
+ """
754
+ with self.open(
755
+ path,
756
+ mode="r",
757
+ encoding=encoding,
758
+ errors=errors,
759
+ newline=newline,
760
+ **kwargs,
761
+ ) as f:
762
+ return f.read()
763
+
764
+ def write_text(
765
+ self, path, value, encoding=None, errors=None, newline=None, **kwargs
766
+ ):
767
+ """Write the text to the given file.
768
+
769
+ An existing file will be overwritten.
770
+
771
+ Parameters
772
+ ----------
773
+ path: str
774
+ URL of file on this filesystems
775
+ value: str
776
+ Text to write.
777
+ encoding, errors, newline: same as `open`.
778
+ """
779
+ with self.open(
780
+ path,
781
+ mode="w",
782
+ encoding=encoding,
783
+ errors=errors,
784
+ newline=newline,
785
+ **kwargs,
786
+ ) as f:
787
+ return f.write(value)
788
+
789
+ def cat_file(self, path, start=None, end=None, **kwargs):
790
+ """Get the content of a file
791
+
792
+ Parameters
793
+ ----------
794
+ path: URL of file on this filesystems
795
+ start, end: int
796
+ Bytes limits of the read. If negative, backwards from end,
797
+ like usual python slices. Either can be None for start or
798
+ end of file, respectively
799
+ kwargs: passed to ``open()``.
800
+ """
801
+ # explicitly set buffering off?
802
+ with self.open(path, "rb", **kwargs) as f:
803
+ if start is not None:
804
+ if start >= 0:
805
+ f.seek(start)
806
+ else:
807
+ f.seek(max(0, f.size + start))
808
+ if end is not None:
809
+ if end < 0:
810
+ end = f.size + end
811
+ return f.read(end - f.tell())
812
+ return f.read()
813
+
814
+ def pipe_file(self, path, value, mode="overwrite", **kwargs):
815
+ """Set the bytes of given file"""
816
+ if mode == "create" and self.exists(path):
817
+ # non-atomic but simple way; or could use "xb" in open(), which is likely
818
+ # not as well supported
819
+ raise FileExistsError
820
+ with self.open(path, "wb", **kwargs) as f:
821
+ f.write(value)
822
+
823
+ def pipe(self, path, value=None, **kwargs):
824
+ """Put value into path
825
+
826
+ (counterpart to ``cat``)
827
+
828
+ Parameters
829
+ ----------
830
+ path: string or dict(str, bytes)
831
+ If a string, a single remote location to put ``value`` bytes; if a dict,
832
+ a mapping of {path: bytesvalue}.
833
+ value: bytes, optional
834
+ If using a single path, these are the bytes to put there. Ignored if
835
+ ``path`` is a dict
836
+ """
837
+ if isinstance(path, str):
838
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
839
+ elif isinstance(path, dict):
840
+ for k, v in path.items():
841
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
842
+ else:
843
+ raise ValueError("path must be str or dict")
844
+
845
+ def cat_ranges(
846
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
847
+ ):
848
+ """Get the contents of byte ranges from one or more files
849
+
850
+ Parameters
851
+ ----------
852
+ paths: list
853
+ A list of of filepaths on this filesystems
854
+ starts, ends: int or list
855
+ Bytes limits of the read. If using a single int, the same value will be
856
+ used to read all the specified files.
857
+ """
858
+ if max_gap is not None:
859
+ raise NotImplementedError
860
+ if not isinstance(paths, list):
861
+ raise TypeError
862
+ if not isinstance(starts, list):
863
+ starts = [starts] * len(paths)
864
+ if not isinstance(ends, list):
865
+ ends = [ends] * len(paths)
866
+ if len(starts) != len(paths) or len(ends) != len(paths):
867
+ raise ValueError
868
+ out = []
869
+ for p, s, e in zip(paths, starts, ends):
870
+ try:
871
+ out.append(self.cat_file(p, s, e))
872
+ except Exception as e:
873
+ if on_error == "return":
874
+ out.append(e)
875
+ else:
876
+ raise
877
+ return out
878
+
879
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
880
+ """Fetch (potentially multiple) paths' contents
881
+
882
+ Parameters
883
+ ----------
884
+ recursive: bool
885
+ If True, assume the path(s) are directories, and get all the
886
+ contained files
887
+ on_error : "raise", "omit", "return"
888
+ If raise, an underlying exception will be raised (converted to KeyError
889
+ if the type is in self.missing_exceptions); if omit, keys with exception
890
+ will simply not be included in the output; if "return", all keys are
891
+ included in the output, but the value will be bytes or an exception
892
+ instance.
893
+ kwargs: passed to cat_file
894
+
895
+ Returns
896
+ -------
897
+ dict of {path: contents} if there are multiple paths
898
+ or the path has been otherwise expanded
899
+ """
900
+ paths = self.expand_path(path, recursive=recursive, **kwargs)
901
+ if (
902
+ len(paths) > 1
903
+ or isinstance(path, list)
904
+ or paths[0] != self._strip_protocol(path)
905
+ ):
906
+ out = {}
907
+ for path in paths:
908
+ try:
909
+ out[path] = self.cat_file(path, **kwargs)
910
+ except Exception as e:
911
+ if on_error == "raise":
912
+ raise
913
+ if on_error == "return":
914
+ out[path] = e
915
+ return out
916
+ else:
917
+ return self.cat_file(paths[0], **kwargs)
918
+
919
+ def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
920
+ """Copy single remote file to local"""
921
+ from .implementations.local import LocalFileSystem
922
+
923
+ if isfilelike(lpath):
924
+ outfile = lpath
925
+ elif self.isdir(rpath):
926
+ os.makedirs(lpath, exist_ok=True)
927
+ return None
928
+
929
+ fs = LocalFileSystem(auto_mkdir=True)
930
+ fs.makedirs(fs._parent(lpath), exist_ok=True)
931
+
932
+ with self.open(rpath, "rb", **kwargs) as f1:
933
+ if outfile is None:
934
+ outfile = open(lpath, "wb")
935
+
936
+ try:
937
+ callback.set_size(getattr(f1, "size", None))
938
+ data = True
939
+ while data:
940
+ data = f1.read(self.blocksize)
941
+ segment_len = outfile.write(data)
942
+ if segment_len is None:
943
+ segment_len = len(data)
944
+ callback.relative_update(segment_len)
945
+ finally:
946
+ if not isfilelike(lpath):
947
+ outfile.close()
948
+
949
+ def get(
950
+ self,
951
+ rpath,
952
+ lpath,
953
+ recursive=False,
954
+ callback=DEFAULT_CALLBACK,
955
+ maxdepth=None,
956
+ **kwargs,
957
+ ):
958
+ """Copy file(s) to local.
959
+
960
+ Copies a specific file or tree of files (if recursive=True). If lpath
961
+ ends with a "/", it will be assumed to be a directory, and target files
962
+ will go within. Can submit a list of paths, which may be glob-patterns
963
+ and will be expanded.
964
+
965
+ Calls get_file for each source.
966
+ """
967
+ if isinstance(lpath, list) and isinstance(rpath, list):
968
+ # No need to expand paths when both source and destination
969
+ # are provided as lists
970
+ rpaths = rpath
971
+ lpaths = lpath
972
+ else:
973
+ from .implementations.local import (
974
+ LocalFileSystem,
975
+ make_path_posix,
976
+ trailing_sep,
977
+ )
978
+
979
+ source_is_str = isinstance(rpath, str)
980
+ rpaths = self.expand_path(
981
+ rpath, recursive=recursive, maxdepth=maxdepth, **kwargs
982
+ )
983
+ if source_is_str and (not recursive or maxdepth is not None):
984
+ # Non-recursive glob does not copy directories
985
+ rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
986
+ if not rpaths:
987
+ return
988
+
989
+ if isinstance(lpath, str):
990
+ lpath = make_path_posix(lpath)
991
+
992
+ source_is_file = len(rpaths) == 1
993
+ dest_is_dir = isinstance(lpath, str) and (
994
+ trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
995
+ )
996
+
997
+ exists = source_is_str and (
998
+ (has_magic(rpath) and source_is_file)
999
+ or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
1000
+ )
1001
+ lpaths = other_paths(
1002
+ rpaths,
1003
+ lpath,
1004
+ exists=exists,
1005
+ flatten=not source_is_str,
1006
+ )
1007
+
1008
+ callback.set_size(len(lpaths))
1009
+ for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
1010
+ with callback.branched(rpath, lpath) as child:
1011
+ self.get_file(rpath, lpath, callback=child, **kwargs)
1012
+
1013
+ def put_file(
1014
+ self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
1015
+ ):
1016
+ """Copy single file to remote"""
1017
+ if mode == "create" and self.exists(rpath):
1018
+ raise FileExistsError
1019
+ if os.path.isdir(lpath):
1020
+ self.makedirs(rpath, exist_ok=True)
1021
+ return None
1022
+
1023
+ with open(lpath, "rb") as f1:
1024
+ size = f1.seek(0, 2)
1025
+ callback.set_size(size)
1026
+ f1.seek(0)
1027
+
1028
+ self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
1029
+ with self.open(rpath, "wb", **kwargs) as f2:
1030
+ while f1.tell() < size:
1031
+ data = f1.read(self.blocksize)
1032
+ segment_len = f2.write(data)
1033
+ if segment_len is None:
1034
+ segment_len = len(data)
1035
+ callback.relative_update(segment_len)
1036
+
1037
+ def put(
1038
+ self,
1039
+ lpath,
1040
+ rpath,
1041
+ recursive=False,
1042
+ callback=DEFAULT_CALLBACK,
1043
+ maxdepth=None,
1044
+ **kwargs,
1045
+ ):
1046
+ """Copy file(s) from local.
1047
+
1048
+ Copies a specific file or tree of files (if recursive=True). If rpath
1049
+ ends with a "/", it will be assumed to be a directory, and target files
1050
+ will go within.
1051
+
1052
+ Calls put_file for each source.
1053
+ """
1054
+ if isinstance(lpath, list) and isinstance(rpath, list):
1055
+ # No need to expand paths when both source and destination
1056
+ # are provided as lists
1057
+ rpaths = rpath
1058
+ lpaths = lpath
1059
+ else:
1060
+ from .implementations.local import (
1061
+ LocalFileSystem,
1062
+ make_path_posix,
1063
+ trailing_sep,
1064
+ )
1065
+
1066
+ source_is_str = isinstance(lpath, str)
1067
+ if source_is_str:
1068
+ lpath = make_path_posix(lpath)
1069
+ fs = LocalFileSystem()
1070
+ lpaths = fs.expand_path(
1071
+ lpath, recursive=recursive, maxdepth=maxdepth, **kwargs
1072
+ )
1073
+ if source_is_str and (not recursive or maxdepth is not None):
1074
+ # Non-recursive glob does not copy directories
1075
+ lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
1076
+ if not lpaths:
1077
+ return
1078
+
1079
+ source_is_file = len(lpaths) == 1
1080
+ dest_is_dir = isinstance(rpath, str) and (
1081
+ trailing_sep(rpath) or self.isdir(rpath)
1082
+ )
1083
+
1084
+ rpath = (
1085
+ self._strip_protocol(rpath)
1086
+ if isinstance(rpath, str)
1087
+ else [self._strip_protocol(p) for p in rpath]
1088
+ )
1089
+ exists = source_is_str and (
1090
+ (has_magic(lpath) and source_is_file)
1091
+ or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
1092
+ )
1093
+ rpaths = other_paths(
1094
+ lpaths,
1095
+ rpath,
1096
+ exists=exists,
1097
+ flatten=not source_is_str,
1098
+ )
1099
+
1100
+ callback.set_size(len(rpaths))
1101
+ for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
1102
+ with callback.branched(lpath, rpath) as child:
1103
+ self.put_file(lpath, rpath, callback=child, **kwargs)
1104
+
1105
+ def head(self, path, size=1024):
1106
+ """Get the first ``size`` bytes from file"""
1107
+ with self.open(path, "rb") as f:
1108
+ return f.read(size)
1109
+
1110
+ def tail(self, path, size=1024):
1111
+ """Get the last ``size`` bytes from file"""
1112
+ with self.open(path, "rb") as f:
1113
+ f.seek(max(-size, -f.size), 2)
1114
+ return f.read()
1115
+
1116
+ def cp_file(self, path1, path2, **kwargs):
1117
+ raise NotImplementedError
1118
+
1119
+ def copy(
1120
+ self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
1121
+ ):
1122
+ """Copy within two locations in the filesystem
1123
+
1124
+ on_error : "raise", "ignore"
1125
+ If raise, any not-found exceptions will be raised; if ignore any
1126
+ not-found exceptions will cause the path to be skipped; defaults to
1127
+ raise unless recursive is true, where the default is ignore
1128
+ """
1129
+ if on_error is None and recursive:
1130
+ on_error = "ignore"
1131
+ elif on_error is None:
1132
+ on_error = "raise"
1133
+
1134
+ if isinstance(path1, list) and isinstance(path2, list):
1135
+ # No need to expand paths when both source and destination
1136
+ # are provided as lists
1137
+ paths1 = path1
1138
+ paths2 = path2
1139
+ else:
1140
+ from .implementations.local import trailing_sep
1141
+
1142
+ source_is_str = isinstance(path1, str)
1143
+ paths1 = self.expand_path(
1144
+ path1, recursive=recursive, maxdepth=maxdepth, **kwargs
1145
+ )
1146
+ if source_is_str and (not recursive or maxdepth is not None):
1147
+ # Non-recursive glob does not copy directories
1148
+ paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
1149
+ if not paths1:
1150
+ return
1151
+
1152
+ source_is_file = len(paths1) == 1
1153
+ dest_is_dir = isinstance(path2, str) and (
1154
+ trailing_sep(path2) or self.isdir(path2)
1155
+ )
1156
+
1157
+ exists = source_is_str and (
1158
+ (has_magic(path1) and source_is_file)
1159
+ or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
1160
+ )
1161
+ paths2 = other_paths(
1162
+ paths1,
1163
+ path2,
1164
+ exists=exists,
1165
+ flatten=not source_is_str,
1166
+ )
1167
+
1168
+ for p1, p2 in zip(paths1, paths2):
1169
+ try:
1170
+ self.cp_file(p1, p2, **kwargs)
1171
+ except FileNotFoundError:
1172
+ if on_error == "raise":
1173
+ raise
1174
+
1175
+ def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
1176
+ """Turn one or more globs or directories into a list of all matching paths
1177
+ to files or directories.
1178
+
1179
+ kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
1180
+ """
1181
+
1182
+ if maxdepth is not None and maxdepth < 1:
1183
+ raise ValueError("maxdepth must be at least 1")
1184
+
1185
+ if isinstance(path, (str, os.PathLike)):
1186
+ out = self.expand_path([path], recursive, maxdepth, **kwargs)
1187
+ else:
1188
+ out = set()
1189
+ path = [self._strip_protocol(p) for p in path]
1190
+ for p in path:
1191
+ if has_magic(p):
1192
+ bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
1193
+ out |= bit
1194
+ if recursive:
1195
+ # glob call above expanded one depth so if maxdepth is defined
1196
+ # then decrement it in expand_path call below. If it is zero
1197
+ # after decrementing then avoid expand_path call.
1198
+ if maxdepth is not None and maxdepth <= 1:
1199
+ continue
1200
+ out |= set(
1201
+ self.expand_path(
1202
+ list(bit),
1203
+ recursive=recursive,
1204
+ maxdepth=maxdepth - 1 if maxdepth is not None else None,
1205
+ **kwargs,
1206
+ )
1207
+ )
1208
+ continue
1209
+ elif recursive:
1210
+ rec = set(
1211
+ self.find(
1212
+ p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
1213
+ )
1214
+ )
1215
+ out |= rec
1216
+ if p not in out and (recursive is False or self.exists(p)):
1217
+ # should only check once, for the root
1218
+ out.add(p)
1219
+ if not out:
1220
+ raise FileNotFoundError(path)
1221
+ return sorted(out)
1222
+
1223
+ def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
1224
+ """Move file(s) from one location to another"""
1225
+ if path1 == path2:
1226
+ logger.debug("%s mv: The paths are the same, so no files were moved.", self)
1227
+ else:
1228
+ # explicitly raise exception to prevent data corruption
1229
+ self.copy(
1230
+ path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise"
1231
+ )
1232
+ self.rm(path1, recursive=recursive)
1233
+
1234
+ def rm_file(self, path):
1235
+ """Delete a file"""
1236
+ self._rm(path)
1237
+
1238
+ def _rm(self, path):
1239
+ """Delete one file"""
1240
+ # this is the old name for the method, prefer rm_file
1241
+ raise NotImplementedError
1242
+
1243
+ def rm(self, path, recursive=False, maxdepth=None):
1244
+ """Delete files.
1245
+
1246
+ Parameters
1247
+ ----------
1248
+ path: str or list of str
1249
+ File(s) to delete.
1250
+ recursive: bool
1251
+ If file(s) are directories, recursively delete contents and then
1252
+ also remove the directory
1253
+ maxdepth: int or None
1254
+ Depth to pass to walk for finding files to delete, if recursive.
1255
+ If None, there will be no limit and infinite recursion may be
1256
+ possible.
1257
+ """
1258
+ path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
1259
+ for p in reversed(path):
1260
+ self.rm_file(p)
1261
+
1262
+ @classmethod
1263
+ def _parent(cls, path):
1264
+ path = cls._strip_protocol(path)
1265
+ if "/" in path:
1266
+ parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
1267
+ return cls.root_marker + parent
1268
+ else:
1269
+ return cls.root_marker
1270
+
1271
+ def _open(
1272
+ self,
1273
+ path,
1274
+ mode="rb",
1275
+ block_size=None,
1276
+ autocommit=True,
1277
+ cache_options=None,
1278
+ **kwargs,
1279
+ ):
1280
+ """Return raw bytes-mode file-like from the file-system"""
1281
+ return AbstractBufferedFile(
1282
+ self,
1283
+ path,
1284
+ mode,
1285
+ block_size,
1286
+ autocommit,
1287
+ cache_options=cache_options,
1288
+ **kwargs,
1289
+ )
1290
+
1291
+ def open(
1292
+ self,
1293
+ path,
1294
+ mode="rb",
1295
+ block_size=None,
1296
+ cache_options=None,
1297
+ compression=None,
1298
+ **kwargs,
1299
+ ):
1300
+ """
1301
+ Return a file-like object from the filesystem
1302
+
1303
+ The resultant instance must function correctly in a context ``with``
1304
+ block.
1305
+
1306
+ Parameters
1307
+ ----------
1308
+ path: str
1309
+ Target file
1310
+ mode: str like 'rb', 'w'
1311
+ See builtin ``open()``
1312
+ Mode "x" (exclusive write) may be implemented by the backend. Even if
1313
+ it is, whether it is checked up front or on commit, and whether it is
1314
+ atomic is implementation-dependent.
1315
+ block_size: int
1316
+ Some indication of buffering - this is a value in bytes
1317
+ cache_options : dict, optional
1318
+ Extra arguments to pass through to the cache.
1319
+ compression: string or None
1320
+ If given, open file using compression codec. Can either be a compression
1321
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
1322
+ compression from the filename suffix.
1323
+ encoding, errors, newline: passed on to TextIOWrapper for text mode
1324
+ """
1325
+ import io
1326
+
1327
+ path = self._strip_protocol(path)
1328
+ if "b" not in mode:
1329
+ mode = mode.replace("t", "") + "b"
1330
+
1331
+ text_kwargs = {
1332
+ k: kwargs.pop(k)
1333
+ for k in ["encoding", "errors", "newline"]
1334
+ if k in kwargs
1335
+ }
1336
+ return io.TextIOWrapper(
1337
+ self.open(
1338
+ path,
1339
+ mode,
1340
+ block_size=block_size,
1341
+ cache_options=cache_options,
1342
+ compression=compression,
1343
+ **kwargs,
1344
+ ),
1345
+ **text_kwargs,
1346
+ )
1347
+ else:
1348
+ ac = kwargs.pop("autocommit", not self._intrans)
1349
+ f = self._open(
1350
+ path,
1351
+ mode=mode,
1352
+ block_size=block_size,
1353
+ autocommit=ac,
1354
+ cache_options=cache_options,
1355
+ **kwargs,
1356
+ )
1357
+ if compression is not None:
1358
+ from fsspec.compression import compr
1359
+ from fsspec.core import get_compression
1360
+
1361
+ compression = get_compression(path, compression)
1362
+ compress = compr[compression]
1363
+ f = compress(f, mode=mode[0])
1364
+
1365
+ if not ac and "r" not in mode:
1366
+ self.transaction.files.append(f)
1367
+ return f
1368
+
1369
+ def touch(self, path, truncate=True, **kwargs):
1370
+ """Create empty file, or update timestamp
1371
+
1372
+ Parameters
1373
+ ----------
1374
+ path: str
1375
+ file location
1376
+ truncate: bool
1377
+ If True, always set file size to 0; if False, update timestamp and
1378
+ leave file unchanged, if backend allows this
1379
+ """
1380
+ if truncate or not self.exists(path):
1381
+ with self.open(path, "wb", **kwargs):
1382
+ pass
1383
+ else:
1384
+ raise NotImplementedError # update timestamp, if possible
1385
+
1386
+ def ukey(self, path):
1387
+ """Hash of file properties, to tell if it has changed"""
1388
+ return sha256(str(self.info(path)).encode()).hexdigest()
1389
+
1390
+ def read_block(self, fn, offset, length, delimiter=None):
1391
+ """Read a block of bytes from
1392
+
1393
+ Starting at ``offset`` of the file, read ``length`` bytes. If
1394
+ ``delimiter`` is set then we ensure that the read starts and stops at
1395
+ delimiter boundaries that follow the locations ``offset`` and ``offset
1396
+ + length``. If ``offset`` is zero then we start at zero. The
1397
+ bytestring returned WILL include the end delimiter string.
1398
+
1399
+ If offset+length is beyond the eof, reads to eof.
1400
+
1401
+ Parameters
1402
+ ----------
1403
+ fn: string
1404
+ Path to filename
1405
+ offset: int
1406
+ Byte offset to start read
1407
+ length: int
1408
+ Number of bytes to read. If None, read to end.
1409
+ delimiter: bytes (optional)
1410
+ Ensure reading starts and stops at delimiter bytestring
1411
+
1412
+ Examples
1413
+ --------
1414
+ >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP
1415
+ b'Alice, 100\\nBo'
1416
+ >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP
1417
+ b'Alice, 100\\nBob, 200\\n'
1418
+
1419
+ Use ``length=None`` to read to the end of the file.
1420
+ >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP
1421
+ b'Alice, 100\\nBob, 200\\nCharlie, 300'
1422
+
1423
+ See Also
1424
+ --------
1425
+ :func:`fsspec.utils.read_block`
1426
+ """
1427
+ with self.open(fn, "rb") as f:
1428
+ size = f.size
1429
+ if length is None:
1430
+ length = size
1431
+ if size is not None and offset + length > size:
1432
+ length = size - offset
1433
+ return read_block(f, offset, length, delimiter)
1434
+
1435
+ def to_json(self, *, include_password: bool = True) -> str:
1436
+ """
1437
+ JSON representation of this filesystem instance.
1438
+
1439
+ Parameters
1440
+ ----------
1441
+ include_password: bool, default True
1442
+ Whether to include the password (if any) in the output.
1443
+
1444
+ Returns
1445
+ -------
1446
+ JSON string with keys ``cls`` (the python location of this class),
1447
+ protocol (text name of this class's protocol, first one in case of
1448
+ multiple), ``args`` (positional args, usually empty), and all other
1449
+ keyword arguments as their own keys.
1450
+
1451
+ Warnings
1452
+ --------
1453
+ Serialized filesystems may contain sensitive information which have been
1454
+ passed to the constructor, such as passwords and tokens. Make sure you
1455
+ store and send them in a secure environment!
1456
+ """
1457
+ from .json import FilesystemJSONEncoder
1458
+
1459
+ return json.dumps(
1460
+ self,
1461
+ cls=type(
1462
+ "_FilesystemJSONEncoder",
1463
+ (FilesystemJSONEncoder,),
1464
+ {"include_password": include_password},
1465
+ ),
1466
+ )
1467
+
1468
+ @staticmethod
1469
+ def from_json(blob: str) -> AbstractFileSystem:
1470
+ """
1471
+ Recreate a filesystem instance from JSON representation.
1472
+
1473
+ See ``.to_json()`` for the expected structure of the input.
1474
+
1475
+ Parameters
1476
+ ----------
1477
+ blob: str
1478
+
1479
+ Returns
1480
+ -------
1481
+ file system instance, not necessarily of this particular class.
1482
+
1483
+ Warnings
1484
+ --------
1485
+ This can import arbitrary modules (as determined by the ``cls`` key).
1486
+ Make sure you haven't installed any modules that may execute malicious code
1487
+ at import time.
1488
+ """
1489
+ from .json import FilesystemJSONDecoder
1490
+
1491
+ return json.loads(blob, cls=FilesystemJSONDecoder)
1492
+
1493
+ def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
1494
+ """
1495
+ JSON-serializable dictionary representation of this filesystem instance.
1496
+
1497
+ Parameters
1498
+ ----------
1499
+ include_password: bool, default True
1500
+ Whether to include the password (if any) in the output.
1501
+
1502
+ Returns
1503
+ -------
1504
+ Dictionary with keys ``cls`` (the python location of this class),
1505
+ protocol (text name of this class's protocol, first one in case of
1506
+ multiple), ``args`` (positional args, usually empty), and all other
1507
+ keyword arguments as their own keys.
1508
+
1509
+ Warnings
1510
+ --------
1511
+ Serialized filesystems may contain sensitive information which have been
1512
+ passed to the constructor, such as passwords and tokens. Make sure you
1513
+ store and send them in a secure environment!
1514
+ """
1515
+ from .json import FilesystemJSONEncoder
1516
+
1517
+ json_encoder = FilesystemJSONEncoder()
1518
+
1519
+ cls = type(self)
1520
+ proto = self.protocol
1521
+
1522
+ storage_options = dict(self.storage_options)
1523
+ if not include_password:
1524
+ storage_options.pop("password", None)
1525
+
1526
+ return dict(
1527
+ cls=f"{cls.__module__}:{cls.__name__}",
1528
+ protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
1529
+ args=json_encoder.make_serializable(self.storage_args),
1530
+ **json_encoder.make_serializable(storage_options),
1531
+ )
1532
+
1533
+ @staticmethod
1534
+ def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
1535
+ """
1536
+ Recreate a filesystem instance from dictionary representation.
1537
+
1538
+ See ``.to_dict()`` for the expected structure of the input.
1539
+
1540
+ Parameters
1541
+ ----------
1542
+ dct: Dict[str, Any]
1543
+
1544
+ Returns
1545
+ -------
1546
+ file system instance, not necessarily of this particular class.
1547
+
1548
+ Warnings
1549
+ --------
1550
+ This can import arbitrary modules (as determined by the ``cls`` key).
1551
+ Make sure you haven't installed any modules that may execute malicious code
1552
+ at import time.
1553
+ """
1554
+ from .json import FilesystemJSONDecoder
1555
+
1556
+ json_decoder = FilesystemJSONDecoder()
1557
+
1558
+ dct = dict(dct) # Defensive copy
1559
+
1560
+ cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
1561
+ if cls is None:
1562
+ raise ValueError("Not a serialized AbstractFileSystem")
1563
+
1564
+ dct.pop("cls", None)
1565
+ dct.pop("protocol", None)
1566
+
1567
+ return cls(
1568
+ *json_decoder.unmake_serializable(dct.pop("args", ())),
1569
+ **json_decoder.unmake_serializable(dct),
1570
+ )
1571
+
1572
+ def _get_pyarrow_filesystem(self):
1573
+ """
1574
+ Make a version of the FS instance which will be acceptable to pyarrow
1575
+ """
1576
+ # all instances already also derive from pyarrow
1577
+ return self
1578
+
1579
+ def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
1580
+ """Create key/value store based on this file-system
1581
+
1582
+ Makes a MutableMapping interface to the FS at the given root path.
1583
+ See ``fsspec.mapping.FSMap`` for further details.
1584
+ """
1585
+ from .mapping import FSMap
1586
+
1587
+ return FSMap(
1588
+ root,
1589
+ self,
1590
+ check=check,
1591
+ create=create,
1592
+ missing_exceptions=missing_exceptions,
1593
+ )
1594
+
1595
+ @classmethod
1596
+ def clear_instance_cache(cls):
1597
+ """
1598
+ Clear the cache of filesystem instances.
1599
+
1600
+ Notes
1601
+ -----
1602
+ Unless overridden by setting the ``cachable`` class attribute to False,
1603
+ the filesystem class stores a reference to newly created instances. This
1604
+ prevents Python's normal rules around garbage collection from working,
1605
+ since the instances refcount will not drop to zero until
1606
+ ``clear_instance_cache`` is called.
1607
+ """
1608
+ cls._cache.clear()
1609
+
1610
+ def created(self, path):
1611
+ """Return the created timestamp of a file as a datetime.datetime"""
1612
+ raise NotImplementedError
1613
+
1614
+ def modified(self, path):
1615
+ """Return the modified timestamp of a file as a datetime.datetime"""
1616
+ raise NotImplementedError
1617
+
1618
+ def tree(
1619
+ self,
1620
+ path: str = "/",
1621
+ recursion_limit: int = 2,
1622
+ max_display: int = 25,
1623
+ display_size: bool = False,
1624
+ prefix: str = "",
1625
+ is_last: bool = True,
1626
+ first: bool = True,
1627
+ indent_size: int = 4,
1628
+ ) -> str:
1629
+ """
1630
+ Return a tree-like structure of the filesystem starting from the given path as a string.
1631
+
1632
+ Parameters
1633
+ ----------
1634
+ path: Root path to start traversal from
1635
+ recursion_limit: Maximum depth of directory traversal
1636
+ max_display: Maximum number of items to display per directory
1637
+ display_size: Whether to display file sizes
1638
+ prefix: Current line prefix for visual tree structure
1639
+ is_last: Whether current item is last in its level
1640
+ first: Whether this is the first call (displays root path)
1641
+ indent_size: Number of spaces by indent
1642
+
1643
+ Returns
1644
+ -------
1645
+ str: A string representing the tree structure.
1646
+
1647
+ Example
1648
+ -------
1649
+ >>> from fsspec import filesystem
1650
+
1651
+ >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
1652
+ >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
1653
+ >>> print(tree)
1654
+ """
1655
+
1656
+ def format_bytes(n: int) -> str:
1657
+ """Format bytes as text."""
1658
+ for prefix, k in (
1659
+ ("P", 2**50),
1660
+ ("T", 2**40),
1661
+ ("G", 2**30),
1662
+ ("M", 2**20),
1663
+ ("k", 2**10),
1664
+ ):
1665
+ if n >= 0.9 * k:
1666
+ return f"{n / k:.2f} {prefix}b"
1667
+ return f"{n}B"
1668
+
1669
+ result = []
1670
+
1671
+ if first:
1672
+ result.append(path)
1673
+
1674
+ if recursion_limit:
1675
+ indent = " " * indent_size
1676
+ contents = self.ls(path, detail=True)
1677
+ contents.sort(
1678
+ key=lambda x: (x.get("type") != "directory", x.get("name", ""))
1679
+ )
1680
+
1681
+ if max_display is not None and len(contents) > max_display:
1682
+ displayed_contents = contents[:max_display]
1683
+ remaining_count = len(contents) - max_display
1684
+ else:
1685
+ displayed_contents = contents
1686
+ remaining_count = 0
1687
+
1688
+ for i, item in enumerate(displayed_contents):
1689
+ is_last_item = (i == len(displayed_contents) - 1) and (
1690
+ remaining_count == 0
1691
+ )
1692
+
1693
+ branch = (
1694
+ "└" + ("─" * (indent_size - 2))
1695
+ if is_last_item
1696
+ else "├" + ("─" * (indent_size - 2))
1697
+ )
1698
+ branch += " "
1699
+ new_prefix = prefix + (
1700
+ indent if is_last_item else "│" + " " * (indent_size - 1)
1701
+ )
1702
+
1703
+ name = os.path.basename(item.get("name", ""))
1704
+
1705
+ if display_size and item.get("type") == "directory":
1706
+ sub_contents = self.ls(item.get("name", ""), detail=True)
1707
+ num_files = sum(
1708
+ 1 for sub_item in sub_contents if sub_item.get("type") == "file"
1709
+ )
1710
+ num_folders = sum(
1711
+ 1
1712
+ for sub_item in sub_contents
1713
+ if sub_item.get("type") == "directory"
1714
+ )
1715
+
1716
+ if num_files == 0 and num_folders == 0:
1717
+ size = " (empty folder)"
1718
+ elif num_files == 0:
1719
+ size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
1720
+ elif num_folders == 0:
1721
+ size = f" ({num_files} file{'s' if num_files > 1 else ''})"
1722
+ else:
1723
+ size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
1724
+ elif display_size and item.get("type") == "file":
1725
+ size = f" ({format_bytes(item.get('size', 0))})"
1726
+ else:
1727
+ size = ""
1728
+
1729
+ result.append(f"{prefix}{branch}{name}{size}")
1730
+
1731
+ if item.get("type") == "directory" and recursion_limit > 0:
1732
+ result.append(
1733
+ self.tree(
1734
+ path=item.get("name", ""),
1735
+ recursion_limit=recursion_limit - 1,
1736
+ max_display=max_display,
1737
+ display_size=display_size,
1738
+ prefix=new_prefix,
1739
+ is_last=is_last_item,
1740
+ first=False,
1741
+ indent_size=indent_size,
1742
+ )
1743
+ )
1744
+
1745
+ if remaining_count > 0:
1746
+ more_message = f"{remaining_count} more item(s) not displayed."
1747
+ result.append(
1748
+ f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
1749
+ )
1750
+
1751
+ return "\n".join(_ for _ in result if _)
1752
+
1753
+ # ------------------------------------------------------------------------
1754
+ # Aliases
1755
+
1756
+ def read_bytes(self, path, start=None, end=None, **kwargs):
1757
+ """Alias of `AbstractFileSystem.cat_file`."""
1758
+ return self.cat_file(path, start=start, end=end, **kwargs)
1759
+
1760
+ def write_bytes(self, path, value, **kwargs):
1761
+ """Alias of `AbstractFileSystem.pipe_file`."""
1762
+ self.pipe_file(path, value, **kwargs)
1763
+
1764
+ def makedir(self, path, create_parents=True, **kwargs):
1765
+ """Alias of `AbstractFileSystem.mkdir`."""
1766
+ return self.mkdir(path, create_parents=create_parents, **kwargs)
1767
+
1768
+ def mkdirs(self, path, exist_ok=False):
1769
+ """Alias of `AbstractFileSystem.makedirs`."""
1770
+ return self.makedirs(path, exist_ok=exist_ok)
1771
+
1772
+ def listdir(self, path, detail=True, **kwargs):
1773
+ """Alias of `AbstractFileSystem.ls`."""
1774
+ return self.ls(path, detail=detail, **kwargs)
1775
+
1776
+ def cp(self, path1, path2, **kwargs):
1777
+ """Alias of `AbstractFileSystem.copy`."""
1778
+ return self.copy(path1, path2, **kwargs)
1779
+
1780
+ def move(self, path1, path2, **kwargs):
1781
+ """Alias of `AbstractFileSystem.mv`."""
1782
+ return self.mv(path1, path2, **kwargs)
1783
+
1784
+ def stat(self, path, **kwargs):
1785
+ """Alias of `AbstractFileSystem.info`."""
1786
+ return self.info(path, **kwargs)
1787
+
1788
+ def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
1789
+ """Alias of `AbstractFileSystem.du`."""
1790
+ return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
1791
+
1792
+ def rename(self, path1, path2, **kwargs):
1793
+ """Alias of `AbstractFileSystem.mv`."""
1794
+ return self.mv(path1, path2, **kwargs)
1795
+
1796
+ def delete(self, path, recursive=False, maxdepth=None):
1797
+ """Alias of `AbstractFileSystem.rm`."""
1798
+ return self.rm(path, recursive=recursive, maxdepth=maxdepth)
1799
+
1800
+ def upload(self, lpath, rpath, recursive=False, **kwargs):
1801
+ """Alias of `AbstractFileSystem.put`."""
1802
+ return self.put(lpath, rpath, recursive=recursive, **kwargs)
1803
+
1804
+ def download(self, rpath, lpath, recursive=False, **kwargs):
1805
+ """Alias of `AbstractFileSystem.get`."""
1806
+ return self.get(rpath, lpath, recursive=recursive, **kwargs)
1807
+
1808
+ def sign(self, path, expiration=100, **kwargs):
1809
+ """Create a signed URL representing the given path
1810
+
1811
+ Some implementations allow temporary URLs to be generated, as a
1812
+ way of delegating credentials.
1813
+
1814
+ Parameters
1815
+ ----------
1816
+ path : str
1817
+ The path on the filesystem
1818
+ expiration : int
1819
+ Number of seconds to enable the URL for (if supported)
1820
+
1821
+ Returns
1822
+ -------
1823
+ URL : str
1824
+ The signed URL
1825
+
1826
+ Raises
1827
+ ------
1828
+ NotImplementedError : if method is not implemented for a filesystem
1829
+ """
1830
+ raise NotImplementedError("Sign is not implemented for this filesystem")
1831
+
1832
+ def _isfilestore(self):
1833
+ # Originally inherited from pyarrow DaskFileSystem. Keeping this
1834
+ # here for backwards compatibility as long as pyarrow uses its
1835
+ # legacy fsspec-compatible filesystems and thus accepts fsspec
1836
+ # filesystems as well
1837
+ return False
1838
+
1839
+
1840
+ class AbstractBufferedFile(io.IOBase):
1841
+ """Convenient class to derive from to provide buffering
1842
+
1843
+ In the case that the backend does not provide a pythonic file-like object
1844
+ already, this class contains much of the logic to build one. The only
1845
+ methods that need to be overridden are ``_upload_chunk``,
1846
+ ``_initiate_upload`` and ``_fetch_range``.
1847
+ """
1848
+
1849
+ DEFAULT_BLOCK_SIZE = 5 * 2**20
1850
+ _details = None
1851
+
1852
+ def __init__(
1853
+ self,
1854
+ fs,
1855
+ path,
1856
+ mode="rb",
1857
+ block_size="default",
1858
+ autocommit=True,
1859
+ cache_type="readahead",
1860
+ cache_options=None,
1861
+ size=None,
1862
+ **kwargs,
1863
+ ):
1864
+ """
1865
+ Template for files with buffered reading and writing
1866
+
1867
+ Parameters
1868
+ ----------
1869
+ fs: instance of FileSystem
1870
+ path: str
1871
+ location in file-system
1872
+ mode: str
1873
+ Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
1874
+ systems may be read-only, and some may not support append.
1875
+ block_size: int
1876
+ Buffer size for reading or writing, 'default' for class default
1877
+ autocommit: bool
1878
+ Whether to write to final destination; may only impact what
1879
+ happens when file is being closed.
1880
+ cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
1881
+ Caching policy in read mode. See the definitions in ``core``.
1882
+ cache_options : dict
1883
+ Additional options passed to the constructor for the cache specified
1884
+ by `cache_type`.
1885
+ size: int
1886
+ If given and in read mode, suppressed having to look up the file size
1887
+ kwargs:
1888
+ Gets stored as self.kwargs
1889
+ """
1890
+ from .core import caches
1891
+
1892
+ self.path = path
1893
+ self.fs = fs
1894
+ self.mode = mode
1895
+ self.blocksize = (
1896
+ self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
1897
+ )
1898
+ self.loc = 0
1899
+ self.autocommit = autocommit
1900
+ self.end = None
1901
+ self.start = None
1902
+ self.closed = False
1903
+
1904
+ if cache_options is None:
1905
+ cache_options = {}
1906
+
1907
+ if "trim" in kwargs:
1908
+ warnings.warn(
1909
+ "Passing 'trim' to control the cache behavior has been deprecated. "
1910
+ "Specify it within the 'cache_options' argument instead.",
1911
+ FutureWarning,
1912
+ )
1913
+ cache_options["trim"] = kwargs.pop("trim")
1914
+
1915
+ self.kwargs = kwargs
1916
+
1917
+ if mode not in {"ab", "rb", "wb", "xb"}:
1918
+ raise NotImplementedError("File mode not supported")
1919
+ if mode == "rb":
1920
+ if size is not None:
1921
+ self.size = size
1922
+ else:
1923
+ self.size = self.details["size"]
1924
+ self.cache = caches[cache_type](
1925
+ self.blocksize, self._fetch_range, self.size, **cache_options
1926
+ )
1927
+ else:
1928
+ self.buffer = io.BytesIO()
1929
+ self.offset = None
1930
+ self.forced = False
1931
+ self.location = None
1932
+
1933
+ @property
1934
+ def details(self):
1935
+ if self._details is None:
1936
+ self._details = self.fs.info(self.path)
1937
+ return self._details
1938
+
1939
+ @details.setter
1940
+ def details(self, value):
1941
+ self._details = value
1942
+ self.size = value["size"]
1943
+
1944
+ @property
1945
+ def full_name(self):
1946
+ return _unstrip_protocol(self.path, self.fs)
1947
+
1948
+ @property
1949
+ def closed(self):
1950
+ # get around this attr being read-only in IOBase
1951
+ # use getattr here, since this can be called during del
1952
+ return getattr(self, "_closed", True)
1953
+
1954
+ @closed.setter
1955
+ def closed(self, c):
1956
+ self._closed = c
1957
+
1958
+ def __hash__(self):
1959
+ if "w" in self.mode:
1960
+ return id(self)
1961
+ else:
1962
+ return int(tokenize(self.details), 16)
1963
+
1964
+ def __eq__(self, other):
1965
+ """Files are equal if they have the same checksum, only in read mode"""
1966
+ if self is other:
1967
+ return True
1968
+ return (
1969
+ isinstance(other, type(self))
1970
+ and self.mode == "rb"
1971
+ and other.mode == "rb"
1972
+ and hash(self) == hash(other)
1973
+ )
1974
+
1975
+ def commit(self):
1976
+ """Move from temp to final destination"""
1977
+
1978
+ def discard(self):
1979
+ """Throw away temporary file"""
1980
+
1981
+ def info(self):
1982
+ """File information about this path"""
1983
+ if self.readable():
1984
+ return self.details
1985
+ else:
1986
+ raise ValueError("Info not available while writing")
1987
+
1988
+ def tell(self):
1989
+ """Current file location"""
1990
+ return self.loc
1991
+
1992
+ def seek(self, loc, whence=0):
1993
+ """Set current file location
1994
+
1995
+ Parameters
1996
+ ----------
1997
+ loc: int
1998
+ byte location
1999
+ whence: {0, 1, 2}
2000
+ from start of file, current location or end of file, resp.
2001
+ """
2002
+ loc = int(loc)
2003
+ if not self.mode == "rb":
2004
+ raise OSError(ESPIPE, "Seek only available in read mode")
2005
+ if whence == 0:
2006
+ nloc = loc
2007
+ elif whence == 1:
2008
+ nloc = self.loc + loc
2009
+ elif whence == 2:
2010
+ nloc = self.size + loc
2011
+ else:
2012
+ raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
2013
+ if nloc < 0:
2014
+ raise ValueError("Seek before start of file")
2015
+ self.loc = nloc
2016
+ return self.loc
2017
+
2018
+ def write(self, data):
2019
+ """
2020
+ Write data to buffer.
2021
+
2022
+ Buffer only sent on flush() or if buffer is greater than
2023
+ or equal to blocksize.
2024
+
2025
+ Parameters
2026
+ ----------
2027
+ data: bytes
2028
+ Set of bytes to be written.
2029
+ """
2030
+ if not self.writable():
2031
+ raise ValueError("File not in write mode")
2032
+ if self.closed:
2033
+ raise ValueError("I/O operation on closed file.")
2034
+ if self.forced:
2035
+ raise ValueError("This file has been force-flushed, can only close")
2036
+ out = self.buffer.write(data)
2037
+ self.loc += out
2038
+ if self.buffer.tell() >= self.blocksize:
2039
+ self.flush()
2040
+ return out
2041
+
2042
+ def flush(self, force=False):
2043
+ """
2044
+ Write buffered data to backend store.
2045
+
2046
+ Writes the current buffer, if it is larger than the block-size, or if
2047
+ the file is being closed.
2048
+
2049
+ Parameters
2050
+ ----------
2051
+ force: bool
2052
+ When closing, write the last block even if it is smaller than
2053
+ blocks are allowed to be. Disallows further writing to this file.
2054
+ """
2055
+
2056
+ if self.closed:
2057
+ raise ValueError("Flush on closed file")
2058
+ if force and self.forced:
2059
+ raise ValueError("Force flush cannot be called more than once")
2060
+ if force:
2061
+ self.forced = True
2062
+
2063
+ if self.readable():
2064
+ # no-op to flush on read-mode
2065
+ return
2066
+
2067
+ if not force and self.buffer.tell() < self.blocksize:
2068
+ # Defer write on small block
2069
+ return
2070
+
2071
+ if self.offset is None:
2072
+ # Initialize a multipart upload
2073
+ self.offset = 0
2074
+ try:
2075
+ self._initiate_upload()
2076
+ except:
2077
+ self.closed = True
2078
+ raise
2079
+
2080
+ if self._upload_chunk(final=force) is not False:
2081
+ self.offset += self.buffer.seek(0, 2)
2082
+ self.buffer = io.BytesIO()
2083
+
2084
+ def _upload_chunk(self, final=False):
2085
+ """Write one part of a multi-block file upload
2086
+
2087
+ Parameters
2088
+ ==========
2089
+ final: bool
2090
+ This is the last block, so should complete file, if
2091
+ self.autocommit is True.
2092
+ """
2093
+ # may not yet have been initialized, may need to call _initialize_upload
2094
+
2095
+ def _initiate_upload(self):
2096
+ """Create remote file/upload"""
2097
+ pass
2098
+
2099
+ def _fetch_range(self, start, end):
2100
+ """Get the specified set of bytes from remote"""
2101
+ return self.fs.cat_file(self.path, start=start, end=end)
2102
+
2103
+ def read(self, length=-1):
2104
+ """
2105
+ Return data from cache, or fetch pieces as necessary
2106
+
2107
+ Parameters
2108
+ ----------
2109
+ length: int (-1)
2110
+ Number of bytes to read; if <0, all remaining bytes.
2111
+ """
2112
+ length = -1 if length is None else int(length)
2113
+ if self.mode != "rb":
2114
+ raise ValueError("File not in read mode")
2115
+ if length < 0:
2116
+ length = self.size - self.loc
2117
+ if self.closed:
2118
+ raise ValueError("I/O operation on closed file.")
2119
+ if length == 0:
2120
+ # don't even bother calling fetch
2121
+ return b""
2122
+ out = self.cache._fetch(self.loc, self.loc + length)
2123
+
2124
+ logger.debug(
2125
+ "%s read: %i - %i %s",
2126
+ self,
2127
+ self.loc,
2128
+ self.loc + length,
2129
+ self.cache._log_stats(),
2130
+ )
2131
+ self.loc += len(out)
2132
+ return out
2133
+
2134
+ def readinto(self, b):
2135
+ """mirrors builtin file's readinto method
2136
+
2137
+ https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
2138
+ """
2139
+ out = memoryview(b).cast("B")
2140
+ data = self.read(out.nbytes)
2141
+ out[: len(data)] = data
2142
+ return len(data)
2143
+
2144
+ def readuntil(self, char=b"\n", blocks=None):
2145
+ """Return data between current position and first occurrence of char
2146
+
2147
+ char is included in the output, except if the end of the tile is
2148
+ encountered first.
2149
+
2150
+ Parameters
2151
+ ----------
2152
+ char: bytes
2153
+ Thing to find
2154
+ blocks: None or int
2155
+ How much to read in each go. Defaults to file blocksize - which may
2156
+ mean a new read on every call.
2157
+ """
2158
+ out = []
2159
+ while True:
2160
+ start = self.tell()
2161
+ part = self.read(blocks or self.blocksize)
2162
+ if len(part) == 0:
2163
+ break
2164
+ found = part.find(char)
2165
+ if found > -1:
2166
+ out.append(part[: found + len(char)])
2167
+ self.seek(start + found + len(char))
2168
+ break
2169
+ out.append(part)
2170
+ return b"".join(out)
2171
+
2172
+ def readline(self):
2173
+ """Read until and including the first occurrence of newline character
2174
+
2175
+ Note that, because of character encoding, this is not necessarily a
2176
+ true line ending.
2177
+ """
2178
+ return self.readuntil(b"\n")
2179
+
2180
+ def __next__(self):
2181
+ out = self.readline()
2182
+ if out:
2183
+ return out
2184
+ raise StopIteration
2185
+
2186
+ def __iter__(self):
2187
+ return self
2188
+
2189
+ def readlines(self):
2190
+ """Return all data, split by the newline character, including the newline character"""
2191
+ data = self.read()
2192
+ lines = data.split(b"\n")
2193
+ out = [l + b"\n" for l in lines[:-1]]
2194
+ if data.endswith(b"\n"):
2195
+ return out
2196
+ else:
2197
+ return out + [lines[-1]]
2198
+ # return list(self) ???
2199
+
2200
+ def readinto1(self, b):
2201
+ return self.readinto(b)
2202
+
2203
+ def close(self):
2204
+ """Close file
2205
+
2206
+ Finalizes writes, discards cache
2207
+ """
2208
+ if getattr(self, "_unclosable", False):
2209
+ return
2210
+ if self.closed:
2211
+ return
2212
+ try:
2213
+ if self.mode == "rb":
2214
+ self.cache = None
2215
+ else:
2216
+ if not self.forced:
2217
+ self.flush(force=True)
2218
+
2219
+ if self.fs is not None:
2220
+ self.fs.invalidate_cache(self.path)
2221
+ self.fs.invalidate_cache(self.fs._parent(self.path))
2222
+ finally:
2223
+ self.closed = True
2224
+
2225
+ def readable(self):
2226
+ """Whether opened for reading"""
2227
+ return "r" in self.mode and not self.closed
2228
+
2229
+ def seekable(self):
2230
+ """Whether is seekable (only in read mode)"""
2231
+ return self.readable()
2232
+
2233
+ def writable(self):
2234
+ """Whether opened for writing"""
2235
+ return self.mode in {"wb", "ab", "xb"} and not self.closed
2236
+
2237
+ def __reduce__(self):
2238
+ if self.mode != "rb":
2239
+ raise RuntimeError("Pickling a writeable file is not supported")
2240
+
2241
+ return reopen, (
2242
+ self.fs,
2243
+ self.path,
2244
+ self.mode,
2245
+ self.blocksize,
2246
+ self.loc,
2247
+ self.size,
2248
+ self.autocommit,
2249
+ self.cache.name if self.cache else "none",
2250
+ self.kwargs,
2251
+ )
2252
+
2253
+ def __del__(self):
2254
+ if not self.closed:
2255
+ self.close()
2256
+
2257
+ def __str__(self):
2258
+ return f"<File-like object {type(self.fs).__name__}, {self.path}>"
2259
+
2260
+ __repr__ = __str__
2261
+
2262
+ def __enter__(self):
2263
+ return self
2264
+
2265
+ def __exit__(self, *args):
2266
+ self.close()
2267
+
2268
+
2269
+ def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
2270
+ file = fs.open(
2271
+ path,
2272
+ mode=mode,
2273
+ block_size=blocksize,
2274
+ autocommit=autocommit,
2275
+ cache_type=cache_type,
2276
+ size=size,
2277
+ **kwargs,
2278
+ )
2279
+ if loc > 0:
2280
+ file.seek(loc)
2281
+ return file
env/lib/python3.13/site-packages/fsspec/transaction.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import deque
2
+
3
+
4
+ class Transaction:
5
+ """Filesystem transaction write context
6
+
7
+ Gathers files for deferred commit or discard, so that several write
8
+ operations can be finalized semi-atomically. This works by having this
9
+ instance as the ``.transaction`` attribute of the given filesystem
10
+ """
11
+
12
+ def __init__(self, fs, **kwargs):
13
+ """
14
+ Parameters
15
+ ----------
16
+ fs: FileSystem instance
17
+ """
18
+ self.fs = fs
19
+ self.files = deque()
20
+
21
+ def __enter__(self):
22
+ self.start()
23
+ return self
24
+
25
+ def __exit__(self, exc_type, exc_val, exc_tb):
26
+ """End transaction and commit, if exit is not due to exception"""
27
+ # only commit if there was no exception
28
+ self.complete(commit=exc_type is None)
29
+ if self.fs:
30
+ self.fs._intrans = False
31
+ self.fs._transaction = None
32
+ self.fs = None
33
+
34
+ def start(self):
35
+ """Start a transaction on this FileSystem"""
36
+ self.files = deque() # clean up after previous failed completions
37
+ self.fs._intrans = True
38
+
39
+ def complete(self, commit=True):
40
+ """Finish transaction: commit or discard all deferred files"""
41
+ while self.files:
42
+ f = self.files.popleft()
43
+ if commit:
44
+ f.commit()
45
+ else:
46
+ f.discard()
47
+ self.fs._intrans = False
48
+ self.fs._transaction = None
49
+ self.fs = None
50
+
51
+
52
+ class FileActor:
53
+ def __init__(self):
54
+ self.files = []
55
+
56
+ def commit(self):
57
+ for f in self.files:
58
+ f.commit()
59
+ self.files.clear()
60
+
61
+ def discard(self):
62
+ for f in self.files:
63
+ f.discard()
64
+ self.files.clear()
65
+
66
+ def append(self, f):
67
+ self.files.append(f)
68
+
69
+
70
+ class DaskTransaction(Transaction):
71
+ def __init__(self, fs):
72
+ """
73
+ Parameters
74
+ ----------
75
+ fs: FileSystem instance
76
+ """
77
+ import distributed
78
+
79
+ super().__init__(fs)
80
+ client = distributed.default_client()
81
+ self.files = client.submit(FileActor, actor=True).result()
82
+
83
+ def complete(self, commit=True):
84
+ """Finish transaction: commit or discard all deferred files"""
85
+ if commit:
86
+ self.files.commit().result()
87
+ else:
88
+ self.files.discard().result()
89
+ self.fs._intrans = False
90
+ self.fs = None
env/lib/python3.13/site-packages/fsspec/utils.py ADDED
@@ -0,0 +1,738 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import logging
5
+ import math
6
+ import os
7
+ import re
8
+ import sys
9
+ import tempfile
10
+ from collections.abc import Callable, Iterable, Iterator, Sequence
11
+ from functools import partial
12
+ from hashlib import md5
13
+ from importlib.metadata import version
14
+ from typing import IO, TYPE_CHECKING, Any, TypeVar
15
+ from urllib.parse import urlsplit
16
+
17
+ if TYPE_CHECKING:
18
+ import pathlib
19
+ from typing import TypeGuard
20
+
21
+ from fsspec.spec import AbstractFileSystem
22
+
23
+
24
+ DEFAULT_BLOCK_SIZE = 5 * 2**20
25
+
26
+ T = TypeVar("T")
27
+
28
+
29
+ def infer_storage_options(
30
+ urlpath: str, inherit_storage_options: dict[str, Any] | None = None
31
+ ) -> dict[str, Any]:
32
+ """Infer storage options from URL path and merge it with existing storage
33
+ options.
34
+
35
+ Parameters
36
+ ----------
37
+ urlpath: str or unicode
38
+ Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
39
+ inherit_storage_options: dict (optional)
40
+ Its contents will get merged with the inferred information from the
41
+ given path
42
+
43
+ Returns
44
+ -------
45
+ Storage options dict.
46
+
47
+ Examples
48
+ --------
49
+ >>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
50
+ {"protocol": "file", "path", "/mnt/datasets/test.csv"}
51
+ >>> infer_storage_options(
52
+ ... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
53
+ ... inherit_storage_options={'extra': 'value'},
54
+ ... ) # doctest: +SKIP
55
+ {"protocol": "hdfs", "username": "username", "password": "pwd",
56
+ "host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
57
+ "url_query": "q=1", "extra": "value"}
58
+ """
59
+ # Handle Windows paths including disk name in this special case
60
+ if (
61
+ re.match(r"^[a-zA-Z]:[\\/]", urlpath)
62
+ or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
63
+ ):
64
+ return {"protocol": "file", "path": urlpath}
65
+
66
+ parsed_path = urlsplit(urlpath)
67
+ protocol = parsed_path.scheme or "file"
68
+ if parsed_path.fragment:
69
+ path = "#".join([parsed_path.path, parsed_path.fragment])
70
+ else:
71
+ path = parsed_path.path
72
+ if protocol == "file":
73
+ # Special case parsing file protocol URL on Windows according to:
74
+ # https://msdn.microsoft.com/en-us/library/jj710207.aspx
75
+ windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
76
+ if windows_path:
77
+ drive, path = windows_path.groups()
78
+ path = f"{drive}:{path}"
79
+
80
+ if protocol in ["http", "https"]:
81
+ # for HTTP, we don't want to parse, as requests will anyway
82
+ return {"protocol": protocol, "path": urlpath}
83
+
84
+ options: dict[str, Any] = {"protocol": protocol, "path": path}
85
+
86
+ if parsed_path.netloc:
87
+ # Parse `hostname` from netloc manually because `parsed_path.hostname`
88
+ # lowercases the hostname which is not always desirable (e.g. in S3):
89
+ # https://github.com/dask/dask/issues/1417
90
+ options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
91
+
92
+ if protocol in ("s3", "s3a", "gcs", "gs"):
93
+ options["path"] = options["host"] + options["path"]
94
+ else:
95
+ options["host"] = options["host"]
96
+ if parsed_path.port:
97
+ options["port"] = parsed_path.port
98
+ if parsed_path.username:
99
+ options["username"] = parsed_path.username
100
+ if parsed_path.password:
101
+ options["password"] = parsed_path.password
102
+
103
+ if parsed_path.query:
104
+ options["url_query"] = parsed_path.query
105
+ if parsed_path.fragment:
106
+ options["url_fragment"] = parsed_path.fragment
107
+
108
+ if inherit_storage_options:
109
+ update_storage_options(options, inherit_storage_options)
110
+
111
+ return options
112
+
113
+
114
+ def update_storage_options(
115
+ options: dict[str, Any], inherited: dict[str, Any] | None = None
116
+ ) -> None:
117
+ if not inherited:
118
+ inherited = {}
119
+ collisions = set(options) & set(inherited)
120
+ if collisions:
121
+ for collision in collisions:
122
+ if options.get(collision) != inherited.get(collision):
123
+ raise KeyError(
124
+ f"Collision between inferred and specified storage "
125
+ f"option:\n{collision}"
126
+ )
127
+ options.update(inherited)
128
+
129
+
130
+ # Compression extensions registered via fsspec.compression.register_compression
131
+ compressions: dict[str, str] = {}
132
+
133
+
134
+ def infer_compression(filename: str) -> str | None:
135
+ """Infer compression, if available, from filename.
136
+
137
+ Infer a named compression type, if registered and available, from filename
138
+ extension. This includes builtin (gz, bz2, zip) compressions, as well as
139
+ optional compressions. See fsspec.compression.register_compression.
140
+ """
141
+ extension = os.path.splitext(filename)[-1].strip(".").lower()
142
+ if extension in compressions:
143
+ return compressions[extension]
144
+ return None
145
+
146
+
147
+ def build_name_function(max_int: float) -> Callable[[int], str]:
148
+ """Returns a function that receives a single integer
149
+ and returns it as a string padded by enough zero characters
150
+ to align with maximum possible integer
151
+
152
+ >>> name_f = build_name_function(57)
153
+
154
+ >>> name_f(7)
155
+ '07'
156
+ >>> name_f(31)
157
+ '31'
158
+ >>> build_name_function(1000)(42)
159
+ '0042'
160
+ >>> build_name_function(999)(42)
161
+ '042'
162
+ >>> build_name_function(0)(0)
163
+ '0'
164
+ """
165
+ # handle corner cases max_int is 0 or exact power of 10
166
+ max_int += 1e-8
167
+
168
+ pad_length = int(math.ceil(math.log10(max_int)))
169
+
170
+ def name_function(i: int) -> str:
171
+ return str(i).zfill(pad_length)
172
+
173
+ return name_function
174
+
175
+
176
+ def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
177
+ r"""Seek current file to file start, file end, or byte after delimiter seq.
178
+
179
+ Seeks file to next chunk delimiter, where chunks are defined on file start,
180
+ a delimiting sequence, and file end. Use file.tell() to see location afterwards.
181
+ Note that file start is a valid split, so must be at offset > 0 to seek for
182
+ delimiter.
183
+
184
+ Parameters
185
+ ----------
186
+ file: a file
187
+ delimiter: bytes
188
+ a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
189
+ blocksize: int
190
+ Number of bytes to read from the file at once.
191
+
192
+
193
+ Returns
194
+ -------
195
+ Returns True if a delimiter was found, False if at file start or end.
196
+
197
+ """
198
+
199
+ if file.tell() == 0:
200
+ # beginning-of-file, return without seek
201
+ return False
202
+
203
+ # Interface is for binary IO, with delimiter as bytes, but initialize last
204
+ # with result of file.read to preserve compatibility with text IO.
205
+ last: bytes | None = None
206
+ while True:
207
+ current = file.read(blocksize)
208
+ if not current:
209
+ # end-of-file without delimiter
210
+ return False
211
+ full = last + current if last else current
212
+ try:
213
+ if delimiter in full:
214
+ i = full.index(delimiter)
215
+ file.seek(file.tell() - (len(full) - i) + len(delimiter))
216
+ return True
217
+ elif len(current) < blocksize:
218
+ # end-of-file without delimiter
219
+ return False
220
+ except (OSError, ValueError):
221
+ pass
222
+ last = full[-len(delimiter) :]
223
+
224
+
225
+ def read_block(
226
+ f: IO[bytes],
227
+ offset: int,
228
+ length: int | None,
229
+ delimiter: bytes | None = None,
230
+ split_before: bool = False,
231
+ ) -> bytes:
232
+ """Read a block of bytes from a file
233
+
234
+ Parameters
235
+ ----------
236
+ f: File
237
+ Open file
238
+ offset: int
239
+ Byte offset to start read
240
+ length: int
241
+ Number of bytes to read, read through end of file if None
242
+ delimiter: bytes (optional)
243
+ Ensure reading starts and stops at delimiter bytestring
244
+ split_before: bool (optional)
245
+ Start/stop read *before* delimiter bytestring.
246
+
247
+
248
+ If using the ``delimiter=`` keyword argument we ensure that the read
249
+ starts and stops at delimiter boundaries that follow the locations
250
+ ``offset`` and ``offset + length``. If ``offset`` is zero then we
251
+ start at zero, regardless of delimiter. The bytestring returned WILL
252
+ include the terminating delimiter string.
253
+
254
+ Examples
255
+ --------
256
+
257
+ >>> from io import BytesIO # doctest: +SKIP
258
+ >>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
259
+ >>> read_block(f, 0, 13) # doctest: +SKIP
260
+ b'Alice, 100\\nBo'
261
+
262
+ >>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
263
+ b'Alice, 100\\nBob, 200\\n'
264
+
265
+ >>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
266
+ b'Bob, 200\\nCharlie, 300'
267
+ """
268
+ if delimiter:
269
+ f.seek(offset)
270
+ found_start_delim = seek_delimiter(f, delimiter, 2**16)
271
+ if length is None:
272
+ return f.read()
273
+ start = f.tell()
274
+ length -= start - offset
275
+
276
+ f.seek(start + length)
277
+ found_end_delim = seek_delimiter(f, delimiter, 2**16)
278
+ end = f.tell()
279
+
280
+ # Adjust split location to before delimiter if seek found the
281
+ # delimiter sequence, not start or end of file.
282
+ if found_start_delim and split_before:
283
+ start -= len(delimiter)
284
+
285
+ if found_end_delim and split_before:
286
+ end -= len(delimiter)
287
+
288
+ offset = start
289
+ length = end - start
290
+
291
+ f.seek(offset)
292
+
293
+ # TODO: allow length to be None and read to the end of the file?
294
+ assert length is not None
295
+ b = f.read(length)
296
+ return b
297
+
298
+
299
+ def tokenize(*args: Any, **kwargs: Any) -> str:
300
+ """Deterministic token
301
+
302
+ (modified from dask.base)
303
+
304
+ >>> tokenize([1, 2, '3'])
305
+ '9d71491b50023b06fc76928e6eddb952'
306
+
307
+ >>> tokenize('Hello') == tokenize('Hello')
308
+ True
309
+ """
310
+ if kwargs:
311
+ args += (kwargs,)
312
+ try:
313
+ h = md5(str(args).encode())
314
+ except ValueError:
315
+ # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
316
+ h = md5(str(args).encode(), usedforsecurity=False)
317
+ return h.hexdigest()
318
+
319
+
320
+ def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
321
+ """Attempt to convert a path-like object to a string.
322
+
323
+ Parameters
324
+ ----------
325
+ filepath: object to be converted
326
+
327
+ Returns
328
+ -------
329
+ filepath_str: maybe a string version of the object
330
+
331
+ Notes
332
+ -----
333
+ Objects supporting the fspath protocol are coerced according to its
334
+ __fspath__ method.
335
+
336
+ For backwards compatibility with older Python version, pathlib.Path
337
+ objects are specially coerced.
338
+
339
+ Any other object is passed through unchanged, which includes bytes,
340
+ strings, buffers, or anything else that's not even path-like.
341
+ """
342
+ if isinstance(filepath, str):
343
+ return filepath
344
+ elif hasattr(filepath, "__fspath__"):
345
+ return filepath.__fspath__()
346
+ elif hasattr(filepath, "path"):
347
+ return filepath.path
348
+ else:
349
+ return filepath # type: ignore[return-value]
350
+
351
+
352
+ def make_instance(
353
+ cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
354
+ ) -> T:
355
+ inst = cls(*args, **kwargs)
356
+ inst._determine_worker() # type: ignore[attr-defined]
357
+ return inst
358
+
359
+
360
+ def common_prefix(paths: Iterable[str]) -> str:
361
+ """For a list of paths, find the shortest prefix common to all"""
362
+ parts = [p.split("/") for p in paths]
363
+ lmax = min(len(p) for p in parts)
364
+ end = 0
365
+ for i in range(lmax):
366
+ end = all(p[i] == parts[0][i] for p in parts)
367
+ if not end:
368
+ break
369
+ i += end
370
+ return "/".join(parts[0][:i])
371
+
372
+
373
+ def other_paths(
374
+ paths: list[str],
375
+ path2: str | list[str],
376
+ exists: bool = False,
377
+ flatten: bool = False,
378
+ ) -> list[str]:
379
+ """In bulk file operations, construct a new file tree from a list of files
380
+
381
+ Parameters
382
+ ----------
383
+ paths: list of str
384
+ The input file tree
385
+ path2: str or list of str
386
+ Root to construct the new list in. If this is already a list of str, we just
387
+ assert it has the right number of elements.
388
+ exists: bool (optional)
389
+ For a str destination, it is already exists (and is a dir), files should
390
+ end up inside.
391
+ flatten: bool (optional)
392
+ Whether to flatten the input directory tree structure so that the output files
393
+ are in the same directory.
394
+
395
+ Returns
396
+ -------
397
+ list of str
398
+ """
399
+
400
+ if isinstance(path2, str):
401
+ path2 = path2.rstrip("/")
402
+
403
+ if flatten:
404
+ path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
405
+ else:
406
+ cp = common_prefix(paths)
407
+ if exists:
408
+ cp = cp.rsplit("/", 1)[0]
409
+ if not cp and all(not s.startswith("/") for s in paths):
410
+ path2 = ["/".join([path2, p]) for p in paths]
411
+ else:
412
+ path2 = [p.replace(cp, path2, 1) for p in paths]
413
+ else:
414
+ assert len(paths) == len(path2)
415
+ return path2
416
+
417
+
418
+ def is_exception(obj: Any) -> bool:
419
+ return isinstance(obj, BaseException)
420
+
421
+
422
+ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
423
+ return all(hasattr(f, attr) for attr in ["read", "close", "tell"])
424
+
425
+
426
+ def get_protocol(url: str) -> str:
427
+ url = stringify_path(url)
428
+ parts = re.split(r"(\:\:|\://)", url, maxsplit=1)
429
+ if len(parts) > 1:
430
+ return parts[0]
431
+ return "file"
432
+
433
+
434
+ def get_file_extension(url: str) -> str:
435
+ url = stringify_path(url)
436
+ ext_parts = url.rsplit(".", 1)
437
+ if len(ext_parts) > 1:
438
+ return ext_parts[-1]
439
+ return ""
440
+
441
+
442
+ def can_be_local(path: str) -> bool:
443
+ """Can the given URL be used with open_local?"""
444
+ from fsspec import get_filesystem_class
445
+
446
+ try:
447
+ return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
448
+ except (ValueError, ImportError):
449
+ # not in registry or import failed
450
+ return False
451
+
452
+
453
+ def get_package_version_without_import(name: str) -> str | None:
454
+ """For given package name, try to find the version without importing it
455
+
456
+ Import and package.__version__ is still the backup here, so an import
457
+ *might* happen.
458
+
459
+ Returns either the version string, or None if the package
460
+ or the version was not readily found.
461
+ """
462
+ if name in sys.modules:
463
+ mod = sys.modules[name]
464
+ if hasattr(mod, "__version__"):
465
+ return mod.__version__
466
+ try:
467
+ return version(name)
468
+ except: # noqa: E722
469
+ pass
470
+ try:
471
+ import importlib
472
+
473
+ mod = importlib.import_module(name)
474
+ return mod.__version__
475
+ except (ImportError, AttributeError):
476
+ return None
477
+
478
+
479
+ def setup_logging(
480
+ logger: logging.Logger | None = None,
481
+ logger_name: str | None = None,
482
+ level: str = "DEBUG",
483
+ clear: bool = True,
484
+ ) -> logging.Logger:
485
+ if logger is None and logger_name is None:
486
+ raise ValueError("Provide either logger object or logger name")
487
+ logger = logger or logging.getLogger(logger_name)
488
+ handle = logging.StreamHandler()
489
+ formatter = logging.Formatter(
490
+ "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
491
+ )
492
+ handle.setFormatter(formatter)
493
+ if clear:
494
+ logger.handlers.clear()
495
+ logger.addHandler(handle)
496
+ logger.setLevel(level)
497
+ return logger
498
+
499
+
500
+ def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
501
+ return fs.unstrip_protocol(name)
502
+
503
+
504
+ def mirror_from(
505
+ origin_name: str, methods: Iterable[str]
506
+ ) -> Callable[[type[T]], type[T]]:
507
+ """Mirror attributes and methods from the given
508
+ origin_name attribute of the instance to the
509
+ decorated class"""
510
+
511
+ def origin_getter(method: str, self: Any) -> Any:
512
+ origin = getattr(self, origin_name)
513
+ return getattr(origin, method)
514
+
515
+ def wrapper(cls: type[T]) -> type[T]:
516
+ for method in methods:
517
+ wrapped_method = partial(origin_getter, method)
518
+ setattr(cls, method, property(wrapped_method))
519
+ return cls
520
+
521
+ return wrapper
522
+
523
+
524
+ @contextlib.contextmanager
525
+ def nullcontext(obj: T) -> Iterator[T]:
526
+ yield obj
527
+
528
+
529
+ def merge_offset_ranges(
530
+ paths: list[str],
531
+ starts: list[int] | int,
532
+ ends: list[int] | int,
533
+ max_gap: int = 0,
534
+ max_block: int | None = None,
535
+ sort: bool = True,
536
+ ) -> tuple[list[str], list[int], list[int]]:
537
+ """Merge adjacent byte-offset ranges when the inter-range
538
+ gap is <= `max_gap`, and when the merged byte range does not
539
+ exceed `max_block` (if specified). By default, this function
540
+ will re-order the input paths and byte ranges to ensure sorted
541
+ order. If the user can guarantee that the inputs are already
542
+ sorted, passing `sort=False` will skip the re-ordering.
543
+ """
544
+ # Check input
545
+ if not isinstance(paths, list):
546
+ raise TypeError
547
+ if not isinstance(starts, list):
548
+ starts = [starts] * len(paths)
549
+ if not isinstance(ends, list):
550
+ ends = [ends] * len(paths)
551
+ if len(starts) != len(paths) or len(ends) != len(paths):
552
+ raise ValueError
553
+
554
+ # Early Return
555
+ if len(starts) <= 1:
556
+ return paths, starts, ends
557
+
558
+ starts = [s or 0 for s in starts]
559
+ # Sort by paths and then ranges if `sort=True`
560
+ if sort:
561
+ paths, starts, ends = (
562
+ list(v)
563
+ for v in zip(
564
+ *sorted(
565
+ zip(paths, starts, ends),
566
+ )
567
+ )
568
+ )
569
+
570
+ if paths:
571
+ # Loop through the coupled `paths`, `starts`, and
572
+ # `ends`, and merge adjacent blocks when appropriate
573
+ new_paths = paths[:1]
574
+ new_starts = starts[:1]
575
+ new_ends = ends[:1]
576
+ for i in range(1, len(paths)):
577
+ if paths[i] == paths[i - 1] and new_ends[-1] is None:
578
+ continue
579
+ elif (
580
+ paths[i] != paths[i - 1]
581
+ or ((starts[i] - new_ends[-1]) > max_gap)
582
+ or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
583
+ ):
584
+ # Cannot merge with previous block.
585
+ # Add new `paths`, `starts`, and `ends` elements
586
+ new_paths.append(paths[i])
587
+ new_starts.append(starts[i])
588
+ new_ends.append(ends[i])
589
+ else:
590
+ # Merge with previous block by updating the
591
+ # last element of `ends`
592
+ new_ends[-1] = ends[i]
593
+ return new_paths, new_starts, new_ends
594
+
595
+ # `paths` is empty. Just return input lists
596
+ return paths, starts, ends
597
+
598
+
599
+ def file_size(filelike: IO[bytes]) -> int:
600
+ """Find length of any open read-mode file-like"""
601
+ pos = filelike.tell()
602
+ try:
603
+ return filelike.seek(0, 2)
604
+ finally:
605
+ filelike.seek(pos)
606
+
607
+
608
+ @contextlib.contextmanager
609
+ def atomic_write(path: str, mode: str = "wb"):
610
+ """
611
+ A context manager that opens a temporary file next to `path` and, on exit,
612
+ replaces `path` with the temporary file, thereby updating `path`
613
+ atomically.
614
+ """
615
+ fd, fn = tempfile.mkstemp(
616
+ dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
617
+ )
618
+ try:
619
+ with open(fd, mode) as fp:
620
+ yield fp
621
+ except BaseException:
622
+ with contextlib.suppress(FileNotFoundError):
623
+ os.unlink(fn)
624
+ raise
625
+ else:
626
+ os.replace(fn, path)
627
+
628
+
629
+ def _translate(pat, STAR, QUESTION_MARK):
630
+ # Copied from: https://github.com/python/cpython/pull/106703.
631
+ res: list[str] = []
632
+ add = res.append
633
+ i, n = 0, len(pat)
634
+ while i < n:
635
+ c = pat[i]
636
+ i = i + 1
637
+ if c == "*":
638
+ # compress consecutive `*` into one
639
+ if (not res) or res[-1] is not STAR:
640
+ add(STAR)
641
+ elif c == "?":
642
+ add(QUESTION_MARK)
643
+ elif c == "[":
644
+ j = i
645
+ if j < n and pat[j] == "!":
646
+ j = j + 1
647
+ if j < n and pat[j] == "]":
648
+ j = j + 1
649
+ while j < n and pat[j] != "]":
650
+ j = j + 1
651
+ if j >= n:
652
+ add("\\[")
653
+ else:
654
+ stuff = pat[i:j]
655
+ if "-" not in stuff:
656
+ stuff = stuff.replace("\\", r"\\")
657
+ else:
658
+ chunks = []
659
+ k = i + 2 if pat[i] == "!" else i + 1
660
+ while True:
661
+ k = pat.find("-", k, j)
662
+ if k < 0:
663
+ break
664
+ chunks.append(pat[i:k])
665
+ i = k + 1
666
+ k = k + 3
667
+ chunk = pat[i:j]
668
+ if chunk:
669
+ chunks.append(chunk)
670
+ else:
671
+ chunks[-1] += "-"
672
+ # Remove empty ranges -- invalid in RE.
673
+ for k in range(len(chunks) - 1, 0, -1):
674
+ if chunks[k - 1][-1] > chunks[k][0]:
675
+ chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
676
+ del chunks[k]
677
+ # Escape backslashes and hyphens for set difference (--).
678
+ # Hyphens that create ranges shouldn't be escaped.
679
+ stuff = "-".join(
680
+ s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
681
+ )
682
+ # Escape set operations (&&, ~~ and ||).
683
+ stuff = re.sub(r"([&~|])", r"\\\1", stuff)
684
+ i = j + 1
685
+ if not stuff:
686
+ # Empty range: never match.
687
+ add("(?!)")
688
+ elif stuff == "!":
689
+ # Negated empty range: match any character.
690
+ add(".")
691
+ else:
692
+ if stuff[0] == "!":
693
+ stuff = "^" + stuff[1:]
694
+ elif stuff[0] in ("^", "["):
695
+ stuff = "\\" + stuff
696
+ add(f"[{stuff}]")
697
+ else:
698
+ add(re.escape(c))
699
+ assert i == n
700
+ return res
701
+
702
+
703
+ def glob_translate(pat):
704
+ # Copied from: https://github.com/python/cpython/pull/106703.
705
+ # The keyword parameters' values are fixed to:
706
+ # recursive=True, include_hidden=True, seps=None
707
+ """Translate a pathname with shell wildcards to a regular expression."""
708
+ if os.path.altsep:
709
+ seps = os.path.sep + os.path.altsep
710
+ else:
711
+ seps = os.path.sep
712
+ escaped_seps = "".join(map(re.escape, seps))
713
+ any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
714
+ not_sep = f"[^{escaped_seps}]"
715
+ one_last_segment = f"{not_sep}+"
716
+ one_segment = f"{one_last_segment}{any_sep}"
717
+ any_segments = f"(?:.+{any_sep})?"
718
+ any_last_segments = ".*"
719
+ results = []
720
+ parts = re.split(any_sep, pat)
721
+ last_part_idx = len(parts) - 1
722
+ for idx, part in enumerate(parts):
723
+ if part == "*":
724
+ results.append(one_segment if idx < last_part_idx else one_last_segment)
725
+ continue
726
+ if part == "**":
727
+ results.append(any_segments if idx < last_part_idx else any_last_segments)
728
+ continue
729
+ elif "**" in part:
730
+ raise ValueError(
731
+ "Invalid pattern: '**' can only be an entire path component"
732
+ )
733
+ if part:
734
+ results.extend(_translate(part, f"{not_sep}*", not_sep))
735
+ if idx < last_part_idx:
736
+ results.append(any_sep)
737
+ res = "".join(results)
738
+ return rf"(?s:{res})\Z"
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/METADATA ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: git-lfs
3
+ Version: 1.6
4
+ Summary: A lightweight Git Large File Storage fetcher
5
+ Home-page: https://github.com/liberapay/git-lfs-fetch.py
6
+ Author: Changaco
7
+ Author-email: changaco@changaco.oy.lc
8
+ License: CC0
9
+ Keywords: git lfs
10
+ Platform: UNKNOWN
11
+ Description-Content-Type: text/x-rst
12
+
13
+ A lightweight Git Large File Storage fetcher written in python.
14
+
15
+ This module cannot fully replace the official git-lfs client, it only knows how
16
+ to download the files, cache them (the same way the official client does), and
17
+ place them in a checkout directory. Uploading files is not implemented at all.
18
+
19
+ Installation
20
+ ============
21
+
22
+ pip install git-lfs
23
+
24
+ python-git-lfs is compatible with python 2 and 3.
25
+
26
+ Usage
27
+ =====
28
+
29
+ Basic: simply run ``python -m git_lfs`` in a normal Git repository.
30
+
31
+ Advanced::
32
+
33
+ python -m git_lfs [-h] [-v] [git_repo] [checkout_dir]
34
+
35
+ positional arguments:
36
+ git_repo if it's bare you need to provide a checkout_dir
37
+ checkout_dir
38
+
39
+ optional arguments:
40
+ -h, --help show this help message and exit
41
+ -v, --verbose
42
+
43
+ License
44
+ =======
45
+
46
+ `CC0 Public Domain Dedication <http://creativecommons.org/publicdomain/zero/1.0/>`_
47
+
48
+
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/RECORD ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git_lfs-1.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ git_lfs-1.6.dist-info/METADATA,sha256=R8JeXdM_tbNNzy23Sw992NKPOV3NXjkrUzCcGMqVT6Y,1174
3
+ git_lfs-1.6.dist-info/RECORD,,
4
+ git_lfs-1.6.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ git_lfs-1.6.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
6
+ git_lfs-1.6.dist-info/top_level.txt,sha256=sfhnq_PjKyFts61t6Ts6ssAYa2uj0KaCnjWWpCc2ElI,8
7
+ git_lfs/__init__.py,sha256=wxf1h0HCxlTYd0Ui_ieYgjVFLVCrl9aVpi-cYqta6kw,8688
8
+ git_lfs/__main__.py,sha256=Imn6ZITlrbp9EyuR_zkN8BN8ZMWDHU6cusJ1_RG5k-0,437
9
+ git_lfs/__pycache__/__init__.cpython-313.pyc,,
10
+ git_lfs/__pycache__/__main__.cpython-313.pyc,,
11
+ git_lfs/__pycache__/utils.cpython-313.pyc,,
12
+ git_lfs/utils.py,sha256=4b11S10mPHITmvKpAriLVHvcZyqYZk4oYORpvKawmM0,1216
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/REQUESTED ADDED
File without changes
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/WHEEL ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.34.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py2-none-any
5
+ Tag: py3-none-any
6
+
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ git_lfs
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/METADATA ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: h11
3
+ Version: 0.16.0
4
+ Summary: A pure-Python, bring-your-own-I/O implementation of HTTP/1.1
5
+ Home-page: https://github.com/python-hyper/h11
6
+ Author: Nathaniel J. Smith
7
+ Author-email: njs@pobox.com
8
+ License: MIT
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: Implementation :: CPython
13
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Internet :: WWW/HTTP
22
+ Classifier: Topic :: System :: Networking
23
+ Requires-Python: >=3.8
24
+ License-File: LICENSE.txt
25
+ Dynamic: author
26
+ Dynamic: author-email
27
+ Dynamic: classifier
28
+ Dynamic: description
29
+ Dynamic: home-page
30
+ Dynamic: license
31
+ Dynamic: license-file
32
+ Dynamic: requires-python
33
+ Dynamic: summary
34
+
35
+ h11
36
+ ===
37
+
38
+ .. image:: https://travis-ci.org/python-hyper/h11.svg?branch=master
39
+ :target: https://travis-ci.org/python-hyper/h11
40
+ :alt: Automated test status
41
+
42
+ .. image:: https://codecov.io/gh/python-hyper/h11/branch/master/graph/badge.svg
43
+ :target: https://codecov.io/gh/python-hyper/h11
44
+ :alt: Test coverage
45
+
46
+ .. image:: https://readthedocs.org/projects/h11/badge/?version=latest
47
+ :target: http://h11.readthedocs.io/en/latest/?badge=latest
48
+ :alt: Documentation Status
49
+
50
+ This is a little HTTP/1.1 library written from scratch in Python,
51
+ heavily inspired by `hyper-h2 <https://hyper-h2.readthedocs.io/>`_.
52
+
53
+ It's a "bring-your-own-I/O" library; h11 contains no IO code
54
+ whatsoever. This means you can hook h11 up to your favorite network
55
+ API, and that could be anything you want: synchronous, threaded,
56
+ asynchronous, or your own implementation of `RFC 6214
57
+ <https://tools.ietf.org/html/rfc6214>`_ -- h11 won't judge you.
58
+ (Compare this to the current state of the art, where every time a `new
59
+ network API <https://trio.readthedocs.io/>`_ comes along then someone
60
+ gets to start over reimplementing the entire HTTP protocol from
61
+ scratch.) Cory Benfield made an `excellent blog post describing the
62
+ benefits of this approach
63
+ <https://lukasa.co.uk/2015/10/The_New_Hyper/>`_, or if you like video
64
+ then here's his `PyCon 2016 talk on the same theme
65
+ <https://www.youtube.com/watch?v=7cC3_jGwl_U>`_.
66
+
67
+ This also means that h11 is not immediately useful out of the box:
68
+ it's a toolkit for building programs that speak HTTP, not something
69
+ that could directly replace ``requests`` or ``twisted.web`` or
70
+ whatever. But h11 makes it much easier to implement something like
71
+ ``requests`` or ``twisted.web``.
72
+
73
+ At a high level, working with h11 goes like this:
74
+
75
+ 1) First, create an ``h11.Connection`` object to track the state of a
76
+ single HTTP/1.1 connection.
77
+
78
+ 2) When you read data off the network, pass it to
79
+ ``conn.receive_data(...)``; you'll get back a list of objects
80
+ representing high-level HTTP "events".
81
+
82
+ 3) When you want to send a high-level HTTP event, create the
83
+ corresponding "event" object and pass it to ``conn.send(...)``;
84
+ this will give you back some bytes that you can then push out
85
+ through the network.
86
+
87
+ For example, a client might instantiate and then send a
88
+ ``h11.Request`` object, then zero or more ``h11.Data`` objects for the
89
+ request body (e.g., if this is a POST), and then a
90
+ ``h11.EndOfMessage`` to indicate the end of the message. Then the
91
+ server would then send back a ``h11.Response``, some ``h11.Data``, and
92
+ its own ``h11.EndOfMessage``. If either side violates the protocol,
93
+ you'll get a ``h11.ProtocolError`` exception.
94
+
95
+ h11 is suitable for implementing both servers and clients, and has a
96
+ pleasantly symmetric API: the events you send as a client are exactly
97
+ the ones that you receive as a server and vice-versa.
98
+
99
+ `Here's an example of a tiny HTTP client
100
+ <https://github.com/python-hyper/h11/blob/master/examples/basic-client.py>`_
101
+
102
+ It also has `a fine manual <https://h11.readthedocs.io/>`_.
103
+
104
+ FAQ
105
+ ---
106
+
107
+ *Whyyyyy?*
108
+
109
+ I wanted to play with HTTP in `Curio
110
+ <https://curio.readthedocs.io/en/latest/tutorial.html>`__ and `Trio
111
+ <https://trio.readthedocs.io>`__, which at the time didn't have any
112
+ HTTP libraries. So I thought, no big deal, Python has, like, a dozen
113
+ different implementations of HTTP, surely I can find one that's
114
+ reusable. I didn't find one, but I did find Cory's call-to-arms
115
+ blog-post. So I figured, well, fine, if I have to implement HTTP from
116
+ scratch, at least I can make sure no-one *else* has to ever again.
117
+
118
+ *Should I use it?*
119
+
120
+ Maybe. You should be aware that it's a very young project. But, it's
121
+ feature complete and has an exhaustive test-suite and complete docs,
122
+ so the next step is for people to try using it and see how it goes
123
+ :-). If you do then please let us know -- if nothing else we'll want
124
+ to talk to you before making any incompatible changes!
125
+
126
+ *What are the features/limitations?*
127
+
128
+ Roughly speaking, it's trying to be a robust, complete, and non-hacky
129
+ implementation of the first "chapter" of the HTTP/1.1 spec: `RFC 7230:
130
+ HTTP/1.1 Message Syntax and Routing
131
+ <https://tools.ietf.org/html/rfc7230>`_. That is, it mostly focuses on
132
+ implementing HTTP at the level of taking bytes on and off the wire,
133
+ and the headers related to that, and tries to be anal about spec
134
+ conformance. It doesn't know about higher-level concerns like URL
135
+ routing, conditional GETs, cross-origin cookie policies, or content
136
+ negotiation. But it does know how to take care of framing,
137
+ cross-version differences in keep-alive handling, and the "obsolete
138
+ line folding" rule, so you can focus your energies on the hard /
139
+ interesting parts for your application, and it tries to support the
140
+ full specification in the sense that any useful HTTP/1.1 conformant
141
+ application should be able to use h11.
142
+
143
+ It's pure Python, and has no dependencies outside of the standard
144
+ library.
145
+
146
+ It has a test suite with 100.0% coverage for both statements and
147
+ branches.
148
+
149
+ Currently it supports Python 3 (testing on 3.8-3.12) and PyPy 3.
150
+ The last Python 2-compatible version was h11 0.11.x.
151
+ (Originally it had a Cython wrapper for `http-parser
152
+ <https://github.com/nodejs/http-parser>`_ and a beautiful nested state
153
+ machine implemented with ``yield from`` to postprocess the output. But
154
+ I had to take these out -- the new *parser* needs fewer lines-of-code
155
+ than the old *parser wrapper*, is written in pure Python, uses no
156
+ exotic language syntax, and has more features. It's sad, really; that
157
+ old state machine was really slick. I just need a few sentences here
158
+ to mourn that.)
159
+
160
+ I don't know how fast it is. I haven't benchmarked or profiled it yet,
161
+ so it's probably got a few pointless hot spots, and I've been trying
162
+ to err on the side of simplicity and robustness instead of
163
+ micro-optimization. But at the architectural level I tried hard to
164
+ avoid fundamentally bad decisions, e.g., I believe that all the
165
+ parsing algorithms remain linear-time even in the face of pathological
166
+ input like slowloris, and there are no byte-by-byte loops. (I also
167
+ believe that it maintains bounded memory usage in the face of
168
+ arbitrary/pathological input.)
169
+
170
+ The whole library is ~800 lines-of-code. You can read and understand
171
+ the whole thing in less than an hour. Most of the energy invested in
172
+ this so far has been spent on trying to keep things simple by
173
+ minimizing special-cases and ad hoc state manipulation; even though it
174
+ is now quite small and simple, I'm still annoyed that I haven't
175
+ figured out how to make it even smaller and simpler. (Unfortunately,
176
+ HTTP does not lend itself to simplicity.)
177
+
178
+ The API is ~feature complete and I don't expect the general outlines
179
+ to change much, but you can't judge an API's ergonomics until you
180
+ actually document and use it, so I'd expect some changes in the
181
+ details.
182
+
183
+ *How do I try it?*
184
+
185
+ .. code-block:: sh
186
+
187
+ $ pip install h11
188
+ $ git clone git@github.com:python-hyper/h11
189
+ $ cd h11/examples
190
+ $ python basic-client.py
191
+
192
+ and go from there.
193
+
194
+ *License?*
195
+
196
+ MIT
197
+
198
+ *Code of conduct?*
199
+
200
+ Contributors are requested to follow our `code of conduct
201
+ <https://github.com/python-hyper/h11/blob/master/CODE_OF_CONDUCT.md>`_ in
202
+ all project spaces.
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/RECORD ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h11-0.16.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ h11-0.16.0.dist-info/METADATA,sha256=KPMmCYrAn8unm48YD5YIfIQf4kViFct7hyqcfVzRnWQ,8348
3
+ h11-0.16.0.dist-info/RECORD,,
4
+ h11-0.16.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
5
+ h11-0.16.0.dist-info/licenses/LICENSE.txt,sha256=N9tbuFkm2yikJ6JYZ_ELEjIAOuob5pzLhRE4rbjm82E,1124
6
+ h11-0.16.0.dist-info/top_level.txt,sha256=F7dC4jl3zeh8TGHEPaWJrMbeuoWbS379Gwdi-Yvdcis,4
7
+ h11/__init__.py,sha256=iO1KzkSO42yZ6ffg-VMgbx_ZVTWGUY00nRYEWn-s3kY,1507
8
+ h11/__pycache__/__init__.cpython-313.pyc,,
9
+ h11/__pycache__/_abnf.cpython-313.pyc,,
10
+ h11/__pycache__/_connection.cpython-313.pyc,,
11
+ h11/__pycache__/_events.cpython-313.pyc,,
12
+ h11/__pycache__/_headers.cpython-313.pyc,,
13
+ h11/__pycache__/_readers.cpython-313.pyc,,
14
+ h11/__pycache__/_receivebuffer.cpython-313.pyc,,
15
+ h11/__pycache__/_state.cpython-313.pyc,,
16
+ h11/__pycache__/_util.cpython-313.pyc,,
17
+ h11/__pycache__/_version.cpython-313.pyc,,
18
+ h11/__pycache__/_writers.cpython-313.pyc,,
19
+ h11/_abnf.py,sha256=ybixr0xsupnkA6GFAyMubuXF6Tc1lb_hF890NgCsfNc,4815
20
+ h11/_connection.py,sha256=k9YRVf6koZqbttBW36xSWaJpWdZwa-xQVU9AHEo9DuI,26863
21
+ h11/_events.py,sha256=I97aXoal1Wu7dkL548BANBUCkOIbe-x5CioYA9IBY14,11792
22
+ h11/_headers.py,sha256=P7D-lBNxHwdLZPLimmYwrPG-9ZkjElvvJZJdZAgSP-4,10412
23
+ h11/_readers.py,sha256=a4RypORUCC3d0q_kxPuBIM7jTD8iLt5X91TH0FsduN4,8590
24
+ h11/_receivebuffer.py,sha256=xrspsdsNgWFxRfQcTXxR8RrdjRXXTK0Io5cQYWpJ1Ws,5252
25
+ h11/_state.py,sha256=_5LG_BGR8FCcFQeBPH-TMHgm_-B-EUcWCnQof_9XjFE,13231
26
+ h11/_util.py,sha256=LWkkjXyJaFlAy6Lt39w73UStklFT5ovcvo0TkY7RYuk,4888
27
+ h11/_version.py,sha256=GVSsbPSPDcOuF6ptfIiXnVJoaEm3ygXbMnqlr_Giahw,686
28
+ h11/_writers.py,sha256=oFKm6PtjeHfbj4RLX7VB7KDc1gIY53gXG3_HR9ltmTA,5081
29
+ h11/py.typed,sha256=sow9soTwP9T_gEAQSVh7Gb8855h04Nwmhs2We-JRgZM,7
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (78.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ h11
env/lib/python3.13/site-packages/httpcore/__init__.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._api import request, stream
2
+ from ._async import (
3
+ AsyncConnectionInterface,
4
+ AsyncConnectionPool,
5
+ AsyncHTTP2Connection,
6
+ AsyncHTTP11Connection,
7
+ AsyncHTTPConnection,
8
+ AsyncHTTPProxy,
9
+ AsyncSOCKSProxy,
10
+ )
11
+ from ._backends.base import (
12
+ SOCKET_OPTION,
13
+ AsyncNetworkBackend,
14
+ AsyncNetworkStream,
15
+ NetworkBackend,
16
+ NetworkStream,
17
+ )
18
+ from ._backends.mock import AsyncMockBackend, AsyncMockStream, MockBackend, MockStream
19
+ from ._backends.sync import SyncBackend
20
+ from ._exceptions import (
21
+ ConnectError,
22
+ ConnectionNotAvailable,
23
+ ConnectTimeout,
24
+ LocalProtocolError,
25
+ NetworkError,
26
+ PoolTimeout,
27
+ ProtocolError,
28
+ ProxyError,
29
+ ReadError,
30
+ ReadTimeout,
31
+ RemoteProtocolError,
32
+ TimeoutException,
33
+ UnsupportedProtocol,
34
+ WriteError,
35
+ WriteTimeout,
36
+ )
37
+ from ._models import URL, Origin, Proxy, Request, Response
38
+ from ._ssl import default_ssl_context
39
+ from ._sync import (
40
+ ConnectionInterface,
41
+ ConnectionPool,
42
+ HTTP2Connection,
43
+ HTTP11Connection,
44
+ HTTPConnection,
45
+ HTTPProxy,
46
+ SOCKSProxy,
47
+ )
48
+
49
+ # The 'httpcore.AnyIOBackend' class is conditional on 'anyio' being installed.
50
+ try:
51
+ from ._backends.anyio import AnyIOBackend
52
+ except ImportError: # pragma: nocover
53
+
54
+ class AnyIOBackend: # type: ignore
55
+ def __init__(self, *args, **kwargs): # type: ignore
56
+ msg = (
57
+ "Attempted to use 'httpcore.AnyIOBackend' but 'anyio' is not installed."
58
+ )
59
+ raise RuntimeError(msg)
60
+
61
+
62
+ # The 'httpcore.TrioBackend' class is conditional on 'trio' being installed.
63
+ try:
64
+ from ._backends.trio import TrioBackend
65
+ except ImportError: # pragma: nocover
66
+
67
+ class TrioBackend: # type: ignore
68
+ def __init__(self, *args, **kwargs): # type: ignore
69
+ msg = "Attempted to use 'httpcore.TrioBackend' but 'trio' is not installed."
70
+ raise RuntimeError(msg)
71
+
72
+
73
+ __all__ = [
74
+ # top-level requests
75
+ "request",
76
+ "stream",
77
+ # models
78
+ "Origin",
79
+ "URL",
80
+ "Request",
81
+ "Response",
82
+ "Proxy",
83
+ # async
84
+ "AsyncHTTPConnection",
85
+ "AsyncConnectionPool",
86
+ "AsyncHTTPProxy",
87
+ "AsyncHTTP11Connection",
88
+ "AsyncHTTP2Connection",
89
+ "AsyncConnectionInterface",
90
+ "AsyncSOCKSProxy",
91
+ # sync
92
+ "HTTPConnection",
93
+ "ConnectionPool",
94
+ "HTTPProxy",
95
+ "HTTP11Connection",
96
+ "HTTP2Connection",
97
+ "ConnectionInterface",
98
+ "SOCKSProxy",
99
+ # network backends, implementations
100
+ "SyncBackend",
101
+ "AnyIOBackend",
102
+ "TrioBackend",
103
+ # network backends, mock implementations
104
+ "AsyncMockBackend",
105
+ "AsyncMockStream",
106
+ "MockBackend",
107
+ "MockStream",
108
+ # network backends, interface
109
+ "AsyncNetworkStream",
110
+ "AsyncNetworkBackend",
111
+ "NetworkStream",
112
+ "NetworkBackend",
113
+ # util
114
+ "default_ssl_context",
115
+ "SOCKET_OPTION",
116
+ # exceptions
117
+ "ConnectionNotAvailable",
118
+ "ProxyError",
119
+ "ProtocolError",
120
+ "LocalProtocolError",
121
+ "RemoteProtocolError",
122
+ "UnsupportedProtocol",
123
+ "TimeoutException",
124
+ "PoolTimeout",
125
+ "ConnectTimeout",
126
+ "ReadTimeout",
127
+ "WriteTimeout",
128
+ "NetworkError",
129
+ "ConnectError",
130
+ "ReadError",
131
+ "WriteError",
132
+ ]
133
+
134
+ __version__ = "1.0.9"
135
+
136
+
137
+ __locals = locals()
138
+ for __name in __all__:
139
+ # Exclude SOCKET_OPTION, it causes AttributeError on Python 3.14
140
+ if not __name.startswith(("__", "SOCKET_OPTION")):
141
+ setattr(__locals[__name], "__module__", "httpcore") # noqa
env/lib/python3.13/site-packages/httpcore/_api.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import typing
5
+
6
+ from ._models import URL, Extensions, HeaderTypes, Response
7
+ from ._sync.connection_pool import ConnectionPool
8
+
9
+
10
+ def request(
11
+ method: bytes | str,
12
+ url: URL | bytes | str,
13
+ *,
14
+ headers: HeaderTypes = None,
15
+ content: bytes | typing.Iterator[bytes] | None = None,
16
+ extensions: Extensions | None = None,
17
+ ) -> Response:
18
+ """
19
+ Sends an HTTP request, returning the response.
20
+
21
+ ```
22
+ response = httpcore.request("GET", "https://www.example.com/")
23
+ ```
24
+
25
+ Arguments:
26
+ method: The HTTP method for the request. Typically one of `"GET"`,
27
+ `"OPTIONS"`, `"HEAD"`, `"POST"`, `"PUT"`, `"PATCH"`, or `"DELETE"`.
28
+ url: The URL of the HTTP request. Either as an instance of `httpcore.URL`,
29
+ or as str/bytes.
30
+ headers: The HTTP request headers. Either as a dictionary of str/bytes,
31
+ or as a list of two-tuples of str/bytes.
32
+ content: The content of the request body. Either as bytes,
33
+ or as a bytes iterator.
34
+ extensions: A dictionary of optional extra information included on the request.
35
+ Possible keys include `"timeout"`.
36
+
37
+ Returns:
38
+ An instance of `httpcore.Response`.
39
+ """
40
+ with ConnectionPool() as pool:
41
+ return pool.request(
42
+ method=method,
43
+ url=url,
44
+ headers=headers,
45
+ content=content,
46
+ extensions=extensions,
47
+ )
48
+
49
+
50
+ @contextlib.contextmanager
51
+ def stream(
52
+ method: bytes | str,
53
+ url: URL | bytes | str,
54
+ *,
55
+ headers: HeaderTypes = None,
56
+ content: bytes | typing.Iterator[bytes] | None = None,
57
+ extensions: Extensions | None = None,
58
+ ) -> typing.Iterator[Response]:
59
+ """
60
+ Sends an HTTP request, returning the response within a content manager.
61
+
62
+ ```
63
+ with httpcore.stream("GET", "https://www.example.com/") as response:
64
+ ...
65
+ ```
66
+
67
+ When using the `stream()` function, the body of the response will not be
68
+ automatically read. If you want to access the response body you should
69
+ either use `content = response.read()`, or `for chunk in response.iter_content()`.
70
+
71
+ Arguments:
72
+ method: The HTTP method for the request. Typically one of `"GET"`,
73
+ `"OPTIONS"`, `"HEAD"`, `"POST"`, `"PUT"`, `"PATCH"`, or `"DELETE"`.
74
+ url: The URL of the HTTP request. Either as an instance of `httpcore.URL`,
75
+ or as str/bytes.
76
+ headers: The HTTP request headers. Either as a dictionary of str/bytes,
77
+ or as a list of two-tuples of str/bytes.
78
+ content: The content of the request body. Either as bytes,
79
+ or as a bytes iterator.
80
+ extensions: A dictionary of optional extra information included on the request.
81
+ Possible keys include `"timeout"`.
82
+
83
+ Returns:
84
+ An instance of `httpcore.Response`.
85
+ """
86
+ with ConnectionPool() as pool:
87
+ with pool.stream(
88
+ method=method,
89
+ url=url,
90
+ headers=headers,
91
+ content=content,
92
+ extensions=extensions,
93
+ ) as response:
94
+ yield response
env/lib/python3.13/site-packages/httpcore/_exceptions.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextlib
2
+ import typing
3
+
4
+ ExceptionMapping = typing.Mapping[typing.Type[Exception], typing.Type[Exception]]
5
+
6
+
7
+ @contextlib.contextmanager
8
+ def map_exceptions(map: ExceptionMapping) -> typing.Iterator[None]:
9
+ try:
10
+ yield
11
+ except Exception as exc: # noqa: PIE786
12
+ for from_exc, to_exc in map.items():
13
+ if isinstance(exc, from_exc):
14
+ raise to_exc(exc) from exc
15
+ raise # pragma: nocover
16
+
17
+
18
+ class ConnectionNotAvailable(Exception):
19
+ pass
20
+
21
+
22
+ class ProxyError(Exception):
23
+ pass
24
+
25
+
26
+ class UnsupportedProtocol(Exception):
27
+ pass
28
+
29
+
30
+ class ProtocolError(Exception):
31
+ pass
32
+
33
+
34
+ class RemoteProtocolError(ProtocolError):
35
+ pass
36
+
37
+
38
+ class LocalProtocolError(ProtocolError):
39
+ pass
40
+
41
+
42
+ # Timeout errors
43
+
44
+
45
+ class TimeoutException(Exception):
46
+ pass
47
+
48
+
49
+ class PoolTimeout(TimeoutException):
50
+ pass
51
+
52
+
53
+ class ConnectTimeout(TimeoutException):
54
+ pass
55
+
56
+
57
+ class ReadTimeout(TimeoutException):
58
+ pass
59
+
60
+
61
+ class WriteTimeout(TimeoutException):
62
+ pass
63
+
64
+
65
+ # Network errors
66
+
67
+
68
+ class NetworkError(Exception):
69
+ pass
70
+
71
+
72
+ class ConnectError(NetworkError):
73
+ pass
74
+
75
+
76
+ class ReadError(NetworkError):
77
+ pass
78
+
79
+
80
+ class WriteError(NetworkError):
81
+ pass
env/lib/python3.13/site-packages/httpcore/_models.py ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import ssl
5
+ import typing
6
+ import urllib.parse
7
+
8
+ # Functions for typechecking...
9
+
10
+
11
+ ByteOrStr = typing.Union[bytes, str]
12
+ HeadersAsSequence = typing.Sequence[typing.Tuple[ByteOrStr, ByteOrStr]]
13
+ HeadersAsMapping = typing.Mapping[ByteOrStr, ByteOrStr]
14
+ HeaderTypes = typing.Union[HeadersAsSequence, HeadersAsMapping, None]
15
+
16
+ Extensions = typing.MutableMapping[str, typing.Any]
17
+
18
+
19
+ def enforce_bytes(value: bytes | str, *, name: str) -> bytes:
20
+ """
21
+ Any arguments that are ultimately represented as bytes can be specified
22
+ either as bytes or as strings.
23
+
24
+ However we enforce that any string arguments must only contain characters in
25
+ the plain ASCII range. chr(0)...chr(127). If you need to use characters
26
+ outside that range then be precise, and use a byte-wise argument.
27
+ """
28
+ if isinstance(value, str):
29
+ try:
30
+ return value.encode("ascii")
31
+ except UnicodeEncodeError:
32
+ raise TypeError(f"{name} strings may not include unicode characters.")
33
+ elif isinstance(value, bytes):
34
+ return value
35
+
36
+ seen_type = type(value).__name__
37
+ raise TypeError(f"{name} must be bytes or str, but got {seen_type}.")
38
+
39
+
40
+ def enforce_url(value: URL | bytes | str, *, name: str) -> URL:
41
+ """
42
+ Type check for URL parameters.
43
+ """
44
+ if isinstance(value, (bytes, str)):
45
+ return URL(value)
46
+ elif isinstance(value, URL):
47
+ return value
48
+
49
+ seen_type = type(value).__name__
50
+ raise TypeError(f"{name} must be a URL, bytes, or str, but got {seen_type}.")
51
+
52
+
53
+ def enforce_headers(
54
+ value: HeadersAsMapping | HeadersAsSequence | None = None, *, name: str
55
+ ) -> list[tuple[bytes, bytes]]:
56
+ """
57
+ Convienence function that ensure all items in request or response headers
58
+ are either bytes or strings in the plain ASCII range.
59
+ """
60
+ if value is None:
61
+ return []
62
+ elif isinstance(value, typing.Mapping):
63
+ return [
64
+ (
65
+ enforce_bytes(k, name="header name"),
66
+ enforce_bytes(v, name="header value"),
67
+ )
68
+ for k, v in value.items()
69
+ ]
70
+ elif isinstance(value, typing.Sequence):
71
+ return [
72
+ (
73
+ enforce_bytes(k, name="header name"),
74
+ enforce_bytes(v, name="header value"),
75
+ )
76
+ for k, v in value
77
+ ]
78
+
79
+ seen_type = type(value).__name__
80
+ raise TypeError(
81
+ f"{name} must be a mapping or sequence of two-tuples, but got {seen_type}."
82
+ )
83
+
84
+
85
+ def enforce_stream(
86
+ value: bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes] | None,
87
+ *,
88
+ name: str,
89
+ ) -> typing.Iterable[bytes] | typing.AsyncIterable[bytes]:
90
+ if value is None:
91
+ return ByteStream(b"")
92
+ elif isinstance(value, bytes):
93
+ return ByteStream(value)
94
+ return value
95
+
96
+
97
+ # * https://tools.ietf.org/html/rfc3986#section-3.2.3
98
+ # * https://url.spec.whatwg.org/#url-miscellaneous
99
+ # * https://url.spec.whatwg.org/#scheme-state
100
+ DEFAULT_PORTS = {
101
+ b"ftp": 21,
102
+ b"http": 80,
103
+ b"https": 443,
104
+ b"ws": 80,
105
+ b"wss": 443,
106
+ }
107
+
108
+
109
+ def include_request_headers(
110
+ headers: list[tuple[bytes, bytes]],
111
+ *,
112
+ url: "URL",
113
+ content: None | bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes],
114
+ ) -> list[tuple[bytes, bytes]]:
115
+ headers_set = set(k.lower() for k, v in headers)
116
+
117
+ if b"host" not in headers_set:
118
+ default_port = DEFAULT_PORTS.get(url.scheme)
119
+ if url.port is None or url.port == default_port:
120
+ header_value = url.host
121
+ else:
122
+ header_value = b"%b:%d" % (url.host, url.port)
123
+ headers = [(b"Host", header_value)] + headers
124
+
125
+ if (
126
+ content is not None
127
+ and b"content-length" not in headers_set
128
+ and b"transfer-encoding" not in headers_set
129
+ ):
130
+ if isinstance(content, bytes):
131
+ content_length = str(len(content)).encode("ascii")
132
+ headers += [(b"Content-Length", content_length)]
133
+ else:
134
+ headers += [(b"Transfer-Encoding", b"chunked")] # pragma: nocover
135
+
136
+ return headers
137
+
138
+
139
+ # Interfaces for byte streams...
140
+
141
+
142
+ class ByteStream:
143
+ """
144
+ A container for non-streaming content, and that supports both sync and async
145
+ stream iteration.
146
+ """
147
+
148
+ def __init__(self, content: bytes) -> None:
149
+ self._content = content
150
+
151
+ def __iter__(self) -> typing.Iterator[bytes]:
152
+ yield self._content
153
+
154
+ async def __aiter__(self) -> typing.AsyncIterator[bytes]:
155
+ yield self._content
156
+
157
+ def __repr__(self) -> str:
158
+ return f"<{self.__class__.__name__} [{len(self._content)} bytes]>"
159
+
160
+
161
+ class Origin:
162
+ def __init__(self, scheme: bytes, host: bytes, port: int) -> None:
163
+ self.scheme = scheme
164
+ self.host = host
165
+ self.port = port
166
+
167
+ def __eq__(self, other: typing.Any) -> bool:
168
+ return (
169
+ isinstance(other, Origin)
170
+ and self.scheme == other.scheme
171
+ and self.host == other.host
172
+ and self.port == other.port
173
+ )
174
+
175
+ def __str__(self) -> str:
176
+ scheme = self.scheme.decode("ascii")
177
+ host = self.host.decode("ascii")
178
+ port = str(self.port)
179
+ return f"{scheme}://{host}:{port}"
180
+
181
+
182
+ class URL:
183
+ """
184
+ Represents the URL against which an HTTP request may be made.
185
+
186
+ The URL may either be specified as a plain string, for convienence:
187
+
188
+ ```python
189
+ url = httpcore.URL("https://www.example.com/")
190
+ ```
191
+
192
+ Or be constructed with explicitily pre-parsed components:
193
+
194
+ ```python
195
+ url = httpcore.URL(scheme=b'https', host=b'www.example.com', port=None, target=b'/')
196
+ ```
197
+
198
+ Using this second more explicit style allows integrations that are using
199
+ `httpcore` to pass through URLs that have already been parsed in order to use
200
+ libraries such as `rfc-3986` rather than relying on the stdlib. It also ensures
201
+ that URL parsing is treated identically at both the networking level and at any
202
+ higher layers of abstraction.
203
+
204
+ The four components are important here, as they allow the URL to be precisely
205
+ specified in a pre-parsed format. They also allow certain types of request to
206
+ be created that could not otherwise be expressed.
207
+
208
+ For example, an HTTP request to `http://www.example.com/` forwarded via a proxy
209
+ at `http://localhost:8080`...
210
+
211
+ ```python
212
+ # Constructs an HTTP request with a complete URL as the target:
213
+ # GET https://www.example.com/ HTTP/1.1
214
+ url = httpcore.URL(
215
+ scheme=b'http',
216
+ host=b'localhost',
217
+ port=8080,
218
+ target=b'https://www.example.com/'
219
+ )
220
+ request = httpcore.Request(
221
+ method="GET",
222
+ url=url
223
+ )
224
+ ```
225
+
226
+ Another example is constructing an `OPTIONS *` request...
227
+
228
+ ```python
229
+ # Constructs an 'OPTIONS *' HTTP request:
230
+ # OPTIONS * HTTP/1.1
231
+ url = httpcore.URL(scheme=b'https', host=b'www.example.com', target=b'*')
232
+ request = httpcore.Request(method="OPTIONS", url=url)
233
+ ```
234
+
235
+ This kind of request is not possible to formulate with a URL string,
236
+ because the `/` delimiter is always used to demark the target from the
237
+ host/port portion of the URL.
238
+
239
+ For convenience, string-like arguments may be specified either as strings or
240
+ as bytes. However, once a request is being issue over-the-wire, the URL
241
+ components are always ultimately required to be a bytewise representation.
242
+
243
+ In order to avoid any ambiguity over character encodings, when strings are used
244
+ as arguments, they must be strictly limited to the ASCII range `chr(0)`-`chr(127)`.
245
+ If you require a bytewise representation that is outside this range you must
246
+ handle the character encoding directly, and pass a bytes instance.
247
+ """
248
+
249
+ def __init__(
250
+ self,
251
+ url: bytes | str = "",
252
+ *,
253
+ scheme: bytes | str = b"",
254
+ host: bytes | str = b"",
255
+ port: int | None = None,
256
+ target: bytes | str = b"",
257
+ ) -> None:
258
+ """
259
+ Parameters:
260
+ url: The complete URL as a string or bytes.
261
+ scheme: The URL scheme as a string or bytes.
262
+ Typically either `"http"` or `"https"`.
263
+ host: The URL host as a string or bytes. Such as `"www.example.com"`.
264
+ port: The port to connect to. Either an integer or `None`.
265
+ target: The target of the HTTP request. Such as `"/items?search=red"`.
266
+ """
267
+ if url:
268
+ parsed = urllib.parse.urlparse(enforce_bytes(url, name="url"))
269
+ self.scheme = parsed.scheme
270
+ self.host = parsed.hostname or b""
271
+ self.port = parsed.port
272
+ self.target = (parsed.path or b"/") + (
273
+ b"?" + parsed.query if parsed.query else b""
274
+ )
275
+ else:
276
+ self.scheme = enforce_bytes(scheme, name="scheme")
277
+ self.host = enforce_bytes(host, name="host")
278
+ self.port = port
279
+ self.target = enforce_bytes(target, name="target")
280
+
281
+ @property
282
+ def origin(self) -> Origin:
283
+ default_port = {
284
+ b"http": 80,
285
+ b"https": 443,
286
+ b"ws": 80,
287
+ b"wss": 443,
288
+ b"socks5": 1080,
289
+ b"socks5h": 1080,
290
+ }[self.scheme]
291
+ return Origin(
292
+ scheme=self.scheme, host=self.host, port=self.port or default_port
293
+ )
294
+
295
+ def __eq__(self, other: typing.Any) -> bool:
296
+ return (
297
+ isinstance(other, URL)
298
+ and other.scheme == self.scheme
299
+ and other.host == self.host
300
+ and other.port == self.port
301
+ and other.target == self.target
302
+ )
303
+
304
+ def __bytes__(self) -> bytes:
305
+ if self.port is None:
306
+ return b"%b://%b%b" % (self.scheme, self.host, self.target)
307
+ return b"%b://%b:%d%b" % (self.scheme, self.host, self.port, self.target)
308
+
309
+ def __repr__(self) -> str:
310
+ return (
311
+ f"{self.__class__.__name__}(scheme={self.scheme!r}, "
312
+ f"host={self.host!r}, port={self.port!r}, target={self.target!r})"
313
+ )
314
+
315
+
316
+ class Request:
317
+ """
318
+ An HTTP request.
319
+ """
320
+
321
+ def __init__(
322
+ self,
323
+ method: bytes | str,
324
+ url: URL | bytes | str,
325
+ *,
326
+ headers: HeaderTypes = None,
327
+ content: bytes
328
+ | typing.Iterable[bytes]
329
+ | typing.AsyncIterable[bytes]
330
+ | None = None,
331
+ extensions: Extensions | None = None,
332
+ ) -> None:
333
+ """
334
+ Parameters:
335
+ method: The HTTP request method, either as a string or bytes.
336
+ For example: `GET`.
337
+ url: The request URL, either as a `URL` instance, or as a string or bytes.
338
+ For example: `"https://www.example.com".`
339
+ headers: The HTTP request headers.
340
+ content: The content of the request body.
341
+ extensions: A dictionary of optional extra information included on
342
+ the request. Possible keys include `"timeout"`, and `"trace"`.
343
+ """
344
+ self.method: bytes = enforce_bytes(method, name="method")
345
+ self.url: URL = enforce_url(url, name="url")
346
+ self.headers: list[tuple[bytes, bytes]] = enforce_headers(
347
+ headers, name="headers"
348
+ )
349
+ self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = (
350
+ enforce_stream(content, name="content")
351
+ )
352
+ self.extensions = {} if extensions is None else extensions
353
+
354
+ if "target" in self.extensions:
355
+ self.url = URL(
356
+ scheme=self.url.scheme,
357
+ host=self.url.host,
358
+ port=self.url.port,
359
+ target=self.extensions["target"],
360
+ )
361
+
362
+ def __repr__(self) -> str:
363
+ return f"<{self.__class__.__name__} [{self.method!r}]>"
364
+
365
+
366
+ class Response:
367
+ """
368
+ An HTTP response.
369
+ """
370
+
371
+ def __init__(
372
+ self,
373
+ status: int,
374
+ *,
375
+ headers: HeaderTypes = None,
376
+ content: bytes
377
+ | typing.Iterable[bytes]
378
+ | typing.AsyncIterable[bytes]
379
+ | None = None,
380
+ extensions: Extensions | None = None,
381
+ ) -> None:
382
+ """
383
+ Parameters:
384
+ status: The HTTP status code of the response. For example `200`.
385
+ headers: The HTTP response headers.
386
+ content: The content of the response body.
387
+ extensions: A dictionary of optional extra information included on
388
+ the responseself.Possible keys include `"http_version"`,
389
+ `"reason_phrase"`, and `"network_stream"`.
390
+ """
391
+ self.status: int = status
392
+ self.headers: list[tuple[bytes, bytes]] = enforce_headers(
393
+ headers, name="headers"
394
+ )
395
+ self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = (
396
+ enforce_stream(content, name="content")
397
+ )
398
+ self.extensions = {} if extensions is None else extensions
399
+
400
+ self._stream_consumed = False
401
+
402
+ @property
403
+ def content(self) -> bytes:
404
+ if not hasattr(self, "_content"):
405
+ if isinstance(self.stream, typing.Iterable):
406
+ raise RuntimeError(
407
+ "Attempted to access 'response.content' on a streaming response. "
408
+ "Call 'response.read()' first."
409
+ )
410
+ else:
411
+ raise RuntimeError(
412
+ "Attempted to access 'response.content' on a streaming response. "
413
+ "Call 'await response.aread()' first."
414
+ )
415
+ return self._content
416
+
417
+ def __repr__(self) -> str:
418
+ return f"<{self.__class__.__name__} [{self.status}]>"
419
+
420
+ # Sync interface...
421
+
422
+ def read(self) -> bytes:
423
+ if not isinstance(self.stream, typing.Iterable): # pragma: nocover
424
+ raise RuntimeError(
425
+ "Attempted to read an asynchronous response using 'response.read()'. "
426
+ "You should use 'await response.aread()' instead."
427
+ )
428
+ if not hasattr(self, "_content"):
429
+ self._content = b"".join([part for part in self.iter_stream()])
430
+ return self._content
431
+
432
+ def iter_stream(self) -> typing.Iterator[bytes]:
433
+ if not isinstance(self.stream, typing.Iterable): # pragma: nocover
434
+ raise RuntimeError(
435
+ "Attempted to stream an asynchronous response using 'for ... in "
436
+ "response.iter_stream()'. "
437
+ "You should use 'async for ... in response.aiter_stream()' instead."
438
+ )
439
+ if self._stream_consumed:
440
+ raise RuntimeError(
441
+ "Attempted to call 'for ... in response.iter_stream()' more than once."
442
+ )
443
+ self._stream_consumed = True
444
+ for chunk in self.stream:
445
+ yield chunk
446
+
447
+ def close(self) -> None:
448
+ if not isinstance(self.stream, typing.Iterable): # pragma: nocover
449
+ raise RuntimeError(
450
+ "Attempted to close an asynchronous response using 'response.close()'. "
451
+ "You should use 'await response.aclose()' instead."
452
+ )
453
+ if hasattr(self.stream, "close"):
454
+ self.stream.close()
455
+
456
+ # Async interface...
457
+
458
+ async def aread(self) -> bytes:
459
+ if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
460
+ raise RuntimeError(
461
+ "Attempted to read an synchronous response using "
462
+ "'await response.aread()'. "
463
+ "You should use 'response.read()' instead."
464
+ )
465
+ if not hasattr(self, "_content"):
466
+ self._content = b"".join([part async for part in self.aiter_stream()])
467
+ return self._content
468
+
469
+ async def aiter_stream(self) -> typing.AsyncIterator[bytes]:
470
+ if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
471
+ raise RuntimeError(
472
+ "Attempted to stream an synchronous response using 'async for ... in "
473
+ "response.aiter_stream()'. "
474
+ "You should use 'for ... in response.iter_stream()' instead."
475
+ )
476
+ if self._stream_consumed:
477
+ raise RuntimeError(
478
+ "Attempted to call 'async for ... in response.aiter_stream()' "
479
+ "more than once."
480
+ )
481
+ self._stream_consumed = True
482
+ async for chunk in self.stream:
483
+ yield chunk
484
+
485
+ async def aclose(self) -> None:
486
+ if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
487
+ raise RuntimeError(
488
+ "Attempted to close a synchronous response using "
489
+ "'await response.aclose()'. "
490
+ "You should use 'response.close()' instead."
491
+ )
492
+ if hasattr(self.stream, "aclose"):
493
+ await self.stream.aclose()
494
+
495
+
496
+ class Proxy:
497
+ def __init__(
498
+ self,
499
+ url: URL | bytes | str,
500
+ auth: tuple[bytes | str, bytes | str] | None = None,
501
+ headers: HeadersAsMapping | HeadersAsSequence | None = None,
502
+ ssl_context: ssl.SSLContext | None = None,
503
+ ):
504
+ self.url = enforce_url(url, name="url")
505
+ self.headers = enforce_headers(headers, name="headers")
506
+ self.ssl_context = ssl_context
507
+
508
+ if auth is not None:
509
+ username = enforce_bytes(auth[0], name="auth")
510
+ password = enforce_bytes(auth[1], name="auth")
511
+ userpass = username + b":" + password
512
+ authorization = b"Basic " + base64.b64encode(userpass)
513
+ self.auth: tuple[bytes, bytes] | None = (username, password)
514
+ self.headers = [(b"Proxy-Authorization", authorization)] + self.headers
515
+ else:
516
+ self.auth = None
env/lib/python3.13/site-packages/httpcore/_ssl.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import ssl
2
+
3
+ import certifi
4
+
5
+
6
+ def default_ssl_context() -> ssl.SSLContext:
7
+ context = ssl.create_default_context()
8
+ context.load_verify_locations(certifi.where())
9
+ return context
env/lib/python3.13/site-packages/httpcore/_synchronization.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import threading
4
+ import types
5
+
6
+ from ._exceptions import ExceptionMapping, PoolTimeout, map_exceptions
7
+
8
+ # Our async synchronization primatives use either 'anyio' or 'trio' depending
9
+ # on if they're running under asyncio or trio.
10
+
11
+ try:
12
+ import trio
13
+ except (ImportError, NotImplementedError): # pragma: nocover
14
+ trio = None # type: ignore
15
+
16
+ try:
17
+ import anyio
18
+ except ImportError: # pragma: nocover
19
+ anyio = None # type: ignore
20
+
21
+
22
+ def current_async_library() -> str:
23
+ # Determine if we're running under trio or asyncio.
24
+ # See https://sniffio.readthedocs.io/en/latest/
25
+ try:
26
+ import sniffio
27
+ except ImportError: # pragma: nocover
28
+ environment = "asyncio"
29
+ else:
30
+ environment = sniffio.current_async_library()
31
+
32
+ if environment not in ("asyncio", "trio"): # pragma: nocover
33
+ raise RuntimeError("Running under an unsupported async environment.")
34
+
35
+ if environment == "asyncio" and anyio is None: # pragma: nocover
36
+ raise RuntimeError(
37
+ "Running with asyncio requires installation of 'httpcore[asyncio]'."
38
+ )
39
+
40
+ if environment == "trio" and trio is None: # pragma: nocover
41
+ raise RuntimeError(
42
+ "Running with trio requires installation of 'httpcore[trio]'."
43
+ )
44
+
45
+ return environment
46
+
47
+
48
+ class AsyncLock:
49
+ """
50
+ This is a standard lock.
51
+
52
+ In the sync case `Lock` provides thread locking.
53
+ In the async case `AsyncLock` provides async locking.
54
+ """
55
+
56
+ def __init__(self) -> None:
57
+ self._backend = ""
58
+
59
+ def setup(self) -> None:
60
+ """
61
+ Detect if we're running under 'asyncio' or 'trio' and create
62
+ a lock with the correct implementation.
63
+ """
64
+ self._backend = current_async_library()
65
+ if self._backend == "trio":
66
+ self._trio_lock = trio.Lock()
67
+ elif self._backend == "asyncio":
68
+ self._anyio_lock = anyio.Lock()
69
+
70
+ async def __aenter__(self) -> AsyncLock:
71
+ if not self._backend:
72
+ self.setup()
73
+
74
+ if self._backend == "trio":
75
+ await self._trio_lock.acquire()
76
+ elif self._backend == "asyncio":
77
+ await self._anyio_lock.acquire()
78
+
79
+ return self
80
+
81
+ async def __aexit__(
82
+ self,
83
+ exc_type: type[BaseException] | None = None,
84
+ exc_value: BaseException | None = None,
85
+ traceback: types.TracebackType | None = None,
86
+ ) -> None:
87
+ if self._backend == "trio":
88
+ self._trio_lock.release()
89
+ elif self._backend == "asyncio":
90
+ self._anyio_lock.release()
91
+
92
+
93
+ class AsyncThreadLock:
94
+ """
95
+ This is a threading-only lock for no-I/O contexts.
96
+
97
+ In the sync case `ThreadLock` provides thread locking.
98
+ In the async case `AsyncThreadLock` is a no-op.
99
+ """
100
+
101
+ def __enter__(self) -> AsyncThreadLock:
102
+ return self
103
+
104
+ def __exit__(
105
+ self,
106
+ exc_type: type[BaseException] | None = None,
107
+ exc_value: BaseException | None = None,
108
+ traceback: types.TracebackType | None = None,
109
+ ) -> None:
110
+ pass
111
+
112
+
113
+ class AsyncEvent:
114
+ def __init__(self) -> None:
115
+ self._backend = ""
116
+
117
+ def setup(self) -> None:
118
+ """
119
+ Detect if we're running under 'asyncio' or 'trio' and create
120
+ a lock with the correct implementation.
121
+ """
122
+ self._backend = current_async_library()
123
+ if self._backend == "trio":
124
+ self._trio_event = trio.Event()
125
+ elif self._backend == "asyncio":
126
+ self._anyio_event = anyio.Event()
127
+
128
+ def set(self) -> None:
129
+ if not self._backend:
130
+ self.setup()
131
+
132
+ if self._backend == "trio":
133
+ self._trio_event.set()
134
+ elif self._backend == "asyncio":
135
+ self._anyio_event.set()
136
+
137
+ async def wait(self, timeout: float | None = None) -> None:
138
+ if not self._backend:
139
+ self.setup()
140
+
141
+ if self._backend == "trio":
142
+ trio_exc_map: ExceptionMapping = {trio.TooSlowError: PoolTimeout}
143
+ timeout_or_inf = float("inf") if timeout is None else timeout
144
+ with map_exceptions(trio_exc_map):
145
+ with trio.fail_after(timeout_or_inf):
146
+ await self._trio_event.wait()
147
+ elif self._backend == "asyncio":
148
+ anyio_exc_map: ExceptionMapping = {TimeoutError: PoolTimeout}
149
+ with map_exceptions(anyio_exc_map):
150
+ with anyio.fail_after(timeout):
151
+ await self._anyio_event.wait()
152
+
153
+
154
+ class AsyncSemaphore:
155
+ def __init__(self, bound: int) -> None:
156
+ self._bound = bound
157
+ self._backend = ""
158
+
159
+ def setup(self) -> None:
160
+ """
161
+ Detect if we're running under 'asyncio' or 'trio' and create
162
+ a semaphore with the correct implementation.
163
+ """
164
+ self._backend = current_async_library()
165
+ if self._backend == "trio":
166
+ self._trio_semaphore = trio.Semaphore(
167
+ initial_value=self._bound, max_value=self._bound
168
+ )
169
+ elif self._backend == "asyncio":
170
+ self._anyio_semaphore = anyio.Semaphore(
171
+ initial_value=self._bound, max_value=self._bound
172
+ )
173
+
174
+ async def acquire(self) -> None:
175
+ if not self._backend:
176
+ self.setup()
177
+
178
+ if self._backend == "trio":
179
+ await self._trio_semaphore.acquire()
180
+ elif self._backend == "asyncio":
181
+ await self._anyio_semaphore.acquire()
182
+
183
+ async def release(self) -> None:
184
+ if self._backend == "trio":
185
+ self._trio_semaphore.release()
186
+ elif self._backend == "asyncio":
187
+ self._anyio_semaphore.release()
188
+
189
+
190
+ class AsyncShieldCancellation:
191
+ # For certain portions of our codebase where we're dealing with
192
+ # closing connections during exception handling we want to shield
193
+ # the operation from being cancelled.
194
+ #
195
+ # with AsyncShieldCancellation():
196
+ # ... # clean-up operations, shielded from cancellation.
197
+
198
+ def __init__(self) -> None:
199
+ """
200
+ Detect if we're running under 'asyncio' or 'trio' and create
201
+ a shielded scope with the correct implementation.
202
+ """
203
+ self._backend = current_async_library()
204
+
205
+ if self._backend == "trio":
206
+ self._trio_shield = trio.CancelScope(shield=True)
207
+ elif self._backend == "asyncio":
208
+ self._anyio_shield = anyio.CancelScope(shield=True)
209
+
210
+ def __enter__(self) -> AsyncShieldCancellation:
211
+ if self._backend == "trio":
212
+ self._trio_shield.__enter__()
213
+ elif self._backend == "asyncio":
214
+ self._anyio_shield.__enter__()
215
+ return self
216
+
217
+ def __exit__(
218
+ self,
219
+ exc_type: type[BaseException] | None = None,
220
+ exc_value: BaseException | None = None,
221
+ traceback: types.TracebackType | None = None,
222
+ ) -> None:
223
+ if self._backend == "trio":
224
+ self._trio_shield.__exit__(exc_type, exc_value, traceback)
225
+ elif self._backend == "asyncio":
226
+ self._anyio_shield.__exit__(exc_type, exc_value, traceback)
227
+
228
+
229
+ # Our thread-based synchronization primitives...
230
+
231
+
232
+ class Lock:
233
+ """
234
+ This is a standard lock.
235
+
236
+ In the sync case `Lock` provides thread locking.
237
+ In the async case `AsyncLock` provides async locking.
238
+ """
239
+
240
+ def __init__(self) -> None:
241
+ self._lock = threading.Lock()
242
+
243
+ def __enter__(self) -> Lock:
244
+ self._lock.acquire()
245
+ return self
246
+
247
+ def __exit__(
248
+ self,
249
+ exc_type: type[BaseException] | None = None,
250
+ exc_value: BaseException | None = None,
251
+ traceback: types.TracebackType | None = None,
252
+ ) -> None:
253
+ self._lock.release()
254
+
255
+
256
+ class ThreadLock:
257
+ """
258
+ This is a threading-only lock for no-I/O contexts.
259
+
260
+ In the sync case `ThreadLock` provides thread locking.
261
+ In the async case `AsyncThreadLock` is a no-op.
262
+ """
263
+
264
+ def __init__(self) -> None:
265
+ self._lock = threading.Lock()
266
+
267
+ def __enter__(self) -> ThreadLock:
268
+ self._lock.acquire()
269
+ return self
270
+
271
+ def __exit__(
272
+ self,
273
+ exc_type: type[BaseException] | None = None,
274
+ exc_value: BaseException | None = None,
275
+ traceback: types.TracebackType | None = None,
276
+ ) -> None:
277
+ self._lock.release()
278
+
279
+
280
+ class Event:
281
+ def __init__(self) -> None:
282
+ self._event = threading.Event()
283
+
284
+ def set(self) -> None:
285
+ self._event.set()
286
+
287
+ def wait(self, timeout: float | None = None) -> None:
288
+ if timeout == float("inf"): # pragma: no cover
289
+ timeout = None
290
+ if not self._event.wait(timeout=timeout):
291
+ raise PoolTimeout() # pragma: nocover
292
+
293
+
294
+ class Semaphore:
295
+ def __init__(self, bound: int) -> None:
296
+ self._semaphore = threading.Semaphore(value=bound)
297
+
298
+ def acquire(self) -> None:
299
+ self._semaphore.acquire()
300
+
301
+ def release(self) -> None:
302
+ self._semaphore.release()
303
+
304
+
305
+ class ShieldCancellation:
306
+ # Thread-synchronous codebases don't support cancellation semantics.
307
+ # We have this class because we need to mirror the async and sync
308
+ # cases within our package, but it's just a no-op.
309
+ def __enter__(self) -> ShieldCancellation:
310
+ return self
311
+
312
+ def __exit__(
313
+ self,
314
+ exc_type: type[BaseException] | None = None,
315
+ exc_value: BaseException | None = None,
316
+ traceback: types.TracebackType | None = None,
317
+ ) -> None:
318
+ pass
env/lib/python3.13/site-packages/httpcore/_trace.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import logging
5
+ import types
6
+ import typing
7
+
8
+ from ._models import Request
9
+
10
+
11
+ class Trace:
12
+ def __init__(
13
+ self,
14
+ name: str,
15
+ logger: logging.Logger,
16
+ request: Request | None = None,
17
+ kwargs: dict[str, typing.Any] | None = None,
18
+ ) -> None:
19
+ self.name = name
20
+ self.logger = logger
21
+ self.trace_extension = (
22
+ None if request is None else request.extensions.get("trace")
23
+ )
24
+ self.debug = self.logger.isEnabledFor(logging.DEBUG)
25
+ self.kwargs = kwargs or {}
26
+ self.return_value: typing.Any = None
27
+ self.should_trace = self.debug or self.trace_extension is not None
28
+ self.prefix = self.logger.name.split(".")[-1]
29
+
30
+ def trace(self, name: str, info: dict[str, typing.Any]) -> None:
31
+ if self.trace_extension is not None:
32
+ prefix_and_name = f"{self.prefix}.{name}"
33
+ ret = self.trace_extension(prefix_and_name, info)
34
+ if inspect.iscoroutine(ret): # pragma: no cover
35
+ raise TypeError(
36
+ "If you are using a synchronous interface, "
37
+ "the callback of the `trace` extension should "
38
+ "be a normal function instead of an asynchronous function."
39
+ )
40
+
41
+ if self.debug:
42
+ if not info or "return_value" in info and info["return_value"] is None:
43
+ message = name
44
+ else:
45
+ args = " ".join([f"{key}={value!r}" for key, value in info.items()])
46
+ message = f"{name} {args}"
47
+ self.logger.debug(message)
48
+
49
+ def __enter__(self) -> Trace:
50
+ if self.should_trace:
51
+ info = self.kwargs
52
+ self.trace(f"{self.name}.started", info)
53
+ return self
54
+
55
+ def __exit__(
56
+ self,
57
+ exc_type: type[BaseException] | None = None,
58
+ exc_value: BaseException | None = None,
59
+ traceback: types.TracebackType | None = None,
60
+ ) -> None:
61
+ if self.should_trace:
62
+ if exc_value is None:
63
+ info = {"return_value": self.return_value}
64
+ self.trace(f"{self.name}.complete", info)
65
+ else:
66
+ info = {"exception": exc_value}
67
+ self.trace(f"{self.name}.failed", info)
68
+
69
+ async def atrace(self, name: str, info: dict[str, typing.Any]) -> None:
70
+ if self.trace_extension is not None:
71
+ prefix_and_name = f"{self.prefix}.{name}"
72
+ coro = self.trace_extension(prefix_and_name, info)
73
+ if not inspect.iscoroutine(coro): # pragma: no cover
74
+ raise TypeError(
75
+ "If you're using an asynchronous interface, "
76
+ "the callback of the `trace` extension should "
77
+ "be an asynchronous function rather than a normal function."
78
+ )
79
+ await coro
80
+
81
+ if self.debug:
82
+ if not info or "return_value" in info and info["return_value"] is None:
83
+ message = name
84
+ else:
85
+ args = " ".join([f"{key}={value!r}" for key, value in info.items()])
86
+ message = f"{name} {args}"
87
+ self.logger.debug(message)
88
+
89
+ async def __aenter__(self) -> Trace:
90
+ if self.should_trace:
91
+ info = self.kwargs
92
+ await self.atrace(f"{self.name}.started", info)
93
+ return self
94
+
95
+ async def __aexit__(
96
+ self,
97
+ exc_type: type[BaseException] | None = None,
98
+ exc_value: BaseException | None = None,
99
+ traceback: types.TracebackType | None = None,
100
+ ) -> None:
101
+ if self.should_trace:
102
+ if exc_value is None:
103
+ info = {"return_value": self.return_value}
104
+ await self.atrace(f"{self.name}.complete", info)
105
+ else:
106
+ info = {"exception": exc_value}
107
+ await self.atrace(f"{self.name}.failed", info)
env/lib/python3.13/site-packages/httpcore/_utils.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import select
4
+ import socket
5
+ import sys
6
+
7
+
8
+ def is_socket_readable(sock: socket.socket | None) -> bool:
9
+ """
10
+ Return whether a socket, as identifed by its file descriptor, is readable.
11
+ "A socket is readable" means that the read buffer isn't empty, i.e. that calling
12
+ .recv() on it would immediately return some data.
13
+ """
14
+ # NOTE: we want check for readability without actually attempting to read, because
15
+ # we don't want to block forever if it's not readable.
16
+
17
+ # In the case that the socket no longer exists, or cannot return a file
18
+ # descriptor, we treat it as being readable, as if it the next read operation
19
+ # on it is ready to return the terminating `b""`.
20
+ sock_fd = None if sock is None else sock.fileno()
21
+ if sock_fd is None or sock_fd < 0: # pragma: nocover
22
+ return True
23
+
24
+ # The implementation below was stolen from:
25
+ # https://github.com/python-trio/trio/blob/20ee2b1b7376db637435d80e266212a35837ddcc/trio/_socket.py#L471-L478
26
+ # See also: https://github.com/encode/httpcore/pull/193#issuecomment-703129316
27
+
28
+ # Use select.select on Windows, and when poll is unavailable and select.poll
29
+ # everywhere else. (E.g. When eventlet is in use. See #327)
30
+ if (
31
+ sys.platform == "win32" or getattr(select, "poll", None) is None
32
+ ): # pragma: nocover
33
+ rready, _, _ = select.select([sock_fd], [], [], 0)
34
+ return bool(rready)
35
+ p = select.poll()
36
+ p.register(sock_fd, select.POLLIN)
37
+ return bool(p.poll(0))
env/lib/python3.13/site-packages/httpcore/py.typed ADDED
File without changes
env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/METADATA ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.3
2
+ Name: httpx
3
+ Version: 0.28.1
4
+ Summary: The next generation HTTP client.
5
+ Project-URL: Changelog, https://github.com/encode/httpx/blob/master/CHANGELOG.md
6
+ Project-URL: Documentation, https://www.python-httpx.org
7
+ Project-URL: Homepage, https://github.com/encode/httpx
8
+ Project-URL: Source, https://github.com/encode/httpx
9
+ Author-email: Tom Christie <tom@tomchristie.com>
10
+ License: BSD-3-Clause
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Web Environment
13
+ Classifier: Framework :: AsyncIO
14
+ Classifier: Framework :: Trio
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: BSD License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3 :: Only
20
+ Classifier: Programming Language :: Python :: 3.8
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Topic :: Internet :: WWW/HTTP
26
+ Requires-Python: >=3.8
27
+ Requires-Dist: anyio
28
+ Requires-Dist: certifi
29
+ Requires-Dist: httpcore==1.*
30
+ Requires-Dist: idna
31
+ Provides-Extra: brotli
32
+ Requires-Dist: brotli; (platform_python_implementation == 'CPython') and extra == 'brotli'
33
+ Requires-Dist: brotlicffi; (platform_python_implementation != 'CPython') and extra == 'brotli'
34
+ Provides-Extra: cli
35
+ Requires-Dist: click==8.*; extra == 'cli'
36
+ Requires-Dist: pygments==2.*; extra == 'cli'
37
+ Requires-Dist: rich<14,>=10; extra == 'cli'
38
+ Provides-Extra: http2
39
+ Requires-Dist: h2<5,>=3; extra == 'http2'
40
+ Provides-Extra: socks
41
+ Requires-Dist: socksio==1.*; extra == 'socks'
42
+ Provides-Extra: zstd
43
+ Requires-Dist: zstandard>=0.18.0; extra == 'zstd'
44
+ Description-Content-Type: text/markdown
45
+
46
+ <p align="center">
47
+ <a href="https://www.python-httpx.org/"><img width="350" height="208" src="https://raw.githubusercontent.com/encode/httpx/master/docs/img/butterfly.png" alt='HTTPX'></a>
48
+ </p>
49
+
50
+ <p align="center"><strong>HTTPX</strong> <em>- A next-generation HTTP client for Python.</em></p>
51
+
52
+ <p align="center">
53
+ <a href="https://github.com/encode/httpx/actions">
54
+ <img src="https://github.com/encode/httpx/workflows/Test%20Suite/badge.svg" alt="Test Suite">
55
+ </a>
56
+ <a href="https://pypi.org/project/httpx/">
57
+ <img src="https://badge.fury.io/py/httpx.svg" alt="Package version">
58
+ </a>
59
+ </p>
60
+
61
+ HTTPX is a fully featured HTTP client library for Python 3. It includes **an integrated command line client**, has support for both **HTTP/1.1 and HTTP/2**, and provides both **sync and async APIs**.
62
+
63
+ ---
64
+
65
+ Install HTTPX using pip:
66
+
67
+ ```shell
68
+ $ pip install httpx
69
+ ```
70
+
71
+ Now, let's get started:
72
+
73
+ ```pycon
74
+ >>> import httpx
75
+ >>> r = httpx.get('https://www.example.org/')
76
+ >>> r
77
+ <Response [200 OK]>
78
+ >>> r.status_code
79
+ 200
80
+ >>> r.headers['content-type']
81
+ 'text/html; charset=UTF-8'
82
+ >>> r.text
83
+ '<!doctype html>\n<html>\n<head>\n<title>Example Domain</title>...'
84
+ ```
85
+
86
+ Or, using the command-line client.
87
+
88
+ ```shell
89
+ $ pip install 'httpx[cli]' # The command line client is an optional dependency.
90
+ ```
91
+
92
+ Which now allows us to use HTTPX directly from the command-line...
93
+
94
+ <p align="center">
95
+ <img width="700" src="https://raw.githubusercontent.com/encode/httpx/master/docs/img/httpx-help.png" alt='httpx --help'>
96
+ </p>
97
+
98
+ Sending a request...
99
+
100
+ <p align="center">
101
+ <img width="700" src="https://raw.githubusercontent.com/encode/httpx/master/docs/img/httpx-request.png" alt='httpx http://httpbin.org/json'>
102
+ </p>
103
+
104
+ ## Features
105
+
106
+ HTTPX builds on the well-established usability of `requests`, and gives you:
107
+
108
+ * A broadly [requests-compatible API](https://www.python-httpx.org/compatibility/).
109
+ * An integrated command-line client.
110
+ * HTTP/1.1 [and HTTP/2 support](https://www.python-httpx.org/http2/).
111
+ * Standard synchronous interface, but with [async support if you need it](https://www.python-httpx.org/async/).
112
+ * Ability to make requests directly to [WSGI applications](https://www.python-httpx.org/advanced/transports/#wsgi-transport) or [ASGI applications](https://www.python-httpx.org/advanced/transports/#asgi-transport).
113
+ * Strict timeouts everywhere.
114
+ * Fully type annotated.
115
+ * 100% test coverage.
116
+
117
+ Plus all the standard features of `requests`...
118
+
119
+ * International Domains and URLs
120
+ * Keep-Alive & Connection Pooling
121
+ * Sessions with Cookie Persistence
122
+ * Browser-style SSL Verification
123
+ * Basic/Digest Authentication
124
+ * Elegant Key/Value Cookies
125
+ * Automatic Decompression
126
+ * Automatic Content Decoding
127
+ * Unicode Response Bodies
128
+ * Multipart File Uploads
129
+ * HTTP(S) Proxy Support
130
+ * Connection Timeouts
131
+ * Streaming Downloads
132
+ * .netrc Support
133
+ * Chunked Requests
134
+
135
+ ## Installation
136
+
137
+ Install with pip:
138
+
139
+ ```shell
140
+ $ pip install httpx
141
+ ```
142
+
143
+ Or, to include the optional HTTP/2 support, use:
144
+
145
+ ```shell
146
+ $ pip install httpx[http2]
147
+ ```
148
+
149
+ HTTPX requires Python 3.8+.
150
+
151
+ ## Documentation
152
+
153
+ Project documentation is available at [https://www.python-httpx.org/](https://www.python-httpx.org/).
154
+
155
+ For a run-through of all the basics, head over to the [QuickStart](https://www.python-httpx.org/quickstart/).
156
+
157
+ For more advanced topics, see the [Advanced Usage](https://www.python-httpx.org/advanced/) section, the [async support](https://www.python-httpx.org/async/) section, or the [HTTP/2](https://www.python-httpx.org/http2/) section.
158
+
159
+ The [Developer Interface](https://www.python-httpx.org/api/) provides a comprehensive API reference.
160
+
161
+ To find out about tools that integrate with HTTPX, see [Third Party Packages](https://www.python-httpx.org/third_party_packages/).
162
+
163
+ ## Contribute
164
+
165
+ If you want to contribute with HTTPX check out the [Contributing Guide](https://www.python-httpx.org/contributing/) to learn how to start.
166
+
167
+ ## Dependencies
168
+
169
+ The HTTPX project relies on these excellent libraries:
170
+
171
+ * `httpcore` - The underlying transport implementation for `httpx`.
172
+ * `h11` - HTTP/1.1 support.
173
+ * `certifi` - SSL certificates.
174
+ * `idna` - Internationalized domain name support.
175
+ * `sniffio` - Async library autodetection.
176
+
177
+ As well as these optional installs:
178
+
179
+ * `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)*
180
+ * `socksio` - SOCKS proxy support. *(Optional, with `httpx[socks]`)*
181
+ * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)*
182
+ * `click` - Command line client support. *(Optional, with `httpx[cli]`)*
183
+ * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)*
184
+ * `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)*
185
+
186
+ A huge amount of credit is due to `requests` for the API layout that
187
+ much of this work follows, as well as to `urllib3` for plenty of design
188
+ inspiration around the lower-level networking details.
189
+
190
+ ---
191
+
192
+ <p align="center"><i>HTTPX is <a href="https://github.com/encode/httpx/blob/master/LICENSE.md">BSD licensed</a> code.<br/>Designed & crafted with care.</i><br/>&mdash; 🦋 &mdash;</p>
193
+
194
+ ## Release Information
195
+
196
+ ### Fixed
197
+
198
+ * Reintroduced supposedly-private `URLTypes` shortcut. (#2673)
199
+
200
+
201
+ ---
202
+
203
+ [Full changelog](https://github.com/encode/httpx/blob/master/CHANGELOG.md)
env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/RECORD ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ../../../bin/httpx,sha256=Rdk5LW602TkuZHquotWRLLc4TnaaXhrPLmhrYBsr8js,253
2
+ httpx-0.28.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
3
+ httpx-0.28.1.dist-info/METADATA,sha256=_rubD48-gNV8gZnDBPNcQzboWB0dGNeYPJJ2a4J5OyU,7052
4
+ httpx-0.28.1.dist-info/RECORD,,
5
+ httpx-0.28.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
6
+ httpx-0.28.1.dist-info/entry_points.txt,sha256=2lVkdQmxLA1pNMgSN2eV89o90HCZezhmNwsy6ryKDSA,37
7
+ httpx-0.28.1.dist-info/licenses/LICENSE.md,sha256=TsWdVE8StfU5o6cW_TIaxYzNgDC0ZSIfLIgCAM3yjY0,1508
8
+ httpx/__init__.py,sha256=CsaZe6yZj0rHg6322AWKWHGTMVr9txgEfD5P3_Rrz60,2171
9
+ httpx/__pycache__/__init__.cpython-313.pyc,,
10
+ httpx/__pycache__/__version__.cpython-313.pyc,,
11
+ httpx/__pycache__/_api.cpython-313.pyc,,
12
+ httpx/__pycache__/_auth.cpython-313.pyc,,
13
+ httpx/__pycache__/_client.cpython-313.pyc,,
14
+ httpx/__pycache__/_config.cpython-313.pyc,,
15
+ httpx/__pycache__/_content.cpython-313.pyc,,
16
+ httpx/__pycache__/_decoders.cpython-313.pyc,,
17
+ httpx/__pycache__/_exceptions.cpython-313.pyc,,
18
+ httpx/__pycache__/_main.cpython-313.pyc,,
19
+ httpx/__pycache__/_models.cpython-313.pyc,,
20
+ httpx/__pycache__/_multipart.cpython-313.pyc,,
21
+ httpx/__pycache__/_status_codes.cpython-313.pyc,,
22
+ httpx/__pycache__/_types.cpython-313.pyc,,
23
+ httpx/__pycache__/_urlparse.cpython-313.pyc,,
24
+ httpx/__pycache__/_urls.cpython-313.pyc,,
25
+ httpx/__pycache__/_utils.cpython-313.pyc,,
26
+ httpx/__version__.py,sha256=LoUyYeOXTieGzuP_64UL0wxdtxjuu_QbOvE7NOg-IqU,108
27
+ httpx/_api.py,sha256=r_Zgs4jIpcPJLqK5dbbSayqo_iVMKFaxZCd-oOHxLEs,11743
28
+ httpx/_auth.py,sha256=Yr3QwaUSK17rGYx-7j-FdicFIzz4Y9FFV-1F4-7RXX4,11891
29
+ httpx/_client.py,sha256=xD-UG67-WMkeltAAOeGGj-cZ2RRTAm19sWRxlFY7_40,65714
30
+ httpx/_config.py,sha256=pPp2U-wicfcKsF-KYRE1LYdt3e6ERGeIoXZ8Gjo3LWc,8547
31
+ httpx/_content.py,sha256=LGGzrJTR3OvN4Mb1GVVNLXkXJH-6oKlwAttO9p5w_yg,8161
32
+ httpx/_decoders.py,sha256=p0dX8I0NEHexs3UGp4SsZutiMhsXrrWl6-GnqVb0iKM,12041
33
+ httpx/_exceptions.py,sha256=bxW7fxzgVMAdNTbwT0Vnq04gJDW1_gI_GFiQPuMyjL0,8527
34
+ httpx/_main.py,sha256=Cg9GMabiTT_swaDfUgIRitSwxLRMSwUDOm7LdSGqlA4,15626
35
+ httpx/_models.py,sha256=4__Guyv1gLxuZChwim8kfQNiIOcJ9acreFOSurvZfms,44700
36
+ httpx/_multipart.py,sha256=KOHEZZl6oohg9mPaKyyu345qq1rJLg35TUG3YAzXB3Y,9843
37
+ httpx/_status_codes.py,sha256=DYn-2ufBgMeXy5s8x3_TB7wjAuAAMewTakPrm5rXEsc,5639
38
+ httpx/_transports/__init__.py,sha256=GbUoBSAOp7z-l-9j5YhMhR3DMIcn6FVLhj072O3Nnno,275
39
+ httpx/_transports/__pycache__/__init__.cpython-313.pyc,,
40
+ httpx/_transports/__pycache__/asgi.cpython-313.pyc,,
41
+ httpx/_transports/__pycache__/base.cpython-313.pyc,,
42
+ httpx/_transports/__pycache__/default.cpython-313.pyc,,
43
+ httpx/_transports/__pycache__/mock.cpython-313.pyc,,
44
+ httpx/_transports/__pycache__/wsgi.cpython-313.pyc,,
45
+ httpx/_transports/asgi.py,sha256=HRfiDYMPt4wQH2gFgHZg4c-i3sblo6bL5GTqcET-xz8,5501
46
+ httpx/_transports/base.py,sha256=kZS_VMbViYfF570pogUCJ1bulz-ybfL51Pqs9yktebU,2523
47
+ httpx/_transports/default.py,sha256=AzeaRUyVwCccTyyNJexDf0n1dFfzzydpdIQgvw7PLnk,13983
48
+ httpx/_transports/mock.py,sha256=PTo0d567RITXxGrki6kN7_67wwAxfwiMDcuXJiZCjEo,1232
49
+ httpx/_transports/wsgi.py,sha256=NcPX3Xap_EwCFZWO_OaSyQNuInCYx1QMNbO8GAei6jY,4825
50
+ httpx/_types.py,sha256=Jyh41GQq7AOev8IOWKDAg7zCbvHAfufmW5g_PiTtErY,2965
51
+ httpx/_urlparse.py,sha256=ZAmH47ONfkxrrj-PPYhGeiHjb6AjKCS-ANWIN4OL_KY,18546
52
+ httpx/_urls.py,sha256=dX99VR1DSOHpgo9Aq7PzYO4FKdxqKjwyNp8grf8dHN0,21550
53
+ httpx/_utils.py,sha256=_TVeqAKvxJkKHdz7dFeb4s0LZqQXgeFkXSgfiHBK_1o,8285
54
+ httpx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.26.3
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any