Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/INSTALLER +1 -0
- env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/RECORD +92 -0
- env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/WHEEL +5 -0
- env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/entry_points.txt +2 -0
- env/lib/python3.13/site-packages/fsspec/__init__.py +71 -0
- env/lib/python3.13/site-packages/fsspec/_version.py +34 -0
- env/lib/python3.13/site-packages/fsspec/archive.py +75 -0
- env/lib/python3.13/site-packages/fsspec/asyn.py +1103 -0
- env/lib/python3.13/site-packages/fsspec/caching.py +1011 -0
- env/lib/python3.13/site-packages/fsspec/callbacks.py +324 -0
- env/lib/python3.13/site-packages/fsspec/compression.py +182 -0
- env/lib/python3.13/site-packages/fsspec/config.py +131 -0
- env/lib/python3.13/site-packages/fsspec/conftest.py +125 -0
- env/lib/python3.13/site-packages/fsspec/core.py +760 -0
- env/lib/python3.13/site-packages/fsspec/dircache.py +98 -0
- env/lib/python3.13/site-packages/fsspec/exceptions.py +18 -0
- env/lib/python3.13/site-packages/fsspec/fuse.py +324 -0
- env/lib/python3.13/site-packages/fsspec/generic.py +396 -0
- env/lib/python3.13/site-packages/fsspec/gui.py +417 -0
- env/lib/python3.13/site-packages/fsspec/json.py +112 -0
- env/lib/python3.13/site-packages/fsspec/mapping.py +251 -0
- env/lib/python3.13/site-packages/fsspec/parquet.py +580 -0
- env/lib/python3.13/site-packages/fsspec/registry.py +333 -0
- env/lib/python3.13/site-packages/fsspec/spec.py +2281 -0
- env/lib/python3.13/site-packages/fsspec/transaction.py +90 -0
- env/lib/python3.13/site-packages/fsspec/utils.py +738 -0
- env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/INSTALLER +1 -0
- env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/METADATA +48 -0
- env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/RECORD +12 -0
- env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/REQUESTED +0 -0
- env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/WHEEL +6 -0
- env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/top_level.txt +1 -0
- env/lib/python3.13/site-packages/h11-0.16.0.dist-info/INSTALLER +1 -0
- env/lib/python3.13/site-packages/h11-0.16.0.dist-info/METADATA +202 -0
- env/lib/python3.13/site-packages/h11-0.16.0.dist-info/RECORD +29 -0
- env/lib/python3.13/site-packages/h11-0.16.0.dist-info/WHEEL +5 -0
- env/lib/python3.13/site-packages/h11-0.16.0.dist-info/top_level.txt +1 -0
- env/lib/python3.13/site-packages/httpcore/__init__.py +141 -0
- env/lib/python3.13/site-packages/httpcore/_api.py +94 -0
- env/lib/python3.13/site-packages/httpcore/_exceptions.py +81 -0
- env/lib/python3.13/site-packages/httpcore/_models.py +516 -0
- env/lib/python3.13/site-packages/httpcore/_ssl.py +9 -0
- env/lib/python3.13/site-packages/httpcore/_synchronization.py +318 -0
- env/lib/python3.13/site-packages/httpcore/_trace.py +107 -0
- env/lib/python3.13/site-packages/httpcore/_utils.py +37 -0
- env/lib/python3.13/site-packages/httpcore/py.typed +0 -0
- env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/INSTALLER +1 -0
- env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/METADATA +203 -0
- env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/RECORD +54 -0
- env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/WHEEL +4 -0
env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/RECORD
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
anyio-4.12.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
anyio-4.12.0.dist-info/METADATA,sha256=rte2_C2hYKP9_iVMFYogSzBxdHBzwY45S1TrLiBsxdk,4277
|
| 3 |
+
anyio-4.12.0.dist-info/RECORD,,
|
| 4 |
+
anyio-4.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
| 5 |
+
anyio-4.12.0.dist-info/entry_points.txt,sha256=_d6Yu6uiaZmNe0CydowirE9Cmg7zUL2g08tQpoS3Qvc,39
|
| 6 |
+
anyio-4.12.0.dist-info/licenses/LICENSE,sha256=U2GsncWPLvX9LpsJxoKXwX8ElQkJu8gCO9uC6s8iwrA,1081
|
| 7 |
+
anyio-4.12.0.dist-info/top_level.txt,sha256=QglSMiWX8_5dpoVAEIHdEYzvqFMdSYWmCj6tYw2ITkQ,6
|
| 8 |
+
anyio/__init__.py,sha256=7iDVqMUprUuKNY91FuoKqayAhR-OY136YDPI6P78HHk,6170
|
| 9 |
+
anyio/__pycache__/__init__.cpython-313.pyc,,
|
| 10 |
+
anyio/__pycache__/from_thread.cpython-313.pyc,,
|
| 11 |
+
anyio/__pycache__/functools.cpython-313.pyc,,
|
| 12 |
+
anyio/__pycache__/lowlevel.cpython-313.pyc,,
|
| 13 |
+
anyio/__pycache__/pytest_plugin.cpython-313.pyc,,
|
| 14 |
+
anyio/__pycache__/to_interpreter.cpython-313.pyc,,
|
| 15 |
+
anyio/__pycache__/to_process.cpython-313.pyc,,
|
| 16 |
+
anyio/__pycache__/to_thread.cpython-313.pyc,,
|
| 17 |
+
anyio/_backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 18 |
+
anyio/_backends/__pycache__/__init__.cpython-313.pyc,,
|
| 19 |
+
anyio/_backends/__pycache__/_asyncio.cpython-313.pyc,,
|
| 20 |
+
anyio/_backends/__pycache__/_trio.cpython-313.pyc,,
|
| 21 |
+
anyio/_backends/_asyncio.py,sha256=w6gCSMs_2D1doKVtzi32bOloBl1df-IHubl8-Vks908,99656
|
| 22 |
+
anyio/_backends/_trio.py,sha256=ScNVMQB0iiuJMAon1epQCVOVbIbf-Lxnfb5OxujzMok,42398
|
| 23 |
+
anyio/_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 24 |
+
anyio/_core/__pycache__/__init__.cpython-313.pyc,,
|
| 25 |
+
anyio/_core/__pycache__/_asyncio_selector_thread.cpython-313.pyc,,
|
| 26 |
+
anyio/_core/__pycache__/_contextmanagers.cpython-313.pyc,,
|
| 27 |
+
anyio/_core/__pycache__/_eventloop.cpython-313.pyc,,
|
| 28 |
+
anyio/_core/__pycache__/_exceptions.cpython-313.pyc,,
|
| 29 |
+
anyio/_core/__pycache__/_fileio.cpython-313.pyc,,
|
| 30 |
+
anyio/_core/__pycache__/_resources.cpython-313.pyc,,
|
| 31 |
+
anyio/_core/__pycache__/_signals.cpython-313.pyc,,
|
| 32 |
+
anyio/_core/__pycache__/_sockets.cpython-313.pyc,,
|
| 33 |
+
anyio/_core/__pycache__/_streams.cpython-313.pyc,,
|
| 34 |
+
anyio/_core/__pycache__/_subprocesses.cpython-313.pyc,,
|
| 35 |
+
anyio/_core/__pycache__/_synchronization.cpython-313.pyc,,
|
| 36 |
+
anyio/_core/__pycache__/_tasks.cpython-313.pyc,,
|
| 37 |
+
anyio/_core/__pycache__/_tempfile.cpython-313.pyc,,
|
| 38 |
+
anyio/_core/__pycache__/_testing.cpython-313.pyc,,
|
| 39 |
+
anyio/_core/__pycache__/_typedattr.cpython-313.pyc,,
|
| 40 |
+
anyio/_core/_asyncio_selector_thread.py,sha256=2PdxFM3cs02Kp6BSppbvmRT7q7asreTW5FgBxEsflBo,5626
|
| 41 |
+
anyio/_core/_contextmanagers.py,sha256=YInBCabiEeS-UaP_Jdxa1CaFC71ETPW8HZTHIM8Rsc8,7215
|
| 42 |
+
anyio/_core/_eventloop.py,sha256=xsoYgHIddNYusTqAFDVmcvpjHKJFmdgtDcAjpN3JEWQ,6261
|
| 43 |
+
anyio/_core/_exceptions.py,sha256=fR2SvRUBYVHvolNKbzWSLt8FC_5NFB2OAzGD738fD8Q,4257
|
| 44 |
+
anyio/_core/_fileio.py,sha256=uc7t10Vb-If7GbdWM_zFf-ajUe6uek63fSt7IBLlZW0,25731
|
| 45 |
+
anyio/_core/_resources.py,sha256=NbmU5O5UX3xEyACnkmYX28Fmwdl-f-ny0tHym26e0w0,435
|
| 46 |
+
anyio/_core/_signals.py,sha256=vulT1M1xdLYtAR-eY5TamIgaf1WTlOwOrMGwswlTTr8,905
|
| 47 |
+
anyio/_core/_sockets.py,sha256=aTbgMr0qPmBPfrapxLykyajsmS7IAerhW9_Qk5r5E18,34311
|
| 48 |
+
anyio/_core/_streams.py,sha256=FczFwIgDpnkK0bODWJXMpsUJYdvAD04kaUaGzJU8DK0,1806
|
| 49 |
+
anyio/_core/_subprocesses.py,sha256=EXm5igL7dj55iYkPlbYVAqtbqxJxjU-6OndSTIx9SRg,8047
|
| 50 |
+
anyio/_core/_synchronization.py,sha256=SY3nsr1ZZyDrjamsOVoYcvj-x6d_AR13Cu5lZecG0gY,20894
|
| 51 |
+
anyio/_core/_tasks.py,sha256=km6hVE1fsuIenya3MDud8KP6-J_bNzlgYC10wUxI7iA,4880
|
| 52 |
+
anyio/_core/_tempfile.py,sha256=lHb7CW4FyIlpkf5ADAf4VmLHCKwEHF9nxqNyBCFFUiA,19697
|
| 53 |
+
anyio/_core/_testing.py,sha256=YUGwA5cgFFbUTv4WFd7cv_BSVr4ryTtPp8owQA3JdWE,2118
|
| 54 |
+
anyio/_core/_typedattr.py,sha256=P4ozZikn3-DbpoYcvyghS_FOYAgbmUxeoU8-L_07pZM,2508
|
| 55 |
+
anyio/abc/__init__.py,sha256=6mWhcl_pGXhrgZVHP_TCfMvIXIOp9mroEFM90fYCU_U,2869
|
| 56 |
+
anyio/abc/__pycache__/__init__.cpython-313.pyc,,
|
| 57 |
+
anyio/abc/__pycache__/_eventloop.cpython-313.pyc,,
|
| 58 |
+
anyio/abc/__pycache__/_resources.cpython-313.pyc,,
|
| 59 |
+
anyio/abc/__pycache__/_sockets.cpython-313.pyc,,
|
| 60 |
+
anyio/abc/__pycache__/_streams.cpython-313.pyc,,
|
| 61 |
+
anyio/abc/__pycache__/_subprocesses.cpython-313.pyc,,
|
| 62 |
+
anyio/abc/__pycache__/_tasks.cpython-313.pyc,,
|
| 63 |
+
anyio/abc/__pycache__/_testing.cpython-313.pyc,,
|
| 64 |
+
anyio/abc/_eventloop.py,sha256=GTZbdItBHcj_b-8K2XylET2-bBYLZ3XjW4snY7vK7LE,10900
|
| 65 |
+
anyio/abc/_resources.py,sha256=DrYvkNN1hH6Uvv5_5uKySvDsnknGVDe8FCKfko0VtN8,783
|
| 66 |
+
anyio/abc/_sockets.py,sha256=ECTY0jLEF18gryANHR3vFzXzGdZ-xPwELq1QdgOb0Jo,13258
|
| 67 |
+
anyio/abc/_streams.py,sha256=005GKSCXGprxnhucILboSqc2JFovECZk9m3p-qqxXVc,7640
|
| 68 |
+
anyio/abc/_subprocesses.py,sha256=cumAPJTktOQtw63IqG0lDpyZqu_l1EElvQHMiwJgL08,2067
|
| 69 |
+
anyio/abc/_tasks.py,sha256=KC7wrciE48AINOI-AhPutnFhe1ewfP7QnamFlDzqesQ,3721
|
| 70 |
+
anyio/abc/_testing.py,sha256=tBJUzkSfOXJw23fe8qSJ03kJlShOYjjaEyFB6k6MYT8,1821
|
| 71 |
+
anyio/from_thread.py,sha256=-YZOTpu9WVHtAsMxQGIOaHMjaDRNeKQilx6Nn2qDU-o,19017
|
| 72 |
+
anyio/functools.py,sha256=tIWQ90cuLMxfJIpdBfFY3W3CC1zqFCRAyR3DxKc0Xlo,10061
|
| 73 |
+
anyio/lowlevel.py,sha256=NnPYQ6tWDzLRwpalX2CvsbkXkTeasbJcL52gPopWdYg,5048
|
| 74 |
+
anyio/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 75 |
+
anyio/pytest_plugin.py,sha256=3jAFQn0jv_pyoWE2GBBlHaj9sqXj4e8vob0_hgrsXE8,10244
|
| 76 |
+
anyio/streams/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 77 |
+
anyio/streams/__pycache__/__init__.cpython-313.pyc,,
|
| 78 |
+
anyio/streams/__pycache__/buffered.cpython-313.pyc,,
|
| 79 |
+
anyio/streams/__pycache__/file.cpython-313.pyc,,
|
| 80 |
+
anyio/streams/__pycache__/memory.cpython-313.pyc,,
|
| 81 |
+
anyio/streams/__pycache__/stapled.cpython-313.pyc,,
|
| 82 |
+
anyio/streams/__pycache__/text.cpython-313.pyc,,
|
| 83 |
+
anyio/streams/__pycache__/tls.cpython-313.pyc,,
|
| 84 |
+
anyio/streams/buffered.py,sha256=2R3PeJhe4EXrdYqz44Y6-Eg9R6DrmlsYrP36Ir43-po,6263
|
| 85 |
+
anyio/streams/file.py,sha256=4WZ7XGz5WNu39FQHvqbe__TQ0HDP9OOhgO1mk9iVpVU,4470
|
| 86 |
+
anyio/streams/memory.py,sha256=F0zwzvFJKAhX_LRZGoKzzqDC2oMM-f-yyTBrEYEGOaU,10740
|
| 87 |
+
anyio/streams/stapled.py,sha256=T8Xqwf8K6EgURPxbt1N4i7A8BAk-gScv-GRhjLXIf_o,4390
|
| 88 |
+
anyio/streams/text.py,sha256=BcVAGJw1VRvtIqnv-o0Rb0pwH7p8vwlvl21xHq522ag,5765
|
| 89 |
+
anyio/streams/tls.py,sha256=Jpxy0Mfbcp1BxHCwE-YjSSFaLnIBbnnwur-excYThs4,15368
|
| 90 |
+
anyio/to_interpreter.py,sha256=_mLngrMy97TMR6VbW4Y6YzDUk9ZuPcQMPlkuyRh3C9k,7100
|
| 91 |
+
anyio/to_process.py,sha256=cEyYUgb8LJVRJCfs6rK3aEM_T3k2gEmhl0nBjEvflOk,9687
|
| 92 |
+
anyio/to_thread.py,sha256=tXQPvHohvQ2Vrw2pBtdzkRPNV7u3H2_UDbvwL2u_R7k,2465
|
env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: setuptools (80.9.0)
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
| 5 |
+
|
env/lib/python3.13/site-packages/anyio-4.12.0.dist-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[pytest11]
|
| 2 |
+
anyio = anyio.pytest_plugin
|
env/lib/python3.13/site-packages/fsspec/__init__.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from . import caching
|
| 2 |
+
from ._version import __version__ # noqa: F401
|
| 3 |
+
from .callbacks import Callback
|
| 4 |
+
from .compression import available_compressions
|
| 5 |
+
from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
|
| 6 |
+
from .exceptions import FSTimeoutError
|
| 7 |
+
from .mapping import FSMap, get_mapper
|
| 8 |
+
from .registry import (
|
| 9 |
+
available_protocols,
|
| 10 |
+
filesystem,
|
| 11 |
+
get_filesystem_class,
|
| 12 |
+
register_implementation,
|
| 13 |
+
registry,
|
| 14 |
+
)
|
| 15 |
+
from .spec import AbstractFileSystem
|
| 16 |
+
|
| 17 |
+
__all__ = [
|
| 18 |
+
"AbstractFileSystem",
|
| 19 |
+
"FSTimeoutError",
|
| 20 |
+
"FSMap",
|
| 21 |
+
"filesystem",
|
| 22 |
+
"register_implementation",
|
| 23 |
+
"get_filesystem_class",
|
| 24 |
+
"get_fs_token_paths",
|
| 25 |
+
"get_mapper",
|
| 26 |
+
"open",
|
| 27 |
+
"open_files",
|
| 28 |
+
"open_local",
|
| 29 |
+
"registry",
|
| 30 |
+
"caching",
|
| 31 |
+
"Callback",
|
| 32 |
+
"available_protocols",
|
| 33 |
+
"available_compressions",
|
| 34 |
+
"url_to_fs",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def process_entries():
|
| 39 |
+
try:
|
| 40 |
+
from importlib.metadata import entry_points
|
| 41 |
+
except ImportError:
|
| 42 |
+
return
|
| 43 |
+
if entry_points is not None:
|
| 44 |
+
try:
|
| 45 |
+
eps = entry_points()
|
| 46 |
+
except TypeError:
|
| 47 |
+
pass # importlib-metadata < 0.8
|
| 48 |
+
else:
|
| 49 |
+
if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
|
| 50 |
+
specs = eps.select(group="fsspec.specs")
|
| 51 |
+
else:
|
| 52 |
+
specs = eps.get("fsspec.specs", [])
|
| 53 |
+
registered_names = {}
|
| 54 |
+
for spec in specs:
|
| 55 |
+
err_msg = f"Unable to load filesystem from {spec}"
|
| 56 |
+
name = spec.name
|
| 57 |
+
if name in registered_names:
|
| 58 |
+
continue
|
| 59 |
+
registered_names[name] = True
|
| 60 |
+
register_implementation(
|
| 61 |
+
name,
|
| 62 |
+
spec.value.replace(":", "."),
|
| 63 |
+
errtxt=err_msg,
|
| 64 |
+
# We take our implementations as the ones to overload with if
|
| 65 |
+
# for some reason we encounter some, may be the same, already
|
| 66 |
+
# registered
|
| 67 |
+
clobber=True,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
process_entries()
|
env/lib/python3.13/site-packages/fsspec/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# file generated by setuptools-scm
|
| 2 |
+
# don't change, don't track in version control
|
| 3 |
+
|
| 4 |
+
__all__ = [
|
| 5 |
+
"__version__",
|
| 6 |
+
"__version_tuple__",
|
| 7 |
+
"version",
|
| 8 |
+
"version_tuple",
|
| 9 |
+
"__commit_id__",
|
| 10 |
+
"commit_id",
|
| 11 |
+
]
|
| 12 |
+
|
| 13 |
+
TYPE_CHECKING = False
|
| 14 |
+
if TYPE_CHECKING:
|
| 15 |
+
from typing import Tuple
|
| 16 |
+
from typing import Union
|
| 17 |
+
|
| 18 |
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
| 19 |
+
COMMIT_ID = Union[str, None]
|
| 20 |
+
else:
|
| 21 |
+
VERSION_TUPLE = object
|
| 22 |
+
COMMIT_ID = object
|
| 23 |
+
|
| 24 |
+
version: str
|
| 25 |
+
__version__: str
|
| 26 |
+
__version_tuple__: VERSION_TUPLE
|
| 27 |
+
version_tuple: VERSION_TUPLE
|
| 28 |
+
commit_id: COMMIT_ID
|
| 29 |
+
__commit_id__: COMMIT_ID
|
| 30 |
+
|
| 31 |
+
__version__ = version = '2025.12.0'
|
| 32 |
+
__version_tuple__ = version_tuple = (2025, 12, 0)
|
| 33 |
+
|
| 34 |
+
__commit_id__ = commit_id = None
|
env/lib/python3.13/site-packages/fsspec/archive.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import operator
|
| 2 |
+
|
| 3 |
+
from fsspec import AbstractFileSystem
|
| 4 |
+
from fsspec.utils import tokenize
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class AbstractArchiveFileSystem(AbstractFileSystem):
|
| 8 |
+
"""
|
| 9 |
+
A generic superclass for implementing Archive-based filesystems.
|
| 10 |
+
|
| 11 |
+
Currently, it is shared amongst
|
| 12 |
+
:class:`~fsspec.implementations.zip.ZipFileSystem`,
|
| 13 |
+
:class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
|
| 14 |
+
:class:`~fsspec.implementations.tar.TarFileSystem`.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __str__(self):
|
| 18 |
+
return f"<Archive-like object {type(self).__name__} at {id(self)}>"
|
| 19 |
+
|
| 20 |
+
__repr__ = __str__
|
| 21 |
+
|
| 22 |
+
def ukey(self, path):
|
| 23 |
+
return tokenize(path, self.fo, self.protocol)
|
| 24 |
+
|
| 25 |
+
def _all_dirnames(self, paths):
|
| 26 |
+
"""Returns *all* directory names for each path in paths, including intermediate
|
| 27 |
+
ones.
|
| 28 |
+
|
| 29 |
+
Parameters
|
| 30 |
+
----------
|
| 31 |
+
paths: Iterable of path strings
|
| 32 |
+
"""
|
| 33 |
+
if len(paths) == 0:
|
| 34 |
+
return set()
|
| 35 |
+
|
| 36 |
+
dirnames = {self._parent(path) for path in paths} - {self.root_marker}
|
| 37 |
+
return dirnames | self._all_dirnames(dirnames)
|
| 38 |
+
|
| 39 |
+
def info(self, path, **kwargs):
|
| 40 |
+
self._get_dirs()
|
| 41 |
+
path = self._strip_protocol(path)
|
| 42 |
+
if path in {"", "/"} and self.dir_cache:
|
| 43 |
+
return {"name": "", "type": "directory", "size": 0}
|
| 44 |
+
if path in self.dir_cache:
|
| 45 |
+
return self.dir_cache[path]
|
| 46 |
+
elif path + "/" in self.dir_cache:
|
| 47 |
+
return self.dir_cache[path + "/"]
|
| 48 |
+
else:
|
| 49 |
+
raise FileNotFoundError(path)
|
| 50 |
+
|
| 51 |
+
def ls(self, path, detail=True, **kwargs):
|
| 52 |
+
self._get_dirs()
|
| 53 |
+
paths = {}
|
| 54 |
+
for p, f in self.dir_cache.items():
|
| 55 |
+
p = p.rstrip("/")
|
| 56 |
+
if "/" in p:
|
| 57 |
+
root = p.rsplit("/", 1)[0]
|
| 58 |
+
else:
|
| 59 |
+
root = ""
|
| 60 |
+
if root == path.rstrip("/"):
|
| 61 |
+
paths[p] = f
|
| 62 |
+
elif all(
|
| 63 |
+
(a == b)
|
| 64 |
+
for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
|
| 65 |
+
):
|
| 66 |
+
# root directory entry
|
| 67 |
+
ppath = p.rstrip("/").split("/", 1)[0]
|
| 68 |
+
if ppath not in paths:
|
| 69 |
+
out = {"name": ppath, "size": 0, "type": "directory"}
|
| 70 |
+
paths[ppath] = out
|
| 71 |
+
if detail:
|
| 72 |
+
out = sorted(paths.values(), key=operator.itemgetter("name"))
|
| 73 |
+
return out
|
| 74 |
+
else:
|
| 75 |
+
return sorted(paths)
|
env/lib/python3.13/site-packages/fsspec/asyn.py
ADDED
|
@@ -0,0 +1,1103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import asyncio.events
|
| 3 |
+
import functools
|
| 4 |
+
import inspect
|
| 5 |
+
import io
|
| 6 |
+
import numbers
|
| 7 |
+
import os
|
| 8 |
+
import re
|
| 9 |
+
import threading
|
| 10 |
+
from collections.abc import Iterable
|
| 11 |
+
from glob import has_magic
|
| 12 |
+
from typing import TYPE_CHECKING
|
| 13 |
+
|
| 14 |
+
from .callbacks import DEFAULT_CALLBACK
|
| 15 |
+
from .exceptions import FSTimeoutError
|
| 16 |
+
from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
|
| 17 |
+
from .spec import AbstractBufferedFile, AbstractFileSystem
|
| 18 |
+
from .utils import glob_translate, is_exception, other_paths
|
| 19 |
+
|
| 20 |
+
private = re.compile("_[^_]")
|
| 21 |
+
iothread = [None] # dedicated fsspec IO thread
|
| 22 |
+
loop = [None] # global event loop for any non-async instance
|
| 23 |
+
_lock = None # global lock placeholder
|
| 24 |
+
get_running_loop = asyncio.get_running_loop
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_lock():
|
| 28 |
+
"""Allocate or return a threading lock.
|
| 29 |
+
|
| 30 |
+
The lock is allocated on first use to allow setting one lock per forked process.
|
| 31 |
+
"""
|
| 32 |
+
global _lock
|
| 33 |
+
if not _lock:
|
| 34 |
+
_lock = threading.Lock()
|
| 35 |
+
return _lock
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def reset_lock():
|
| 39 |
+
"""Reset the global lock.
|
| 40 |
+
|
| 41 |
+
This should be called only on the init of a forked process to reset the lock to
|
| 42 |
+
None, enabling the new forked process to get a new lock.
|
| 43 |
+
"""
|
| 44 |
+
global _lock
|
| 45 |
+
|
| 46 |
+
iothread[0] = None
|
| 47 |
+
loop[0] = None
|
| 48 |
+
_lock = None
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
async def _runner(event, coro, result, timeout=None):
|
| 52 |
+
timeout = timeout if timeout else None # convert 0 or 0.0 to None
|
| 53 |
+
if timeout is not None:
|
| 54 |
+
coro = asyncio.wait_for(coro, timeout=timeout)
|
| 55 |
+
try:
|
| 56 |
+
result[0] = await coro
|
| 57 |
+
except Exception as ex:
|
| 58 |
+
result[0] = ex
|
| 59 |
+
finally:
|
| 60 |
+
event.set()
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def sync(loop, func, *args, timeout=None, **kwargs):
|
| 64 |
+
"""
|
| 65 |
+
Make loop run coroutine until it returns. Runs in other thread
|
| 66 |
+
|
| 67 |
+
Examples
|
| 68 |
+
--------
|
| 69 |
+
>>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
|
| 70 |
+
timeout=timeout, **kwargs)
|
| 71 |
+
"""
|
| 72 |
+
timeout = timeout if timeout else None # convert 0 or 0.0 to None
|
| 73 |
+
# NB: if the loop is not running *yet*, it is OK to submit work
|
| 74 |
+
# and we will wait for it
|
| 75 |
+
if loop is None or loop.is_closed():
|
| 76 |
+
raise RuntimeError("Loop is not running")
|
| 77 |
+
try:
|
| 78 |
+
loop0 = asyncio.events.get_running_loop()
|
| 79 |
+
if loop0 is loop:
|
| 80 |
+
raise NotImplementedError("Calling sync() from within a running loop")
|
| 81 |
+
except NotImplementedError:
|
| 82 |
+
raise
|
| 83 |
+
except RuntimeError:
|
| 84 |
+
pass
|
| 85 |
+
coro = func(*args, **kwargs)
|
| 86 |
+
result = [None]
|
| 87 |
+
event = threading.Event()
|
| 88 |
+
asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
|
| 89 |
+
while True:
|
| 90 |
+
# this loops allows thread to get interrupted
|
| 91 |
+
if event.wait(1):
|
| 92 |
+
break
|
| 93 |
+
if timeout is not None:
|
| 94 |
+
timeout -= 1
|
| 95 |
+
if timeout < 0:
|
| 96 |
+
raise FSTimeoutError
|
| 97 |
+
|
| 98 |
+
return_result = result[0]
|
| 99 |
+
if isinstance(return_result, asyncio.TimeoutError):
|
| 100 |
+
# suppress asyncio.TimeoutError, raise FSTimeoutError
|
| 101 |
+
raise FSTimeoutError from return_result
|
| 102 |
+
elif isinstance(return_result, BaseException):
|
| 103 |
+
raise return_result
|
| 104 |
+
else:
|
| 105 |
+
return return_result
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def sync_wrapper(func, obj=None):
|
| 109 |
+
"""Given a function, make so can be called in blocking contexts
|
| 110 |
+
|
| 111 |
+
Leave obj=None if defining within a class. Pass the instance if attaching
|
| 112 |
+
as an attribute of the instance.
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
@functools.wraps(func)
|
| 116 |
+
def wrapper(*args, **kwargs):
|
| 117 |
+
self = obj or args[0]
|
| 118 |
+
return sync(self.loop, func, *args, **kwargs)
|
| 119 |
+
|
| 120 |
+
return wrapper
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def get_loop():
|
| 124 |
+
"""Create or return the default fsspec IO loop
|
| 125 |
+
|
| 126 |
+
The loop will be running on a separate thread.
|
| 127 |
+
"""
|
| 128 |
+
if loop[0] is None:
|
| 129 |
+
with get_lock():
|
| 130 |
+
# repeat the check just in case the loop got filled between the
|
| 131 |
+
# previous two calls from another thread
|
| 132 |
+
if loop[0] is None:
|
| 133 |
+
loop[0] = asyncio.new_event_loop()
|
| 134 |
+
th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
|
| 135 |
+
th.daemon = True
|
| 136 |
+
th.start()
|
| 137 |
+
iothread[0] = th
|
| 138 |
+
return loop[0]
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def reset_after_fork():
|
| 142 |
+
global lock
|
| 143 |
+
loop[0] = None
|
| 144 |
+
iothread[0] = None
|
| 145 |
+
lock = None
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
if hasattr(os, "register_at_fork"):
|
| 149 |
+
# should be posix; this will do nothing for spawn or forkserver subprocesses
|
| 150 |
+
os.register_at_fork(after_in_child=reset_after_fork)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
if TYPE_CHECKING:
|
| 154 |
+
import resource
|
| 155 |
+
|
| 156 |
+
ResourceError = resource.error
|
| 157 |
+
else:
|
| 158 |
+
try:
|
| 159 |
+
import resource
|
| 160 |
+
except ImportError:
|
| 161 |
+
resource = None
|
| 162 |
+
ResourceError = OSError
|
| 163 |
+
else:
|
| 164 |
+
ResourceError = getattr(resource, "error", OSError)
|
| 165 |
+
|
| 166 |
+
_DEFAULT_BATCH_SIZE = 128
|
| 167 |
+
_NOFILES_DEFAULT_BATCH_SIZE = 1280
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _get_batch_size(nofiles=False):
|
| 171 |
+
from fsspec.config import conf
|
| 172 |
+
|
| 173 |
+
if nofiles:
|
| 174 |
+
if "nofiles_gather_batch_size" in conf:
|
| 175 |
+
return conf["nofiles_gather_batch_size"]
|
| 176 |
+
else:
|
| 177 |
+
if "gather_batch_size" in conf:
|
| 178 |
+
return conf["gather_batch_size"]
|
| 179 |
+
if nofiles:
|
| 180 |
+
return _NOFILES_DEFAULT_BATCH_SIZE
|
| 181 |
+
if resource is None:
|
| 182 |
+
return _DEFAULT_BATCH_SIZE
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
|
| 186 |
+
except (ImportError, ValueError, ResourceError):
|
| 187 |
+
return _DEFAULT_BATCH_SIZE
|
| 188 |
+
|
| 189 |
+
if soft_limit == resource.RLIM_INFINITY:
|
| 190 |
+
return -1
|
| 191 |
+
else:
|
| 192 |
+
return soft_limit // 8
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def running_async() -> bool:
|
| 196 |
+
"""Being executed by an event loop?"""
|
| 197 |
+
try:
|
| 198 |
+
asyncio.get_running_loop()
|
| 199 |
+
return True
|
| 200 |
+
except RuntimeError:
|
| 201 |
+
return False
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
async def _run_coros_in_chunks(
|
| 205 |
+
coros,
|
| 206 |
+
batch_size=None,
|
| 207 |
+
callback=DEFAULT_CALLBACK,
|
| 208 |
+
timeout=None,
|
| 209 |
+
return_exceptions=False,
|
| 210 |
+
nofiles=False,
|
| 211 |
+
):
|
| 212 |
+
"""Run the given coroutines in chunks.
|
| 213 |
+
|
| 214 |
+
Parameters
|
| 215 |
+
----------
|
| 216 |
+
coros: list of coroutines to run
|
| 217 |
+
batch_size: int or None
|
| 218 |
+
Number of coroutines to submit/wait on simultaneously.
|
| 219 |
+
If -1, then it will not be any throttling. If
|
| 220 |
+
None, it will be inferred from _get_batch_size()
|
| 221 |
+
callback: fsspec.callbacks.Callback instance
|
| 222 |
+
Gets a relative_update when each coroutine completes
|
| 223 |
+
timeout: number or None
|
| 224 |
+
If given, each coroutine times out after this time. Note that, since
|
| 225 |
+
there are multiple batches, the total run time of this function will in
|
| 226 |
+
general be longer
|
| 227 |
+
return_exceptions: bool
|
| 228 |
+
Same meaning as in asyncio.gather
|
| 229 |
+
nofiles: bool
|
| 230 |
+
If inferring the batch_size, does this operation involve local files?
|
| 231 |
+
If yes, you normally expect smaller batches.
|
| 232 |
+
"""
|
| 233 |
+
|
| 234 |
+
if batch_size is None:
|
| 235 |
+
batch_size = _get_batch_size(nofiles=nofiles)
|
| 236 |
+
|
| 237 |
+
if batch_size == -1:
|
| 238 |
+
batch_size = len(coros)
|
| 239 |
+
|
| 240 |
+
assert batch_size > 0
|
| 241 |
+
|
| 242 |
+
async def _run_coro(coro, i):
|
| 243 |
+
try:
|
| 244 |
+
return await asyncio.wait_for(coro, timeout=timeout), i
|
| 245 |
+
except Exception as e:
|
| 246 |
+
if not return_exceptions:
|
| 247 |
+
raise
|
| 248 |
+
return e, i
|
| 249 |
+
finally:
|
| 250 |
+
callback.relative_update(1)
|
| 251 |
+
|
| 252 |
+
i = 0
|
| 253 |
+
n = len(coros)
|
| 254 |
+
results = [None] * n
|
| 255 |
+
pending = set()
|
| 256 |
+
|
| 257 |
+
while pending or i < n:
|
| 258 |
+
while len(pending) < batch_size and i < n:
|
| 259 |
+
pending.add(asyncio.ensure_future(_run_coro(coros[i], i)))
|
| 260 |
+
i += 1
|
| 261 |
+
|
| 262 |
+
if not pending:
|
| 263 |
+
break
|
| 264 |
+
|
| 265 |
+
done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
|
| 266 |
+
while done:
|
| 267 |
+
result, k = await done.pop()
|
| 268 |
+
results[k] = result
|
| 269 |
+
|
| 270 |
+
return results
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# these methods should be implemented as async by any async-able backend
|
| 274 |
+
async_methods = [
|
| 275 |
+
"_ls",
|
| 276 |
+
"_cat_file",
|
| 277 |
+
"_get_file",
|
| 278 |
+
"_put_file",
|
| 279 |
+
"_rm_file",
|
| 280 |
+
"_cp_file",
|
| 281 |
+
"_pipe_file",
|
| 282 |
+
"_expand_path",
|
| 283 |
+
"_info",
|
| 284 |
+
"_isfile",
|
| 285 |
+
"_isdir",
|
| 286 |
+
"_exists",
|
| 287 |
+
"_walk",
|
| 288 |
+
"_glob",
|
| 289 |
+
"_find",
|
| 290 |
+
"_du",
|
| 291 |
+
"_size",
|
| 292 |
+
"_mkdir",
|
| 293 |
+
"_makedirs",
|
| 294 |
+
]
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
class AsyncFileSystem(AbstractFileSystem):
|
| 298 |
+
"""Async file operations, default implementations
|
| 299 |
+
|
| 300 |
+
Passes bulk operations to asyncio.gather for concurrent operation.
|
| 301 |
+
|
| 302 |
+
Implementations that have concurrent batch operations and/or async methods
|
| 303 |
+
should inherit from this class instead of AbstractFileSystem. Docstrings are
|
| 304 |
+
copied from the un-underscored method in AbstractFileSystem, if not given.
|
| 305 |
+
"""
|
| 306 |
+
|
| 307 |
+
# note that methods do not have docstring here; they will be copied
|
| 308 |
+
# for _* methods and inferred for overridden methods.
|
| 309 |
+
|
| 310 |
+
async_impl = True
|
| 311 |
+
mirror_sync_methods = True
|
| 312 |
+
disable_throttling = False
|
| 313 |
+
|
| 314 |
+
def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
|
| 315 |
+
self.asynchronous = asynchronous
|
| 316 |
+
self._pid = os.getpid()
|
| 317 |
+
if not asynchronous:
|
| 318 |
+
self._loop = loop or get_loop()
|
| 319 |
+
else:
|
| 320 |
+
self._loop = None
|
| 321 |
+
self.batch_size = batch_size
|
| 322 |
+
super().__init__(*args, **kwargs)
|
| 323 |
+
|
| 324 |
+
@property
|
| 325 |
+
def loop(self):
|
| 326 |
+
if self._pid != os.getpid():
|
| 327 |
+
raise RuntimeError("This class is not fork-safe")
|
| 328 |
+
return self._loop
|
| 329 |
+
|
| 330 |
+
async def _rm_file(self, path, **kwargs):
|
| 331 |
+
if (
|
| 332 |
+
inspect.iscoroutinefunction(self._rm)
|
| 333 |
+
and type(self)._rm is not AsyncFileSystem._rm
|
| 334 |
+
):
|
| 335 |
+
return await self._rm(path, recursive=False, batch_size=1, **kwargs)
|
| 336 |
+
raise NotImplementedError
|
| 337 |
+
|
| 338 |
+
async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
|
| 339 |
+
# TODO: implement on_error
|
| 340 |
+
batch_size = batch_size or self.batch_size
|
| 341 |
+
path = await self._expand_path(path, recursive=recursive)
|
| 342 |
+
return await _run_coros_in_chunks(
|
| 343 |
+
[self._rm_file(p, **kwargs) for p in reversed(path)],
|
| 344 |
+
batch_size=batch_size,
|
| 345 |
+
nofiles=True,
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
async def _cp_file(self, path1, path2, **kwargs):
|
| 349 |
+
raise NotImplementedError
|
| 350 |
+
|
| 351 |
+
async def _mv_file(self, path1, path2):
|
| 352 |
+
await self._cp_file(path1, path2)
|
| 353 |
+
await self._rm_file(path1)
|
| 354 |
+
|
| 355 |
+
async def _copy(
|
| 356 |
+
self,
|
| 357 |
+
path1,
|
| 358 |
+
path2,
|
| 359 |
+
recursive=False,
|
| 360 |
+
on_error=None,
|
| 361 |
+
maxdepth=None,
|
| 362 |
+
batch_size=None,
|
| 363 |
+
**kwargs,
|
| 364 |
+
):
|
| 365 |
+
if on_error is None and recursive:
|
| 366 |
+
on_error = "ignore"
|
| 367 |
+
elif on_error is None:
|
| 368 |
+
on_error = "raise"
|
| 369 |
+
|
| 370 |
+
if isinstance(path1, list) and isinstance(path2, list):
|
| 371 |
+
# No need to expand paths when both source and destination
|
| 372 |
+
# are provided as lists
|
| 373 |
+
paths1 = path1
|
| 374 |
+
paths2 = path2
|
| 375 |
+
else:
|
| 376 |
+
source_is_str = isinstance(path1, str)
|
| 377 |
+
paths1 = await self._expand_path(
|
| 378 |
+
path1, maxdepth=maxdepth, recursive=recursive
|
| 379 |
+
)
|
| 380 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 381 |
+
# Non-recursive glob does not copy directories
|
| 382 |
+
paths1 = [
|
| 383 |
+
p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
|
| 384 |
+
]
|
| 385 |
+
if not paths1:
|
| 386 |
+
return
|
| 387 |
+
|
| 388 |
+
source_is_file = len(paths1) == 1
|
| 389 |
+
dest_is_dir = isinstance(path2, str) and (
|
| 390 |
+
trailing_sep(path2) or await self._isdir(path2)
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
exists = source_is_str and (
|
| 394 |
+
(has_magic(path1) and source_is_file)
|
| 395 |
+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
|
| 396 |
+
)
|
| 397 |
+
paths2 = other_paths(
|
| 398 |
+
paths1,
|
| 399 |
+
path2,
|
| 400 |
+
exists=exists,
|
| 401 |
+
flatten=not source_is_str,
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
batch_size = batch_size or self.batch_size
|
| 405 |
+
coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
|
| 406 |
+
result = await _run_coros_in_chunks(
|
| 407 |
+
coros, batch_size=batch_size, return_exceptions=True, nofiles=True
|
| 408 |
+
)
|
| 409 |
+
|
| 410 |
+
for ex in filter(is_exception, result):
|
| 411 |
+
if on_error == "ignore" and isinstance(ex, FileNotFoundError):
|
| 412 |
+
continue
|
| 413 |
+
raise ex
|
| 414 |
+
|
| 415 |
+
async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
|
| 416 |
+
raise NotImplementedError
|
| 417 |
+
|
| 418 |
+
async def _pipe(self, path, value=None, batch_size=None, **kwargs):
|
| 419 |
+
if isinstance(path, str):
|
| 420 |
+
path = {path: value}
|
| 421 |
+
batch_size = batch_size or self.batch_size
|
| 422 |
+
return await _run_coros_in_chunks(
|
| 423 |
+
[self._pipe_file(k, v, **kwargs) for k, v in path.items()],
|
| 424 |
+
batch_size=batch_size,
|
| 425 |
+
nofiles=True,
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
async def _process_limits(self, url, start, end):
|
| 429 |
+
"""Helper for "Range"-based _cat_file"""
|
| 430 |
+
size = None
|
| 431 |
+
suff = False
|
| 432 |
+
if start is not None and start < 0:
|
| 433 |
+
# if start is negative and end None, end is the "suffix length"
|
| 434 |
+
if end is None:
|
| 435 |
+
end = -start
|
| 436 |
+
start = ""
|
| 437 |
+
suff = True
|
| 438 |
+
else:
|
| 439 |
+
size = size or (await self._info(url))["size"]
|
| 440 |
+
start = size + start
|
| 441 |
+
elif start is None:
|
| 442 |
+
start = 0
|
| 443 |
+
if not suff:
|
| 444 |
+
if end is not None and end < 0:
|
| 445 |
+
if start is not None:
|
| 446 |
+
size = size or (await self._info(url))["size"]
|
| 447 |
+
end = size + end
|
| 448 |
+
elif end is None:
|
| 449 |
+
end = ""
|
| 450 |
+
if isinstance(end, numbers.Integral):
|
| 451 |
+
end -= 1 # bytes range is inclusive
|
| 452 |
+
return f"bytes={start}-{end}"
|
| 453 |
+
|
| 454 |
+
async def _cat_file(self, path, start=None, end=None, **kwargs):
|
| 455 |
+
raise NotImplementedError
|
| 456 |
+
|
| 457 |
+
async def _cat(
|
| 458 |
+
self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
|
| 459 |
+
):
|
| 460 |
+
paths = await self._expand_path(path, recursive=recursive)
|
| 461 |
+
coros = [self._cat_file(path, **kwargs) for path in paths]
|
| 462 |
+
batch_size = batch_size or self.batch_size
|
| 463 |
+
out = await _run_coros_in_chunks(
|
| 464 |
+
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
| 465 |
+
)
|
| 466 |
+
if on_error == "raise":
|
| 467 |
+
ex = next(filter(is_exception, out), False)
|
| 468 |
+
if ex:
|
| 469 |
+
raise ex
|
| 470 |
+
if (
|
| 471 |
+
len(paths) > 1
|
| 472 |
+
or isinstance(path, list)
|
| 473 |
+
or paths[0] != self._strip_protocol(path)
|
| 474 |
+
):
|
| 475 |
+
return {
|
| 476 |
+
k: v
|
| 477 |
+
for k, v in zip(paths, out)
|
| 478 |
+
if on_error != "omit" or not is_exception(v)
|
| 479 |
+
}
|
| 480 |
+
else:
|
| 481 |
+
return out[0]
|
| 482 |
+
|
| 483 |
+
async def _cat_ranges(
|
| 484 |
+
self,
|
| 485 |
+
paths,
|
| 486 |
+
starts,
|
| 487 |
+
ends,
|
| 488 |
+
max_gap=None,
|
| 489 |
+
batch_size=None,
|
| 490 |
+
on_error="return",
|
| 491 |
+
**kwargs,
|
| 492 |
+
):
|
| 493 |
+
"""Get the contents of byte ranges from one or more files
|
| 494 |
+
|
| 495 |
+
Parameters
|
| 496 |
+
----------
|
| 497 |
+
paths: list
|
| 498 |
+
A list of of filepaths on this filesystems
|
| 499 |
+
starts, ends: int or list
|
| 500 |
+
Bytes limits of the read. If using a single int, the same value will be
|
| 501 |
+
used to read all the specified files.
|
| 502 |
+
"""
|
| 503 |
+
# TODO: on_error
|
| 504 |
+
if max_gap is not None:
|
| 505 |
+
# use utils.merge_offset_ranges
|
| 506 |
+
raise NotImplementedError
|
| 507 |
+
if not isinstance(paths, list):
|
| 508 |
+
raise TypeError
|
| 509 |
+
if not isinstance(starts, Iterable):
|
| 510 |
+
starts = [starts] * len(paths)
|
| 511 |
+
if not isinstance(ends, Iterable):
|
| 512 |
+
ends = [ends] * len(paths)
|
| 513 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 514 |
+
raise ValueError
|
| 515 |
+
coros = [
|
| 516 |
+
self._cat_file(p, start=s, end=e, **kwargs)
|
| 517 |
+
for p, s, e in zip(paths, starts, ends)
|
| 518 |
+
]
|
| 519 |
+
batch_size = batch_size or self.batch_size
|
| 520 |
+
return await _run_coros_in_chunks(
|
| 521 |
+
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
| 522 |
+
)
|
| 523 |
+
|
| 524 |
+
async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
|
| 525 |
+
raise NotImplementedError
|
| 526 |
+
|
| 527 |
+
async def _put(
|
| 528 |
+
self,
|
| 529 |
+
lpath,
|
| 530 |
+
rpath,
|
| 531 |
+
recursive=False,
|
| 532 |
+
callback=DEFAULT_CALLBACK,
|
| 533 |
+
batch_size=None,
|
| 534 |
+
maxdepth=None,
|
| 535 |
+
**kwargs,
|
| 536 |
+
):
|
| 537 |
+
"""Copy file(s) from local.
|
| 538 |
+
|
| 539 |
+
Copies a specific file or tree of files (if recursive=True). If rpath
|
| 540 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 541 |
+
will go within.
|
| 542 |
+
|
| 543 |
+
The put_file method will be called concurrently on a batch of files. The
|
| 544 |
+
batch_size option can configure the amount of futures that can be executed
|
| 545 |
+
at the same time. If it is -1, then all the files will be uploaded concurrently.
|
| 546 |
+
The default can be set for this instance by passing "batch_size" in the
|
| 547 |
+
constructor, or for all instances by setting the "gather_batch_size" key
|
| 548 |
+
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
|
| 549 |
+
"""
|
| 550 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 551 |
+
# No need to expand paths when both source and destination
|
| 552 |
+
# are provided as lists
|
| 553 |
+
rpaths = rpath
|
| 554 |
+
lpaths = lpath
|
| 555 |
+
else:
|
| 556 |
+
source_is_str = isinstance(lpath, str)
|
| 557 |
+
if source_is_str:
|
| 558 |
+
lpath = make_path_posix(lpath)
|
| 559 |
+
fs = LocalFileSystem()
|
| 560 |
+
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
|
| 561 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 562 |
+
# Non-recursive glob does not copy directories
|
| 563 |
+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
|
| 564 |
+
if not lpaths:
|
| 565 |
+
return
|
| 566 |
+
|
| 567 |
+
source_is_file = len(lpaths) == 1
|
| 568 |
+
dest_is_dir = isinstance(rpath, str) and (
|
| 569 |
+
trailing_sep(rpath) or await self._isdir(rpath)
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
rpath = self._strip_protocol(rpath)
|
| 573 |
+
exists = source_is_str and (
|
| 574 |
+
(has_magic(lpath) and source_is_file)
|
| 575 |
+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
|
| 576 |
+
)
|
| 577 |
+
rpaths = other_paths(
|
| 578 |
+
lpaths,
|
| 579 |
+
rpath,
|
| 580 |
+
exists=exists,
|
| 581 |
+
flatten=not source_is_str,
|
| 582 |
+
)
|
| 583 |
+
|
| 584 |
+
is_dir = {l: os.path.isdir(l) for l in lpaths}
|
| 585 |
+
rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
|
| 586 |
+
file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
|
| 587 |
+
|
| 588 |
+
await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
|
| 589 |
+
batch_size = batch_size or self.batch_size
|
| 590 |
+
|
| 591 |
+
coros = []
|
| 592 |
+
callback.set_size(len(file_pairs))
|
| 593 |
+
for lfile, rfile in file_pairs:
|
| 594 |
+
put_file = callback.branch_coro(self._put_file)
|
| 595 |
+
coros.append(put_file(lfile, rfile, **kwargs))
|
| 596 |
+
|
| 597 |
+
return await _run_coros_in_chunks(
|
| 598 |
+
coros, batch_size=batch_size, callback=callback
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
async def _get_file(self, rpath, lpath, **kwargs):
|
| 602 |
+
raise NotImplementedError
|
| 603 |
+
|
| 604 |
+
async def _get(
|
| 605 |
+
self,
|
| 606 |
+
rpath,
|
| 607 |
+
lpath,
|
| 608 |
+
recursive=False,
|
| 609 |
+
callback=DEFAULT_CALLBACK,
|
| 610 |
+
maxdepth=None,
|
| 611 |
+
**kwargs,
|
| 612 |
+
):
|
| 613 |
+
"""Copy file(s) to local.
|
| 614 |
+
|
| 615 |
+
Copies a specific file or tree of files (if recursive=True). If lpath
|
| 616 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 617 |
+
will go within. Can submit a list of paths, which may be glob-patterns
|
| 618 |
+
and will be expanded.
|
| 619 |
+
|
| 620 |
+
The get_file method will be called concurrently on a batch of files. The
|
| 621 |
+
batch_size option can configure the amount of futures that can be executed
|
| 622 |
+
at the same time. If it is -1, then all the files will be uploaded concurrently.
|
| 623 |
+
The default can be set for this instance by passing "batch_size" in the
|
| 624 |
+
constructor, or for all instances by setting the "gather_batch_size" key
|
| 625 |
+
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
|
| 626 |
+
"""
|
| 627 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 628 |
+
# No need to expand paths when both source and destination
|
| 629 |
+
# are provided as lists
|
| 630 |
+
rpaths = rpath
|
| 631 |
+
lpaths = lpath
|
| 632 |
+
else:
|
| 633 |
+
source_is_str = isinstance(rpath, str)
|
| 634 |
+
# First check for rpath trailing slash as _strip_protocol removes it.
|
| 635 |
+
source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
|
| 636 |
+
rpath = self._strip_protocol(rpath)
|
| 637 |
+
rpaths = await self._expand_path(
|
| 638 |
+
rpath, recursive=recursive, maxdepth=maxdepth
|
| 639 |
+
)
|
| 640 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 641 |
+
# Non-recursive glob does not copy directories
|
| 642 |
+
rpaths = [
|
| 643 |
+
p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
|
| 644 |
+
]
|
| 645 |
+
if not rpaths:
|
| 646 |
+
return
|
| 647 |
+
|
| 648 |
+
lpath = make_path_posix(lpath)
|
| 649 |
+
source_is_file = len(rpaths) == 1
|
| 650 |
+
dest_is_dir = isinstance(lpath, str) and (
|
| 651 |
+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
|
| 652 |
+
)
|
| 653 |
+
|
| 654 |
+
exists = source_is_str and (
|
| 655 |
+
(has_magic(rpath) and source_is_file)
|
| 656 |
+
or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
|
| 657 |
+
)
|
| 658 |
+
lpaths = other_paths(
|
| 659 |
+
rpaths,
|
| 660 |
+
lpath,
|
| 661 |
+
exists=exists,
|
| 662 |
+
flatten=not source_is_str,
|
| 663 |
+
)
|
| 664 |
+
|
| 665 |
+
[os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
|
| 666 |
+
batch_size = kwargs.pop("batch_size", self.batch_size)
|
| 667 |
+
|
| 668 |
+
coros = []
|
| 669 |
+
callback.set_size(len(lpaths))
|
| 670 |
+
for lpath, rpath in zip(lpaths, rpaths):
|
| 671 |
+
get_file = callback.branch_coro(self._get_file)
|
| 672 |
+
coros.append(get_file(rpath, lpath, **kwargs))
|
| 673 |
+
return await _run_coros_in_chunks(
|
| 674 |
+
coros, batch_size=batch_size, callback=callback
|
| 675 |
+
)
|
| 676 |
+
|
| 677 |
+
async def _isfile(self, path):
|
| 678 |
+
try:
|
| 679 |
+
return (await self._info(path))["type"] == "file"
|
| 680 |
+
except: # noqa: E722
|
| 681 |
+
return False
|
| 682 |
+
|
| 683 |
+
async def _isdir(self, path):
|
| 684 |
+
try:
|
| 685 |
+
return (await self._info(path))["type"] == "directory"
|
| 686 |
+
except OSError:
|
| 687 |
+
return False
|
| 688 |
+
|
| 689 |
+
async def _size(self, path):
|
| 690 |
+
return (await self._info(path)).get("size", None)
|
| 691 |
+
|
| 692 |
+
async def _sizes(self, paths, batch_size=None):
|
| 693 |
+
batch_size = batch_size or self.batch_size
|
| 694 |
+
return await _run_coros_in_chunks(
|
| 695 |
+
[self._size(p) for p in paths], batch_size=batch_size
|
| 696 |
+
)
|
| 697 |
+
|
| 698 |
+
async def _exists(self, path, **kwargs):
|
| 699 |
+
try:
|
| 700 |
+
await self._info(path, **kwargs)
|
| 701 |
+
return True
|
| 702 |
+
except FileNotFoundError:
|
| 703 |
+
return False
|
| 704 |
+
|
| 705 |
+
async def _info(self, path, **kwargs):
|
| 706 |
+
raise NotImplementedError
|
| 707 |
+
|
| 708 |
+
async def _ls(self, path, detail=True, **kwargs):
|
| 709 |
+
raise NotImplementedError
|
| 710 |
+
|
| 711 |
+
async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
|
| 712 |
+
if maxdepth is not None and maxdepth < 1:
|
| 713 |
+
raise ValueError("maxdepth must be at least 1")
|
| 714 |
+
|
| 715 |
+
path = self._strip_protocol(path)
|
| 716 |
+
full_dirs = {}
|
| 717 |
+
dirs = {}
|
| 718 |
+
files = {}
|
| 719 |
+
|
| 720 |
+
detail = kwargs.pop("detail", False)
|
| 721 |
+
try:
|
| 722 |
+
listing = await self._ls(path, detail=True, **kwargs)
|
| 723 |
+
except (FileNotFoundError, OSError) as e:
|
| 724 |
+
if on_error == "raise":
|
| 725 |
+
raise
|
| 726 |
+
elif callable(on_error):
|
| 727 |
+
on_error(e)
|
| 728 |
+
if detail:
|
| 729 |
+
yield path, {}, {}
|
| 730 |
+
else:
|
| 731 |
+
yield path, [], []
|
| 732 |
+
return
|
| 733 |
+
|
| 734 |
+
for info in listing:
|
| 735 |
+
# each info name must be at least [path]/part , but here
|
| 736 |
+
# we check also for names like [path]/part/
|
| 737 |
+
pathname = info["name"].rstrip("/")
|
| 738 |
+
name = pathname.rsplit("/", 1)[-1]
|
| 739 |
+
if info["type"] == "directory" and pathname != path:
|
| 740 |
+
# do not include "self" path
|
| 741 |
+
full_dirs[name] = pathname
|
| 742 |
+
dirs[name] = info
|
| 743 |
+
elif pathname == path:
|
| 744 |
+
# file-like with same name as give path
|
| 745 |
+
files[""] = info
|
| 746 |
+
else:
|
| 747 |
+
files[name] = info
|
| 748 |
+
|
| 749 |
+
if detail:
|
| 750 |
+
yield path, dirs, files
|
| 751 |
+
else:
|
| 752 |
+
yield path, list(dirs), list(files)
|
| 753 |
+
|
| 754 |
+
if maxdepth is not None:
|
| 755 |
+
maxdepth -= 1
|
| 756 |
+
if maxdepth < 1:
|
| 757 |
+
return
|
| 758 |
+
|
| 759 |
+
for d in dirs:
|
| 760 |
+
async for _ in self._walk(
|
| 761 |
+
full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
|
| 762 |
+
):
|
| 763 |
+
yield _
|
| 764 |
+
|
| 765 |
+
async def _glob(self, path, maxdepth=None, **kwargs):
|
| 766 |
+
if maxdepth is not None and maxdepth < 1:
|
| 767 |
+
raise ValueError("maxdepth must be at least 1")
|
| 768 |
+
|
| 769 |
+
import re
|
| 770 |
+
|
| 771 |
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
| 772 |
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
| 773 |
+
path = self._strip_protocol(path)
|
| 774 |
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
| 775 |
+
tuple(sep + "**" for sep in seps)
|
| 776 |
+
)
|
| 777 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
| 778 |
+
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
| 779 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
| 780 |
+
|
| 781 |
+
min_idx = min(idx_star, idx_qmark, idx_brace)
|
| 782 |
+
|
| 783 |
+
detail = kwargs.pop("detail", False)
|
| 784 |
+
withdirs = kwargs.pop("withdirs", True)
|
| 785 |
+
|
| 786 |
+
if not has_magic(path):
|
| 787 |
+
if await self._exists(path, **kwargs):
|
| 788 |
+
if not detail:
|
| 789 |
+
return [path]
|
| 790 |
+
else:
|
| 791 |
+
return {path: await self._info(path, **kwargs)}
|
| 792 |
+
else:
|
| 793 |
+
if not detail:
|
| 794 |
+
return [] # glob of non-existent returns empty
|
| 795 |
+
else:
|
| 796 |
+
return {}
|
| 797 |
+
elif "/" in path[:min_idx]:
|
| 798 |
+
min_idx = path[:min_idx].rindex("/")
|
| 799 |
+
root = path[: min_idx + 1]
|
| 800 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 801 |
+
else:
|
| 802 |
+
root = ""
|
| 803 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 804 |
+
|
| 805 |
+
if "**" in path:
|
| 806 |
+
if maxdepth is not None:
|
| 807 |
+
idx_double_stars = path.find("**")
|
| 808 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
| 809 |
+
depth = depth - depth_double_stars + maxdepth
|
| 810 |
+
else:
|
| 811 |
+
depth = None
|
| 812 |
+
|
| 813 |
+
allpaths = await self._find(
|
| 814 |
+
root, maxdepth=depth, withdirs=withdirs, detail=True, **kwargs
|
| 815 |
+
)
|
| 816 |
+
|
| 817 |
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
| 818 |
+
pattern = re.compile(pattern)
|
| 819 |
+
|
| 820 |
+
out = {
|
| 821 |
+
p: info
|
| 822 |
+
for p, info in sorted(allpaths.items())
|
| 823 |
+
if pattern.match(
|
| 824 |
+
p + "/"
|
| 825 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
| 826 |
+
else p
|
| 827 |
+
)
|
| 828 |
+
}
|
| 829 |
+
|
| 830 |
+
if detail:
|
| 831 |
+
return out
|
| 832 |
+
else:
|
| 833 |
+
return list(out)
|
| 834 |
+
|
| 835 |
+
async def _du(self, path, total=True, maxdepth=None, **kwargs):
|
| 836 |
+
sizes = {}
|
| 837 |
+
# async for?
|
| 838 |
+
for f in await self._find(path, maxdepth=maxdepth, **kwargs):
|
| 839 |
+
info = await self._info(f)
|
| 840 |
+
sizes[info["name"]] = info["size"]
|
| 841 |
+
if total:
|
| 842 |
+
return sum(sizes.values())
|
| 843 |
+
else:
|
| 844 |
+
return sizes
|
| 845 |
+
|
| 846 |
+
async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
|
| 847 |
+
path = self._strip_protocol(path)
|
| 848 |
+
out = {}
|
| 849 |
+
detail = kwargs.pop("detail", False)
|
| 850 |
+
|
| 851 |
+
# Add the root directory if withdirs is requested
|
| 852 |
+
# This is needed for posix glob compliance
|
| 853 |
+
if withdirs and path != "" and await self._isdir(path):
|
| 854 |
+
out[path] = await self._info(path)
|
| 855 |
+
|
| 856 |
+
# async for?
|
| 857 |
+
async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
|
| 858 |
+
if withdirs:
|
| 859 |
+
files.update(dirs)
|
| 860 |
+
out.update({info["name"]: info for name, info in files.items()})
|
| 861 |
+
if not out and (await self._isfile(path)):
|
| 862 |
+
# walk works on directories, but find should also return [path]
|
| 863 |
+
# when path happens to be a file
|
| 864 |
+
out[path] = {}
|
| 865 |
+
names = sorted(out)
|
| 866 |
+
if not detail:
|
| 867 |
+
return names
|
| 868 |
+
else:
|
| 869 |
+
return {name: out[name] for name in names}
|
| 870 |
+
|
| 871 |
+
async def _expand_path(self, path, recursive=False, maxdepth=None):
|
| 872 |
+
if maxdepth is not None and maxdepth < 1:
|
| 873 |
+
raise ValueError("maxdepth must be at least 1")
|
| 874 |
+
|
| 875 |
+
if isinstance(path, str):
|
| 876 |
+
out = await self._expand_path([path], recursive, maxdepth)
|
| 877 |
+
else:
|
| 878 |
+
out = set()
|
| 879 |
+
path = [self._strip_protocol(p) for p in path]
|
| 880 |
+
for p in path: # can gather here
|
| 881 |
+
if has_magic(p):
|
| 882 |
+
bit = set(await self._glob(p, maxdepth=maxdepth))
|
| 883 |
+
out |= bit
|
| 884 |
+
if recursive:
|
| 885 |
+
# glob call above expanded one depth so if maxdepth is defined
|
| 886 |
+
# then decrement it in expand_path call below. If it is zero
|
| 887 |
+
# after decrementing then avoid expand_path call.
|
| 888 |
+
if maxdepth is not None and maxdepth <= 1:
|
| 889 |
+
continue
|
| 890 |
+
out |= set(
|
| 891 |
+
await self._expand_path(
|
| 892 |
+
list(bit),
|
| 893 |
+
recursive=recursive,
|
| 894 |
+
maxdepth=maxdepth - 1 if maxdepth is not None else None,
|
| 895 |
+
)
|
| 896 |
+
)
|
| 897 |
+
continue
|
| 898 |
+
elif recursive:
|
| 899 |
+
rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
|
| 900 |
+
out |= rec
|
| 901 |
+
if p not in out and (recursive is False or (await self._exists(p))):
|
| 902 |
+
# should only check once, for the root
|
| 903 |
+
out.add(p)
|
| 904 |
+
if not out:
|
| 905 |
+
raise FileNotFoundError(path)
|
| 906 |
+
return sorted(out)
|
| 907 |
+
|
| 908 |
+
async def _mkdir(self, path, create_parents=True, **kwargs):
|
| 909 |
+
pass # not necessary to implement, may not have directories
|
| 910 |
+
|
| 911 |
+
async def _makedirs(self, path, exist_ok=False):
|
| 912 |
+
pass # not necessary to implement, may not have directories
|
| 913 |
+
|
| 914 |
+
async def open_async(self, path, mode="rb", **kwargs):
|
| 915 |
+
if "b" not in mode or kwargs.get("compression"):
|
| 916 |
+
raise ValueError
|
| 917 |
+
raise NotImplementedError
|
| 918 |
+
|
| 919 |
+
|
| 920 |
+
def mirror_sync_methods(obj):
|
| 921 |
+
"""Populate sync and async methods for obj
|
| 922 |
+
|
| 923 |
+
For each method will create a sync version if the name refers to an async method
|
| 924 |
+
(coroutine) and there is no override in the child class; will create an async
|
| 925 |
+
method for the corresponding sync method if there is no implementation.
|
| 926 |
+
|
| 927 |
+
Uses the methods specified in
|
| 928 |
+
- async_methods: the set that an implementation is expected to provide
|
| 929 |
+
- default_async_methods: that can be derived from their sync version in
|
| 930 |
+
AbstractFileSystem
|
| 931 |
+
- AsyncFileSystem: async-specific default coroutines
|
| 932 |
+
"""
|
| 933 |
+
from fsspec import AbstractFileSystem
|
| 934 |
+
|
| 935 |
+
for method in async_methods + dir(AsyncFileSystem):
|
| 936 |
+
if not method.startswith("_"):
|
| 937 |
+
continue
|
| 938 |
+
smethod = method[1:]
|
| 939 |
+
if private.match(method):
|
| 940 |
+
isco = inspect.iscoroutinefunction(getattr(obj, method, None))
|
| 941 |
+
unsync = getattr(getattr(obj, smethod, False), "__func__", None)
|
| 942 |
+
is_default = unsync is getattr(AbstractFileSystem, smethod, "")
|
| 943 |
+
if isco and is_default:
|
| 944 |
+
mth = sync_wrapper(getattr(obj, method), obj=obj)
|
| 945 |
+
setattr(obj, smethod, mth)
|
| 946 |
+
if not mth.__doc__:
|
| 947 |
+
mth.__doc__ = getattr(
|
| 948 |
+
getattr(AbstractFileSystem, smethod, None), "__doc__", ""
|
| 949 |
+
)
|
| 950 |
+
|
| 951 |
+
|
| 952 |
+
class FSSpecCoroutineCancel(Exception):
|
| 953 |
+
pass
|
| 954 |
+
|
| 955 |
+
|
| 956 |
+
def _dump_running_tasks(
|
| 957 |
+
printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
|
| 958 |
+
):
|
| 959 |
+
import traceback
|
| 960 |
+
|
| 961 |
+
tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
|
| 962 |
+
if printout:
|
| 963 |
+
[task.print_stack() for task in tasks]
|
| 964 |
+
out = [
|
| 965 |
+
{
|
| 966 |
+
"locals": task._coro.cr_frame.f_locals,
|
| 967 |
+
"file": task._coro.cr_frame.f_code.co_filename,
|
| 968 |
+
"firstline": task._coro.cr_frame.f_code.co_firstlineno,
|
| 969 |
+
"linelo": task._coro.cr_frame.f_lineno,
|
| 970 |
+
"stack": traceback.format_stack(task._coro.cr_frame),
|
| 971 |
+
"task": task if with_task else None,
|
| 972 |
+
}
|
| 973 |
+
for task in tasks
|
| 974 |
+
]
|
| 975 |
+
if cancel:
|
| 976 |
+
for t in tasks:
|
| 977 |
+
cbs = t._callbacks
|
| 978 |
+
t.cancel()
|
| 979 |
+
asyncio.futures.Future.set_exception(t, exc)
|
| 980 |
+
asyncio.futures.Future.cancel(t)
|
| 981 |
+
[cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures
|
| 982 |
+
try:
|
| 983 |
+
t._coro.throw(exc) # exits coro, unless explicitly handled
|
| 984 |
+
except exc:
|
| 985 |
+
pass
|
| 986 |
+
return out
|
| 987 |
+
|
| 988 |
+
|
| 989 |
+
class AbstractAsyncStreamedFile(AbstractBufferedFile):
|
| 990 |
+
# no read buffering, and always auto-commit
|
| 991 |
+
# TODO: readahead might still be useful here, but needs async version
|
| 992 |
+
|
| 993 |
+
async def read(self, length=-1):
|
| 994 |
+
"""
|
| 995 |
+
Return data from cache, or fetch pieces as necessary
|
| 996 |
+
|
| 997 |
+
Parameters
|
| 998 |
+
----------
|
| 999 |
+
length: int (-1)
|
| 1000 |
+
Number of bytes to read; if <0, all remaining bytes.
|
| 1001 |
+
"""
|
| 1002 |
+
length = -1 if length is None else int(length)
|
| 1003 |
+
if self.mode != "rb":
|
| 1004 |
+
raise ValueError("File not in read mode")
|
| 1005 |
+
if length < 0:
|
| 1006 |
+
length = self.size - self.loc
|
| 1007 |
+
if self.closed:
|
| 1008 |
+
raise ValueError("I/O operation on closed file.")
|
| 1009 |
+
if length == 0:
|
| 1010 |
+
# don't even bother calling fetch
|
| 1011 |
+
return b""
|
| 1012 |
+
out = await self._fetch_range(self.loc, self.loc + length)
|
| 1013 |
+
self.loc += len(out)
|
| 1014 |
+
return out
|
| 1015 |
+
|
| 1016 |
+
async def write(self, data):
|
| 1017 |
+
"""
|
| 1018 |
+
Write data to buffer.
|
| 1019 |
+
|
| 1020 |
+
Buffer only sent on flush() or if buffer is greater than
|
| 1021 |
+
or equal to blocksize.
|
| 1022 |
+
|
| 1023 |
+
Parameters
|
| 1024 |
+
----------
|
| 1025 |
+
data: bytes
|
| 1026 |
+
Set of bytes to be written.
|
| 1027 |
+
"""
|
| 1028 |
+
if self.mode not in {"wb", "ab"}:
|
| 1029 |
+
raise ValueError("File not in write mode")
|
| 1030 |
+
if self.closed:
|
| 1031 |
+
raise ValueError("I/O operation on closed file.")
|
| 1032 |
+
if self.forced:
|
| 1033 |
+
raise ValueError("This file has been force-flushed, can only close")
|
| 1034 |
+
out = self.buffer.write(data)
|
| 1035 |
+
self.loc += out
|
| 1036 |
+
if self.buffer.tell() >= self.blocksize:
|
| 1037 |
+
await self.flush()
|
| 1038 |
+
return out
|
| 1039 |
+
|
| 1040 |
+
async def close(self):
|
| 1041 |
+
"""Close file
|
| 1042 |
+
|
| 1043 |
+
Finalizes writes, discards cache
|
| 1044 |
+
"""
|
| 1045 |
+
if getattr(self, "_unclosable", False):
|
| 1046 |
+
return
|
| 1047 |
+
if self.closed:
|
| 1048 |
+
return
|
| 1049 |
+
if self.mode == "rb":
|
| 1050 |
+
self.cache = None
|
| 1051 |
+
else:
|
| 1052 |
+
if not self.forced:
|
| 1053 |
+
await self.flush(force=True)
|
| 1054 |
+
|
| 1055 |
+
if self.fs is not None:
|
| 1056 |
+
self.fs.invalidate_cache(self.path)
|
| 1057 |
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
| 1058 |
+
|
| 1059 |
+
self.closed = True
|
| 1060 |
+
|
| 1061 |
+
async def flush(self, force=False):
|
| 1062 |
+
if self.closed:
|
| 1063 |
+
raise ValueError("Flush on closed file")
|
| 1064 |
+
if force and self.forced:
|
| 1065 |
+
raise ValueError("Force flush cannot be called more than once")
|
| 1066 |
+
if force:
|
| 1067 |
+
self.forced = True
|
| 1068 |
+
|
| 1069 |
+
if self.mode not in {"wb", "ab"}:
|
| 1070 |
+
# no-op to flush on read-mode
|
| 1071 |
+
return
|
| 1072 |
+
|
| 1073 |
+
if not force and self.buffer.tell() < self.blocksize:
|
| 1074 |
+
# Defer write on small block
|
| 1075 |
+
return
|
| 1076 |
+
|
| 1077 |
+
if self.offset is None:
|
| 1078 |
+
# Initialize a multipart upload
|
| 1079 |
+
self.offset = 0
|
| 1080 |
+
try:
|
| 1081 |
+
await self._initiate_upload()
|
| 1082 |
+
except:
|
| 1083 |
+
self.closed = True
|
| 1084 |
+
raise
|
| 1085 |
+
|
| 1086 |
+
if await self._upload_chunk(final=force) is not False:
|
| 1087 |
+
self.offset += self.buffer.seek(0, 2)
|
| 1088 |
+
self.buffer = io.BytesIO()
|
| 1089 |
+
|
| 1090 |
+
async def __aenter__(self):
|
| 1091 |
+
return self
|
| 1092 |
+
|
| 1093 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 1094 |
+
await self.close()
|
| 1095 |
+
|
| 1096 |
+
async def _fetch_range(self, start, end):
|
| 1097 |
+
raise NotImplementedError
|
| 1098 |
+
|
| 1099 |
+
async def _initiate_upload(self):
|
| 1100 |
+
pass
|
| 1101 |
+
|
| 1102 |
+
async def _upload_chunk(self, final=False):
|
| 1103 |
+
raise NotImplementedError
|
env/lib/python3.13/site-packages/fsspec/caching.py
ADDED
|
@@ -0,0 +1,1011 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import collections
|
| 4 |
+
import functools
|
| 5 |
+
import logging
|
| 6 |
+
import math
|
| 7 |
+
import os
|
| 8 |
+
import threading
|
| 9 |
+
from collections import OrderedDict
|
| 10 |
+
from collections.abc import Callable
|
| 11 |
+
from concurrent.futures import Future, ThreadPoolExecutor
|
| 12 |
+
from itertools import groupby
|
| 13 |
+
from operator import itemgetter
|
| 14 |
+
from typing import TYPE_CHECKING, Any, ClassVar, Generic, NamedTuple, TypeVar
|
| 15 |
+
|
| 16 |
+
if TYPE_CHECKING:
|
| 17 |
+
import mmap
|
| 18 |
+
|
| 19 |
+
from typing_extensions import ParamSpec
|
| 20 |
+
|
| 21 |
+
P = ParamSpec("P")
|
| 22 |
+
else:
|
| 23 |
+
P = TypeVar("P")
|
| 24 |
+
|
| 25 |
+
T = TypeVar("T")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger("fsspec")
|
| 29 |
+
|
| 30 |
+
Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
|
| 31 |
+
MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class BaseCache:
|
| 35 |
+
"""Pass-though cache: doesn't keep anything, calls every time
|
| 36 |
+
|
| 37 |
+
Acts as base class for other cachers
|
| 38 |
+
|
| 39 |
+
Parameters
|
| 40 |
+
----------
|
| 41 |
+
blocksize: int
|
| 42 |
+
How far to read ahead in numbers of bytes
|
| 43 |
+
fetcher: func
|
| 44 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 45 |
+
specified
|
| 46 |
+
size: int
|
| 47 |
+
How big this file is
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
name: ClassVar[str] = "none"
|
| 51 |
+
|
| 52 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 53 |
+
self.blocksize = blocksize
|
| 54 |
+
self.nblocks = 0
|
| 55 |
+
self.fetcher = fetcher
|
| 56 |
+
self.size = size
|
| 57 |
+
self.hit_count = 0
|
| 58 |
+
self.miss_count = 0
|
| 59 |
+
# the bytes that we actually requested
|
| 60 |
+
self.total_requested_bytes = 0
|
| 61 |
+
|
| 62 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 63 |
+
if start is None:
|
| 64 |
+
start = 0
|
| 65 |
+
if stop is None:
|
| 66 |
+
stop = self.size
|
| 67 |
+
if start >= self.size or start >= stop:
|
| 68 |
+
return b""
|
| 69 |
+
return self.fetcher(start, stop)
|
| 70 |
+
|
| 71 |
+
def _reset_stats(self) -> None:
|
| 72 |
+
"""Reset hit and miss counts for a more ganular report e.g. by file."""
|
| 73 |
+
self.hit_count = 0
|
| 74 |
+
self.miss_count = 0
|
| 75 |
+
self.total_requested_bytes = 0
|
| 76 |
+
|
| 77 |
+
def _log_stats(self) -> str:
|
| 78 |
+
"""Return a formatted string of the cache statistics."""
|
| 79 |
+
if self.hit_count == 0 and self.miss_count == 0:
|
| 80 |
+
# a cache that does nothing, this is for logs only
|
| 81 |
+
return ""
|
| 82 |
+
return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
|
| 83 |
+
|
| 84 |
+
def __repr__(self) -> str:
|
| 85 |
+
# TODO: use rich for better formatting
|
| 86 |
+
return f"""
|
| 87 |
+
<{self.__class__.__name__}:
|
| 88 |
+
block size : {self.blocksize}
|
| 89 |
+
block count : {self.nblocks}
|
| 90 |
+
file size : {self.size}
|
| 91 |
+
cache hits : {self.hit_count}
|
| 92 |
+
cache misses: {self.miss_count}
|
| 93 |
+
total requested bytes: {self.total_requested_bytes}>
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
class MMapCache(BaseCache):
|
| 98 |
+
"""memory-mapped sparse file cache
|
| 99 |
+
|
| 100 |
+
Opens temporary file, which is filled blocks-wise when data is requested.
|
| 101 |
+
Ensure there is enough disc space in the temporary location.
|
| 102 |
+
|
| 103 |
+
This cache method might only work on posix
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
blocksize: int
|
| 108 |
+
How far to read ahead in numbers of bytes
|
| 109 |
+
fetcher: Fetcher
|
| 110 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 111 |
+
specified
|
| 112 |
+
size: int
|
| 113 |
+
How big this file is
|
| 114 |
+
location: str
|
| 115 |
+
Where to create the temporary file. If None, a temporary file is
|
| 116 |
+
created using tempfile.TemporaryFile().
|
| 117 |
+
blocks: set[int]
|
| 118 |
+
Set of block numbers that have already been fetched. If None, an empty
|
| 119 |
+
set is created.
|
| 120 |
+
multi_fetcher: MultiFetcher
|
| 121 |
+
Function of the form f([(start, end)]) which gets bytes from remote
|
| 122 |
+
as specified. This function is used to fetch multiple blocks at once.
|
| 123 |
+
If not specified, the fetcher function is used instead.
|
| 124 |
+
"""
|
| 125 |
+
|
| 126 |
+
name = "mmap"
|
| 127 |
+
|
| 128 |
+
def __init__(
|
| 129 |
+
self,
|
| 130 |
+
blocksize: int,
|
| 131 |
+
fetcher: Fetcher,
|
| 132 |
+
size: int,
|
| 133 |
+
location: str | None = None,
|
| 134 |
+
blocks: set[int] | None = None,
|
| 135 |
+
multi_fetcher: MultiFetcher | None = None,
|
| 136 |
+
) -> None:
|
| 137 |
+
super().__init__(blocksize, fetcher, size)
|
| 138 |
+
self.blocks = set() if blocks is None else blocks
|
| 139 |
+
self.location = location
|
| 140 |
+
self.multi_fetcher = multi_fetcher
|
| 141 |
+
self.cache = self._makefile()
|
| 142 |
+
|
| 143 |
+
def _makefile(self) -> mmap.mmap | bytearray:
|
| 144 |
+
import mmap
|
| 145 |
+
import tempfile
|
| 146 |
+
|
| 147 |
+
if self.size == 0:
|
| 148 |
+
return bytearray()
|
| 149 |
+
|
| 150 |
+
# posix version
|
| 151 |
+
if self.location is None or not os.path.exists(self.location):
|
| 152 |
+
if self.location is None:
|
| 153 |
+
fd = tempfile.TemporaryFile()
|
| 154 |
+
self.blocks = set()
|
| 155 |
+
else:
|
| 156 |
+
fd = open(self.location, "wb+")
|
| 157 |
+
fd.seek(self.size - 1)
|
| 158 |
+
fd.write(b"1")
|
| 159 |
+
fd.flush()
|
| 160 |
+
else:
|
| 161 |
+
fd = open(self.location, "r+b")
|
| 162 |
+
|
| 163 |
+
return mmap.mmap(fd.fileno(), self.size)
|
| 164 |
+
|
| 165 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 166 |
+
logger.debug(f"MMap cache fetching {start}-{end}")
|
| 167 |
+
if start is None:
|
| 168 |
+
start = 0
|
| 169 |
+
if end is None:
|
| 170 |
+
end = self.size
|
| 171 |
+
if start >= self.size or start >= end:
|
| 172 |
+
return b""
|
| 173 |
+
start_block = start // self.blocksize
|
| 174 |
+
end_block = end // self.blocksize
|
| 175 |
+
block_range = range(start_block, end_block + 1)
|
| 176 |
+
# Determine which blocks need to be fetched. This sequence is sorted by construction.
|
| 177 |
+
need = (i for i in block_range if i not in self.blocks)
|
| 178 |
+
# Count the number of blocks already cached
|
| 179 |
+
self.hit_count += sum(1 for i in block_range if i in self.blocks)
|
| 180 |
+
|
| 181 |
+
ranges = []
|
| 182 |
+
|
| 183 |
+
# Consolidate needed blocks.
|
| 184 |
+
# Algorithm adapted from Python 2.x itertools documentation.
|
| 185 |
+
# We are grouping an enumerated sequence of blocks. By comparing when the difference
|
| 186 |
+
# between an ascending range (provided by enumerate) and the needed block numbers
|
| 187 |
+
# we can detect when the block number skips values. The key computes this difference.
|
| 188 |
+
# Whenever the difference changes, we know that we have previously cached block(s),
|
| 189 |
+
# and a new group is started. In other words, this algorithm neatly groups
|
| 190 |
+
# runs of consecutive block numbers so they can be fetched together.
|
| 191 |
+
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
|
| 192 |
+
# Extract the blocks from the enumerated sequence
|
| 193 |
+
_blocks = tuple(map(itemgetter(1), _blocks))
|
| 194 |
+
# Compute start of first block
|
| 195 |
+
sstart = _blocks[0] * self.blocksize
|
| 196 |
+
# Compute the end of the last block. Last block may not be full size.
|
| 197 |
+
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
|
| 198 |
+
|
| 199 |
+
# Fetch bytes (could be multiple consecutive blocks)
|
| 200 |
+
self.total_requested_bytes += send - sstart
|
| 201 |
+
logger.debug(
|
| 202 |
+
f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
|
| 203 |
+
)
|
| 204 |
+
ranges.append((sstart, send))
|
| 205 |
+
|
| 206 |
+
# Update set of cached blocks
|
| 207 |
+
self.blocks.update(_blocks)
|
| 208 |
+
# Update cache statistics with number of blocks we had to cache
|
| 209 |
+
self.miss_count += len(_blocks)
|
| 210 |
+
|
| 211 |
+
if not ranges:
|
| 212 |
+
return self.cache[start:end]
|
| 213 |
+
|
| 214 |
+
if self.multi_fetcher:
|
| 215 |
+
logger.debug(f"MMap get blocks {ranges}")
|
| 216 |
+
for idx, r in enumerate(self.multi_fetcher(ranges)):
|
| 217 |
+
(sstart, send) = ranges[idx]
|
| 218 |
+
logger.debug(f"MMap copy block ({sstart}-{send}")
|
| 219 |
+
self.cache[sstart:send] = r
|
| 220 |
+
else:
|
| 221 |
+
for sstart, send in ranges:
|
| 222 |
+
logger.debug(f"MMap get block ({sstart}-{send}")
|
| 223 |
+
self.cache[sstart:send] = self.fetcher(sstart, send)
|
| 224 |
+
|
| 225 |
+
return self.cache[start:end]
|
| 226 |
+
|
| 227 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 228 |
+
state = self.__dict__.copy()
|
| 229 |
+
# Remove the unpicklable entries.
|
| 230 |
+
del state["cache"]
|
| 231 |
+
return state
|
| 232 |
+
|
| 233 |
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
| 234 |
+
# Restore instance attributes
|
| 235 |
+
self.__dict__.update(state)
|
| 236 |
+
self.cache = self._makefile()
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
class ReadAheadCache(BaseCache):
|
| 240 |
+
"""Cache which reads only when we get beyond a block of data
|
| 241 |
+
|
| 242 |
+
This is a much simpler version of BytesCache, and does not attempt to
|
| 243 |
+
fill holes in the cache or keep fragments alive. It is best suited to
|
| 244 |
+
many small reads in a sequential order (e.g., reading lines from a file).
|
| 245 |
+
"""
|
| 246 |
+
|
| 247 |
+
name = "readahead"
|
| 248 |
+
|
| 249 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 250 |
+
super().__init__(blocksize, fetcher, size)
|
| 251 |
+
self.cache = b""
|
| 252 |
+
self.start = 0
|
| 253 |
+
self.end = 0
|
| 254 |
+
|
| 255 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 256 |
+
if start is None:
|
| 257 |
+
start = 0
|
| 258 |
+
if end is None or end > self.size:
|
| 259 |
+
end = self.size
|
| 260 |
+
if start >= self.size or start >= end:
|
| 261 |
+
return b""
|
| 262 |
+
l = end - start
|
| 263 |
+
if start >= self.start and end <= self.end:
|
| 264 |
+
# cache hit
|
| 265 |
+
self.hit_count += 1
|
| 266 |
+
return self.cache[start - self.start : end - self.start]
|
| 267 |
+
elif self.start <= start < self.end:
|
| 268 |
+
# partial hit
|
| 269 |
+
self.miss_count += 1
|
| 270 |
+
part = self.cache[start - self.start :]
|
| 271 |
+
l -= len(part)
|
| 272 |
+
start = self.end
|
| 273 |
+
else:
|
| 274 |
+
# miss
|
| 275 |
+
self.miss_count += 1
|
| 276 |
+
part = b""
|
| 277 |
+
end = min(self.size, end + self.blocksize)
|
| 278 |
+
self.total_requested_bytes += end - start
|
| 279 |
+
self.cache = self.fetcher(start, end) # new block replaces old
|
| 280 |
+
self.start = start
|
| 281 |
+
self.end = self.start + len(self.cache)
|
| 282 |
+
return part + self.cache[:l]
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
class FirstChunkCache(BaseCache):
|
| 286 |
+
"""Caches the first block of a file only
|
| 287 |
+
|
| 288 |
+
This may be useful for file types where the metadata is stored in the header,
|
| 289 |
+
but is randomly accessed.
|
| 290 |
+
"""
|
| 291 |
+
|
| 292 |
+
name = "first"
|
| 293 |
+
|
| 294 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 295 |
+
if blocksize > size:
|
| 296 |
+
# this will buffer the whole thing
|
| 297 |
+
blocksize = size
|
| 298 |
+
super().__init__(blocksize, fetcher, size)
|
| 299 |
+
self.cache: bytes | None = None
|
| 300 |
+
|
| 301 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 302 |
+
start = start or 0
|
| 303 |
+
if start > self.size:
|
| 304 |
+
logger.debug("FirstChunkCache: requested start > file size")
|
| 305 |
+
return b""
|
| 306 |
+
|
| 307 |
+
end = min(end, self.size)
|
| 308 |
+
|
| 309 |
+
if start < self.blocksize:
|
| 310 |
+
if self.cache is None:
|
| 311 |
+
self.miss_count += 1
|
| 312 |
+
if end > self.blocksize:
|
| 313 |
+
self.total_requested_bytes += end
|
| 314 |
+
data = self.fetcher(0, end)
|
| 315 |
+
self.cache = data[: self.blocksize]
|
| 316 |
+
return data[start:]
|
| 317 |
+
self.cache = self.fetcher(0, self.blocksize)
|
| 318 |
+
self.total_requested_bytes += self.blocksize
|
| 319 |
+
part = self.cache[start:end]
|
| 320 |
+
if end > self.blocksize:
|
| 321 |
+
self.total_requested_bytes += end - self.blocksize
|
| 322 |
+
part += self.fetcher(self.blocksize, end)
|
| 323 |
+
self.hit_count += 1
|
| 324 |
+
return part
|
| 325 |
+
else:
|
| 326 |
+
self.miss_count += 1
|
| 327 |
+
self.total_requested_bytes += end - start
|
| 328 |
+
return self.fetcher(start, end)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
class BlockCache(BaseCache):
|
| 332 |
+
"""
|
| 333 |
+
Cache holding memory as a set of blocks.
|
| 334 |
+
|
| 335 |
+
Requests are only ever made ``blocksize`` at a time, and are
|
| 336 |
+
stored in an LRU cache. The least recently accessed block is
|
| 337 |
+
discarded when more than ``maxblocks`` are stored.
|
| 338 |
+
|
| 339 |
+
Parameters
|
| 340 |
+
----------
|
| 341 |
+
blocksize : int
|
| 342 |
+
The number of bytes to store in each block.
|
| 343 |
+
Requests are only ever made for ``blocksize``, so this
|
| 344 |
+
should balance the overhead of making a request against
|
| 345 |
+
the granularity of the blocks.
|
| 346 |
+
fetcher : Callable
|
| 347 |
+
size : int
|
| 348 |
+
The total size of the file being cached.
|
| 349 |
+
maxblocks : int
|
| 350 |
+
The maximum number of blocks to cache for. The maximum memory
|
| 351 |
+
use for this cache is then ``blocksize * maxblocks``.
|
| 352 |
+
"""
|
| 353 |
+
|
| 354 |
+
name = "blockcache"
|
| 355 |
+
|
| 356 |
+
def __init__(
|
| 357 |
+
self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
|
| 358 |
+
) -> None:
|
| 359 |
+
super().__init__(blocksize, fetcher, size)
|
| 360 |
+
self.nblocks = math.ceil(size / blocksize)
|
| 361 |
+
self.maxblocks = maxblocks
|
| 362 |
+
self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
|
| 363 |
+
|
| 364 |
+
def cache_info(self):
|
| 365 |
+
"""
|
| 366 |
+
The statistics on the block cache.
|
| 367 |
+
|
| 368 |
+
Returns
|
| 369 |
+
-------
|
| 370 |
+
NamedTuple
|
| 371 |
+
Returned directly from the LRU Cache used internally.
|
| 372 |
+
"""
|
| 373 |
+
return self._fetch_block_cached.cache_info()
|
| 374 |
+
|
| 375 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 376 |
+
state = self.__dict__
|
| 377 |
+
del state["_fetch_block_cached"]
|
| 378 |
+
return state
|
| 379 |
+
|
| 380 |
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
| 381 |
+
self.__dict__.update(state)
|
| 382 |
+
self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
|
| 383 |
+
self._fetch_block
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 387 |
+
if start is None:
|
| 388 |
+
start = 0
|
| 389 |
+
if end is None:
|
| 390 |
+
end = self.size
|
| 391 |
+
if start >= self.size or start >= end:
|
| 392 |
+
return b""
|
| 393 |
+
|
| 394 |
+
# byte position -> block numbers
|
| 395 |
+
start_block_number = start // self.blocksize
|
| 396 |
+
end_block_number = end // self.blocksize
|
| 397 |
+
|
| 398 |
+
# these are cached, so safe to do multiple calls for the same start and end.
|
| 399 |
+
for block_number in range(start_block_number, end_block_number + 1):
|
| 400 |
+
self._fetch_block_cached(block_number)
|
| 401 |
+
|
| 402 |
+
return self._read_cache(
|
| 403 |
+
start,
|
| 404 |
+
end,
|
| 405 |
+
start_block_number=start_block_number,
|
| 406 |
+
end_block_number=end_block_number,
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
def _fetch_block(self, block_number: int) -> bytes:
|
| 410 |
+
"""
|
| 411 |
+
Fetch the block of data for `block_number`.
|
| 412 |
+
"""
|
| 413 |
+
if block_number > self.nblocks:
|
| 414 |
+
raise ValueError(
|
| 415 |
+
f"'block_number={block_number}' is greater than "
|
| 416 |
+
f"the number of blocks ({self.nblocks})"
|
| 417 |
+
)
|
| 418 |
+
|
| 419 |
+
start = block_number * self.blocksize
|
| 420 |
+
end = start + self.blocksize
|
| 421 |
+
self.total_requested_bytes += end - start
|
| 422 |
+
self.miss_count += 1
|
| 423 |
+
logger.info("BlockCache fetching block %d", block_number)
|
| 424 |
+
block_contents = super()._fetch(start, end)
|
| 425 |
+
return block_contents
|
| 426 |
+
|
| 427 |
+
def _read_cache(
|
| 428 |
+
self, start: int, end: int, start_block_number: int, end_block_number: int
|
| 429 |
+
) -> bytes:
|
| 430 |
+
"""
|
| 431 |
+
Read from our block cache.
|
| 432 |
+
|
| 433 |
+
Parameters
|
| 434 |
+
----------
|
| 435 |
+
start, end : int
|
| 436 |
+
The start and end byte positions.
|
| 437 |
+
start_block_number, end_block_number : int
|
| 438 |
+
The start and end block numbers.
|
| 439 |
+
"""
|
| 440 |
+
start_pos = start % self.blocksize
|
| 441 |
+
end_pos = end % self.blocksize
|
| 442 |
+
|
| 443 |
+
self.hit_count += 1
|
| 444 |
+
if start_block_number == end_block_number:
|
| 445 |
+
block: bytes = self._fetch_block_cached(start_block_number)
|
| 446 |
+
return block[start_pos:end_pos]
|
| 447 |
+
|
| 448 |
+
else:
|
| 449 |
+
# read from the initial
|
| 450 |
+
out = [self._fetch_block_cached(start_block_number)[start_pos:]]
|
| 451 |
+
|
| 452 |
+
# intermediate blocks
|
| 453 |
+
# Note: it'd be nice to combine these into one big request. However
|
| 454 |
+
# that doesn't play nicely with our LRU cache.
|
| 455 |
+
out.extend(
|
| 456 |
+
map(
|
| 457 |
+
self._fetch_block_cached,
|
| 458 |
+
range(start_block_number + 1, end_block_number),
|
| 459 |
+
)
|
| 460 |
+
)
|
| 461 |
+
|
| 462 |
+
# final block
|
| 463 |
+
out.append(self._fetch_block_cached(end_block_number)[:end_pos])
|
| 464 |
+
|
| 465 |
+
return b"".join(out)
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
class BytesCache(BaseCache):
|
| 469 |
+
"""Cache which holds data in a in-memory bytes object
|
| 470 |
+
|
| 471 |
+
Implements read-ahead by the block size, for semi-random reads progressing
|
| 472 |
+
through the file.
|
| 473 |
+
|
| 474 |
+
Parameters
|
| 475 |
+
----------
|
| 476 |
+
trim: bool
|
| 477 |
+
As we read more data, whether to discard the start of the buffer when
|
| 478 |
+
we are more than a blocksize ahead of it.
|
| 479 |
+
"""
|
| 480 |
+
|
| 481 |
+
name: ClassVar[str] = "bytes"
|
| 482 |
+
|
| 483 |
+
def __init__(
|
| 484 |
+
self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
|
| 485 |
+
) -> None:
|
| 486 |
+
super().__init__(blocksize, fetcher, size)
|
| 487 |
+
self.cache = b""
|
| 488 |
+
self.start: int | None = None
|
| 489 |
+
self.end: int | None = None
|
| 490 |
+
self.trim = trim
|
| 491 |
+
|
| 492 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 493 |
+
# TODO: only set start/end after fetch, in case it fails?
|
| 494 |
+
# is this where retry logic might go?
|
| 495 |
+
if start is None:
|
| 496 |
+
start = 0
|
| 497 |
+
if end is None:
|
| 498 |
+
end = self.size
|
| 499 |
+
if start >= self.size or start >= end:
|
| 500 |
+
return b""
|
| 501 |
+
if (
|
| 502 |
+
self.start is not None
|
| 503 |
+
and start >= self.start
|
| 504 |
+
and self.end is not None
|
| 505 |
+
and end < self.end
|
| 506 |
+
):
|
| 507 |
+
# cache hit: we have all the required data
|
| 508 |
+
offset = start - self.start
|
| 509 |
+
self.hit_count += 1
|
| 510 |
+
return self.cache[offset : offset + end - start]
|
| 511 |
+
|
| 512 |
+
if self.blocksize:
|
| 513 |
+
bend = min(self.size, end + self.blocksize)
|
| 514 |
+
else:
|
| 515 |
+
bend = end
|
| 516 |
+
|
| 517 |
+
if bend == start or start > self.size:
|
| 518 |
+
return b""
|
| 519 |
+
|
| 520 |
+
if (self.start is None or start < self.start) and (
|
| 521 |
+
self.end is None or end > self.end
|
| 522 |
+
):
|
| 523 |
+
# First read, or extending both before and after
|
| 524 |
+
self.total_requested_bytes += bend - start
|
| 525 |
+
self.miss_count += 1
|
| 526 |
+
self.cache = self.fetcher(start, bend)
|
| 527 |
+
self.start = start
|
| 528 |
+
else:
|
| 529 |
+
assert self.start is not None
|
| 530 |
+
assert self.end is not None
|
| 531 |
+
self.miss_count += 1
|
| 532 |
+
|
| 533 |
+
if start < self.start:
|
| 534 |
+
if self.end is None or self.end - end > self.blocksize:
|
| 535 |
+
self.total_requested_bytes += bend - start
|
| 536 |
+
self.cache = self.fetcher(start, bend)
|
| 537 |
+
self.start = start
|
| 538 |
+
else:
|
| 539 |
+
self.total_requested_bytes += self.start - start
|
| 540 |
+
new = self.fetcher(start, self.start)
|
| 541 |
+
self.start = start
|
| 542 |
+
self.cache = new + self.cache
|
| 543 |
+
elif self.end is not None and bend > self.end:
|
| 544 |
+
if self.end > self.size:
|
| 545 |
+
pass
|
| 546 |
+
elif end - self.end > self.blocksize:
|
| 547 |
+
self.total_requested_bytes += bend - start
|
| 548 |
+
self.cache = self.fetcher(start, bend)
|
| 549 |
+
self.start = start
|
| 550 |
+
else:
|
| 551 |
+
self.total_requested_bytes += bend - self.end
|
| 552 |
+
new = self.fetcher(self.end, bend)
|
| 553 |
+
self.cache = self.cache + new
|
| 554 |
+
|
| 555 |
+
self.end = self.start + len(self.cache)
|
| 556 |
+
offset = start - self.start
|
| 557 |
+
out = self.cache[offset : offset + end - start]
|
| 558 |
+
if self.trim:
|
| 559 |
+
num = (self.end - self.start) // (self.blocksize + 1)
|
| 560 |
+
if num > 1:
|
| 561 |
+
self.start += self.blocksize * num
|
| 562 |
+
self.cache = self.cache[self.blocksize * num :]
|
| 563 |
+
return out
|
| 564 |
+
|
| 565 |
+
def __len__(self) -> int:
|
| 566 |
+
return len(self.cache)
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
class AllBytes(BaseCache):
|
| 570 |
+
"""Cache entire contents of the file"""
|
| 571 |
+
|
| 572 |
+
name: ClassVar[str] = "all"
|
| 573 |
+
|
| 574 |
+
def __init__(
|
| 575 |
+
self,
|
| 576 |
+
blocksize: int | None = None,
|
| 577 |
+
fetcher: Fetcher | None = None,
|
| 578 |
+
size: int | None = None,
|
| 579 |
+
data: bytes | None = None,
|
| 580 |
+
) -> None:
|
| 581 |
+
super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
|
| 582 |
+
if data is None:
|
| 583 |
+
self.miss_count += 1
|
| 584 |
+
self.total_requested_bytes += self.size
|
| 585 |
+
data = self.fetcher(0, self.size)
|
| 586 |
+
self.data = data
|
| 587 |
+
|
| 588 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 589 |
+
self.hit_count += 1
|
| 590 |
+
return self.data[start:stop]
|
| 591 |
+
|
| 592 |
+
|
| 593 |
+
class KnownPartsOfAFile(BaseCache):
|
| 594 |
+
"""
|
| 595 |
+
Cache holding known file parts.
|
| 596 |
+
|
| 597 |
+
Parameters
|
| 598 |
+
----------
|
| 599 |
+
blocksize: int
|
| 600 |
+
How far to read ahead in numbers of bytes
|
| 601 |
+
fetcher: func
|
| 602 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 603 |
+
specified
|
| 604 |
+
size: int
|
| 605 |
+
How big this file is
|
| 606 |
+
data: dict
|
| 607 |
+
A dictionary mapping explicit `(start, stop)` file-offset tuples
|
| 608 |
+
with known bytes.
|
| 609 |
+
strict: bool, default True
|
| 610 |
+
Whether to fetch reads that go beyond a known byte-range boundary.
|
| 611 |
+
If `False`, any read that ends outside a known part will be zero
|
| 612 |
+
padded. Note that zero padding will not be used for reads that
|
| 613 |
+
begin outside a known byte-range.
|
| 614 |
+
"""
|
| 615 |
+
|
| 616 |
+
name: ClassVar[str] = "parts"
|
| 617 |
+
|
| 618 |
+
def __init__(
|
| 619 |
+
self,
|
| 620 |
+
blocksize: int,
|
| 621 |
+
fetcher: Fetcher,
|
| 622 |
+
size: int,
|
| 623 |
+
data: dict[tuple[int, int], bytes] | None = None,
|
| 624 |
+
strict: bool = False,
|
| 625 |
+
**_: Any,
|
| 626 |
+
):
|
| 627 |
+
super().__init__(blocksize, fetcher, size)
|
| 628 |
+
self.strict = strict
|
| 629 |
+
|
| 630 |
+
# simple consolidation of contiguous blocks
|
| 631 |
+
if data:
|
| 632 |
+
old_offsets = sorted(data.keys())
|
| 633 |
+
offsets = [old_offsets[0]]
|
| 634 |
+
blocks = [data.pop(old_offsets[0])]
|
| 635 |
+
for start, stop in old_offsets[1:]:
|
| 636 |
+
start0, stop0 = offsets[-1]
|
| 637 |
+
if start == stop0:
|
| 638 |
+
offsets[-1] = (start0, stop)
|
| 639 |
+
blocks[-1] += data.pop((start, stop))
|
| 640 |
+
else:
|
| 641 |
+
offsets.append((start, stop))
|
| 642 |
+
blocks.append(data.pop((start, stop)))
|
| 643 |
+
|
| 644 |
+
self.data = dict(zip(offsets, blocks))
|
| 645 |
+
else:
|
| 646 |
+
self.data = {}
|
| 647 |
+
|
| 648 |
+
@property
|
| 649 |
+
def size(self):
|
| 650 |
+
return sum(_[1] - _[0] for _ in self.data)
|
| 651 |
+
|
| 652 |
+
@size.setter
|
| 653 |
+
def size(self, value):
|
| 654 |
+
pass
|
| 655 |
+
|
| 656 |
+
@property
|
| 657 |
+
def nblocks(self):
|
| 658 |
+
return len(self.data)
|
| 659 |
+
|
| 660 |
+
@nblocks.setter
|
| 661 |
+
def nblocks(self, value):
|
| 662 |
+
pass
|
| 663 |
+
|
| 664 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 665 |
+
if start is None:
|
| 666 |
+
start = 0
|
| 667 |
+
if stop is None:
|
| 668 |
+
stop = self.size
|
| 669 |
+
self.total_requested_bytes += stop - start
|
| 670 |
+
|
| 671 |
+
out = b""
|
| 672 |
+
started = False
|
| 673 |
+
loc_old = 0
|
| 674 |
+
for loc0, loc1 in sorted(self.data):
|
| 675 |
+
if (loc0 <= start < loc1) and (loc0 <= stop <= loc1):
|
| 676 |
+
# entirely within the block
|
| 677 |
+
off = start - loc0
|
| 678 |
+
self.hit_count += 1
|
| 679 |
+
return self.data[(loc0, loc1)][off : off + stop - start]
|
| 680 |
+
if stop <= loc0:
|
| 681 |
+
break
|
| 682 |
+
if started and loc0 > loc_old:
|
| 683 |
+
# a gap where we need data
|
| 684 |
+
self.miss_count += 1
|
| 685 |
+
if self.strict:
|
| 686 |
+
raise ValueError
|
| 687 |
+
out += b"\x00" * (loc0 - loc_old)
|
| 688 |
+
if loc0 <= start < loc1:
|
| 689 |
+
# found the start
|
| 690 |
+
self.hit_count += 1
|
| 691 |
+
off = start - loc0
|
| 692 |
+
out = self.data[(loc0, loc1)][off : off + stop - start]
|
| 693 |
+
started = True
|
| 694 |
+
elif start < loc0 and stop > loc1:
|
| 695 |
+
# the whole block
|
| 696 |
+
self.hit_count += 1
|
| 697 |
+
out += self.data[(loc0, loc1)]
|
| 698 |
+
elif loc0 <= stop <= loc1:
|
| 699 |
+
# end block
|
| 700 |
+
self.hit_count += 1
|
| 701 |
+
return out + self.data[(loc0, loc1)][: stop - loc0]
|
| 702 |
+
loc_old = loc1
|
| 703 |
+
self.miss_count += 1
|
| 704 |
+
if started and not self.strict:
|
| 705 |
+
return out + b"\x00" * (stop - loc_old)
|
| 706 |
+
raise ValueError
|
| 707 |
+
|
| 708 |
+
|
| 709 |
+
class UpdatableLRU(Generic[P, T]):
|
| 710 |
+
"""
|
| 711 |
+
Custom implementation of LRU cache that allows updating keys
|
| 712 |
+
|
| 713 |
+
Used by BackgroudBlockCache
|
| 714 |
+
"""
|
| 715 |
+
|
| 716 |
+
class CacheInfo(NamedTuple):
|
| 717 |
+
hits: int
|
| 718 |
+
misses: int
|
| 719 |
+
maxsize: int
|
| 720 |
+
currsize: int
|
| 721 |
+
|
| 722 |
+
def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
|
| 723 |
+
self._cache: OrderedDict[Any, T] = collections.OrderedDict()
|
| 724 |
+
self._func = func
|
| 725 |
+
self._max_size = max_size
|
| 726 |
+
self._hits = 0
|
| 727 |
+
self._misses = 0
|
| 728 |
+
self._lock = threading.Lock()
|
| 729 |
+
|
| 730 |
+
def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
|
| 731 |
+
if kwargs:
|
| 732 |
+
raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
|
| 733 |
+
with self._lock:
|
| 734 |
+
if args in self._cache:
|
| 735 |
+
self._cache.move_to_end(args)
|
| 736 |
+
self._hits += 1
|
| 737 |
+
return self._cache[args]
|
| 738 |
+
|
| 739 |
+
result = self._func(*args, **kwargs)
|
| 740 |
+
|
| 741 |
+
with self._lock:
|
| 742 |
+
self._cache[args] = result
|
| 743 |
+
self._misses += 1
|
| 744 |
+
if len(self._cache) > self._max_size:
|
| 745 |
+
self._cache.popitem(last=False)
|
| 746 |
+
|
| 747 |
+
return result
|
| 748 |
+
|
| 749 |
+
def is_key_cached(self, *args: Any) -> bool:
|
| 750 |
+
with self._lock:
|
| 751 |
+
return args in self._cache
|
| 752 |
+
|
| 753 |
+
def add_key(self, result: T, *args: Any) -> None:
|
| 754 |
+
with self._lock:
|
| 755 |
+
self._cache[args] = result
|
| 756 |
+
if len(self._cache) > self._max_size:
|
| 757 |
+
self._cache.popitem(last=False)
|
| 758 |
+
|
| 759 |
+
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
| 760 |
+
with self._lock:
|
| 761 |
+
return self.CacheInfo(
|
| 762 |
+
maxsize=self._max_size,
|
| 763 |
+
currsize=len(self._cache),
|
| 764 |
+
hits=self._hits,
|
| 765 |
+
misses=self._misses,
|
| 766 |
+
)
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
class BackgroundBlockCache(BaseCache):
|
| 770 |
+
"""
|
| 771 |
+
Cache holding memory as a set of blocks with pre-loading of
|
| 772 |
+
the next block in the background.
|
| 773 |
+
|
| 774 |
+
Requests are only ever made ``blocksize`` at a time, and are
|
| 775 |
+
stored in an LRU cache. The least recently accessed block is
|
| 776 |
+
discarded when more than ``maxblocks`` are stored. If the
|
| 777 |
+
next block is not in cache, it is loaded in a separate thread
|
| 778 |
+
in non-blocking way.
|
| 779 |
+
|
| 780 |
+
Parameters
|
| 781 |
+
----------
|
| 782 |
+
blocksize : int
|
| 783 |
+
The number of bytes to store in each block.
|
| 784 |
+
Requests are only ever made for ``blocksize``, so this
|
| 785 |
+
should balance the overhead of making a request against
|
| 786 |
+
the granularity of the blocks.
|
| 787 |
+
fetcher : Callable
|
| 788 |
+
size : int
|
| 789 |
+
The total size of the file being cached.
|
| 790 |
+
maxblocks : int
|
| 791 |
+
The maximum number of blocks to cache for. The maximum memory
|
| 792 |
+
use for this cache is then ``blocksize * maxblocks``.
|
| 793 |
+
"""
|
| 794 |
+
|
| 795 |
+
name: ClassVar[str] = "background"
|
| 796 |
+
|
| 797 |
+
def __init__(
|
| 798 |
+
self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
|
| 799 |
+
) -> None:
|
| 800 |
+
super().__init__(blocksize, fetcher, size)
|
| 801 |
+
self.nblocks = math.ceil(size / blocksize)
|
| 802 |
+
self.maxblocks = maxblocks
|
| 803 |
+
self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
|
| 804 |
+
|
| 805 |
+
self._thread_executor = ThreadPoolExecutor(max_workers=1)
|
| 806 |
+
self._fetch_future_block_number: int | None = None
|
| 807 |
+
self._fetch_future: Future[bytes] | None = None
|
| 808 |
+
self._fetch_future_lock = threading.Lock()
|
| 809 |
+
|
| 810 |
+
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
| 811 |
+
"""
|
| 812 |
+
The statistics on the block cache.
|
| 813 |
+
|
| 814 |
+
Returns
|
| 815 |
+
-------
|
| 816 |
+
NamedTuple
|
| 817 |
+
Returned directly from the LRU Cache used internally.
|
| 818 |
+
"""
|
| 819 |
+
return self._fetch_block_cached.cache_info()
|
| 820 |
+
|
| 821 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 822 |
+
state = self.__dict__
|
| 823 |
+
del state["_fetch_block_cached"]
|
| 824 |
+
del state["_thread_executor"]
|
| 825 |
+
del state["_fetch_future_block_number"]
|
| 826 |
+
del state["_fetch_future"]
|
| 827 |
+
del state["_fetch_future_lock"]
|
| 828 |
+
return state
|
| 829 |
+
|
| 830 |
+
def __setstate__(self, state) -> None:
|
| 831 |
+
self.__dict__.update(state)
|
| 832 |
+
self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
|
| 833 |
+
self._thread_executor = ThreadPoolExecutor(max_workers=1)
|
| 834 |
+
self._fetch_future_block_number = None
|
| 835 |
+
self._fetch_future = None
|
| 836 |
+
self._fetch_future_lock = threading.Lock()
|
| 837 |
+
|
| 838 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 839 |
+
if start is None:
|
| 840 |
+
start = 0
|
| 841 |
+
if end is None:
|
| 842 |
+
end = self.size
|
| 843 |
+
if start >= self.size or start >= end:
|
| 844 |
+
return b""
|
| 845 |
+
|
| 846 |
+
# byte position -> block numbers
|
| 847 |
+
start_block_number = start // self.blocksize
|
| 848 |
+
end_block_number = end // self.blocksize
|
| 849 |
+
|
| 850 |
+
fetch_future_block_number = None
|
| 851 |
+
fetch_future = None
|
| 852 |
+
with self._fetch_future_lock:
|
| 853 |
+
# Background thread is running. Check we we can or must join it.
|
| 854 |
+
if self._fetch_future is not None:
|
| 855 |
+
assert self._fetch_future_block_number is not None
|
| 856 |
+
if self._fetch_future.done():
|
| 857 |
+
logger.info("BlockCache joined background fetch without waiting.")
|
| 858 |
+
self._fetch_block_cached.add_key(
|
| 859 |
+
self._fetch_future.result(), self._fetch_future_block_number
|
| 860 |
+
)
|
| 861 |
+
# Cleanup the fetch variables. Done with fetching the block.
|
| 862 |
+
self._fetch_future_block_number = None
|
| 863 |
+
self._fetch_future = None
|
| 864 |
+
else:
|
| 865 |
+
# Must join if we need the block for the current fetch
|
| 866 |
+
must_join = bool(
|
| 867 |
+
start_block_number
|
| 868 |
+
<= self._fetch_future_block_number
|
| 869 |
+
<= end_block_number
|
| 870 |
+
)
|
| 871 |
+
if must_join:
|
| 872 |
+
# Copy to the local variables to release lock
|
| 873 |
+
# before waiting for result
|
| 874 |
+
fetch_future_block_number = self._fetch_future_block_number
|
| 875 |
+
fetch_future = self._fetch_future
|
| 876 |
+
|
| 877 |
+
# Cleanup the fetch variables. Have a local copy.
|
| 878 |
+
self._fetch_future_block_number = None
|
| 879 |
+
self._fetch_future = None
|
| 880 |
+
|
| 881 |
+
# Need to wait for the future for the current read
|
| 882 |
+
if fetch_future is not None:
|
| 883 |
+
logger.info("BlockCache waiting for background fetch.")
|
| 884 |
+
# Wait until result and put it in cache
|
| 885 |
+
self._fetch_block_cached.add_key(
|
| 886 |
+
fetch_future.result(), fetch_future_block_number
|
| 887 |
+
)
|
| 888 |
+
|
| 889 |
+
# these are cached, so safe to do multiple calls for the same start and end.
|
| 890 |
+
for block_number in range(start_block_number, end_block_number + 1):
|
| 891 |
+
self._fetch_block_cached(block_number)
|
| 892 |
+
|
| 893 |
+
# fetch next block in the background if nothing is running in the background,
|
| 894 |
+
# the block is within file and it is not already cached
|
| 895 |
+
end_block_plus_1 = end_block_number + 1
|
| 896 |
+
with self._fetch_future_lock:
|
| 897 |
+
if (
|
| 898 |
+
self._fetch_future is None
|
| 899 |
+
and end_block_plus_1 <= self.nblocks
|
| 900 |
+
and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
|
| 901 |
+
):
|
| 902 |
+
self._fetch_future_block_number = end_block_plus_1
|
| 903 |
+
self._fetch_future = self._thread_executor.submit(
|
| 904 |
+
self._fetch_block, end_block_plus_1, "async"
|
| 905 |
+
)
|
| 906 |
+
|
| 907 |
+
return self._read_cache(
|
| 908 |
+
start,
|
| 909 |
+
end,
|
| 910 |
+
start_block_number=start_block_number,
|
| 911 |
+
end_block_number=end_block_number,
|
| 912 |
+
)
|
| 913 |
+
|
| 914 |
+
def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
|
| 915 |
+
"""
|
| 916 |
+
Fetch the block of data for `block_number`.
|
| 917 |
+
"""
|
| 918 |
+
if block_number > self.nblocks:
|
| 919 |
+
raise ValueError(
|
| 920 |
+
f"'block_number={block_number}' is greater than "
|
| 921 |
+
f"the number of blocks ({self.nblocks})"
|
| 922 |
+
)
|
| 923 |
+
|
| 924 |
+
start = block_number * self.blocksize
|
| 925 |
+
end = start + self.blocksize
|
| 926 |
+
logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
|
| 927 |
+
self.total_requested_bytes += end - start
|
| 928 |
+
self.miss_count += 1
|
| 929 |
+
block_contents = super()._fetch(start, end)
|
| 930 |
+
return block_contents
|
| 931 |
+
|
| 932 |
+
def _read_cache(
|
| 933 |
+
self, start: int, end: int, start_block_number: int, end_block_number: int
|
| 934 |
+
) -> bytes:
|
| 935 |
+
"""
|
| 936 |
+
Read from our block cache.
|
| 937 |
+
|
| 938 |
+
Parameters
|
| 939 |
+
----------
|
| 940 |
+
start, end : int
|
| 941 |
+
The start and end byte positions.
|
| 942 |
+
start_block_number, end_block_number : int
|
| 943 |
+
The start and end block numbers.
|
| 944 |
+
"""
|
| 945 |
+
start_pos = start % self.blocksize
|
| 946 |
+
end_pos = end % self.blocksize
|
| 947 |
+
|
| 948 |
+
# kind of pointless to count this as a hit, but it is
|
| 949 |
+
self.hit_count += 1
|
| 950 |
+
|
| 951 |
+
if start_block_number == end_block_number:
|
| 952 |
+
block = self._fetch_block_cached(start_block_number)
|
| 953 |
+
return block[start_pos:end_pos]
|
| 954 |
+
|
| 955 |
+
else:
|
| 956 |
+
# read from the initial
|
| 957 |
+
out = [self._fetch_block_cached(start_block_number)[start_pos:]]
|
| 958 |
+
|
| 959 |
+
# intermediate blocks
|
| 960 |
+
# Note: it'd be nice to combine these into one big request. However
|
| 961 |
+
# that doesn't play nicely with our LRU cache.
|
| 962 |
+
out.extend(
|
| 963 |
+
map(
|
| 964 |
+
self._fetch_block_cached,
|
| 965 |
+
range(start_block_number + 1, end_block_number),
|
| 966 |
+
)
|
| 967 |
+
)
|
| 968 |
+
|
| 969 |
+
# final block
|
| 970 |
+
out.append(self._fetch_block_cached(end_block_number)[:end_pos])
|
| 971 |
+
|
| 972 |
+
return b"".join(out)
|
| 973 |
+
|
| 974 |
+
|
| 975 |
+
caches: dict[str | None, type[BaseCache]] = {
|
| 976 |
+
# one custom case
|
| 977 |
+
None: BaseCache,
|
| 978 |
+
}
|
| 979 |
+
|
| 980 |
+
|
| 981 |
+
def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
|
| 982 |
+
"""'Register' cache implementation.
|
| 983 |
+
|
| 984 |
+
Parameters
|
| 985 |
+
----------
|
| 986 |
+
clobber: bool, optional
|
| 987 |
+
If set to True (default is False) - allow to overwrite existing
|
| 988 |
+
entry.
|
| 989 |
+
|
| 990 |
+
Raises
|
| 991 |
+
------
|
| 992 |
+
ValueError
|
| 993 |
+
"""
|
| 994 |
+
name = cls.name
|
| 995 |
+
if not clobber and name in caches:
|
| 996 |
+
raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
|
| 997 |
+
caches[name] = cls
|
| 998 |
+
|
| 999 |
+
|
| 1000 |
+
for c in (
|
| 1001 |
+
BaseCache,
|
| 1002 |
+
MMapCache,
|
| 1003 |
+
BytesCache,
|
| 1004 |
+
ReadAheadCache,
|
| 1005 |
+
BlockCache,
|
| 1006 |
+
FirstChunkCache,
|
| 1007 |
+
AllBytes,
|
| 1008 |
+
KnownPartsOfAFile,
|
| 1009 |
+
BackgroundBlockCache,
|
| 1010 |
+
):
|
| 1011 |
+
register_cache(c)
|
env/lib/python3.13/site-packages/fsspec/callbacks.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import wraps
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Callback:
|
| 5 |
+
"""
|
| 6 |
+
Base class and interface for callback mechanism
|
| 7 |
+
|
| 8 |
+
This class can be used directly for monitoring file transfers by
|
| 9 |
+
providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
|
| 10 |
+
below), or subclassed for more specialised behaviour.
|
| 11 |
+
|
| 12 |
+
Parameters
|
| 13 |
+
----------
|
| 14 |
+
size: int (optional)
|
| 15 |
+
Nominal quantity for the value that corresponds to a complete
|
| 16 |
+
transfer, e.g., total number of tiles or total number of
|
| 17 |
+
bytes
|
| 18 |
+
value: int (0)
|
| 19 |
+
Starting internal counter value
|
| 20 |
+
hooks: dict or None
|
| 21 |
+
A dict of named functions to be called on each update. The signature
|
| 22 |
+
of these must be ``f(size, value, **kwargs)``
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, size=None, value=0, hooks=None, **kwargs):
|
| 26 |
+
self.size = size
|
| 27 |
+
self.value = value
|
| 28 |
+
self.hooks = hooks or {}
|
| 29 |
+
self.kw = kwargs
|
| 30 |
+
|
| 31 |
+
def __enter__(self):
|
| 32 |
+
return self
|
| 33 |
+
|
| 34 |
+
def __exit__(self, *exc_args):
|
| 35 |
+
self.close()
|
| 36 |
+
|
| 37 |
+
def close(self):
|
| 38 |
+
"""Close callback."""
|
| 39 |
+
|
| 40 |
+
def branched(self, path_1, path_2, **kwargs):
|
| 41 |
+
"""
|
| 42 |
+
Return callback for child transfers
|
| 43 |
+
|
| 44 |
+
If this callback is operating at a higher level, e.g., put, which may
|
| 45 |
+
trigger transfers that can also be monitored. The function returns a callback
|
| 46 |
+
that has to be passed to the child method, e.g., put_file,
|
| 47 |
+
as `callback=` argument.
|
| 48 |
+
|
| 49 |
+
The implementation uses `callback.branch` for compatibility.
|
| 50 |
+
When implementing callbacks, it is recommended to override this function instead
|
| 51 |
+
of `branch` and avoid calling `super().branched(...)`.
|
| 52 |
+
|
| 53 |
+
Prefer using this function over `branch`.
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
path_1: str
|
| 58 |
+
Child's source path
|
| 59 |
+
path_2: str
|
| 60 |
+
Child's destination path
|
| 61 |
+
**kwargs:
|
| 62 |
+
Arbitrary keyword arguments
|
| 63 |
+
|
| 64 |
+
Returns
|
| 65 |
+
-------
|
| 66 |
+
callback: Callback
|
| 67 |
+
A callback instance to be passed to the child method
|
| 68 |
+
"""
|
| 69 |
+
self.branch(path_1, path_2, kwargs)
|
| 70 |
+
# mutate kwargs so that we can force the caller to pass "callback=" explicitly
|
| 71 |
+
return kwargs.pop("callback", DEFAULT_CALLBACK)
|
| 72 |
+
|
| 73 |
+
def branch_coro(self, fn):
|
| 74 |
+
"""
|
| 75 |
+
Wraps a coroutine, and pass a new child callback to it.
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
@wraps(fn)
|
| 79 |
+
async def func(path1, path2: str, **kwargs):
|
| 80 |
+
with self.branched(path1, path2, **kwargs) as child:
|
| 81 |
+
return await fn(path1, path2, callback=child, **kwargs)
|
| 82 |
+
|
| 83 |
+
return func
|
| 84 |
+
|
| 85 |
+
def set_size(self, size):
|
| 86 |
+
"""
|
| 87 |
+
Set the internal maximum size attribute
|
| 88 |
+
|
| 89 |
+
Usually called if not initially set at instantiation. Note that this
|
| 90 |
+
triggers a ``call()``.
|
| 91 |
+
|
| 92 |
+
Parameters
|
| 93 |
+
----------
|
| 94 |
+
size: int
|
| 95 |
+
"""
|
| 96 |
+
self.size = size
|
| 97 |
+
self.call()
|
| 98 |
+
|
| 99 |
+
def absolute_update(self, value):
|
| 100 |
+
"""
|
| 101 |
+
Set the internal value state
|
| 102 |
+
|
| 103 |
+
Triggers ``call()``
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
value: int
|
| 108 |
+
"""
|
| 109 |
+
self.value = value
|
| 110 |
+
self.call()
|
| 111 |
+
|
| 112 |
+
def relative_update(self, inc=1):
|
| 113 |
+
"""
|
| 114 |
+
Delta increment the internal counter
|
| 115 |
+
|
| 116 |
+
Triggers ``call()``
|
| 117 |
+
|
| 118 |
+
Parameters
|
| 119 |
+
----------
|
| 120 |
+
inc: int
|
| 121 |
+
"""
|
| 122 |
+
self.value += inc
|
| 123 |
+
self.call()
|
| 124 |
+
|
| 125 |
+
def call(self, hook_name=None, **kwargs):
|
| 126 |
+
"""
|
| 127 |
+
Execute hook(s) with current state
|
| 128 |
+
|
| 129 |
+
Each function is passed the internal size and current value
|
| 130 |
+
|
| 131 |
+
Parameters
|
| 132 |
+
----------
|
| 133 |
+
hook_name: str or None
|
| 134 |
+
If given, execute on this hook
|
| 135 |
+
kwargs: passed on to (all) hook(s)
|
| 136 |
+
"""
|
| 137 |
+
if not self.hooks:
|
| 138 |
+
return
|
| 139 |
+
kw = self.kw.copy()
|
| 140 |
+
kw.update(kwargs)
|
| 141 |
+
if hook_name:
|
| 142 |
+
if hook_name not in self.hooks:
|
| 143 |
+
return
|
| 144 |
+
return self.hooks[hook_name](self.size, self.value, **kw)
|
| 145 |
+
for hook in self.hooks.values() or []:
|
| 146 |
+
hook(self.size, self.value, **kw)
|
| 147 |
+
|
| 148 |
+
def wrap(self, iterable):
|
| 149 |
+
"""
|
| 150 |
+
Wrap an iterable to call ``relative_update`` on each iterations
|
| 151 |
+
|
| 152 |
+
Parameters
|
| 153 |
+
----------
|
| 154 |
+
iterable: Iterable
|
| 155 |
+
The iterable that is being wrapped
|
| 156 |
+
"""
|
| 157 |
+
for item in iterable:
|
| 158 |
+
self.relative_update()
|
| 159 |
+
yield item
|
| 160 |
+
|
| 161 |
+
def branch(self, path_1, path_2, kwargs):
|
| 162 |
+
"""
|
| 163 |
+
Set callbacks for child transfers
|
| 164 |
+
|
| 165 |
+
If this callback is operating at a higher level, e.g., put, which may
|
| 166 |
+
trigger transfers that can also be monitored. The passed kwargs are
|
| 167 |
+
to be *mutated* to add ``callback=``, if this class supports branching
|
| 168 |
+
to children.
|
| 169 |
+
|
| 170 |
+
Parameters
|
| 171 |
+
----------
|
| 172 |
+
path_1: str
|
| 173 |
+
Child's source path
|
| 174 |
+
path_2: str
|
| 175 |
+
Child's destination path
|
| 176 |
+
kwargs: dict
|
| 177 |
+
arguments passed to child method, e.g., put_file.
|
| 178 |
+
|
| 179 |
+
Returns
|
| 180 |
+
-------
|
| 181 |
+
|
| 182 |
+
"""
|
| 183 |
+
return None
|
| 184 |
+
|
| 185 |
+
def no_op(self, *_, **__):
|
| 186 |
+
pass
|
| 187 |
+
|
| 188 |
+
def __getattr__(self, item):
|
| 189 |
+
"""
|
| 190 |
+
If undefined methods are called on this class, nothing happens
|
| 191 |
+
"""
|
| 192 |
+
return self.no_op
|
| 193 |
+
|
| 194 |
+
@classmethod
|
| 195 |
+
def as_callback(cls, maybe_callback=None):
|
| 196 |
+
"""Transform callback=... into Callback instance
|
| 197 |
+
|
| 198 |
+
For the special value of ``None``, return the global instance of
|
| 199 |
+
``NoOpCallback``. This is an alternative to including
|
| 200 |
+
``callback=DEFAULT_CALLBACK`` directly in a method signature.
|
| 201 |
+
"""
|
| 202 |
+
if maybe_callback is None:
|
| 203 |
+
return DEFAULT_CALLBACK
|
| 204 |
+
return maybe_callback
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
class NoOpCallback(Callback):
|
| 208 |
+
"""
|
| 209 |
+
This implementation of Callback does exactly nothing
|
| 210 |
+
"""
|
| 211 |
+
|
| 212 |
+
def call(self, *args, **kwargs):
|
| 213 |
+
return None
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
class DotPrinterCallback(Callback):
|
| 217 |
+
"""
|
| 218 |
+
Simple example Callback implementation
|
| 219 |
+
|
| 220 |
+
Almost identical to Callback with a hook that prints a char; here we
|
| 221 |
+
demonstrate how the outer layer may print "#" and the inner layer "."
|
| 222 |
+
"""
|
| 223 |
+
|
| 224 |
+
def __init__(self, chr_to_print="#", **kwargs):
|
| 225 |
+
self.chr = chr_to_print
|
| 226 |
+
super().__init__(**kwargs)
|
| 227 |
+
|
| 228 |
+
def branch(self, path_1, path_2, kwargs):
|
| 229 |
+
"""Mutate kwargs to add new instance with different print char"""
|
| 230 |
+
kwargs["callback"] = DotPrinterCallback(".")
|
| 231 |
+
|
| 232 |
+
def call(self, **kwargs):
|
| 233 |
+
"""Just outputs a character"""
|
| 234 |
+
print(self.chr, end="")
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
class TqdmCallback(Callback):
|
| 238 |
+
"""
|
| 239 |
+
A callback to display a progress bar using tqdm
|
| 240 |
+
|
| 241 |
+
Parameters
|
| 242 |
+
----------
|
| 243 |
+
tqdm_kwargs : dict, (optional)
|
| 244 |
+
Any argument accepted by the tqdm constructor.
|
| 245 |
+
See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
|
| 246 |
+
Will be forwarded to `tqdm_cls`.
|
| 247 |
+
tqdm_cls: (optional)
|
| 248 |
+
subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
|
| 249 |
+
|
| 250 |
+
Examples
|
| 251 |
+
--------
|
| 252 |
+
>>> import fsspec
|
| 253 |
+
>>> from fsspec.callbacks import TqdmCallback
|
| 254 |
+
>>> fs = fsspec.filesystem("memory")
|
| 255 |
+
>>> path2distant_data = "/your-path"
|
| 256 |
+
>>> fs.upload(
|
| 257 |
+
".",
|
| 258 |
+
path2distant_data,
|
| 259 |
+
recursive=True,
|
| 260 |
+
callback=TqdmCallback(),
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
|
| 264 |
+
|
| 265 |
+
>>> fs.upload(
|
| 266 |
+
".",
|
| 267 |
+
path2distant_data,
|
| 268 |
+
recursive=True,
|
| 269 |
+
callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
You can also customize the progress bar by passing a subclass of `tqdm`.
|
| 273 |
+
|
| 274 |
+
.. code-block:: python
|
| 275 |
+
|
| 276 |
+
class TqdmFormat(tqdm):
|
| 277 |
+
'''Provides a `total_time` format parameter'''
|
| 278 |
+
@property
|
| 279 |
+
def format_dict(self):
|
| 280 |
+
d = super().format_dict
|
| 281 |
+
total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
|
| 282 |
+
d.update(total_time=self.format_interval(total_time) + " in total")
|
| 283 |
+
return d
|
| 284 |
+
|
| 285 |
+
>>> with TqdmCallback(
|
| 286 |
+
tqdm_kwargs={
|
| 287 |
+
"desc": "desc",
|
| 288 |
+
"bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
|
| 289 |
+
},
|
| 290 |
+
tqdm_cls=TqdmFormat,
|
| 291 |
+
) as callback:
|
| 292 |
+
fs.upload(".", path2distant_data, recursive=True, callback=callback)
|
| 293 |
+
"""
|
| 294 |
+
|
| 295 |
+
def __init__(self, tqdm_kwargs=None, *args, **kwargs):
|
| 296 |
+
try:
|
| 297 |
+
from tqdm import tqdm
|
| 298 |
+
|
| 299 |
+
except ImportError as exce:
|
| 300 |
+
raise ImportError(
|
| 301 |
+
"Using TqdmCallback requires tqdm to be installed"
|
| 302 |
+
) from exce
|
| 303 |
+
|
| 304 |
+
self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
|
| 305 |
+
self._tqdm_kwargs = tqdm_kwargs or {}
|
| 306 |
+
self.tqdm = None
|
| 307 |
+
super().__init__(*args, **kwargs)
|
| 308 |
+
|
| 309 |
+
def call(self, *args, **kwargs):
|
| 310 |
+
if self.tqdm is None:
|
| 311 |
+
self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
|
| 312 |
+
self.tqdm.total = self.size
|
| 313 |
+
self.tqdm.update(self.value - self.tqdm.n)
|
| 314 |
+
|
| 315 |
+
def close(self):
|
| 316 |
+
if self.tqdm is not None:
|
| 317 |
+
self.tqdm.close()
|
| 318 |
+
self.tqdm = None
|
| 319 |
+
|
| 320 |
+
def __del__(self):
|
| 321 |
+
return self.close()
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
|
env/lib/python3.13/site-packages/fsspec/compression.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper functions for a standard streaming compression API"""
|
| 2 |
+
|
| 3 |
+
from zipfile import ZipFile
|
| 4 |
+
|
| 5 |
+
import fsspec.utils
|
| 6 |
+
from fsspec.spec import AbstractBufferedFile
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def noop_file(file, mode, **kwargs):
|
| 10 |
+
return file
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# TODO: files should also be available as contexts
|
| 14 |
+
# should be functions of the form func(infile, mode=, **kwargs) -> file-like
|
| 15 |
+
compr = {None: noop_file}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def register_compression(name, callback, extensions, force=False):
|
| 19 |
+
"""Register an "inferable" file compression type.
|
| 20 |
+
|
| 21 |
+
Registers transparent file compression type for use with fsspec.open.
|
| 22 |
+
Compression can be specified by name in open, or "infer"-ed for any files
|
| 23 |
+
ending with the given extensions.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
name: (str) The compression type name. Eg. "gzip".
|
| 27 |
+
callback: A callable of form (infile, mode, **kwargs) -> file-like.
|
| 28 |
+
Accepts an input file-like object, the target mode and kwargs.
|
| 29 |
+
Returns a wrapped file-like object.
|
| 30 |
+
extensions: (str, Iterable[str]) A file extension, or list of file
|
| 31 |
+
extensions for which to infer this compression scheme. Eg. "gz".
|
| 32 |
+
force: (bool) Force re-registration of compression type or extensions.
|
| 33 |
+
|
| 34 |
+
Raises:
|
| 35 |
+
ValueError: If name or extensions already registered, and not force.
|
| 36 |
+
|
| 37 |
+
"""
|
| 38 |
+
if isinstance(extensions, str):
|
| 39 |
+
extensions = [extensions]
|
| 40 |
+
|
| 41 |
+
# Validate registration
|
| 42 |
+
if name in compr and not force:
|
| 43 |
+
raise ValueError(f"Duplicate compression registration: {name}")
|
| 44 |
+
|
| 45 |
+
for ext in extensions:
|
| 46 |
+
if ext in fsspec.utils.compressions and not force:
|
| 47 |
+
raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
|
| 48 |
+
|
| 49 |
+
compr[name] = callback
|
| 50 |
+
|
| 51 |
+
for ext in extensions:
|
| 52 |
+
fsspec.utils.compressions[ext] = name
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def unzip(infile, mode="rb", filename=None, **kwargs):
|
| 56 |
+
if "r" not in mode:
|
| 57 |
+
filename = filename or "file"
|
| 58 |
+
z = ZipFile(infile, mode="w", **kwargs)
|
| 59 |
+
fo = z.open(filename, mode="w")
|
| 60 |
+
fo.close = lambda closer=fo.close: closer() or z.close()
|
| 61 |
+
return fo
|
| 62 |
+
z = ZipFile(infile)
|
| 63 |
+
if filename is None:
|
| 64 |
+
filename = z.namelist()[0]
|
| 65 |
+
return z.open(filename, mode="r", **kwargs)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
register_compression("zip", unzip, "zip")
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
from bz2 import BZ2File
|
| 72 |
+
except ImportError:
|
| 73 |
+
pass
|
| 74 |
+
else:
|
| 75 |
+
register_compression("bz2", BZ2File, "bz2")
|
| 76 |
+
|
| 77 |
+
try: # pragma: no cover
|
| 78 |
+
from isal import igzip
|
| 79 |
+
|
| 80 |
+
def isal(infile, mode="rb", **kwargs):
|
| 81 |
+
return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
|
| 82 |
+
|
| 83 |
+
register_compression("gzip", isal, "gz")
|
| 84 |
+
except ImportError:
|
| 85 |
+
from gzip import GzipFile
|
| 86 |
+
|
| 87 |
+
register_compression(
|
| 88 |
+
"gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
from lzma import LZMAFile
|
| 93 |
+
|
| 94 |
+
register_compression("lzma", LZMAFile, "lzma")
|
| 95 |
+
register_compression("xz", LZMAFile, "xz")
|
| 96 |
+
except ImportError:
|
| 97 |
+
pass
|
| 98 |
+
|
| 99 |
+
try:
|
| 100 |
+
import lzmaffi
|
| 101 |
+
|
| 102 |
+
register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
|
| 103 |
+
register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
|
| 104 |
+
except ImportError:
|
| 105 |
+
pass
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class SnappyFile(AbstractBufferedFile):
|
| 109 |
+
def __init__(self, infile, mode, **kwargs):
|
| 110 |
+
import snappy
|
| 111 |
+
|
| 112 |
+
super().__init__(
|
| 113 |
+
fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
|
| 114 |
+
)
|
| 115 |
+
self.infile = infile
|
| 116 |
+
if "r" in mode:
|
| 117 |
+
self.codec = snappy.StreamDecompressor()
|
| 118 |
+
else:
|
| 119 |
+
self.codec = snappy.StreamCompressor()
|
| 120 |
+
|
| 121 |
+
def _upload_chunk(self, final=False):
|
| 122 |
+
self.buffer.seek(0)
|
| 123 |
+
out = self.codec.add_chunk(self.buffer.read())
|
| 124 |
+
self.infile.write(out)
|
| 125 |
+
return True
|
| 126 |
+
|
| 127 |
+
def seek(self, loc, whence=0):
|
| 128 |
+
raise NotImplementedError("SnappyFile is not seekable")
|
| 129 |
+
|
| 130 |
+
def seekable(self):
|
| 131 |
+
return False
|
| 132 |
+
|
| 133 |
+
def _fetch_range(self, start, end):
|
| 134 |
+
"""Get the specified set of bytes from remote"""
|
| 135 |
+
data = self.infile.read(end - start)
|
| 136 |
+
return self.codec.decompress(data)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
try:
|
| 140 |
+
import snappy
|
| 141 |
+
|
| 142 |
+
snappy.compress(b"")
|
| 143 |
+
# Snappy may use the .sz file extension, but this is not part of the
|
| 144 |
+
# standard implementation.
|
| 145 |
+
register_compression("snappy", SnappyFile, [])
|
| 146 |
+
|
| 147 |
+
except (ImportError, NameError, AttributeError):
|
| 148 |
+
pass
|
| 149 |
+
|
| 150 |
+
try:
|
| 151 |
+
import lz4.frame
|
| 152 |
+
|
| 153 |
+
register_compression("lz4", lz4.frame.open, "lz4")
|
| 154 |
+
except ImportError:
|
| 155 |
+
pass
|
| 156 |
+
|
| 157 |
+
try:
|
| 158 |
+
# zstd in the standard library for python >= 3.14
|
| 159 |
+
from compression.zstd import ZstdFile
|
| 160 |
+
|
| 161 |
+
register_compression("zstd", ZstdFile, "zst")
|
| 162 |
+
|
| 163 |
+
except ImportError:
|
| 164 |
+
try:
|
| 165 |
+
import zstandard as zstd
|
| 166 |
+
|
| 167 |
+
def zstandard_file(infile, mode="rb"):
|
| 168 |
+
if "r" in mode:
|
| 169 |
+
cctx = zstd.ZstdDecompressor()
|
| 170 |
+
return cctx.stream_reader(infile)
|
| 171 |
+
else:
|
| 172 |
+
cctx = zstd.ZstdCompressor(level=10)
|
| 173 |
+
return cctx.stream_writer(infile)
|
| 174 |
+
|
| 175 |
+
register_compression("zstd", zstandard_file, "zst")
|
| 176 |
+
except ImportError:
|
| 177 |
+
pass
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def available_compressions():
|
| 181 |
+
"""Return a list of the implemented compressions."""
|
| 182 |
+
return list(compr)
|
env/lib/python3.13/site-packages/fsspec/config.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import configparser
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import warnings
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
conf: dict[str, dict[str, Any]] = {}
|
| 10 |
+
default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
|
| 11 |
+
conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def set_conf_env(conf_dict, envdict=os.environ):
|
| 15 |
+
"""Set config values from environment variables
|
| 16 |
+
|
| 17 |
+
Looks for variables of the form ``FSSPEC_<protocol>`` and
|
| 18 |
+
``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
|
| 19 |
+
as a json dictionary and used to ``update`` the config of the
|
| 20 |
+
corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
|
| 21 |
+
attempt to convert the string value, but the kwarg keys will be lower-cased.
|
| 22 |
+
|
| 23 |
+
The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
|
| 24 |
+
``FSSPEC_<protocol>`` ones.
|
| 25 |
+
|
| 26 |
+
Parameters
|
| 27 |
+
----------
|
| 28 |
+
conf_dict : dict(str, dict)
|
| 29 |
+
This dict will be mutated
|
| 30 |
+
envdict : dict-like(str, str)
|
| 31 |
+
Source for the values - usually the real environment
|
| 32 |
+
"""
|
| 33 |
+
kwarg_keys = []
|
| 34 |
+
for key in envdict:
|
| 35 |
+
if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
|
| 36 |
+
if key.count("_") > 1:
|
| 37 |
+
kwarg_keys.append(key)
|
| 38 |
+
continue
|
| 39 |
+
try:
|
| 40 |
+
value = json.loads(envdict[key])
|
| 41 |
+
except json.decoder.JSONDecodeError as ex:
|
| 42 |
+
warnings.warn(
|
| 43 |
+
f"Ignoring environment variable {key} due to a parse failure: {ex}"
|
| 44 |
+
)
|
| 45 |
+
else:
|
| 46 |
+
if isinstance(value, dict):
|
| 47 |
+
_, proto = key.split("_", 1)
|
| 48 |
+
conf_dict.setdefault(proto.lower(), {}).update(value)
|
| 49 |
+
else:
|
| 50 |
+
warnings.warn(
|
| 51 |
+
f"Ignoring environment variable {key} due to not being a dict:"
|
| 52 |
+
f" {type(value)}"
|
| 53 |
+
)
|
| 54 |
+
elif key.startswith("FSSPEC"):
|
| 55 |
+
warnings.warn(
|
| 56 |
+
f"Ignoring environment variable {key} due to having an unexpected name"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
for key in kwarg_keys:
|
| 60 |
+
_, proto, kwarg = key.split("_", 2)
|
| 61 |
+
conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def set_conf_files(cdir, conf_dict):
|
| 65 |
+
"""Set config values from files
|
| 66 |
+
|
| 67 |
+
Scans for INI and JSON files in the given dictionary, and uses their
|
| 68 |
+
contents to set the config. In case of repeated values, later values
|
| 69 |
+
win.
|
| 70 |
+
|
| 71 |
+
In the case of INI files, all values are strings, and these will not
|
| 72 |
+
be converted.
|
| 73 |
+
|
| 74 |
+
Parameters
|
| 75 |
+
----------
|
| 76 |
+
cdir : str
|
| 77 |
+
Directory to search
|
| 78 |
+
conf_dict : dict(str, dict)
|
| 79 |
+
This dict will be mutated
|
| 80 |
+
"""
|
| 81 |
+
if not os.path.isdir(cdir):
|
| 82 |
+
return
|
| 83 |
+
allfiles = sorted(os.listdir(cdir))
|
| 84 |
+
for fn in allfiles:
|
| 85 |
+
if fn.endswith(".ini"):
|
| 86 |
+
ini = configparser.ConfigParser()
|
| 87 |
+
ini.read(os.path.join(cdir, fn))
|
| 88 |
+
for key in ini:
|
| 89 |
+
if key == "DEFAULT":
|
| 90 |
+
continue
|
| 91 |
+
conf_dict.setdefault(key, {}).update(dict(ini[key]))
|
| 92 |
+
if fn.endswith(".json"):
|
| 93 |
+
with open(os.path.join(cdir, fn)) as f:
|
| 94 |
+
js = json.load(f)
|
| 95 |
+
for key in js:
|
| 96 |
+
conf_dict.setdefault(key, {}).update(dict(js[key]))
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def apply_config(cls, kwargs, conf_dict=None):
|
| 100 |
+
"""Supply default values for kwargs when instantiating class
|
| 101 |
+
|
| 102 |
+
Augments the passed kwargs, by finding entries in the config dict
|
| 103 |
+
which match the classes ``.protocol`` attribute (one or more str)
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
cls : file system implementation
|
| 108 |
+
kwargs : dict
|
| 109 |
+
conf_dict : dict of dict
|
| 110 |
+
Typically this is the global configuration
|
| 111 |
+
|
| 112 |
+
Returns
|
| 113 |
+
-------
|
| 114 |
+
dict : the modified set of kwargs
|
| 115 |
+
"""
|
| 116 |
+
if conf_dict is None:
|
| 117 |
+
conf_dict = conf
|
| 118 |
+
protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
|
| 119 |
+
kw = {}
|
| 120 |
+
for proto in protos:
|
| 121 |
+
# default kwargs from the current state of the config
|
| 122 |
+
if proto in conf_dict:
|
| 123 |
+
kw.update(conf_dict[proto])
|
| 124 |
+
# explicit kwargs always win
|
| 125 |
+
kw.update(**kwargs)
|
| 126 |
+
kwargs = kw
|
| 127 |
+
return kwargs
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
set_conf_files(conf_dir, conf)
|
| 131 |
+
set_conf_env(conf)
|
env/lib/python3.13/site-packages/fsspec/conftest.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import subprocess
|
| 4 |
+
import sys
|
| 5 |
+
import time
|
| 6 |
+
from collections import deque
|
| 7 |
+
from collections.abc import Generator, Sequence
|
| 8 |
+
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
import fsspec
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@pytest.fixture()
|
| 15 |
+
def m():
|
| 16 |
+
"""
|
| 17 |
+
Fixture providing a memory filesystem.
|
| 18 |
+
"""
|
| 19 |
+
m = fsspec.filesystem("memory")
|
| 20 |
+
m.store.clear()
|
| 21 |
+
m.pseudo_dirs.clear()
|
| 22 |
+
m.pseudo_dirs.append("")
|
| 23 |
+
try:
|
| 24 |
+
yield m
|
| 25 |
+
finally:
|
| 26 |
+
m.store.clear()
|
| 27 |
+
m.pseudo_dirs.clear()
|
| 28 |
+
m.pseudo_dirs.append("")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class InstanceCacheInspector:
|
| 32 |
+
"""
|
| 33 |
+
Helper class to inspect instance caches of filesystem classes in tests.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def clear(self) -> None:
|
| 37 |
+
"""
|
| 38 |
+
Clear instance caches of all currently imported filesystem classes.
|
| 39 |
+
"""
|
| 40 |
+
classes = deque([fsspec.spec.AbstractFileSystem])
|
| 41 |
+
while classes:
|
| 42 |
+
cls = classes.popleft()
|
| 43 |
+
cls.clear_instance_cache()
|
| 44 |
+
classes.extend(cls.__subclasses__())
|
| 45 |
+
|
| 46 |
+
def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]:
|
| 47 |
+
"""
|
| 48 |
+
Gather counts of filesystem instances in the instance caches
|
| 49 |
+
of all currently imported filesystem classes.
|
| 50 |
+
|
| 51 |
+
Parameters
|
| 52 |
+
----------
|
| 53 |
+
omit_zero:
|
| 54 |
+
Whether to omit instance types with no cached instances.
|
| 55 |
+
"""
|
| 56 |
+
out: dict[str, int] = {}
|
| 57 |
+
classes = deque([fsspec.spec.AbstractFileSystem])
|
| 58 |
+
while classes:
|
| 59 |
+
cls = classes.popleft()
|
| 60 |
+
count = len(cls._cache) # there is no public interface for the cache
|
| 61 |
+
# note: skip intermediate AbstractFileSystem subclasses
|
| 62 |
+
# if they proxy the protocol attribute via a property.
|
| 63 |
+
if isinstance(cls.protocol, (Sequence, str)):
|
| 64 |
+
key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0]
|
| 65 |
+
if count or not omit_zero:
|
| 66 |
+
out[key] = count
|
| 67 |
+
classes.extend(cls.__subclasses__())
|
| 68 |
+
return out
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@pytest.fixture(scope="function", autouse=True)
|
| 72 |
+
def instance_caches() -> Generator[InstanceCacheInspector, None, None]:
|
| 73 |
+
"""
|
| 74 |
+
Fixture to ensure empty filesystem instance caches before and after a test.
|
| 75 |
+
|
| 76 |
+
Used by default for all tests.
|
| 77 |
+
Clears caches of all imported filesystem classes.
|
| 78 |
+
Can be used to write test assertions about instance caches.
|
| 79 |
+
|
| 80 |
+
Usage:
|
| 81 |
+
|
| 82 |
+
def test_something(instance_caches):
|
| 83 |
+
# Test code here
|
| 84 |
+
fsspec.open("file://abc")
|
| 85 |
+
fsspec.open("memory://foo/bar")
|
| 86 |
+
|
| 87 |
+
# Test assertion
|
| 88 |
+
assert instance_caches.gather_counts() == {"file": 1, "memory": 1}
|
| 89 |
+
|
| 90 |
+
Returns
|
| 91 |
+
-------
|
| 92 |
+
instance_caches: An instance cache inspector for clearing and inspecting caches.
|
| 93 |
+
"""
|
| 94 |
+
ic = InstanceCacheInspector()
|
| 95 |
+
|
| 96 |
+
ic.clear()
|
| 97 |
+
try:
|
| 98 |
+
yield ic
|
| 99 |
+
finally:
|
| 100 |
+
ic.clear()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
@pytest.fixture(scope="function")
|
| 104 |
+
def ftp_writable(tmpdir):
|
| 105 |
+
"""
|
| 106 |
+
Fixture providing a writable FTP filesystem.
|
| 107 |
+
"""
|
| 108 |
+
pytest.importorskip("pyftpdlib")
|
| 109 |
+
|
| 110 |
+
d = str(tmpdir)
|
| 111 |
+
with open(os.path.join(d, "out"), "wb") as f:
|
| 112 |
+
f.write(b"hello" * 10000)
|
| 113 |
+
P = subprocess.Popen(
|
| 114 |
+
[sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
|
| 115 |
+
)
|
| 116 |
+
try:
|
| 117 |
+
time.sleep(1)
|
| 118 |
+
yield "localhost", 2121, "user", "pass"
|
| 119 |
+
finally:
|
| 120 |
+
P.terminate()
|
| 121 |
+
P.wait()
|
| 122 |
+
try:
|
| 123 |
+
shutil.rmtree(tmpdir)
|
| 124 |
+
except Exception:
|
| 125 |
+
pass
|
env/lib/python3.13/site-packages/fsspec/core.py
ADDED
|
@@ -0,0 +1,760 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
from glob import has_magic
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# for backwards compat, we export cache things from here too
|
| 11 |
+
from fsspec.caching import ( # noqa: F401
|
| 12 |
+
BaseCache,
|
| 13 |
+
BlockCache,
|
| 14 |
+
BytesCache,
|
| 15 |
+
MMapCache,
|
| 16 |
+
ReadAheadCache,
|
| 17 |
+
caches,
|
| 18 |
+
)
|
| 19 |
+
from fsspec.compression import compr
|
| 20 |
+
from fsspec.config import conf
|
| 21 |
+
from fsspec.registry import available_protocols, filesystem, get_filesystem_class
|
| 22 |
+
from fsspec.utils import (
|
| 23 |
+
_unstrip_protocol,
|
| 24 |
+
build_name_function,
|
| 25 |
+
infer_compression,
|
| 26 |
+
stringify_path,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger("fsspec")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class OpenFile:
|
| 33 |
+
"""
|
| 34 |
+
File-like object to be used in a context
|
| 35 |
+
|
| 36 |
+
Can layer (buffered) text-mode and compression over any file-system, which
|
| 37 |
+
are typically binary-only.
|
| 38 |
+
|
| 39 |
+
These instances are safe to serialize, as the low-level file object
|
| 40 |
+
is not created until invoked using ``with``.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
fs: FileSystem
|
| 45 |
+
The file system to use for opening the file. Should be a subclass or duck-type
|
| 46 |
+
with ``fsspec.spec.AbstractFileSystem``
|
| 47 |
+
path: str
|
| 48 |
+
Location to open
|
| 49 |
+
mode: str like 'rb', optional
|
| 50 |
+
Mode of the opened file
|
| 51 |
+
compression: str or None, optional
|
| 52 |
+
Compression to apply
|
| 53 |
+
encoding: str or None, optional
|
| 54 |
+
The encoding to use if opened in text mode.
|
| 55 |
+
errors: str or None, optional
|
| 56 |
+
How to handle encoding errors if opened in text mode.
|
| 57 |
+
newline: None or str
|
| 58 |
+
Passed to TextIOWrapper in text mode, how to handle line endings.
|
| 59 |
+
autoopen: bool
|
| 60 |
+
If True, calls open() immediately. Mostly used by pickle
|
| 61 |
+
pos: int
|
| 62 |
+
If given and autoopen is True, seek to this location immediately
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
def __init__(
|
| 66 |
+
self,
|
| 67 |
+
fs,
|
| 68 |
+
path,
|
| 69 |
+
mode="rb",
|
| 70 |
+
compression=None,
|
| 71 |
+
encoding=None,
|
| 72 |
+
errors=None,
|
| 73 |
+
newline=None,
|
| 74 |
+
):
|
| 75 |
+
self.fs = fs
|
| 76 |
+
self.path = path
|
| 77 |
+
self.mode = mode
|
| 78 |
+
self.compression = get_compression(path, compression)
|
| 79 |
+
self.encoding = encoding
|
| 80 |
+
self.errors = errors
|
| 81 |
+
self.newline = newline
|
| 82 |
+
self.fobjects = []
|
| 83 |
+
|
| 84 |
+
def __reduce__(self):
|
| 85 |
+
return (
|
| 86 |
+
OpenFile,
|
| 87 |
+
(
|
| 88 |
+
self.fs,
|
| 89 |
+
self.path,
|
| 90 |
+
self.mode,
|
| 91 |
+
self.compression,
|
| 92 |
+
self.encoding,
|
| 93 |
+
self.errors,
|
| 94 |
+
self.newline,
|
| 95 |
+
),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def __repr__(self):
|
| 99 |
+
return f"<OpenFile '{self.path}'>"
|
| 100 |
+
|
| 101 |
+
def __enter__(self):
|
| 102 |
+
mode = self.mode.replace("t", "").replace("b", "") + "b"
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
f = self.fs.open(self.path, mode=mode)
|
| 106 |
+
except FileNotFoundError as e:
|
| 107 |
+
if has_magic(self.path):
|
| 108 |
+
raise FileNotFoundError(
|
| 109 |
+
"%s not found. The URL contains glob characters: you maybe needed\n"
|
| 110 |
+
"to pass expand=True in fsspec.open() or the storage_options of \n"
|
| 111 |
+
"your library. You can also set the config value 'open_expand'\n"
|
| 112 |
+
"before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
|
| 113 |
+
self.path,
|
| 114 |
+
) from e
|
| 115 |
+
raise
|
| 116 |
+
|
| 117 |
+
self.fobjects = [f]
|
| 118 |
+
|
| 119 |
+
if self.compression is not None:
|
| 120 |
+
compress = compr[self.compression]
|
| 121 |
+
f = compress(f, mode=mode[0])
|
| 122 |
+
self.fobjects.append(f)
|
| 123 |
+
|
| 124 |
+
if "b" not in self.mode:
|
| 125 |
+
# assume, for example, that 'r' is equivalent to 'rt' as in builtin
|
| 126 |
+
f = PickleableTextIOWrapper(
|
| 127 |
+
f, encoding=self.encoding, errors=self.errors, newline=self.newline
|
| 128 |
+
)
|
| 129 |
+
self.fobjects.append(f)
|
| 130 |
+
|
| 131 |
+
return self.fobjects[-1]
|
| 132 |
+
|
| 133 |
+
def __exit__(self, *args):
|
| 134 |
+
self.close()
|
| 135 |
+
|
| 136 |
+
@property
|
| 137 |
+
def full_name(self):
|
| 138 |
+
return _unstrip_protocol(self.path, self.fs)
|
| 139 |
+
|
| 140 |
+
def open(self):
|
| 141 |
+
"""Materialise this as a real open file without context
|
| 142 |
+
|
| 143 |
+
The OpenFile object should be explicitly closed to avoid enclosed file
|
| 144 |
+
instances persisting. You must, therefore, keep a reference to the OpenFile
|
| 145 |
+
during the life of the file-like it generates.
|
| 146 |
+
"""
|
| 147 |
+
return self.__enter__()
|
| 148 |
+
|
| 149 |
+
def close(self):
|
| 150 |
+
"""Close all encapsulated file objects"""
|
| 151 |
+
for f in reversed(self.fobjects):
|
| 152 |
+
if "r" not in self.mode and not f.closed:
|
| 153 |
+
f.flush()
|
| 154 |
+
f.close()
|
| 155 |
+
self.fobjects.clear()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class OpenFiles(list):
|
| 159 |
+
"""List of OpenFile instances
|
| 160 |
+
|
| 161 |
+
Can be used in a single context, which opens and closes all of the
|
| 162 |
+
contained files. Normal list access to get the elements works as
|
| 163 |
+
normal.
|
| 164 |
+
|
| 165 |
+
A special case is made for caching filesystems - the files will
|
| 166 |
+
be down/uploaded together at the start or end of the context, and
|
| 167 |
+
this may happen concurrently, if the target filesystem supports it.
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
def __init__(self, *args, mode="rb", fs=None):
|
| 171 |
+
self.mode = mode
|
| 172 |
+
self.fs = fs
|
| 173 |
+
self.files = []
|
| 174 |
+
super().__init__(*args)
|
| 175 |
+
|
| 176 |
+
def __enter__(self):
|
| 177 |
+
if self.fs is None:
|
| 178 |
+
raise ValueError("Context has already been used")
|
| 179 |
+
|
| 180 |
+
fs = self.fs
|
| 181 |
+
while True:
|
| 182 |
+
if hasattr(fs, "open_many"):
|
| 183 |
+
# check for concurrent cache download; or set up for upload
|
| 184 |
+
self.files = fs.open_many(self)
|
| 185 |
+
return self.files
|
| 186 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
| 187 |
+
fs = fs.fs
|
| 188 |
+
else:
|
| 189 |
+
break
|
| 190 |
+
return [s.__enter__() for s in self]
|
| 191 |
+
|
| 192 |
+
def __exit__(self, *args):
|
| 193 |
+
fs = self.fs
|
| 194 |
+
[s.__exit__(*args) for s in self]
|
| 195 |
+
if "r" not in self.mode:
|
| 196 |
+
while True:
|
| 197 |
+
if hasattr(fs, "open_many"):
|
| 198 |
+
# check for concurrent cache upload
|
| 199 |
+
fs.commit_many(self.files)
|
| 200 |
+
return
|
| 201 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
| 202 |
+
fs = fs.fs
|
| 203 |
+
else:
|
| 204 |
+
break
|
| 205 |
+
|
| 206 |
+
def __getitem__(self, item):
|
| 207 |
+
out = super().__getitem__(item)
|
| 208 |
+
if isinstance(item, slice):
|
| 209 |
+
return OpenFiles(out, mode=self.mode, fs=self.fs)
|
| 210 |
+
return out
|
| 211 |
+
|
| 212 |
+
def __repr__(self):
|
| 213 |
+
return f"<List of {len(self)} OpenFile instances>"
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def open_files(
|
| 217 |
+
urlpath,
|
| 218 |
+
mode="rb",
|
| 219 |
+
compression=None,
|
| 220 |
+
encoding="utf8",
|
| 221 |
+
errors=None,
|
| 222 |
+
name_function=None,
|
| 223 |
+
num=1,
|
| 224 |
+
protocol=None,
|
| 225 |
+
newline=None,
|
| 226 |
+
auto_mkdir=True,
|
| 227 |
+
expand=True,
|
| 228 |
+
**kwargs,
|
| 229 |
+
):
|
| 230 |
+
"""Given a path or paths, return a list of ``OpenFile`` objects.
|
| 231 |
+
|
| 232 |
+
For writing, a str path must contain the "*" character, which will be filled
|
| 233 |
+
in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
|
| 234 |
+
|
| 235 |
+
For either reading or writing, can instead provide explicit list of paths.
|
| 236 |
+
|
| 237 |
+
Parameters
|
| 238 |
+
----------
|
| 239 |
+
urlpath: string or list
|
| 240 |
+
Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
|
| 241 |
+
to read from alternative filesystems. To read from multiple files you
|
| 242 |
+
can pass a globstring or a list of paths, with the caveat that they
|
| 243 |
+
must all have the same protocol.
|
| 244 |
+
mode: 'rb', 'wt', etc.
|
| 245 |
+
compression: string or None
|
| 246 |
+
If given, open file using compression codec. Can either be a compression
|
| 247 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 248 |
+
compression from the filename suffix.
|
| 249 |
+
encoding: str
|
| 250 |
+
For text mode only
|
| 251 |
+
errors: None or str
|
| 252 |
+
Passed to TextIOWrapper in text mode
|
| 253 |
+
name_function: function or None
|
| 254 |
+
if opening a set of files for writing, those files do not yet exist,
|
| 255 |
+
so we need to generate their names by formatting the urlpath for
|
| 256 |
+
each sequence number
|
| 257 |
+
num: int [1]
|
| 258 |
+
if writing mode, number of files we expect to create (passed to
|
| 259 |
+
name+function)
|
| 260 |
+
protocol: str or None
|
| 261 |
+
If given, overrides the protocol found in the URL.
|
| 262 |
+
newline: bytes or None
|
| 263 |
+
Used for line terminator in text mode. If None, uses system default;
|
| 264 |
+
if blank, uses no translation.
|
| 265 |
+
auto_mkdir: bool (True)
|
| 266 |
+
If in write mode, this will ensure the target directory exists before
|
| 267 |
+
writing, by calling ``fs.mkdirs(exist_ok=True)``.
|
| 268 |
+
expand: bool
|
| 269 |
+
**kwargs: dict
|
| 270 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 271 |
+
host, port, username, password, etc.
|
| 272 |
+
|
| 273 |
+
Examples
|
| 274 |
+
--------
|
| 275 |
+
>>> files = open_files('2015-*-*.csv') # doctest: +SKIP
|
| 276 |
+
>>> files = open_files(
|
| 277 |
+
... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
|
| 278 |
+
... ) # doctest: +SKIP
|
| 279 |
+
|
| 280 |
+
Returns
|
| 281 |
+
-------
|
| 282 |
+
An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
|
| 283 |
+
be used as a single context
|
| 284 |
+
|
| 285 |
+
Notes
|
| 286 |
+
-----
|
| 287 |
+
For a full list of the available protocols and the implementations that
|
| 288 |
+
they map across to see the latest online documentation:
|
| 289 |
+
|
| 290 |
+
- For implementations built into ``fsspec`` see
|
| 291 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
| 292 |
+
- For implementations in separate packages see
|
| 293 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
| 294 |
+
"""
|
| 295 |
+
fs, fs_token, paths = get_fs_token_paths(
|
| 296 |
+
urlpath,
|
| 297 |
+
mode,
|
| 298 |
+
num=num,
|
| 299 |
+
name_function=name_function,
|
| 300 |
+
storage_options=kwargs,
|
| 301 |
+
protocol=protocol,
|
| 302 |
+
expand=expand,
|
| 303 |
+
)
|
| 304 |
+
if fs.protocol == "file":
|
| 305 |
+
fs.auto_mkdir = auto_mkdir
|
| 306 |
+
elif "r" not in mode and auto_mkdir:
|
| 307 |
+
parents = {fs._parent(path) for path in paths}
|
| 308 |
+
for parent in parents:
|
| 309 |
+
try:
|
| 310 |
+
fs.makedirs(parent, exist_ok=True)
|
| 311 |
+
except PermissionError:
|
| 312 |
+
pass
|
| 313 |
+
return OpenFiles(
|
| 314 |
+
[
|
| 315 |
+
OpenFile(
|
| 316 |
+
fs,
|
| 317 |
+
path,
|
| 318 |
+
mode=mode,
|
| 319 |
+
compression=compression,
|
| 320 |
+
encoding=encoding,
|
| 321 |
+
errors=errors,
|
| 322 |
+
newline=newline,
|
| 323 |
+
)
|
| 324 |
+
for path in paths
|
| 325 |
+
],
|
| 326 |
+
mode=mode,
|
| 327 |
+
fs=fs,
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def _un_chain(path, kwargs):
|
| 332 |
+
# Avoid a circular import
|
| 333 |
+
from fsspec.implementations.chained import ChainedFileSystem
|
| 334 |
+
|
| 335 |
+
if "::" in path:
|
| 336 |
+
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
|
| 337 |
+
known_protocols = set(available_protocols())
|
| 338 |
+
bits = []
|
| 339 |
+
|
| 340 |
+
# split on '::', then ensure each bit has a protocol
|
| 341 |
+
for p in path.split("::"):
|
| 342 |
+
if p in known_protocols:
|
| 343 |
+
bits.append(p + "://")
|
| 344 |
+
elif "://" in p or x.match(p):
|
| 345 |
+
bits.append(p)
|
| 346 |
+
else:
|
| 347 |
+
bits.append(p + "://")
|
| 348 |
+
else:
|
| 349 |
+
bits = [path]
|
| 350 |
+
|
| 351 |
+
# [[url, protocol, kwargs], ...]
|
| 352 |
+
out = []
|
| 353 |
+
previous_bit = None
|
| 354 |
+
kwargs = kwargs.copy()
|
| 355 |
+
|
| 356 |
+
for bit in reversed(bits):
|
| 357 |
+
protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
|
| 358 |
+
cls = get_filesystem_class(protocol)
|
| 359 |
+
extra_kwargs = cls._get_kwargs_from_urls(bit)
|
| 360 |
+
kws = kwargs.pop(protocol, {})
|
| 361 |
+
|
| 362 |
+
if bit is bits[0]:
|
| 363 |
+
kws.update(kwargs)
|
| 364 |
+
|
| 365 |
+
kw = dict(
|
| 366 |
+
**{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
|
| 367 |
+
**kws,
|
| 368 |
+
)
|
| 369 |
+
bit = cls._strip_protocol(bit)
|
| 370 |
+
|
| 371 |
+
if (
|
| 372 |
+
"target_protocol" not in kw
|
| 373 |
+
and issubclass(cls, ChainedFileSystem)
|
| 374 |
+
and not bit
|
| 375 |
+
):
|
| 376 |
+
# replace bit if we are chaining and no path given
|
| 377 |
+
bit = previous_bit
|
| 378 |
+
|
| 379 |
+
out.append((bit, protocol, kw))
|
| 380 |
+
previous_bit = bit
|
| 381 |
+
|
| 382 |
+
out.reverse()
|
| 383 |
+
return out
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def url_to_fs(url, **kwargs):
|
| 387 |
+
"""
|
| 388 |
+
Turn fully-qualified and potentially chained URL into filesystem instance
|
| 389 |
+
|
| 390 |
+
Parameters
|
| 391 |
+
----------
|
| 392 |
+
url : str
|
| 393 |
+
The fsspec-compatible URL
|
| 394 |
+
**kwargs: dict
|
| 395 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 396 |
+
host, port, username, password, etc.
|
| 397 |
+
|
| 398 |
+
Returns
|
| 399 |
+
-------
|
| 400 |
+
filesystem : FileSystem
|
| 401 |
+
The new filesystem discovered from ``url`` and created with
|
| 402 |
+
``**kwargs``.
|
| 403 |
+
urlpath : str
|
| 404 |
+
The file-systems-specific URL for ``url``.
|
| 405 |
+
"""
|
| 406 |
+
url = stringify_path(url)
|
| 407 |
+
# non-FS arguments that appear in fsspec.open()
|
| 408 |
+
# inspect could keep this in sync with open()'s signature
|
| 409 |
+
known_kwargs = {
|
| 410 |
+
"compression",
|
| 411 |
+
"encoding",
|
| 412 |
+
"errors",
|
| 413 |
+
"expand",
|
| 414 |
+
"mode",
|
| 415 |
+
"name_function",
|
| 416 |
+
"newline",
|
| 417 |
+
"num",
|
| 418 |
+
}
|
| 419 |
+
kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
|
| 420 |
+
chain = _un_chain(url, kwargs)
|
| 421 |
+
inkwargs = {}
|
| 422 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
| 423 |
+
for i, ch in enumerate(reversed(chain)):
|
| 424 |
+
urls, protocol, kw = ch
|
| 425 |
+
if i == len(chain) - 1:
|
| 426 |
+
inkwargs = dict(**kw, **inkwargs)
|
| 427 |
+
continue
|
| 428 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
| 429 |
+
inkwargs["target_protocol"] = protocol
|
| 430 |
+
inkwargs["fo"] = urls
|
| 431 |
+
urlpath, protocol, _ = chain[0]
|
| 432 |
+
fs = filesystem(protocol, **inkwargs)
|
| 433 |
+
return fs, urlpath
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
DEFAULT_EXPAND = conf.get("open_expand", False)
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
def open(
|
| 440 |
+
urlpath,
|
| 441 |
+
mode="rb",
|
| 442 |
+
compression=None,
|
| 443 |
+
encoding="utf8",
|
| 444 |
+
errors=None,
|
| 445 |
+
protocol=None,
|
| 446 |
+
newline=None,
|
| 447 |
+
expand=None,
|
| 448 |
+
**kwargs,
|
| 449 |
+
):
|
| 450 |
+
"""Given a path or paths, return one ``OpenFile`` object.
|
| 451 |
+
|
| 452 |
+
Parameters
|
| 453 |
+
----------
|
| 454 |
+
urlpath: string or list
|
| 455 |
+
Absolute or relative filepath. Prefix with a protocol like ``s3://``
|
| 456 |
+
to read from alternative filesystems. Should not include glob
|
| 457 |
+
character(s).
|
| 458 |
+
mode: 'rb', 'wt', etc.
|
| 459 |
+
compression: string or None
|
| 460 |
+
If given, open file using compression codec. Can either be a compression
|
| 461 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 462 |
+
compression from the filename suffix.
|
| 463 |
+
encoding: str
|
| 464 |
+
For text mode only
|
| 465 |
+
errors: None or str
|
| 466 |
+
Passed to TextIOWrapper in text mode
|
| 467 |
+
protocol: str or None
|
| 468 |
+
If given, overrides the protocol found in the URL.
|
| 469 |
+
newline: bytes or None
|
| 470 |
+
Used for line terminator in text mode. If None, uses system default;
|
| 471 |
+
if blank, uses no translation.
|
| 472 |
+
expand: bool or None
|
| 473 |
+
Whether to regard file paths containing special glob characters as needing
|
| 474 |
+
expansion (finding the first match) or absolute. Setting False allows using
|
| 475 |
+
paths which do embed such characters. If None (default), this argument
|
| 476 |
+
takes its value from the DEFAULT_EXPAND module variable, which takes
|
| 477 |
+
its initial value from the "open_expand" config value at startup, which will
|
| 478 |
+
be False if not set.
|
| 479 |
+
**kwargs: dict
|
| 480 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 481 |
+
host, port, username, password, etc.
|
| 482 |
+
|
| 483 |
+
Examples
|
| 484 |
+
--------
|
| 485 |
+
>>> openfile = open('2015-01-01.csv') # doctest: +SKIP
|
| 486 |
+
>>> openfile = open(
|
| 487 |
+
... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
|
| 488 |
+
... ) # doctest: +SKIP
|
| 489 |
+
>>> with openfile as f:
|
| 490 |
+
... df = pd.read_csv(f) # doctest: +SKIP
|
| 491 |
+
...
|
| 492 |
+
|
| 493 |
+
Returns
|
| 494 |
+
-------
|
| 495 |
+
``OpenFile`` object.
|
| 496 |
+
|
| 497 |
+
Notes
|
| 498 |
+
-----
|
| 499 |
+
For a full list of the available protocols and the implementations that
|
| 500 |
+
they map across to see the latest online documentation:
|
| 501 |
+
|
| 502 |
+
- For implementations built into ``fsspec`` see
|
| 503 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
| 504 |
+
- For implementations in separate packages see
|
| 505 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
| 506 |
+
"""
|
| 507 |
+
expand = DEFAULT_EXPAND if expand is None else expand
|
| 508 |
+
out = open_files(
|
| 509 |
+
urlpath=[urlpath],
|
| 510 |
+
mode=mode,
|
| 511 |
+
compression=compression,
|
| 512 |
+
encoding=encoding,
|
| 513 |
+
errors=errors,
|
| 514 |
+
protocol=protocol,
|
| 515 |
+
newline=newline,
|
| 516 |
+
expand=expand,
|
| 517 |
+
**kwargs,
|
| 518 |
+
)
|
| 519 |
+
if not out:
|
| 520 |
+
raise FileNotFoundError(urlpath)
|
| 521 |
+
return out[0]
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
def open_local(
|
| 525 |
+
url: str | list[str] | Path | list[Path],
|
| 526 |
+
mode: str = "rb",
|
| 527 |
+
**storage_options: dict,
|
| 528 |
+
) -> str | list[str]:
|
| 529 |
+
"""Open file(s) which can be resolved to local
|
| 530 |
+
|
| 531 |
+
For files which either are local, or get downloaded upon open
|
| 532 |
+
(e.g., by file caching)
|
| 533 |
+
|
| 534 |
+
Parameters
|
| 535 |
+
----------
|
| 536 |
+
url: str or list(str)
|
| 537 |
+
mode: str
|
| 538 |
+
Must be read mode
|
| 539 |
+
storage_options:
|
| 540 |
+
passed on to FS for or used by open_files (e.g., compression)
|
| 541 |
+
"""
|
| 542 |
+
if "r" not in mode:
|
| 543 |
+
raise ValueError("Can only ensure local files when reading")
|
| 544 |
+
of = open_files(url, mode=mode, **storage_options)
|
| 545 |
+
if not getattr(of[0].fs, "local_file", False):
|
| 546 |
+
raise ValueError(
|
| 547 |
+
"open_local can only be used on a filesystem which"
|
| 548 |
+
" has attribute local_file=True"
|
| 549 |
+
)
|
| 550 |
+
with of as files:
|
| 551 |
+
paths = [f.name for f in files]
|
| 552 |
+
if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
|
| 553 |
+
return paths[0]
|
| 554 |
+
return paths
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
def get_compression(urlpath, compression):
|
| 558 |
+
if compression == "infer":
|
| 559 |
+
compression = infer_compression(urlpath)
|
| 560 |
+
if compression is not None and compression not in compr:
|
| 561 |
+
raise ValueError(f"Compression type {compression} not supported")
|
| 562 |
+
return compression
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
def split_protocol(urlpath):
|
| 566 |
+
"""Return protocol, path pair"""
|
| 567 |
+
urlpath = stringify_path(urlpath)
|
| 568 |
+
if "://" in urlpath:
|
| 569 |
+
protocol, path = urlpath.split("://", 1)
|
| 570 |
+
if len(protocol) > 1:
|
| 571 |
+
# excludes Windows paths
|
| 572 |
+
return protocol, path
|
| 573 |
+
if urlpath.startswith("data:"):
|
| 574 |
+
return urlpath.split(":", 1)
|
| 575 |
+
return None, urlpath
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
def strip_protocol(urlpath):
|
| 579 |
+
"""Return only path part of full URL, according to appropriate backend"""
|
| 580 |
+
protocol, _ = split_protocol(urlpath)
|
| 581 |
+
cls = get_filesystem_class(protocol)
|
| 582 |
+
return cls._strip_protocol(urlpath)
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
def expand_paths_if_needed(paths, mode, num, fs, name_function):
|
| 586 |
+
"""Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
|
| 587 |
+
in them (read mode).
|
| 588 |
+
|
| 589 |
+
:param paths: list of paths
|
| 590 |
+
mode: str
|
| 591 |
+
Mode in which to open files.
|
| 592 |
+
num: int
|
| 593 |
+
If opening in writing mode, number of files we expect to create.
|
| 594 |
+
fs: filesystem object
|
| 595 |
+
name_function: callable
|
| 596 |
+
If opening in writing mode, this callable is used to generate path
|
| 597 |
+
names. Names are generated for each partition by
|
| 598 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
| 599 |
+
:return: list of paths
|
| 600 |
+
"""
|
| 601 |
+
expanded_paths = []
|
| 602 |
+
paths = list(paths)
|
| 603 |
+
|
| 604 |
+
if "w" in mode: # read mode
|
| 605 |
+
if sum(1 for p in paths if "*" in p) > 1:
|
| 606 |
+
raise ValueError(
|
| 607 |
+
"When writing data, only one filename mask can be specified."
|
| 608 |
+
)
|
| 609 |
+
num = max(num, len(paths))
|
| 610 |
+
|
| 611 |
+
for curr_path in paths:
|
| 612 |
+
if "*" in curr_path:
|
| 613 |
+
# expand using name_function
|
| 614 |
+
expanded_paths.extend(_expand_paths(curr_path, name_function, num))
|
| 615 |
+
else:
|
| 616 |
+
expanded_paths.append(curr_path)
|
| 617 |
+
# if we generated more paths that asked for, trim the list
|
| 618 |
+
if len(expanded_paths) > num:
|
| 619 |
+
expanded_paths = expanded_paths[:num]
|
| 620 |
+
|
| 621 |
+
else: # read mode
|
| 622 |
+
for curr_path in paths:
|
| 623 |
+
if has_magic(curr_path):
|
| 624 |
+
# expand using glob
|
| 625 |
+
expanded_paths.extend(fs.glob(curr_path))
|
| 626 |
+
else:
|
| 627 |
+
expanded_paths.append(curr_path)
|
| 628 |
+
|
| 629 |
+
return expanded_paths
|
| 630 |
+
|
| 631 |
+
|
| 632 |
+
def get_fs_token_paths(
|
| 633 |
+
urlpath,
|
| 634 |
+
mode="rb",
|
| 635 |
+
num=1,
|
| 636 |
+
name_function=None,
|
| 637 |
+
storage_options=None,
|
| 638 |
+
protocol=None,
|
| 639 |
+
expand=True,
|
| 640 |
+
):
|
| 641 |
+
"""Filesystem, deterministic token, and paths from a urlpath and options.
|
| 642 |
+
|
| 643 |
+
Parameters
|
| 644 |
+
----------
|
| 645 |
+
urlpath: string or iterable
|
| 646 |
+
Absolute or relative filepath, URL (may include protocols like
|
| 647 |
+
``s3://``), or globstring pointing to data.
|
| 648 |
+
mode: str, optional
|
| 649 |
+
Mode in which to open files.
|
| 650 |
+
num: int, optional
|
| 651 |
+
If opening in writing mode, number of files we expect to create.
|
| 652 |
+
name_function: callable, optional
|
| 653 |
+
If opening in writing mode, this callable is used to generate path
|
| 654 |
+
names. Names are generated for each partition by
|
| 655 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
| 656 |
+
storage_options: dict, optional
|
| 657 |
+
Additional keywords to pass to the filesystem class.
|
| 658 |
+
protocol: str or None
|
| 659 |
+
To override the protocol specifier in the URL
|
| 660 |
+
expand: bool
|
| 661 |
+
Expand string paths for writing, assuming the path is a directory
|
| 662 |
+
"""
|
| 663 |
+
if isinstance(urlpath, (list, tuple, set)):
|
| 664 |
+
if not urlpath:
|
| 665 |
+
raise ValueError("empty urlpath sequence")
|
| 666 |
+
urlpath0 = stringify_path(next(iter(urlpath)))
|
| 667 |
+
else:
|
| 668 |
+
urlpath0 = stringify_path(urlpath)
|
| 669 |
+
storage_options = storage_options or {}
|
| 670 |
+
if protocol:
|
| 671 |
+
storage_options["protocol"] = protocol
|
| 672 |
+
chain = _un_chain(urlpath0, storage_options or {})
|
| 673 |
+
inkwargs = {}
|
| 674 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
| 675 |
+
for i, ch in enumerate(reversed(chain)):
|
| 676 |
+
urls, nested_protocol, kw = ch
|
| 677 |
+
if i == len(chain) - 1:
|
| 678 |
+
inkwargs = dict(**kw, **inkwargs)
|
| 679 |
+
continue
|
| 680 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
| 681 |
+
inkwargs["target_protocol"] = nested_protocol
|
| 682 |
+
inkwargs["fo"] = urls
|
| 683 |
+
paths, protocol, _ = chain[0]
|
| 684 |
+
fs = filesystem(protocol, **inkwargs)
|
| 685 |
+
if isinstance(urlpath, (list, tuple, set)):
|
| 686 |
+
pchains = [
|
| 687 |
+
_un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
|
| 688 |
+
]
|
| 689 |
+
if len({pc[1] for pc in pchains}) > 1:
|
| 690 |
+
raise ValueError("Protocol mismatch getting fs from %s", urlpath)
|
| 691 |
+
paths = [pc[0] for pc in pchains]
|
| 692 |
+
else:
|
| 693 |
+
paths = fs._strip_protocol(paths)
|
| 694 |
+
if isinstance(paths, (list, tuple, set)):
|
| 695 |
+
if expand:
|
| 696 |
+
paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
|
| 697 |
+
elif not isinstance(paths, list):
|
| 698 |
+
paths = list(paths)
|
| 699 |
+
else:
|
| 700 |
+
if ("w" in mode or "x" in mode) and expand:
|
| 701 |
+
paths = _expand_paths(paths, name_function, num)
|
| 702 |
+
elif "*" in paths:
|
| 703 |
+
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
|
| 704 |
+
else:
|
| 705 |
+
paths = [paths]
|
| 706 |
+
|
| 707 |
+
return fs, fs._fs_token, paths
|
| 708 |
+
|
| 709 |
+
|
| 710 |
+
def _expand_paths(path, name_function, num):
|
| 711 |
+
if isinstance(path, str):
|
| 712 |
+
if path.count("*") > 1:
|
| 713 |
+
raise ValueError("Output path spec must contain exactly one '*'.")
|
| 714 |
+
elif "*" not in path:
|
| 715 |
+
path = os.path.join(path, "*.part")
|
| 716 |
+
|
| 717 |
+
if name_function is None:
|
| 718 |
+
name_function = build_name_function(num - 1)
|
| 719 |
+
|
| 720 |
+
paths = [path.replace("*", name_function(i)) for i in range(num)]
|
| 721 |
+
if paths != sorted(paths):
|
| 722 |
+
logger.warning(
|
| 723 |
+
"In order to preserve order between partitions"
|
| 724 |
+
" paths created with ``name_function`` should "
|
| 725 |
+
"sort to partition order"
|
| 726 |
+
)
|
| 727 |
+
elif isinstance(path, (tuple, list)):
|
| 728 |
+
assert len(path) == num
|
| 729 |
+
paths = list(path)
|
| 730 |
+
else:
|
| 731 |
+
raise ValueError(
|
| 732 |
+
"Path should be either\n"
|
| 733 |
+
"1. A list of paths: ['foo.json', 'bar.json', ...]\n"
|
| 734 |
+
"2. A directory: 'foo/\n"
|
| 735 |
+
"3. A path with a '*' in it: 'foo.*.json'"
|
| 736 |
+
)
|
| 737 |
+
return paths
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
class PickleableTextIOWrapper(io.TextIOWrapper):
|
| 741 |
+
"""TextIOWrapper cannot be pickled. This solves it.
|
| 742 |
+
|
| 743 |
+
Requires that ``buffer`` be pickleable, which all instances of
|
| 744 |
+
AbstractBufferedFile are.
|
| 745 |
+
"""
|
| 746 |
+
|
| 747 |
+
def __init__(
|
| 748 |
+
self,
|
| 749 |
+
buffer,
|
| 750 |
+
encoding=None,
|
| 751 |
+
errors=None,
|
| 752 |
+
newline=None,
|
| 753 |
+
line_buffering=False,
|
| 754 |
+
write_through=False,
|
| 755 |
+
):
|
| 756 |
+
self.args = buffer, encoding, errors, newline, line_buffering, write_through
|
| 757 |
+
super().__init__(*self.args)
|
| 758 |
+
|
| 759 |
+
def __reduce__(self):
|
| 760 |
+
return PickleableTextIOWrapper, self.args
|
env/lib/python3.13/site-packages/fsspec/dircache.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
from collections.abc import MutableMapping
|
| 3 |
+
from functools import lru_cache
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class DirCache(MutableMapping):
|
| 7 |
+
"""
|
| 8 |
+
Caching of directory listings, in a structure like::
|
| 9 |
+
|
| 10 |
+
{"path0": [
|
| 11 |
+
{"name": "path0/file0",
|
| 12 |
+
"size": 123,
|
| 13 |
+
"type": "file",
|
| 14 |
+
...
|
| 15 |
+
},
|
| 16 |
+
{"name": "path0/file1",
|
| 17 |
+
},
|
| 18 |
+
...
|
| 19 |
+
],
|
| 20 |
+
"path1": [...]
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
Parameters to this class control listing expiry or indeed turn
|
| 24 |
+
caching off
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(
|
| 28 |
+
self,
|
| 29 |
+
use_listings_cache=True,
|
| 30 |
+
listings_expiry_time=None,
|
| 31 |
+
max_paths=None,
|
| 32 |
+
**kwargs,
|
| 33 |
+
):
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
Parameters
|
| 37 |
+
----------
|
| 38 |
+
use_listings_cache: bool
|
| 39 |
+
If False, this cache never returns items, but always reports KeyError,
|
| 40 |
+
and setting items has no effect
|
| 41 |
+
listings_expiry_time: int or float (optional)
|
| 42 |
+
Time in seconds that a listing is considered valid. If None,
|
| 43 |
+
listings do not expire.
|
| 44 |
+
max_paths: int (optional)
|
| 45 |
+
The number of most recent listings that are considered valid; 'recent'
|
| 46 |
+
refers to when the entry was set.
|
| 47 |
+
"""
|
| 48 |
+
self._cache = {}
|
| 49 |
+
self._times = {}
|
| 50 |
+
if max_paths:
|
| 51 |
+
self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
|
| 52 |
+
self.use_listings_cache = use_listings_cache
|
| 53 |
+
self.listings_expiry_time = listings_expiry_time
|
| 54 |
+
self.max_paths = max_paths
|
| 55 |
+
|
| 56 |
+
def __getitem__(self, item):
|
| 57 |
+
if self.listings_expiry_time is not None:
|
| 58 |
+
if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
|
| 59 |
+
del self._cache[item]
|
| 60 |
+
if self.max_paths:
|
| 61 |
+
self._q(item)
|
| 62 |
+
return self._cache[item] # maybe raises KeyError
|
| 63 |
+
|
| 64 |
+
def clear(self):
|
| 65 |
+
self._cache.clear()
|
| 66 |
+
|
| 67 |
+
def __len__(self):
|
| 68 |
+
return len(self._cache)
|
| 69 |
+
|
| 70 |
+
def __contains__(self, item):
|
| 71 |
+
try:
|
| 72 |
+
self[item]
|
| 73 |
+
return True
|
| 74 |
+
except KeyError:
|
| 75 |
+
return False
|
| 76 |
+
|
| 77 |
+
def __setitem__(self, key, value):
|
| 78 |
+
if not self.use_listings_cache:
|
| 79 |
+
return
|
| 80 |
+
if self.max_paths:
|
| 81 |
+
self._q(key)
|
| 82 |
+
self._cache[key] = value
|
| 83 |
+
if self.listings_expiry_time is not None:
|
| 84 |
+
self._times[key] = time.time()
|
| 85 |
+
|
| 86 |
+
def __delitem__(self, key):
|
| 87 |
+
del self._cache[key]
|
| 88 |
+
|
| 89 |
+
def __iter__(self):
|
| 90 |
+
entries = list(self._cache)
|
| 91 |
+
|
| 92 |
+
return (k for k in entries if k in self)
|
| 93 |
+
|
| 94 |
+
def __reduce__(self):
|
| 95 |
+
return (
|
| 96 |
+
DirCache,
|
| 97 |
+
(self.use_listings_cache, self.listings_expiry_time, self.max_paths),
|
| 98 |
+
)
|
env/lib/python3.13/site-packages/fsspec/exceptions.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
fsspec user-defined exception classes
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import asyncio
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class BlocksizeMismatchError(ValueError):
|
| 9 |
+
"""
|
| 10 |
+
Raised when a cached file is opened with a different blocksize than it was
|
| 11 |
+
written with
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class FSTimeoutError(asyncio.TimeoutError):
|
| 16 |
+
"""
|
| 17 |
+
Raised when a fsspec function timed out occurs
|
| 18 |
+
"""
|
env/lib/python3.13/site-packages/fsspec/fuse.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
import stat
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
from errno import EIO, ENOENT
|
| 8 |
+
|
| 9 |
+
from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
|
| 10 |
+
|
| 11 |
+
from fsspec import __version__
|
| 12 |
+
from fsspec.core import url_to_fs
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger("fsspec.fuse")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class FUSEr(Operations):
|
| 18 |
+
def __init__(self, fs, path, ready_file=False):
|
| 19 |
+
self.fs = fs
|
| 20 |
+
self.cache = {}
|
| 21 |
+
self.root = path.rstrip("/") + "/"
|
| 22 |
+
self.counter = 0
|
| 23 |
+
logger.info("Starting FUSE at %s", path)
|
| 24 |
+
self._ready_file = ready_file
|
| 25 |
+
|
| 26 |
+
def getattr(self, path, fh=None):
|
| 27 |
+
logger.debug("getattr %s", path)
|
| 28 |
+
if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
|
| 29 |
+
return {"type": "file", "st_size": 5}
|
| 30 |
+
|
| 31 |
+
path = "".join([self.root, path.lstrip("/")]).rstrip("/")
|
| 32 |
+
try:
|
| 33 |
+
info = self.fs.info(path)
|
| 34 |
+
except FileNotFoundError as exc:
|
| 35 |
+
raise FuseOSError(ENOENT) from exc
|
| 36 |
+
|
| 37 |
+
data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
|
| 38 |
+
perm = info.get("mode", 0o777)
|
| 39 |
+
|
| 40 |
+
if info["type"] != "file":
|
| 41 |
+
data["st_mode"] = stat.S_IFDIR | perm
|
| 42 |
+
data["st_size"] = 0
|
| 43 |
+
data["st_blksize"] = 0
|
| 44 |
+
else:
|
| 45 |
+
data["st_mode"] = stat.S_IFREG | perm
|
| 46 |
+
data["st_size"] = info["size"]
|
| 47 |
+
data["st_blksize"] = 5 * 2**20
|
| 48 |
+
data["st_nlink"] = 1
|
| 49 |
+
data["st_atime"] = info["atime"] if "atime" in info else time.time()
|
| 50 |
+
data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
|
| 51 |
+
data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
|
| 52 |
+
return data
|
| 53 |
+
|
| 54 |
+
def readdir(self, path, fh):
|
| 55 |
+
logger.debug("readdir %s", path)
|
| 56 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 57 |
+
files = self.fs.ls(path, False)
|
| 58 |
+
files = [os.path.basename(f.rstrip("/")) for f in files]
|
| 59 |
+
return [".", ".."] + files
|
| 60 |
+
|
| 61 |
+
def mkdir(self, path, mode):
|
| 62 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 63 |
+
self.fs.mkdir(path)
|
| 64 |
+
return 0
|
| 65 |
+
|
| 66 |
+
def rmdir(self, path):
|
| 67 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 68 |
+
self.fs.rmdir(path)
|
| 69 |
+
return 0
|
| 70 |
+
|
| 71 |
+
def read(self, path, size, offset, fh):
|
| 72 |
+
logger.debug("read %s", (path, size, offset))
|
| 73 |
+
if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
|
| 74 |
+
# status indicator
|
| 75 |
+
return b"ready"
|
| 76 |
+
|
| 77 |
+
f = self.cache[fh]
|
| 78 |
+
f.seek(offset)
|
| 79 |
+
out = f.read(size)
|
| 80 |
+
return out
|
| 81 |
+
|
| 82 |
+
def write(self, path, data, offset, fh):
|
| 83 |
+
logger.debug("write %s", (path, offset))
|
| 84 |
+
f = self.cache[fh]
|
| 85 |
+
f.seek(offset)
|
| 86 |
+
f.write(data)
|
| 87 |
+
return len(data)
|
| 88 |
+
|
| 89 |
+
def create(self, path, flags, fi=None):
|
| 90 |
+
logger.debug("create %s", (path, flags))
|
| 91 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 92 |
+
self.fs.touch(fn) # OS will want to get attributes immediately
|
| 93 |
+
f = self.fs.open(fn, "wb")
|
| 94 |
+
self.cache[self.counter] = f
|
| 95 |
+
self.counter += 1
|
| 96 |
+
return self.counter - 1
|
| 97 |
+
|
| 98 |
+
def open(self, path, flags):
|
| 99 |
+
logger.debug("open %s", (path, flags))
|
| 100 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 101 |
+
if flags % 2 == 0:
|
| 102 |
+
# read
|
| 103 |
+
mode = "rb"
|
| 104 |
+
else:
|
| 105 |
+
# write/create
|
| 106 |
+
mode = "wb"
|
| 107 |
+
self.cache[self.counter] = self.fs.open(fn, mode)
|
| 108 |
+
self.counter += 1
|
| 109 |
+
return self.counter - 1
|
| 110 |
+
|
| 111 |
+
def truncate(self, path, length, fh=None):
|
| 112 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 113 |
+
if length != 0:
|
| 114 |
+
raise NotImplementedError
|
| 115 |
+
# maybe should be no-op since open with write sets size to zero anyway
|
| 116 |
+
self.fs.touch(fn)
|
| 117 |
+
|
| 118 |
+
def unlink(self, path):
|
| 119 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 120 |
+
try:
|
| 121 |
+
self.fs.rm(fn, False)
|
| 122 |
+
except (OSError, FileNotFoundError) as exc:
|
| 123 |
+
raise FuseOSError(EIO) from exc
|
| 124 |
+
|
| 125 |
+
def release(self, path, fh):
|
| 126 |
+
try:
|
| 127 |
+
if fh in self.cache:
|
| 128 |
+
f = self.cache[fh]
|
| 129 |
+
f.close()
|
| 130 |
+
self.cache.pop(fh)
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(e)
|
| 133 |
+
return 0
|
| 134 |
+
|
| 135 |
+
def chmod(self, path, mode):
|
| 136 |
+
if hasattr(self.fs, "chmod"):
|
| 137 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 138 |
+
return self.fs.chmod(path, mode)
|
| 139 |
+
raise NotImplementedError
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def run(
|
| 143 |
+
fs,
|
| 144 |
+
path,
|
| 145 |
+
mount_point,
|
| 146 |
+
foreground=True,
|
| 147 |
+
threads=False,
|
| 148 |
+
ready_file=False,
|
| 149 |
+
ops_class=FUSEr,
|
| 150 |
+
):
|
| 151 |
+
"""Mount stuff in a local directory
|
| 152 |
+
|
| 153 |
+
This uses fusepy to make it appear as if a given path on an fsspec
|
| 154 |
+
instance is in fact resident within the local file-system.
|
| 155 |
+
|
| 156 |
+
This requires that fusepy by installed, and that FUSE be available on
|
| 157 |
+
the system (typically requiring a package to be installed with
|
| 158 |
+
apt, yum, brew, etc.).
|
| 159 |
+
|
| 160 |
+
Parameters
|
| 161 |
+
----------
|
| 162 |
+
fs: file-system instance
|
| 163 |
+
From one of the compatible implementations
|
| 164 |
+
path: str
|
| 165 |
+
Location on that file-system to regard as the root directory to
|
| 166 |
+
mount. Note that you typically should include the terminating "/"
|
| 167 |
+
character.
|
| 168 |
+
mount_point: str
|
| 169 |
+
An empty directory on the local file-system where the contents of
|
| 170 |
+
the remote path will appear.
|
| 171 |
+
foreground: bool
|
| 172 |
+
Whether or not calling this function will block. Operation will
|
| 173 |
+
typically be more stable if True.
|
| 174 |
+
threads: bool
|
| 175 |
+
Whether or not to create threads when responding to file operations
|
| 176 |
+
within the mounter directory. Operation will typically be more
|
| 177 |
+
stable if False.
|
| 178 |
+
ready_file: bool
|
| 179 |
+
Whether the FUSE process is ready. The ``.fuse_ready`` file will
|
| 180 |
+
exist in the ``mount_point`` directory if True. Debugging purpose.
|
| 181 |
+
ops_class: FUSEr or Subclass of FUSEr
|
| 182 |
+
To override the default behavior of FUSEr. For Example, logging
|
| 183 |
+
to file.
|
| 184 |
+
|
| 185 |
+
"""
|
| 186 |
+
func = lambda: FUSE(
|
| 187 |
+
ops_class(fs, path, ready_file=ready_file),
|
| 188 |
+
mount_point,
|
| 189 |
+
nothreads=not threads,
|
| 190 |
+
foreground=foreground,
|
| 191 |
+
)
|
| 192 |
+
if not foreground:
|
| 193 |
+
th = threading.Thread(target=func)
|
| 194 |
+
th.daemon = True
|
| 195 |
+
th.start()
|
| 196 |
+
return th
|
| 197 |
+
else: # pragma: no cover
|
| 198 |
+
try:
|
| 199 |
+
func()
|
| 200 |
+
except KeyboardInterrupt:
|
| 201 |
+
pass
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def main(args):
|
| 205 |
+
"""Mount filesystem from chained URL to MOUNT_POINT.
|
| 206 |
+
|
| 207 |
+
Examples:
|
| 208 |
+
|
| 209 |
+
python3 -m fsspec.fuse memory /usr/share /tmp/mem
|
| 210 |
+
|
| 211 |
+
python3 -m fsspec.fuse local /tmp/source /tmp/local \\
|
| 212 |
+
-l /tmp/fsspecfuse.log
|
| 213 |
+
|
| 214 |
+
You can also mount chained-URLs and use special settings:
|
| 215 |
+
|
| 216 |
+
python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
|
| 217 |
+
/ /tmp/zip \\
|
| 218 |
+
-o 'filecache-cache_storage=/tmp/simplecache'
|
| 219 |
+
|
| 220 |
+
You can specify the type of the setting by using `[int]` or `[bool]`,
|
| 221 |
+
(`true`, `yes`, `1` represents the Boolean value `True`):
|
| 222 |
+
|
| 223 |
+
python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
|
| 224 |
+
/historic/packages/RPMS /tmp/ftp \\
|
| 225 |
+
-o 'simplecache-cache_storage=/tmp/simplecache' \\
|
| 226 |
+
-o 'simplecache-check_files=false[bool]' \\
|
| 227 |
+
-o 'ftp-listings_expiry_time=60[int]' \\
|
| 228 |
+
-o 'ftp-username=anonymous' \\
|
| 229 |
+
-o 'ftp-password=xieyanbo'
|
| 230 |
+
"""
|
| 231 |
+
|
| 232 |
+
class RawDescriptionArgumentParser(argparse.ArgumentParser):
|
| 233 |
+
def format_help(self):
|
| 234 |
+
usage = super().format_help()
|
| 235 |
+
parts = usage.split("\n\n")
|
| 236 |
+
parts[1] = self.description.rstrip()
|
| 237 |
+
return "\n\n".join(parts)
|
| 238 |
+
|
| 239 |
+
parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
|
| 240 |
+
parser.add_argument("--version", action="version", version=__version__)
|
| 241 |
+
parser.add_argument("url", type=str, help="fs url")
|
| 242 |
+
parser.add_argument("source_path", type=str, help="source directory in fs")
|
| 243 |
+
parser.add_argument("mount_point", type=str, help="local directory")
|
| 244 |
+
parser.add_argument(
|
| 245 |
+
"-o",
|
| 246 |
+
"--option",
|
| 247 |
+
action="append",
|
| 248 |
+
help="Any options of protocol included in the chained URL",
|
| 249 |
+
)
|
| 250 |
+
parser.add_argument(
|
| 251 |
+
"-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
|
| 252 |
+
)
|
| 253 |
+
parser.add_argument(
|
| 254 |
+
"-f",
|
| 255 |
+
"--foreground",
|
| 256 |
+
action="store_false",
|
| 257 |
+
help="Running in foreground or not (Default: False)",
|
| 258 |
+
)
|
| 259 |
+
parser.add_argument(
|
| 260 |
+
"-t",
|
| 261 |
+
"--threads",
|
| 262 |
+
action="store_false",
|
| 263 |
+
help="Running with threads support (Default: False)",
|
| 264 |
+
)
|
| 265 |
+
parser.add_argument(
|
| 266 |
+
"-r",
|
| 267 |
+
"--ready-file",
|
| 268 |
+
action="store_false",
|
| 269 |
+
help="The `.fuse_ready` file will exist after FUSE is ready. "
|
| 270 |
+
"(Debugging purpose, Default: False)",
|
| 271 |
+
)
|
| 272 |
+
args = parser.parse_args(args)
|
| 273 |
+
|
| 274 |
+
kwargs = {}
|
| 275 |
+
for item in args.option or []:
|
| 276 |
+
key, sep, value = item.partition("=")
|
| 277 |
+
if not sep:
|
| 278 |
+
parser.error(message=f"Wrong option: {item!r}")
|
| 279 |
+
val = value.lower()
|
| 280 |
+
if val.endswith("[int]"):
|
| 281 |
+
value = int(value[: -len("[int]")])
|
| 282 |
+
elif val.endswith("[bool]"):
|
| 283 |
+
value = val[: -len("[bool]")] in ["1", "yes", "true"]
|
| 284 |
+
|
| 285 |
+
if "-" in key:
|
| 286 |
+
fs_name, setting_name = key.split("-", 1)
|
| 287 |
+
if fs_name in kwargs:
|
| 288 |
+
kwargs[fs_name][setting_name] = value
|
| 289 |
+
else:
|
| 290 |
+
kwargs[fs_name] = {setting_name: value}
|
| 291 |
+
else:
|
| 292 |
+
kwargs[key] = value
|
| 293 |
+
|
| 294 |
+
if args.log_file:
|
| 295 |
+
logging.basicConfig(
|
| 296 |
+
level=logging.DEBUG,
|
| 297 |
+
filename=args.log_file,
|
| 298 |
+
format="%(asctime)s %(message)s",
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
class LoggingFUSEr(FUSEr, LoggingMixIn):
|
| 302 |
+
pass
|
| 303 |
+
|
| 304 |
+
fuser = LoggingFUSEr
|
| 305 |
+
else:
|
| 306 |
+
fuser = FUSEr
|
| 307 |
+
|
| 308 |
+
fs, url_path = url_to_fs(args.url, **kwargs)
|
| 309 |
+
logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
|
| 310 |
+
run(
|
| 311 |
+
fs,
|
| 312 |
+
args.source_path,
|
| 313 |
+
args.mount_point,
|
| 314 |
+
foreground=args.foreground,
|
| 315 |
+
threads=args.threads,
|
| 316 |
+
ready_file=args.ready_file,
|
| 317 |
+
ops_class=fuser,
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
if __name__ == "__main__":
|
| 322 |
+
import sys
|
| 323 |
+
|
| 324 |
+
main(sys.argv[1:])
|
env/lib/python3.13/site-packages/fsspec/generic.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import shutil
|
| 7 |
+
import uuid
|
| 8 |
+
|
| 9 |
+
from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
|
| 10 |
+
from .callbacks import DEFAULT_CALLBACK
|
| 11 |
+
from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
|
| 12 |
+
|
| 13 |
+
_generic_fs = {}
|
| 14 |
+
logger = logging.getLogger("fsspec.generic")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def set_generic_fs(protocol, **storage_options):
|
| 18 |
+
"""Populate the dict used for method=="generic" lookups"""
|
| 19 |
+
_generic_fs[protocol] = filesystem(protocol, **storage_options)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _resolve_fs(url, method, protocol=None, storage_options=None):
|
| 23 |
+
"""Pick instance of backend FS"""
|
| 24 |
+
url = url[0] if isinstance(url, (list, tuple)) else url
|
| 25 |
+
protocol = protocol or split_protocol(url)[0]
|
| 26 |
+
storage_options = storage_options or {}
|
| 27 |
+
if method == "default":
|
| 28 |
+
return filesystem(protocol)
|
| 29 |
+
if method == "generic":
|
| 30 |
+
return _generic_fs[protocol]
|
| 31 |
+
if method == "current":
|
| 32 |
+
cls = get_filesystem_class(protocol)
|
| 33 |
+
return cls.current()
|
| 34 |
+
if method == "options":
|
| 35 |
+
fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
|
| 36 |
+
return fs
|
| 37 |
+
raise ValueError(f"Unknown FS resolution method: {method}")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def rsync(
|
| 41 |
+
source,
|
| 42 |
+
destination,
|
| 43 |
+
delete_missing=False,
|
| 44 |
+
source_field="size",
|
| 45 |
+
dest_field="size",
|
| 46 |
+
update_cond="different",
|
| 47 |
+
inst_kwargs=None,
|
| 48 |
+
fs=None,
|
| 49 |
+
**kwargs,
|
| 50 |
+
):
|
| 51 |
+
"""Sync files between two directory trees
|
| 52 |
+
|
| 53 |
+
(experimental)
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
source: str
|
| 58 |
+
Root of the directory tree to take files from. This must be a directory, but
|
| 59 |
+
do not include any terminating "/" character
|
| 60 |
+
destination: str
|
| 61 |
+
Root path to copy into. The contents of this location should be
|
| 62 |
+
identical to the contents of ``source`` when done. This will be made a
|
| 63 |
+
directory, and the terminal "/" should not be included.
|
| 64 |
+
delete_missing: bool
|
| 65 |
+
If there are paths in the destination that don't exist in the
|
| 66 |
+
source and this is True, delete them. Otherwise, leave them alone.
|
| 67 |
+
source_field: str | callable
|
| 68 |
+
If ``update_field`` is "different", this is the key in the info
|
| 69 |
+
of source files to consider for difference. Maybe a function of the
|
| 70 |
+
info dict.
|
| 71 |
+
dest_field: str | callable
|
| 72 |
+
If ``update_field`` is "different", this is the key in the info
|
| 73 |
+
of destination files to consider for difference. May be a function of
|
| 74 |
+
the info dict.
|
| 75 |
+
update_cond: "different"|"always"|"never"
|
| 76 |
+
If "always", every file is copied, regardless of whether it exists in
|
| 77 |
+
the destination. If "never", files that exist in the destination are
|
| 78 |
+
not copied again. If "different" (default), only copy if the info
|
| 79 |
+
fields given by ``source_field`` and ``dest_field`` (usually "size")
|
| 80 |
+
are different. Other comparisons may be added in the future.
|
| 81 |
+
inst_kwargs: dict|None
|
| 82 |
+
If ``fs`` is None, use this set of keyword arguments to make a
|
| 83 |
+
GenericFileSystem instance
|
| 84 |
+
fs: GenericFileSystem|None
|
| 85 |
+
Instance to use if explicitly given. The instance defines how to
|
| 86 |
+
to make downstream file system instances from paths.
|
| 87 |
+
|
| 88 |
+
Returns
|
| 89 |
+
-------
|
| 90 |
+
dict of the copy operations that were performed, {source: destination}
|
| 91 |
+
"""
|
| 92 |
+
fs = fs or GenericFileSystem(**(inst_kwargs or {}))
|
| 93 |
+
source = fs._strip_protocol(source)
|
| 94 |
+
destination = fs._strip_protocol(destination)
|
| 95 |
+
allfiles = fs.find(source, withdirs=True, detail=True)
|
| 96 |
+
if not fs.isdir(source):
|
| 97 |
+
raise ValueError("Can only rsync on a directory")
|
| 98 |
+
otherfiles = fs.find(destination, withdirs=True, detail=True)
|
| 99 |
+
dirs = [
|
| 100 |
+
a
|
| 101 |
+
for a, v in allfiles.items()
|
| 102 |
+
if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
|
| 103 |
+
]
|
| 104 |
+
logger.debug(f"{len(dirs)} directories to create")
|
| 105 |
+
if dirs:
|
| 106 |
+
fs.make_many_dirs(
|
| 107 |
+
[dirn.replace(source, destination) for dirn in dirs], exist_ok=True
|
| 108 |
+
)
|
| 109 |
+
allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
|
| 110 |
+
logger.debug(f"{len(allfiles)} files to consider for copy")
|
| 111 |
+
to_delete = [
|
| 112 |
+
o
|
| 113 |
+
for o, v in otherfiles.items()
|
| 114 |
+
if o.replace(destination, source) not in allfiles and v["type"] == "file"
|
| 115 |
+
]
|
| 116 |
+
for k, v in allfiles.copy().items():
|
| 117 |
+
otherfile = k.replace(source, destination)
|
| 118 |
+
if otherfile in otherfiles:
|
| 119 |
+
if update_cond == "always":
|
| 120 |
+
allfiles[k] = otherfile
|
| 121 |
+
elif update_cond == "never":
|
| 122 |
+
allfiles.pop(k)
|
| 123 |
+
elif update_cond == "different":
|
| 124 |
+
inf1 = source_field(v) if callable(source_field) else v[source_field]
|
| 125 |
+
v2 = otherfiles[otherfile]
|
| 126 |
+
inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
|
| 127 |
+
if inf1 != inf2:
|
| 128 |
+
# details mismatch, make copy
|
| 129 |
+
allfiles[k] = otherfile
|
| 130 |
+
else:
|
| 131 |
+
# details match, don't copy
|
| 132 |
+
allfiles.pop(k)
|
| 133 |
+
else:
|
| 134 |
+
# file not in target yet
|
| 135 |
+
allfiles[k] = otherfile
|
| 136 |
+
logger.debug(f"{len(allfiles)} files to copy")
|
| 137 |
+
if allfiles:
|
| 138 |
+
source_files, target_files = zip(*allfiles.items())
|
| 139 |
+
fs.cp(source_files, target_files, **kwargs)
|
| 140 |
+
logger.debug(f"{len(to_delete)} files to delete")
|
| 141 |
+
if delete_missing and to_delete:
|
| 142 |
+
fs.rm(to_delete)
|
| 143 |
+
return allfiles
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
class GenericFileSystem(AsyncFileSystem):
|
| 147 |
+
"""Wrapper over all other FS types
|
| 148 |
+
|
| 149 |
+
<experimental!>
|
| 150 |
+
|
| 151 |
+
This implementation is a single unified interface to be able to run FS operations
|
| 152 |
+
over generic URLs, and dispatch to the specific implementations using the URL
|
| 153 |
+
protocol prefix.
|
| 154 |
+
|
| 155 |
+
Note: instances of this FS are always async, even if you never use it with any async
|
| 156 |
+
backend.
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
protocol = "generic" # there is no real reason to ever use a protocol with this FS
|
| 160 |
+
|
| 161 |
+
def __init__(self, default_method="default", storage_options=None, **kwargs):
|
| 162 |
+
"""
|
| 163 |
+
|
| 164 |
+
Parameters
|
| 165 |
+
----------
|
| 166 |
+
default_method: str (optional)
|
| 167 |
+
Defines how to configure backend FS instances. Options are:
|
| 168 |
+
- "default": instantiate like FSClass(), with no
|
| 169 |
+
extra arguments; this is the default instance of that FS, and can be
|
| 170 |
+
configured via the config system
|
| 171 |
+
- "generic": takes instances from the `_generic_fs` dict in this module,
|
| 172 |
+
which you must populate before use. Keys are by protocol
|
| 173 |
+
- "options": expects storage_options, a dict mapping protocol to
|
| 174 |
+
kwargs to use when constructing the filesystem
|
| 175 |
+
- "current": takes the most recently instantiated version of each FS
|
| 176 |
+
"""
|
| 177 |
+
self.method = default_method
|
| 178 |
+
self.st_opts = storage_options
|
| 179 |
+
super().__init__(**kwargs)
|
| 180 |
+
|
| 181 |
+
def _parent(self, path):
|
| 182 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 183 |
+
return fs.unstrip_protocol(fs._parent(path))
|
| 184 |
+
|
| 185 |
+
def _strip_protocol(self, path):
|
| 186 |
+
# normalization only
|
| 187 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 188 |
+
return fs.unstrip_protocol(fs._strip_protocol(path))
|
| 189 |
+
|
| 190 |
+
async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
| 191 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 192 |
+
if fs.async_impl:
|
| 193 |
+
out = await fs._find(
|
| 194 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
| 195 |
+
)
|
| 196 |
+
else:
|
| 197 |
+
out = fs.find(
|
| 198 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
| 199 |
+
)
|
| 200 |
+
result = {}
|
| 201 |
+
for k, v in out.items():
|
| 202 |
+
v = v.copy() # don't corrupt target FS dircache
|
| 203 |
+
name = fs.unstrip_protocol(k)
|
| 204 |
+
v["name"] = name
|
| 205 |
+
result[name] = v
|
| 206 |
+
if detail:
|
| 207 |
+
return result
|
| 208 |
+
return list(result)
|
| 209 |
+
|
| 210 |
+
async def _info(self, url, **kwargs):
|
| 211 |
+
fs = _resolve_fs(url, self.method)
|
| 212 |
+
if fs.async_impl:
|
| 213 |
+
out = await fs._info(url, **kwargs)
|
| 214 |
+
else:
|
| 215 |
+
out = fs.info(url, **kwargs)
|
| 216 |
+
out = out.copy() # don't edit originals
|
| 217 |
+
out["name"] = fs.unstrip_protocol(out["name"])
|
| 218 |
+
return out
|
| 219 |
+
|
| 220 |
+
async def _ls(
|
| 221 |
+
self,
|
| 222 |
+
url,
|
| 223 |
+
detail=True,
|
| 224 |
+
**kwargs,
|
| 225 |
+
):
|
| 226 |
+
fs = _resolve_fs(url, self.method)
|
| 227 |
+
if fs.async_impl:
|
| 228 |
+
out = await fs._ls(url, detail=True, **kwargs)
|
| 229 |
+
else:
|
| 230 |
+
out = fs.ls(url, detail=True, **kwargs)
|
| 231 |
+
out = [o.copy() for o in out] # don't edit originals
|
| 232 |
+
for o in out:
|
| 233 |
+
o["name"] = fs.unstrip_protocol(o["name"])
|
| 234 |
+
if detail:
|
| 235 |
+
return out
|
| 236 |
+
else:
|
| 237 |
+
return [o["name"] for o in out]
|
| 238 |
+
|
| 239 |
+
async def _cat_file(
|
| 240 |
+
self,
|
| 241 |
+
url,
|
| 242 |
+
**kwargs,
|
| 243 |
+
):
|
| 244 |
+
fs = _resolve_fs(url, self.method)
|
| 245 |
+
if fs.async_impl:
|
| 246 |
+
return await fs._cat_file(url, **kwargs)
|
| 247 |
+
else:
|
| 248 |
+
return fs.cat_file(url, **kwargs)
|
| 249 |
+
|
| 250 |
+
async def _pipe_file(
|
| 251 |
+
self,
|
| 252 |
+
path,
|
| 253 |
+
value,
|
| 254 |
+
**kwargs,
|
| 255 |
+
):
|
| 256 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 257 |
+
if fs.async_impl:
|
| 258 |
+
return await fs._pipe_file(path, value, **kwargs)
|
| 259 |
+
else:
|
| 260 |
+
return fs.pipe_file(path, value, **kwargs)
|
| 261 |
+
|
| 262 |
+
async def _rm(self, url, **kwargs):
|
| 263 |
+
urls = url
|
| 264 |
+
if isinstance(urls, str):
|
| 265 |
+
urls = [urls]
|
| 266 |
+
fs = _resolve_fs(urls[0], self.method)
|
| 267 |
+
if fs.async_impl:
|
| 268 |
+
await fs._rm(urls, **kwargs)
|
| 269 |
+
else:
|
| 270 |
+
fs.rm(url, **kwargs)
|
| 271 |
+
|
| 272 |
+
async def _makedirs(self, path, exist_ok=False):
|
| 273 |
+
logger.debug("Make dir %s", path)
|
| 274 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 275 |
+
if fs.async_impl:
|
| 276 |
+
await fs._makedirs(path, exist_ok=exist_ok)
|
| 277 |
+
else:
|
| 278 |
+
fs.makedirs(path, exist_ok=exist_ok)
|
| 279 |
+
|
| 280 |
+
def rsync(self, source, destination, **kwargs):
|
| 281 |
+
"""Sync files between two directory trees
|
| 282 |
+
|
| 283 |
+
See `func:rsync` for more details.
|
| 284 |
+
"""
|
| 285 |
+
rsync(source, destination, fs=self, **kwargs)
|
| 286 |
+
|
| 287 |
+
async def _cp_file(
|
| 288 |
+
self,
|
| 289 |
+
url,
|
| 290 |
+
url2,
|
| 291 |
+
blocksize=2**20,
|
| 292 |
+
callback=DEFAULT_CALLBACK,
|
| 293 |
+
tempdir: str | None = None,
|
| 294 |
+
**kwargs,
|
| 295 |
+
):
|
| 296 |
+
fs = _resolve_fs(url, self.method)
|
| 297 |
+
fs2 = _resolve_fs(url2, self.method)
|
| 298 |
+
if fs is fs2:
|
| 299 |
+
# pure remote
|
| 300 |
+
if fs.async_impl:
|
| 301 |
+
return await fs._copy(url, url2, **kwargs)
|
| 302 |
+
else:
|
| 303 |
+
return fs.copy(url, url2, **kwargs)
|
| 304 |
+
await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
|
| 305 |
+
|
| 306 |
+
async def _make_many_dirs(self, urls, exist_ok=True):
|
| 307 |
+
fs = _resolve_fs(urls[0], self.method)
|
| 308 |
+
if fs.async_impl:
|
| 309 |
+
coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
|
| 310 |
+
await _run_coros_in_chunks(coros)
|
| 311 |
+
else:
|
| 312 |
+
for u in urls:
|
| 313 |
+
fs.makedirs(u, exist_ok=exist_ok)
|
| 314 |
+
|
| 315 |
+
make_many_dirs = sync_wrapper(_make_many_dirs)
|
| 316 |
+
|
| 317 |
+
async def _copy(
|
| 318 |
+
self,
|
| 319 |
+
path1: list[str],
|
| 320 |
+
path2: list[str],
|
| 321 |
+
recursive: bool = False,
|
| 322 |
+
on_error: str = "ignore",
|
| 323 |
+
maxdepth: int | None = None,
|
| 324 |
+
batch_size: int | None = None,
|
| 325 |
+
tempdir: str | None = None,
|
| 326 |
+
**kwargs,
|
| 327 |
+
):
|
| 328 |
+
# TODO: special case for one FS being local, which can use get/put
|
| 329 |
+
# TODO: special case for one being memFS, which can use cat/pipe
|
| 330 |
+
if recursive:
|
| 331 |
+
raise NotImplementedError("Please use fsspec.generic.rsync")
|
| 332 |
+
path1 = [path1] if isinstance(path1, str) else path1
|
| 333 |
+
path2 = [path2] if isinstance(path2, str) else path2
|
| 334 |
+
|
| 335 |
+
fs = _resolve_fs(path1, self.method)
|
| 336 |
+
fs2 = _resolve_fs(path2, self.method)
|
| 337 |
+
|
| 338 |
+
if fs is fs2:
|
| 339 |
+
if fs.async_impl:
|
| 340 |
+
return await fs._copy(path1, path2, **kwargs)
|
| 341 |
+
else:
|
| 342 |
+
return fs.copy(path1, path2, **kwargs)
|
| 343 |
+
|
| 344 |
+
await copy_file_op(
|
| 345 |
+
fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
async def copy_file_op(
|
| 350 |
+
fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
|
| 351 |
+
):
|
| 352 |
+
import tempfile
|
| 353 |
+
|
| 354 |
+
tempdir = tempdir or tempfile.mkdtemp()
|
| 355 |
+
try:
|
| 356 |
+
coros = [
|
| 357 |
+
_copy_file_op(
|
| 358 |
+
fs1,
|
| 359 |
+
u1,
|
| 360 |
+
fs2,
|
| 361 |
+
u2,
|
| 362 |
+
os.path.join(tempdir, uuid.uuid4().hex),
|
| 363 |
+
)
|
| 364 |
+
for u1, u2 in zip(url1, url2)
|
| 365 |
+
]
|
| 366 |
+
out = await _run_coros_in_chunks(
|
| 367 |
+
coros, batch_size=batch_size, return_exceptions=True
|
| 368 |
+
)
|
| 369 |
+
finally:
|
| 370 |
+
shutil.rmtree(tempdir)
|
| 371 |
+
if on_error == "return":
|
| 372 |
+
return out
|
| 373 |
+
elif on_error == "raise":
|
| 374 |
+
for o in out:
|
| 375 |
+
if isinstance(o, Exception):
|
| 376 |
+
raise o
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
|
| 380 |
+
if fs1.async_impl:
|
| 381 |
+
await fs1._get_file(url1, local)
|
| 382 |
+
else:
|
| 383 |
+
fs1.get_file(url1, local)
|
| 384 |
+
if fs2.async_impl:
|
| 385 |
+
await fs2._put_file(local, url2)
|
| 386 |
+
else:
|
| 387 |
+
fs2.put_file(local, url2)
|
| 388 |
+
os.unlink(local)
|
| 389 |
+
logger.debug("Copy %s -> %s; done", url1, url2)
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
async def maybe_await(cor):
|
| 393 |
+
if inspect.iscoroutine(cor):
|
| 394 |
+
return await cor
|
| 395 |
+
else:
|
| 396 |
+
return cor
|
env/lib/python3.13/site-packages/fsspec/gui.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
import contextlib
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
from collections.abc import Sequence
|
| 7 |
+
from typing import ClassVar
|
| 8 |
+
|
| 9 |
+
import panel as pn
|
| 10 |
+
|
| 11 |
+
from .core import OpenFile, get_filesystem_class, split_protocol
|
| 12 |
+
from .registry import known_implementations
|
| 13 |
+
|
| 14 |
+
pn.extension()
|
| 15 |
+
logger = logging.getLogger("fsspec.gui")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class SigSlot:
|
| 19 |
+
"""Signal-slot mixin, for Panel event passing
|
| 20 |
+
|
| 21 |
+
Include this class in a widget manager's superclasses to be able to
|
| 22 |
+
register events and callbacks on Panel widgets managed by that class.
|
| 23 |
+
|
| 24 |
+
The method ``_register`` should be called as widgets are added, and external
|
| 25 |
+
code should call ``connect`` to associate callbacks.
|
| 26 |
+
|
| 27 |
+
By default, all signals emit a DEBUG logging statement.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
# names of signals that this class may emit each of which must be
|
| 31 |
+
# set by _register for any new instance
|
| 32 |
+
signals: ClassVar[Sequence[str]] = []
|
| 33 |
+
# names of actions that this class may respond to
|
| 34 |
+
slots: ClassVar[Sequence[str]] = []
|
| 35 |
+
|
| 36 |
+
# each of which must be a method name
|
| 37 |
+
|
| 38 |
+
def __init__(self):
|
| 39 |
+
self._ignoring_events = False
|
| 40 |
+
self._sigs = {}
|
| 41 |
+
self._map = {}
|
| 42 |
+
self._setup()
|
| 43 |
+
|
| 44 |
+
def _setup(self):
|
| 45 |
+
"""Create GUI elements and register signals"""
|
| 46 |
+
self.panel = pn.pane.PaneBase()
|
| 47 |
+
# no signals to set up in the base class
|
| 48 |
+
|
| 49 |
+
def _register(
|
| 50 |
+
self, widget, name, thing="value", log_level=logging.DEBUG, auto=False
|
| 51 |
+
):
|
| 52 |
+
"""Watch the given attribute of a widget and assign it a named event
|
| 53 |
+
|
| 54 |
+
This is normally called at the time a widget is instantiated, in the
|
| 55 |
+
class which owns it.
|
| 56 |
+
|
| 57 |
+
Parameters
|
| 58 |
+
----------
|
| 59 |
+
widget : pn.layout.Panel or None
|
| 60 |
+
Widget to watch. If None, an anonymous signal not associated with
|
| 61 |
+
any widget.
|
| 62 |
+
name : str
|
| 63 |
+
Name of this event
|
| 64 |
+
thing : str
|
| 65 |
+
Attribute of the given widget to watch
|
| 66 |
+
log_level : int
|
| 67 |
+
When the signal is triggered, a logging event of the given level
|
| 68 |
+
will be fired in the dfviz logger.
|
| 69 |
+
auto : bool
|
| 70 |
+
If True, automatically connects with a method in this class of the
|
| 71 |
+
same name.
|
| 72 |
+
"""
|
| 73 |
+
if name not in self.signals:
|
| 74 |
+
raise ValueError(f"Attempt to assign an undeclared signal: {name}")
|
| 75 |
+
self._sigs[name] = {
|
| 76 |
+
"widget": widget,
|
| 77 |
+
"callbacks": [],
|
| 78 |
+
"thing": thing,
|
| 79 |
+
"log": log_level,
|
| 80 |
+
}
|
| 81 |
+
wn = "-".join(
|
| 82 |
+
[
|
| 83 |
+
getattr(widget, "name", str(widget)) if widget is not None else "none",
|
| 84 |
+
thing,
|
| 85 |
+
]
|
| 86 |
+
)
|
| 87 |
+
self._map[wn] = name
|
| 88 |
+
if widget is not None:
|
| 89 |
+
widget.param.watch(self._signal, thing, onlychanged=True)
|
| 90 |
+
if auto and hasattr(self, name):
|
| 91 |
+
self.connect(name, getattr(self, name))
|
| 92 |
+
|
| 93 |
+
def _repr_mimebundle_(self, *args, **kwargs):
|
| 94 |
+
"""Display in a notebook or a server"""
|
| 95 |
+
try:
|
| 96 |
+
return self.panel._repr_mimebundle_(*args, **kwargs)
|
| 97 |
+
except (ValueError, AttributeError) as exc:
|
| 98 |
+
raise NotImplementedError(
|
| 99 |
+
"Panel does not seem to be set up properly"
|
| 100 |
+
) from exc
|
| 101 |
+
|
| 102 |
+
def connect(self, signal, slot):
|
| 103 |
+
"""Associate call back with given event
|
| 104 |
+
|
| 105 |
+
The callback must be a function which takes the "new" value of the
|
| 106 |
+
watched attribute as the only parameter. If the callback return False,
|
| 107 |
+
this cancels any further processing of the given event.
|
| 108 |
+
|
| 109 |
+
Alternatively, the callback can be a string, in which case it means
|
| 110 |
+
emitting the correspondingly-named event (i.e., connect to self)
|
| 111 |
+
"""
|
| 112 |
+
self._sigs[signal]["callbacks"].append(slot)
|
| 113 |
+
|
| 114 |
+
def _signal(self, event):
|
| 115 |
+
"""This is called by a an action on a widget
|
| 116 |
+
|
| 117 |
+
Within an self.ignore_events context, nothing happens.
|
| 118 |
+
|
| 119 |
+
Tests can execute this method by directly changing the values of
|
| 120 |
+
widget components.
|
| 121 |
+
"""
|
| 122 |
+
if not self._ignoring_events:
|
| 123 |
+
wn = "-".join([event.obj.name, event.name])
|
| 124 |
+
if wn in self._map and self._map[wn] in self._sigs:
|
| 125 |
+
self._emit(self._map[wn], event.new)
|
| 126 |
+
|
| 127 |
+
@contextlib.contextmanager
|
| 128 |
+
def ignore_events(self):
|
| 129 |
+
"""Temporarily turn off events processing in this instance
|
| 130 |
+
|
| 131 |
+
(does not propagate to children)
|
| 132 |
+
"""
|
| 133 |
+
self._ignoring_events = True
|
| 134 |
+
try:
|
| 135 |
+
yield
|
| 136 |
+
finally:
|
| 137 |
+
self._ignoring_events = False
|
| 138 |
+
|
| 139 |
+
def _emit(self, sig, value=None):
|
| 140 |
+
"""An event happened, call its callbacks
|
| 141 |
+
|
| 142 |
+
This method can be used in tests to simulate message passing without
|
| 143 |
+
directly changing visual elements.
|
| 144 |
+
|
| 145 |
+
Calling of callbacks will halt whenever one returns False.
|
| 146 |
+
"""
|
| 147 |
+
logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
|
| 148 |
+
for callback in self._sigs[sig]["callbacks"]:
|
| 149 |
+
if isinstance(callback, str):
|
| 150 |
+
self._emit(callback)
|
| 151 |
+
else:
|
| 152 |
+
try:
|
| 153 |
+
# running callbacks should not break the interface
|
| 154 |
+
ret = callback(value)
|
| 155 |
+
if ret is False:
|
| 156 |
+
break
|
| 157 |
+
except Exception as e:
|
| 158 |
+
logger.exception(
|
| 159 |
+
"Exception (%s) while executing callback for signal: %s",
|
| 160 |
+
e,
|
| 161 |
+
sig,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
def show(self, threads=False):
|
| 165 |
+
"""Open a new browser tab and display this instance's interface"""
|
| 166 |
+
self.panel.show(threads=threads, verbose=False)
|
| 167 |
+
return self
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
class SingleSelect(SigSlot):
|
| 171 |
+
"""A multiselect which only allows you to select one item for an event"""
|
| 172 |
+
|
| 173 |
+
signals = ["_selected", "selected"] # the first is internal
|
| 174 |
+
slots = ["set_options", "set_selection", "add", "clear", "select"]
|
| 175 |
+
|
| 176 |
+
def __init__(self, **kwargs):
|
| 177 |
+
self.kwargs = kwargs
|
| 178 |
+
super().__init__()
|
| 179 |
+
|
| 180 |
+
def _setup(self):
|
| 181 |
+
self.panel = pn.widgets.MultiSelect(**self.kwargs)
|
| 182 |
+
self._register(self.panel, "_selected", "value")
|
| 183 |
+
self._register(None, "selected")
|
| 184 |
+
self.connect("_selected", self.select_one)
|
| 185 |
+
|
| 186 |
+
def _signal(self, *args, **kwargs):
|
| 187 |
+
super()._signal(*args, **kwargs)
|
| 188 |
+
|
| 189 |
+
def select_one(self, *_):
|
| 190 |
+
with self.ignore_events():
|
| 191 |
+
val = [self.panel.value[-1]] if self.panel.value else []
|
| 192 |
+
self.panel.value = val
|
| 193 |
+
self._emit("selected", self.panel.value)
|
| 194 |
+
|
| 195 |
+
def set_options(self, options):
|
| 196 |
+
self.panel.options = options
|
| 197 |
+
|
| 198 |
+
def clear(self):
|
| 199 |
+
self.panel.options = []
|
| 200 |
+
|
| 201 |
+
@property
|
| 202 |
+
def value(self):
|
| 203 |
+
return self.panel.value
|
| 204 |
+
|
| 205 |
+
def set_selection(self, selection):
|
| 206 |
+
self.panel.value = [selection]
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
class FileSelector(SigSlot):
|
| 210 |
+
"""Panel-based graphical file selector widget
|
| 211 |
+
|
| 212 |
+
Instances of this widget are interactive and can be displayed in jupyter by having
|
| 213 |
+
them as the output of a cell, or in a separate browser tab using ``.show()``.
|
| 214 |
+
"""
|
| 215 |
+
|
| 216 |
+
signals = [
|
| 217 |
+
"protocol_changed",
|
| 218 |
+
"selection_changed",
|
| 219 |
+
"directory_entered",
|
| 220 |
+
"home_clicked",
|
| 221 |
+
"up_clicked",
|
| 222 |
+
"go_clicked",
|
| 223 |
+
"filters_changed",
|
| 224 |
+
]
|
| 225 |
+
slots = ["set_filters", "go_home"]
|
| 226 |
+
|
| 227 |
+
def __init__(self, url=None, filters=None, ignore=None, kwargs=None):
|
| 228 |
+
"""
|
| 229 |
+
|
| 230 |
+
Parameters
|
| 231 |
+
----------
|
| 232 |
+
url : str (optional)
|
| 233 |
+
Initial value of the URL to populate the dialog; should include protocol
|
| 234 |
+
filters : list(str) (optional)
|
| 235 |
+
File endings to include in the listings. If not included, all files are
|
| 236 |
+
allowed. Does not affect directories.
|
| 237 |
+
If given, the endings will appear as checkboxes in the interface
|
| 238 |
+
ignore : list(str) (optional)
|
| 239 |
+
Regex(s) of file basename patterns to ignore, e.g., "\\." for typical
|
| 240 |
+
hidden files on posix
|
| 241 |
+
kwargs : dict (optional)
|
| 242 |
+
To pass to file system instance
|
| 243 |
+
"""
|
| 244 |
+
if url:
|
| 245 |
+
self.init_protocol, url = split_protocol(url)
|
| 246 |
+
else:
|
| 247 |
+
self.init_protocol, url = "file", os.getcwd()
|
| 248 |
+
self.init_url = url
|
| 249 |
+
self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
|
| 250 |
+
self.filters = filters
|
| 251 |
+
self.ignore = [re.compile(i) for i in ignore or []]
|
| 252 |
+
self._fs = None
|
| 253 |
+
super().__init__()
|
| 254 |
+
|
| 255 |
+
def _setup(self):
|
| 256 |
+
self.url = pn.widgets.TextInput(
|
| 257 |
+
name="url",
|
| 258 |
+
value=self.init_url,
|
| 259 |
+
align="end",
|
| 260 |
+
sizing_mode="stretch_width",
|
| 261 |
+
width_policy="max",
|
| 262 |
+
)
|
| 263 |
+
self.protocol = pn.widgets.Select(
|
| 264 |
+
options=sorted(known_implementations),
|
| 265 |
+
value=self.init_protocol,
|
| 266 |
+
name="protocol",
|
| 267 |
+
align="center",
|
| 268 |
+
)
|
| 269 |
+
self.kwargs = pn.widgets.TextInput(
|
| 270 |
+
name="kwargs", value=self.init_kwargs, align="center"
|
| 271 |
+
)
|
| 272 |
+
self.go = pn.widgets.Button(name="⇨", align="end", width=45)
|
| 273 |
+
self.main = SingleSelect(size=10)
|
| 274 |
+
self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
|
| 275 |
+
self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end")
|
| 276 |
+
|
| 277 |
+
self._register(self.protocol, "protocol_changed", auto=True)
|
| 278 |
+
self._register(self.go, "go_clicked", "clicks", auto=True)
|
| 279 |
+
self._register(self.up, "up_clicked", "clicks", auto=True)
|
| 280 |
+
self._register(self.home, "home_clicked", "clicks", auto=True)
|
| 281 |
+
self._register(None, "selection_changed")
|
| 282 |
+
self.main.connect("selected", self.selection_changed)
|
| 283 |
+
self._register(None, "directory_entered")
|
| 284 |
+
self.prev_protocol = self.protocol.value
|
| 285 |
+
self.prev_kwargs = self.storage_options
|
| 286 |
+
|
| 287 |
+
self.filter_sel = pn.widgets.CheckBoxGroup(
|
| 288 |
+
value=[], options=[], inline=False, align="end", width_policy="min"
|
| 289 |
+
)
|
| 290 |
+
self._register(self.filter_sel, "filters_changed", auto=True)
|
| 291 |
+
|
| 292 |
+
self.panel = pn.Column(
|
| 293 |
+
pn.Row(self.protocol, self.kwargs),
|
| 294 |
+
pn.Row(self.home, self.up, self.url, self.go, self.filter_sel),
|
| 295 |
+
self.main.panel,
|
| 296 |
+
)
|
| 297 |
+
self.set_filters(self.filters)
|
| 298 |
+
self.go_clicked()
|
| 299 |
+
|
| 300 |
+
def set_filters(self, filters=None):
|
| 301 |
+
self.filters = filters
|
| 302 |
+
if filters:
|
| 303 |
+
self.filter_sel.options = filters
|
| 304 |
+
self.filter_sel.value = filters
|
| 305 |
+
else:
|
| 306 |
+
self.filter_sel.options = []
|
| 307 |
+
self.filter_sel.value = []
|
| 308 |
+
|
| 309 |
+
@property
|
| 310 |
+
def storage_options(self):
|
| 311 |
+
"""Value of the kwargs box as a dictionary"""
|
| 312 |
+
return ast.literal_eval(self.kwargs.value) or {}
|
| 313 |
+
|
| 314 |
+
@property
|
| 315 |
+
def fs(self):
|
| 316 |
+
"""Current filesystem instance"""
|
| 317 |
+
if self._fs is None:
|
| 318 |
+
cls = get_filesystem_class(self.protocol.value)
|
| 319 |
+
self._fs = cls(**self.storage_options)
|
| 320 |
+
return self._fs
|
| 321 |
+
|
| 322 |
+
@property
|
| 323 |
+
def urlpath(self):
|
| 324 |
+
"""URL of currently selected item"""
|
| 325 |
+
return (
|
| 326 |
+
(f"{self.protocol.value}://{self.main.value[0]}")
|
| 327 |
+
if self.main.value
|
| 328 |
+
else None
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
def open_file(self, mode="rb", compression=None, encoding=None):
|
| 332 |
+
"""Create OpenFile instance for the currently selected item
|
| 333 |
+
|
| 334 |
+
For example, in a notebook you might do something like
|
| 335 |
+
|
| 336 |
+
.. code-block::
|
| 337 |
+
|
| 338 |
+
[ ]: sel = FileSelector(); sel
|
| 339 |
+
|
| 340 |
+
# user selects their file
|
| 341 |
+
|
| 342 |
+
[ ]: with sel.open_file('rb') as f:
|
| 343 |
+
... out = f.read()
|
| 344 |
+
|
| 345 |
+
Parameters
|
| 346 |
+
----------
|
| 347 |
+
mode: str (optional)
|
| 348 |
+
Open mode for the file.
|
| 349 |
+
compression: str (optional)
|
| 350 |
+
The interact with the file as compressed. Set to 'infer' to guess
|
| 351 |
+
compression from the file ending
|
| 352 |
+
encoding: str (optional)
|
| 353 |
+
If using text mode, use this encoding; defaults to UTF8.
|
| 354 |
+
"""
|
| 355 |
+
if self.urlpath is None:
|
| 356 |
+
raise ValueError("No file selected")
|
| 357 |
+
return OpenFile(self.fs, self.urlpath, mode, compression, encoding)
|
| 358 |
+
|
| 359 |
+
def filters_changed(self, values):
|
| 360 |
+
self.filters = values
|
| 361 |
+
self.go_clicked()
|
| 362 |
+
|
| 363 |
+
def selection_changed(self, *_):
|
| 364 |
+
if self.urlpath is None:
|
| 365 |
+
return
|
| 366 |
+
if self.fs.isdir(self.urlpath):
|
| 367 |
+
self.url.value = self.fs._strip_protocol(self.urlpath)
|
| 368 |
+
self.go_clicked()
|
| 369 |
+
|
| 370 |
+
def go_clicked(self, *_):
|
| 371 |
+
if (
|
| 372 |
+
self.prev_protocol != self.protocol.value
|
| 373 |
+
or self.prev_kwargs != self.storage_options
|
| 374 |
+
):
|
| 375 |
+
self._fs = None # causes fs to be recreated
|
| 376 |
+
self.prev_protocol = self.protocol.value
|
| 377 |
+
self.prev_kwargs = self.storage_options
|
| 378 |
+
listing = sorted(
|
| 379 |
+
self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"]
|
| 380 |
+
)
|
| 381 |
+
listing = [
|
| 382 |
+
l
|
| 383 |
+
for l in listing
|
| 384 |
+
if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore)
|
| 385 |
+
]
|
| 386 |
+
folders = {
|
| 387 |
+
"📁 " + o["name"].rsplit("/", 1)[-1]: o["name"]
|
| 388 |
+
for o in listing
|
| 389 |
+
if o["type"] == "directory"
|
| 390 |
+
}
|
| 391 |
+
files = {
|
| 392 |
+
"📄 " + o["name"].rsplit("/", 1)[-1]: o["name"]
|
| 393 |
+
for o in listing
|
| 394 |
+
if o["type"] == "file"
|
| 395 |
+
}
|
| 396 |
+
if self.filters:
|
| 397 |
+
files = {
|
| 398 |
+
k: v
|
| 399 |
+
for k, v in files.items()
|
| 400 |
+
if any(v.endswith(ext) for ext in self.filters)
|
| 401 |
+
}
|
| 402 |
+
self.main.set_options(dict(**folders, **files))
|
| 403 |
+
|
| 404 |
+
def protocol_changed(self, *_):
|
| 405 |
+
self._fs = None
|
| 406 |
+
self.main.options = []
|
| 407 |
+
self.url.value = ""
|
| 408 |
+
|
| 409 |
+
def home_clicked(self, *_):
|
| 410 |
+
self.protocol.value = self.init_protocol
|
| 411 |
+
self.kwargs.value = self.init_kwargs
|
| 412 |
+
self.url.value = self.init_url
|
| 413 |
+
self.go_clicked()
|
| 414 |
+
|
| 415 |
+
def up_clicked(self, *_):
|
| 416 |
+
self.url.value = self.fs._parent(self.url.value)
|
| 417 |
+
self.go_clicked()
|
env/lib/python3.13/site-packages/fsspec/json.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from collections.abc import Callable, Mapping, Sequence
|
| 3 |
+
from contextlib import suppress
|
| 4 |
+
from pathlib import PurePath
|
| 5 |
+
from typing import Any, ClassVar
|
| 6 |
+
|
| 7 |
+
from .registry import _import_class, get_filesystem_class
|
| 8 |
+
from .spec import AbstractFileSystem
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class FilesystemJSONEncoder(json.JSONEncoder):
|
| 12 |
+
include_password: ClassVar[bool] = True
|
| 13 |
+
|
| 14 |
+
def default(self, o: Any) -> Any:
|
| 15 |
+
if isinstance(o, AbstractFileSystem):
|
| 16 |
+
return o.to_dict(include_password=self.include_password)
|
| 17 |
+
if isinstance(o, PurePath):
|
| 18 |
+
cls = type(o)
|
| 19 |
+
return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)}
|
| 20 |
+
|
| 21 |
+
return super().default(o)
|
| 22 |
+
|
| 23 |
+
def make_serializable(self, obj: Any) -> Any:
|
| 24 |
+
"""
|
| 25 |
+
Recursively converts an object so that it can be JSON serialized via
|
| 26 |
+
:func:`json.dumps` and :func:`json.dump`, without actually calling
|
| 27 |
+
said functions.
|
| 28 |
+
"""
|
| 29 |
+
if isinstance(obj, (str, int, float, bool)):
|
| 30 |
+
return obj
|
| 31 |
+
if isinstance(obj, Mapping):
|
| 32 |
+
return {k: self.make_serializable(v) for k, v in obj.items()}
|
| 33 |
+
if isinstance(obj, Sequence):
|
| 34 |
+
return [self.make_serializable(v) for v in obj]
|
| 35 |
+
|
| 36 |
+
return self.default(obj)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class FilesystemJSONDecoder(json.JSONDecoder):
|
| 40 |
+
def __init__(
|
| 41 |
+
self,
|
| 42 |
+
*,
|
| 43 |
+
object_hook: Callable[[dict[str, Any]], Any] | None = None,
|
| 44 |
+
parse_float: Callable[[str], Any] | None = None,
|
| 45 |
+
parse_int: Callable[[str], Any] | None = None,
|
| 46 |
+
parse_constant: Callable[[str], Any] | None = None,
|
| 47 |
+
strict: bool = True,
|
| 48 |
+
object_pairs_hook: Callable[[list[tuple[str, Any]]], Any] | None = None,
|
| 49 |
+
) -> None:
|
| 50 |
+
self.original_object_hook = object_hook
|
| 51 |
+
|
| 52 |
+
super().__init__(
|
| 53 |
+
object_hook=self.custom_object_hook,
|
| 54 |
+
parse_float=parse_float,
|
| 55 |
+
parse_int=parse_int,
|
| 56 |
+
parse_constant=parse_constant,
|
| 57 |
+
strict=strict,
|
| 58 |
+
object_pairs_hook=object_pairs_hook,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
@classmethod
|
| 62 |
+
def try_resolve_path_cls(cls, dct: dict[str, Any]):
|
| 63 |
+
with suppress(Exception):
|
| 64 |
+
fqp = dct["cls"]
|
| 65 |
+
|
| 66 |
+
path_cls = _import_class(fqp)
|
| 67 |
+
|
| 68 |
+
if issubclass(path_cls, PurePath):
|
| 69 |
+
return path_cls
|
| 70 |
+
|
| 71 |
+
return None
|
| 72 |
+
|
| 73 |
+
@classmethod
|
| 74 |
+
def try_resolve_fs_cls(cls, dct: dict[str, Any]):
|
| 75 |
+
with suppress(Exception):
|
| 76 |
+
if "cls" in dct:
|
| 77 |
+
try:
|
| 78 |
+
fs_cls = _import_class(dct["cls"])
|
| 79 |
+
if issubclass(fs_cls, AbstractFileSystem):
|
| 80 |
+
return fs_cls
|
| 81 |
+
except Exception:
|
| 82 |
+
if "protocol" in dct: # Fallback if cls cannot be imported
|
| 83 |
+
return get_filesystem_class(dct["protocol"])
|
| 84 |
+
|
| 85 |
+
raise
|
| 86 |
+
|
| 87 |
+
return None
|
| 88 |
+
|
| 89 |
+
def custom_object_hook(self, dct: dict[str, Any]):
|
| 90 |
+
if "cls" in dct:
|
| 91 |
+
if (obj_cls := self.try_resolve_fs_cls(dct)) is not None:
|
| 92 |
+
return AbstractFileSystem.from_dict(dct)
|
| 93 |
+
if (obj_cls := self.try_resolve_path_cls(dct)) is not None:
|
| 94 |
+
return obj_cls(dct["str"])
|
| 95 |
+
|
| 96 |
+
if self.original_object_hook is not None:
|
| 97 |
+
return self.original_object_hook(dct)
|
| 98 |
+
|
| 99 |
+
return dct
|
| 100 |
+
|
| 101 |
+
def unmake_serializable(self, obj: Any) -> Any:
|
| 102 |
+
"""
|
| 103 |
+
Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`.
|
| 104 |
+
"""
|
| 105 |
+
if isinstance(obj, dict):
|
| 106 |
+
obj = self.custom_object_hook(obj)
|
| 107 |
+
if isinstance(obj, dict):
|
| 108 |
+
return {k: self.unmake_serializable(v) for k, v in obj.items()}
|
| 109 |
+
if isinstance(obj, (list, tuple)):
|
| 110 |
+
return [self.unmake_serializable(v) for v in obj]
|
| 111 |
+
|
| 112 |
+
return obj
|
env/lib/python3.13/site-packages/fsspec/mapping.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import array
|
| 2 |
+
import logging
|
| 3 |
+
import posixpath
|
| 4 |
+
import warnings
|
| 5 |
+
from collections.abc import MutableMapping
|
| 6 |
+
from functools import cached_property
|
| 7 |
+
|
| 8 |
+
from fsspec.core import url_to_fs
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger("fsspec.mapping")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class FSMap(MutableMapping):
|
| 14 |
+
"""Wrap a FileSystem instance as a mutable wrapping.
|
| 15 |
+
|
| 16 |
+
The keys of the mapping become files under the given root, and the
|
| 17 |
+
values (which must be bytes) the contents of those files.
|
| 18 |
+
|
| 19 |
+
Parameters
|
| 20 |
+
----------
|
| 21 |
+
root: string
|
| 22 |
+
prefix for all the files
|
| 23 |
+
fs: FileSystem instance
|
| 24 |
+
check: bool (=True)
|
| 25 |
+
performs a touch at the location, to check for write access.
|
| 26 |
+
|
| 27 |
+
Examples
|
| 28 |
+
--------
|
| 29 |
+
>>> fs = FileSystem(**parameters) # doctest: +SKIP
|
| 30 |
+
>>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
|
| 31 |
+
or, more likely
|
| 32 |
+
>>> d = fs.get_mapper('my-data/path/')
|
| 33 |
+
|
| 34 |
+
>>> d['loc1'] = b'Hello World' # doctest: +SKIP
|
| 35 |
+
>>> list(d.keys()) # doctest: +SKIP
|
| 36 |
+
['loc1']
|
| 37 |
+
>>> d['loc1'] # doctest: +SKIP
|
| 38 |
+
b'Hello World'
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
|
| 42 |
+
self.fs = fs
|
| 43 |
+
self.root = fs._strip_protocol(root)
|
| 44 |
+
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
|
| 45 |
+
if missing_exceptions is None:
|
| 46 |
+
missing_exceptions = (
|
| 47 |
+
FileNotFoundError,
|
| 48 |
+
IsADirectoryError,
|
| 49 |
+
NotADirectoryError,
|
| 50 |
+
)
|
| 51 |
+
self.missing_exceptions = missing_exceptions
|
| 52 |
+
self.check = check
|
| 53 |
+
self.create = create
|
| 54 |
+
if create:
|
| 55 |
+
if not self.fs.exists(root):
|
| 56 |
+
self.fs.mkdir(root)
|
| 57 |
+
if check:
|
| 58 |
+
if not self.fs.exists(root):
|
| 59 |
+
raise ValueError(
|
| 60 |
+
f"Path {root} does not exist. Create "
|
| 61 |
+
f" with the ``create=True`` keyword"
|
| 62 |
+
)
|
| 63 |
+
self.fs.touch(root + "/a")
|
| 64 |
+
self.fs.rm(root + "/a")
|
| 65 |
+
|
| 66 |
+
@cached_property
|
| 67 |
+
def dirfs(self):
|
| 68 |
+
"""dirfs instance that can be used with the same keys as the mapper"""
|
| 69 |
+
from .implementations.dirfs import DirFileSystem
|
| 70 |
+
|
| 71 |
+
return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
|
| 72 |
+
|
| 73 |
+
def clear(self):
|
| 74 |
+
"""Remove all keys below root - empties out mapping"""
|
| 75 |
+
logger.info("Clear mapping at %s", self.root)
|
| 76 |
+
try:
|
| 77 |
+
self.fs.rm(self.root, True)
|
| 78 |
+
self.fs.mkdir(self.root)
|
| 79 |
+
except: # noqa: E722
|
| 80 |
+
pass
|
| 81 |
+
|
| 82 |
+
def getitems(self, keys, on_error="raise"):
|
| 83 |
+
"""Fetch multiple items from the store
|
| 84 |
+
|
| 85 |
+
If the backend is async-able, this might proceed concurrently
|
| 86 |
+
|
| 87 |
+
Parameters
|
| 88 |
+
----------
|
| 89 |
+
keys: list(str)
|
| 90 |
+
They keys to be fetched
|
| 91 |
+
on_error : "raise", "omit", "return"
|
| 92 |
+
If raise, an underlying exception will be raised (converted to KeyError
|
| 93 |
+
if the type is in self.missing_exceptions); if omit, keys with exception
|
| 94 |
+
will simply not be included in the output; if "return", all keys are
|
| 95 |
+
included in the output, but the value will be bytes or an exception
|
| 96 |
+
instance.
|
| 97 |
+
|
| 98 |
+
Returns
|
| 99 |
+
-------
|
| 100 |
+
dict(key, bytes|exception)
|
| 101 |
+
"""
|
| 102 |
+
keys2 = [self._key_to_str(k) for k in keys]
|
| 103 |
+
oe = on_error if on_error == "raise" else "return"
|
| 104 |
+
try:
|
| 105 |
+
out = self.fs.cat(keys2, on_error=oe)
|
| 106 |
+
if isinstance(out, bytes):
|
| 107 |
+
out = {keys2[0]: out}
|
| 108 |
+
except self.missing_exceptions as e:
|
| 109 |
+
raise KeyError from e
|
| 110 |
+
out = {
|
| 111 |
+
k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
|
| 112 |
+
for k, v in out.items()
|
| 113 |
+
}
|
| 114 |
+
return {
|
| 115 |
+
key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
|
| 116 |
+
for key, k2 in zip(keys, keys2)
|
| 117 |
+
if on_error == "return" or not isinstance(out[k2], BaseException)
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
def setitems(self, values_dict):
|
| 121 |
+
"""Set the values of multiple items in the store
|
| 122 |
+
|
| 123 |
+
Parameters
|
| 124 |
+
----------
|
| 125 |
+
values_dict: dict(str, bytes)
|
| 126 |
+
"""
|
| 127 |
+
values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
|
| 128 |
+
self.fs.pipe(values)
|
| 129 |
+
|
| 130 |
+
def delitems(self, keys):
|
| 131 |
+
"""Remove multiple keys from the store"""
|
| 132 |
+
self.fs.rm([self._key_to_str(k) for k in keys])
|
| 133 |
+
|
| 134 |
+
def _key_to_str(self, key):
|
| 135 |
+
"""Generate full path for the key"""
|
| 136 |
+
if not isinstance(key, str):
|
| 137 |
+
# raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
|
| 138 |
+
warnings.warn(
|
| 139 |
+
"from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
|
| 140 |
+
DeprecationWarning,
|
| 141 |
+
)
|
| 142 |
+
if isinstance(key, list):
|
| 143 |
+
key = tuple(key)
|
| 144 |
+
key = str(key)
|
| 145 |
+
return f"{self._root_key_to_str}{key}".rstrip("/")
|
| 146 |
+
|
| 147 |
+
def _str_to_key(self, s):
|
| 148 |
+
"""Strip path of to leave key name"""
|
| 149 |
+
return s[len(self.root) :].lstrip("/")
|
| 150 |
+
|
| 151 |
+
def __getitem__(self, key, default=None):
|
| 152 |
+
"""Retrieve data"""
|
| 153 |
+
k = self._key_to_str(key)
|
| 154 |
+
try:
|
| 155 |
+
result = self.fs.cat(k)
|
| 156 |
+
except self.missing_exceptions as exc:
|
| 157 |
+
if default is not None:
|
| 158 |
+
return default
|
| 159 |
+
raise KeyError(key) from exc
|
| 160 |
+
return result
|
| 161 |
+
|
| 162 |
+
def pop(self, key, default=None):
|
| 163 |
+
"""Pop data"""
|
| 164 |
+
result = self.__getitem__(key, default)
|
| 165 |
+
try:
|
| 166 |
+
del self[key]
|
| 167 |
+
except KeyError:
|
| 168 |
+
pass
|
| 169 |
+
return result
|
| 170 |
+
|
| 171 |
+
def __setitem__(self, key, value):
|
| 172 |
+
"""Store value in key"""
|
| 173 |
+
key = self._key_to_str(key)
|
| 174 |
+
self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
|
| 175 |
+
self.fs.pipe_file(key, maybe_convert(value))
|
| 176 |
+
|
| 177 |
+
def __iter__(self):
|
| 178 |
+
return (self._str_to_key(x) for x in self.fs.find(self.root))
|
| 179 |
+
|
| 180 |
+
def __len__(self):
|
| 181 |
+
return len(self.fs.find(self.root))
|
| 182 |
+
|
| 183 |
+
def __delitem__(self, key):
|
| 184 |
+
"""Remove key"""
|
| 185 |
+
try:
|
| 186 |
+
self.fs.rm(self._key_to_str(key))
|
| 187 |
+
except Exception as exc:
|
| 188 |
+
raise KeyError from exc
|
| 189 |
+
|
| 190 |
+
def __contains__(self, key):
|
| 191 |
+
"""Does key exist in mapping?"""
|
| 192 |
+
path = self._key_to_str(key)
|
| 193 |
+
return self.fs.isfile(path)
|
| 194 |
+
|
| 195 |
+
def __reduce__(self):
|
| 196 |
+
return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def maybe_convert(value):
|
| 200 |
+
if isinstance(value, array.array) or hasattr(value, "__array__"):
|
| 201 |
+
# bytes-like things
|
| 202 |
+
if hasattr(value, "dtype") and value.dtype.kind in "Mm":
|
| 203 |
+
# The buffer interface doesn't support datetime64/timdelta64 numpy
|
| 204 |
+
# arrays
|
| 205 |
+
value = value.view("int64")
|
| 206 |
+
value = bytes(memoryview(value))
|
| 207 |
+
return value
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def get_mapper(
|
| 211 |
+
url="",
|
| 212 |
+
check=False,
|
| 213 |
+
create=False,
|
| 214 |
+
missing_exceptions=None,
|
| 215 |
+
alternate_root=None,
|
| 216 |
+
**kwargs,
|
| 217 |
+
):
|
| 218 |
+
"""Create key-value interface for given URL and options
|
| 219 |
+
|
| 220 |
+
The URL will be of the form "protocol://location" and point to the root
|
| 221 |
+
of the mapper required. All keys will be file-names below this location,
|
| 222 |
+
and their values the contents of each key.
|
| 223 |
+
|
| 224 |
+
Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
|
| 225 |
+
|
| 226 |
+
Parameters
|
| 227 |
+
----------
|
| 228 |
+
url: str
|
| 229 |
+
Root URL of mapping
|
| 230 |
+
check: bool
|
| 231 |
+
Whether to attempt to read from the location before instantiation, to
|
| 232 |
+
check that the mapping does exist
|
| 233 |
+
create: bool
|
| 234 |
+
Whether to make the directory corresponding to the root before
|
| 235 |
+
instantiating
|
| 236 |
+
missing_exceptions: None or tuple
|
| 237 |
+
If given, these exception types will be regarded as missing keys and
|
| 238 |
+
return KeyError when trying to read data. By default, you get
|
| 239 |
+
(FileNotFoundError, IsADirectoryError, NotADirectoryError)
|
| 240 |
+
alternate_root: None or str
|
| 241 |
+
In cases of complex URLs, the parser may fail to pick the correct part
|
| 242 |
+
for the mapper root, so this arg can override
|
| 243 |
+
|
| 244 |
+
Returns
|
| 245 |
+
-------
|
| 246 |
+
``FSMap`` instance, the dict-like key-value store.
|
| 247 |
+
"""
|
| 248 |
+
# Removing protocol here - could defer to each open() on the backend
|
| 249 |
+
fs, urlpath = url_to_fs(url, **kwargs)
|
| 250 |
+
root = alternate_root if alternate_root is not None else urlpath
|
| 251 |
+
return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
|
env/lib/python3.13/site-packages/fsspec/parquet.py
ADDED
|
@@ -0,0 +1,580 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import json
|
| 3 |
+
import warnings
|
| 4 |
+
from typing import Literal
|
| 5 |
+
|
| 6 |
+
import fsspec
|
| 7 |
+
|
| 8 |
+
from .core import url_to_fs
|
| 9 |
+
from .spec import AbstractBufferedFile
|
| 10 |
+
from .utils import merge_offset_ranges
|
| 11 |
+
|
| 12 |
+
# Parquet-Specific Utilities for fsspec
|
| 13 |
+
#
|
| 14 |
+
# Most of the functions defined in this module are NOT
|
| 15 |
+
# intended for public consumption. The only exception
|
| 16 |
+
# to this is `open_parquet_file`, which should be used
|
| 17 |
+
# place of `fs.open()` to open parquet-formatted files
|
| 18 |
+
# on remote file systems.
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class AlreadyBufferedFile(AbstractBufferedFile):
|
| 22 |
+
def _fetch_range(self, start, end):
|
| 23 |
+
raise NotImplementedError
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def open_parquet_files(
|
| 27 |
+
path: list[str],
|
| 28 |
+
mode: Literal["rb"] = "rb",
|
| 29 |
+
fs: None | fsspec.AbstractFileSystem = None,
|
| 30 |
+
metadata=None,
|
| 31 |
+
columns: None | list[str] = None,
|
| 32 |
+
row_groups: None | list[int] = None,
|
| 33 |
+
storage_options: None | dict = None,
|
| 34 |
+
engine: str = "auto",
|
| 35 |
+
max_gap: int = 64_000,
|
| 36 |
+
max_block: int = 256_000_000,
|
| 37 |
+
footer_sample_size: int = 1_000_000,
|
| 38 |
+
filters: None | list[list[list[str]]] = None,
|
| 39 |
+
**kwargs,
|
| 40 |
+
):
|
| 41 |
+
"""
|
| 42 |
+
Return a file-like object for a single Parquet file.
|
| 43 |
+
|
| 44 |
+
The specified parquet `engine` will be used to parse the
|
| 45 |
+
footer metadata, and determine the required byte ranges
|
| 46 |
+
from the file. The target path will then be opened with
|
| 47 |
+
the "parts" (`KnownPartsOfAFile`) caching strategy.
|
| 48 |
+
|
| 49 |
+
Note that this method is intended for usage with remote
|
| 50 |
+
file systems, and is unlikely to improve parquet-read
|
| 51 |
+
performance on local file systems.
|
| 52 |
+
|
| 53 |
+
Parameters
|
| 54 |
+
----------
|
| 55 |
+
path: str
|
| 56 |
+
Target file path.
|
| 57 |
+
mode: str, optional
|
| 58 |
+
Mode option to be passed through to `fs.open`. Default is "rb".
|
| 59 |
+
metadata: Any, optional
|
| 60 |
+
Parquet metadata object. Object type must be supported
|
| 61 |
+
by the backend parquet engine. For now, only the "fastparquet"
|
| 62 |
+
engine supports an explicit `ParquetFile` metadata object.
|
| 63 |
+
If a metadata object is supplied, the remote footer metadata
|
| 64 |
+
will not need to be transferred into local memory.
|
| 65 |
+
fs: AbstractFileSystem, optional
|
| 66 |
+
Filesystem object to use for opening the file. If nothing is
|
| 67 |
+
specified, an `AbstractFileSystem` object will be inferred.
|
| 68 |
+
engine : str, default "auto"
|
| 69 |
+
Parquet engine to use for metadata parsing. Allowed options
|
| 70 |
+
include "fastparquet", "pyarrow", and "auto". The specified
|
| 71 |
+
engine must be installed in the current environment. If
|
| 72 |
+
"auto" is specified, and both engines are installed,
|
| 73 |
+
"fastparquet" will take precedence over "pyarrow".
|
| 74 |
+
columns: list, optional
|
| 75 |
+
List of all column names that may be read from the file.
|
| 76 |
+
row_groups : list, optional
|
| 77 |
+
List of all row-groups that may be read from the file. This
|
| 78 |
+
may be a list of row-group indices (integers), or it may be
|
| 79 |
+
a list of `RowGroup` metadata objects (if the "fastparquet"
|
| 80 |
+
engine is used).
|
| 81 |
+
storage_options : dict, optional
|
| 82 |
+
Used to generate an `AbstractFileSystem` object if `fs` was
|
| 83 |
+
not specified.
|
| 84 |
+
max_gap : int, optional
|
| 85 |
+
Neighboring byte ranges will only be merged when their
|
| 86 |
+
inter-range gap is <= `max_gap`. Default is 64KB.
|
| 87 |
+
max_block : int, optional
|
| 88 |
+
Neighboring byte ranges will only be merged when the size of
|
| 89 |
+
the aggregated range is <= `max_block`. Default is 256MB.
|
| 90 |
+
footer_sample_size : int, optional
|
| 91 |
+
Number of bytes to read from the end of the path to look
|
| 92 |
+
for the footer metadata. If the sampled bytes do not contain
|
| 93 |
+
the footer, a second read request will be required, and
|
| 94 |
+
performance will suffer. Default is 1MB.
|
| 95 |
+
filters : list[list], optional
|
| 96 |
+
List of filters to apply to prevent reading row groups, of the
|
| 97 |
+
same format as accepted by the loading engines. Ignored if
|
| 98 |
+
``row_groups`` is specified.
|
| 99 |
+
**kwargs :
|
| 100 |
+
Optional key-word arguments to pass to `fs.open`
|
| 101 |
+
"""
|
| 102 |
+
|
| 103 |
+
# Make sure we have an `AbstractFileSystem` object
|
| 104 |
+
# to work with
|
| 105 |
+
if fs is None:
|
| 106 |
+
path0 = path
|
| 107 |
+
if isinstance(path, (list, tuple)):
|
| 108 |
+
path = path[0]
|
| 109 |
+
fs, path = url_to_fs(path, **(storage_options or {}))
|
| 110 |
+
else:
|
| 111 |
+
path0 = path
|
| 112 |
+
|
| 113 |
+
# For now, `columns == []` not supported, is the same
|
| 114 |
+
# as all columns
|
| 115 |
+
if columns is not None and len(columns) == 0:
|
| 116 |
+
columns = None
|
| 117 |
+
|
| 118 |
+
# Set the engine
|
| 119 |
+
engine = _set_engine(engine)
|
| 120 |
+
|
| 121 |
+
if isinstance(path0, (list, tuple)):
|
| 122 |
+
paths = path0
|
| 123 |
+
elif "*" in path:
|
| 124 |
+
paths = fs.glob(path)
|
| 125 |
+
elif path0.endswith("/"): # or fs.isdir(path):
|
| 126 |
+
paths = [
|
| 127 |
+
_
|
| 128 |
+
for _ in fs.find(path, withdirs=False, detail=False)
|
| 129 |
+
if _.endswith((".parquet", ".parq"))
|
| 130 |
+
]
|
| 131 |
+
else:
|
| 132 |
+
paths = [path]
|
| 133 |
+
|
| 134 |
+
data = _get_parquet_byte_ranges(
|
| 135 |
+
paths,
|
| 136 |
+
fs,
|
| 137 |
+
metadata=metadata,
|
| 138 |
+
columns=columns,
|
| 139 |
+
row_groups=row_groups,
|
| 140 |
+
engine=engine,
|
| 141 |
+
max_gap=max_gap,
|
| 142 |
+
max_block=max_block,
|
| 143 |
+
footer_sample_size=footer_sample_size,
|
| 144 |
+
filters=filters,
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# Call self.open with "parts" caching
|
| 148 |
+
options = kwargs.pop("cache_options", {}).copy()
|
| 149 |
+
return [
|
| 150 |
+
AlreadyBufferedFile(
|
| 151 |
+
fs=None,
|
| 152 |
+
path=fn,
|
| 153 |
+
mode=mode,
|
| 154 |
+
cache_type="parts",
|
| 155 |
+
cache_options={
|
| 156 |
+
**options,
|
| 157 |
+
"data": data.get(fn, {}),
|
| 158 |
+
},
|
| 159 |
+
size=max(_[1] for _ in data.get(fn, {})),
|
| 160 |
+
**kwargs,
|
| 161 |
+
)
|
| 162 |
+
for fn in data
|
| 163 |
+
]
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def open_parquet_file(*args, **kwargs):
|
| 167 |
+
"""Create files tailed to reading specific parts of parquet files
|
| 168 |
+
|
| 169 |
+
Please see ``open_parquet_files`` for details of the arguments. The
|
| 170 |
+
difference is, this function always returns a single ``AleadyBufferedFile``,
|
| 171 |
+
whereas `open_parquet_files`` always returns a list of files, even if
|
| 172 |
+
there are one or zero matching parquet files.
|
| 173 |
+
"""
|
| 174 |
+
return open_parquet_files(*args, **kwargs)[0]
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _get_parquet_byte_ranges(
|
| 178 |
+
paths,
|
| 179 |
+
fs,
|
| 180 |
+
metadata=None,
|
| 181 |
+
columns=None,
|
| 182 |
+
row_groups=None,
|
| 183 |
+
max_gap=64_000,
|
| 184 |
+
max_block=256_000_000,
|
| 185 |
+
footer_sample_size=1_000_000,
|
| 186 |
+
engine="auto",
|
| 187 |
+
filters=None,
|
| 188 |
+
):
|
| 189 |
+
"""Get a dictionary of the known byte ranges needed
|
| 190 |
+
to read a specific column/row-group selection from a
|
| 191 |
+
Parquet dataset. Each value in the output dictionary
|
| 192 |
+
is intended for use as the `data` argument for the
|
| 193 |
+
`KnownPartsOfAFile` caching strategy of a single path.
|
| 194 |
+
"""
|
| 195 |
+
|
| 196 |
+
# Set engine if necessary
|
| 197 |
+
if isinstance(engine, str):
|
| 198 |
+
engine = _set_engine(engine)
|
| 199 |
+
|
| 200 |
+
# Pass to specialized function if metadata is defined
|
| 201 |
+
if metadata is not None:
|
| 202 |
+
# Use the provided parquet metadata object
|
| 203 |
+
# to avoid transferring/parsing footer metadata
|
| 204 |
+
return _get_parquet_byte_ranges_from_metadata(
|
| 205 |
+
metadata,
|
| 206 |
+
fs,
|
| 207 |
+
engine,
|
| 208 |
+
columns=columns,
|
| 209 |
+
row_groups=row_groups,
|
| 210 |
+
max_gap=max_gap,
|
| 211 |
+
max_block=max_block,
|
| 212 |
+
filters=filters,
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# Get file sizes asynchronously
|
| 216 |
+
file_sizes = fs.sizes(paths)
|
| 217 |
+
|
| 218 |
+
# Populate global paths, starts, & ends
|
| 219 |
+
result = {}
|
| 220 |
+
data_paths = []
|
| 221 |
+
data_starts = []
|
| 222 |
+
data_ends = []
|
| 223 |
+
add_header_magic = True
|
| 224 |
+
if columns is None and row_groups is None and filters is None:
|
| 225 |
+
# We are NOT selecting specific columns or row-groups.
|
| 226 |
+
#
|
| 227 |
+
# We can avoid sampling the footers, and just transfer
|
| 228 |
+
# all file data with cat_ranges
|
| 229 |
+
for i, path in enumerate(paths):
|
| 230 |
+
result[path] = {}
|
| 231 |
+
data_paths.append(path)
|
| 232 |
+
data_starts.append(0)
|
| 233 |
+
data_ends.append(file_sizes[i])
|
| 234 |
+
add_header_magic = False # "Magic" should already be included
|
| 235 |
+
else:
|
| 236 |
+
# We ARE selecting specific columns or row-groups.
|
| 237 |
+
#
|
| 238 |
+
# Gather file footers.
|
| 239 |
+
# We just take the last `footer_sample_size` bytes of each
|
| 240 |
+
# file (or the entire file if it is smaller than that)
|
| 241 |
+
footer_starts = []
|
| 242 |
+
footer_ends = []
|
| 243 |
+
for i, path in enumerate(paths):
|
| 244 |
+
footer_ends.append(file_sizes[i])
|
| 245 |
+
sample_size = max(0, file_sizes[i] - footer_sample_size)
|
| 246 |
+
footer_starts.append(sample_size)
|
| 247 |
+
footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
|
| 248 |
+
|
| 249 |
+
# Check our footer samples and re-sample if necessary.
|
| 250 |
+
missing_footer_starts = footer_starts.copy()
|
| 251 |
+
large_footer = 0
|
| 252 |
+
for i, path in enumerate(paths):
|
| 253 |
+
footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
|
| 254 |
+
real_footer_start = file_sizes[i] - (footer_size + 8)
|
| 255 |
+
if real_footer_start < footer_starts[i]:
|
| 256 |
+
missing_footer_starts[i] = real_footer_start
|
| 257 |
+
large_footer = max(large_footer, (footer_size + 8))
|
| 258 |
+
if large_footer:
|
| 259 |
+
warnings.warn(
|
| 260 |
+
f"Not enough data was used to sample the parquet footer. "
|
| 261 |
+
f"Try setting footer_sample_size >= {large_footer}."
|
| 262 |
+
)
|
| 263 |
+
for i, block in enumerate(
|
| 264 |
+
fs.cat_ranges(
|
| 265 |
+
paths,
|
| 266 |
+
missing_footer_starts,
|
| 267 |
+
footer_starts,
|
| 268 |
+
)
|
| 269 |
+
):
|
| 270 |
+
footer_samples[i] = block + footer_samples[i]
|
| 271 |
+
footer_starts[i] = missing_footer_starts[i]
|
| 272 |
+
|
| 273 |
+
# Calculate required byte ranges for each path
|
| 274 |
+
for i, path in enumerate(paths):
|
| 275 |
+
# Use "engine" to collect data byte ranges
|
| 276 |
+
path_data_starts, path_data_ends = engine._parquet_byte_ranges(
|
| 277 |
+
columns,
|
| 278 |
+
row_groups=row_groups,
|
| 279 |
+
footer=footer_samples[i],
|
| 280 |
+
footer_start=footer_starts[i],
|
| 281 |
+
filters=filters,
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
data_paths += [path] * len(path_data_starts)
|
| 285 |
+
data_starts += path_data_starts
|
| 286 |
+
data_ends += path_data_ends
|
| 287 |
+
result.setdefault(path, {})[(footer_starts[i], file_sizes[i])] = (
|
| 288 |
+
footer_samples[i]
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
# Merge adjacent offset ranges
|
| 292 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
| 293 |
+
data_paths,
|
| 294 |
+
data_starts,
|
| 295 |
+
data_ends,
|
| 296 |
+
max_gap=max_gap,
|
| 297 |
+
max_block=max_block,
|
| 298 |
+
sort=False, # Should already be sorted
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
# Start by populating `result` with footer samples
|
| 302 |
+
for i, path in enumerate(paths):
|
| 303 |
+
result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
|
| 304 |
+
|
| 305 |
+
# Transfer the data byte-ranges into local memory
|
| 306 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
| 307 |
+
|
| 308 |
+
# Add b"PAR1" to header if necessary
|
| 309 |
+
if add_header_magic:
|
| 310 |
+
_add_header_magic(result)
|
| 311 |
+
|
| 312 |
+
return result
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
def _get_parquet_byte_ranges_from_metadata(
|
| 316 |
+
metadata,
|
| 317 |
+
fs,
|
| 318 |
+
engine,
|
| 319 |
+
columns=None,
|
| 320 |
+
row_groups=None,
|
| 321 |
+
max_gap=64_000,
|
| 322 |
+
max_block=256_000_000,
|
| 323 |
+
filters=None,
|
| 324 |
+
):
|
| 325 |
+
"""Simplified version of `_get_parquet_byte_ranges` for
|
| 326 |
+
the case that an engine-specific `metadata` object is
|
| 327 |
+
provided, and the remote footer metadata does not need to
|
| 328 |
+
be transferred before calculating the required byte ranges.
|
| 329 |
+
"""
|
| 330 |
+
|
| 331 |
+
# Use "engine" to collect data byte ranges
|
| 332 |
+
data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
|
| 333 |
+
columns, row_groups=row_groups, metadata=metadata, filters=filters
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
# Merge adjacent offset ranges
|
| 337 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
| 338 |
+
data_paths,
|
| 339 |
+
data_starts,
|
| 340 |
+
data_ends,
|
| 341 |
+
max_gap=max_gap,
|
| 342 |
+
max_block=max_block,
|
| 343 |
+
sort=False, # Should be sorted
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
# Transfer the data byte-ranges into local memory
|
| 347 |
+
result = {fn: {} for fn in list(set(data_paths))}
|
| 348 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
| 349 |
+
|
| 350 |
+
# Add b"PAR1" to header
|
| 351 |
+
_add_header_magic(result)
|
| 352 |
+
|
| 353 |
+
return result
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def _transfer_ranges(fs, blocks, paths, starts, ends):
|
| 357 |
+
# Use cat_ranges to gather the data byte_ranges
|
| 358 |
+
ranges = (paths, starts, ends)
|
| 359 |
+
for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
|
| 360 |
+
blocks[path][(start, stop)] = data
|
| 361 |
+
|
| 362 |
+
|
| 363 |
+
def _add_header_magic(data):
|
| 364 |
+
# Add b"PAR1" to file headers
|
| 365 |
+
for path in list(data.keys()):
|
| 366 |
+
add_magic = True
|
| 367 |
+
for k in data[path]:
|
| 368 |
+
if k[0] == 0 and k[1] >= 4:
|
| 369 |
+
add_magic = False
|
| 370 |
+
break
|
| 371 |
+
if add_magic:
|
| 372 |
+
data[path][(0, 4)] = b"PAR1"
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def _set_engine(engine_str):
|
| 376 |
+
# Define a list of parquet engines to try
|
| 377 |
+
if engine_str == "auto":
|
| 378 |
+
try_engines = ("fastparquet", "pyarrow")
|
| 379 |
+
elif not isinstance(engine_str, str):
|
| 380 |
+
raise ValueError(
|
| 381 |
+
"Failed to set parquet engine! "
|
| 382 |
+
"Please pass 'fastparquet', 'pyarrow', or 'auto'"
|
| 383 |
+
)
|
| 384 |
+
elif engine_str not in ("fastparquet", "pyarrow"):
|
| 385 |
+
raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
|
| 386 |
+
else:
|
| 387 |
+
try_engines = [engine_str]
|
| 388 |
+
|
| 389 |
+
# Try importing the engines in `try_engines`,
|
| 390 |
+
# and choose the first one that succeeds
|
| 391 |
+
for engine in try_engines:
|
| 392 |
+
try:
|
| 393 |
+
if engine == "fastparquet":
|
| 394 |
+
return FastparquetEngine()
|
| 395 |
+
elif engine == "pyarrow":
|
| 396 |
+
return PyarrowEngine()
|
| 397 |
+
except ImportError:
|
| 398 |
+
pass
|
| 399 |
+
|
| 400 |
+
# Raise an error if a supported parquet engine
|
| 401 |
+
# was not found
|
| 402 |
+
raise ImportError(
|
| 403 |
+
f"The following parquet engines are not installed "
|
| 404 |
+
f"in your python environment: {try_engines}."
|
| 405 |
+
f"Please install 'fastparquert' or 'pyarrow' to "
|
| 406 |
+
f"utilize the `fsspec.parquet` module."
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
class FastparquetEngine:
|
| 411 |
+
# The purpose of the FastparquetEngine class is
|
| 412 |
+
# to check if fastparquet can be imported (on initialization)
|
| 413 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
| 414 |
+
# future, this class may also be used to define other
|
| 415 |
+
# methods/logic that are specific to fastparquet.
|
| 416 |
+
|
| 417 |
+
def __init__(self):
|
| 418 |
+
import fastparquet as fp
|
| 419 |
+
|
| 420 |
+
self.fp = fp
|
| 421 |
+
|
| 422 |
+
def _row_group_filename(self, row_group, pf):
|
| 423 |
+
return pf.row_group_filename(row_group)
|
| 424 |
+
|
| 425 |
+
def _parquet_byte_ranges(
|
| 426 |
+
self,
|
| 427 |
+
columns,
|
| 428 |
+
row_groups=None,
|
| 429 |
+
metadata=None,
|
| 430 |
+
footer=None,
|
| 431 |
+
footer_start=None,
|
| 432 |
+
filters=None,
|
| 433 |
+
):
|
| 434 |
+
# Initialize offset ranges and define ParqetFile metadata
|
| 435 |
+
pf = metadata
|
| 436 |
+
data_paths, data_starts, data_ends = [], [], []
|
| 437 |
+
if filters and row_groups:
|
| 438 |
+
raise ValueError("filters and row_groups cannot be used together")
|
| 439 |
+
if pf is None:
|
| 440 |
+
pf = self.fp.ParquetFile(io.BytesIO(footer))
|
| 441 |
+
|
| 442 |
+
# Convert columns to a set and add any index columns
|
| 443 |
+
# specified in the pandas metadata (just in case)
|
| 444 |
+
column_set = None if columns is None else {c.split(".", 1)[0] for c in columns}
|
| 445 |
+
if column_set is not None and hasattr(pf, "pandas_metadata"):
|
| 446 |
+
md_index = [
|
| 447 |
+
ind
|
| 448 |
+
for ind in pf.pandas_metadata.get("index_columns", [])
|
| 449 |
+
# Ignore RangeIndex information
|
| 450 |
+
if not isinstance(ind, dict)
|
| 451 |
+
]
|
| 452 |
+
column_set |= set(md_index)
|
| 453 |
+
|
| 454 |
+
# Check if row_groups is a list of integers
|
| 455 |
+
# or a list of row-group metadata
|
| 456 |
+
if filters:
|
| 457 |
+
from fastparquet.api import filter_row_groups
|
| 458 |
+
|
| 459 |
+
row_group_indices = None
|
| 460 |
+
row_groups = filter_row_groups(pf, filters)
|
| 461 |
+
elif row_groups and not isinstance(row_groups[0], int):
|
| 462 |
+
# Input row_groups contains row-group metadata
|
| 463 |
+
row_group_indices = None
|
| 464 |
+
else:
|
| 465 |
+
# Input row_groups contains row-group indices
|
| 466 |
+
row_group_indices = row_groups
|
| 467 |
+
row_groups = pf.row_groups
|
| 468 |
+
|
| 469 |
+
# Loop through column chunks to add required byte ranges
|
| 470 |
+
for r, row_group in enumerate(row_groups):
|
| 471 |
+
# Skip this row-group if we are targeting
|
| 472 |
+
# specific row-groups
|
| 473 |
+
if row_group_indices is None or r in row_group_indices:
|
| 474 |
+
# Find the target parquet-file path for `row_group`
|
| 475 |
+
fn = self._row_group_filename(row_group, pf)
|
| 476 |
+
|
| 477 |
+
for column in row_group.columns:
|
| 478 |
+
name = column.meta_data.path_in_schema[0]
|
| 479 |
+
# Skip this column if we are targeting a
|
| 480 |
+
# specific columns
|
| 481 |
+
if column_set is None or name in column_set:
|
| 482 |
+
file_offset0 = column.meta_data.dictionary_page_offset
|
| 483 |
+
if file_offset0 is None:
|
| 484 |
+
file_offset0 = column.meta_data.data_page_offset
|
| 485 |
+
num_bytes = column.meta_data.total_compressed_size
|
| 486 |
+
if footer_start is None or file_offset0 < footer_start:
|
| 487 |
+
data_paths.append(fn)
|
| 488 |
+
data_starts.append(file_offset0)
|
| 489 |
+
data_ends.append(
|
| 490 |
+
min(
|
| 491 |
+
file_offset0 + num_bytes,
|
| 492 |
+
footer_start or (file_offset0 + num_bytes),
|
| 493 |
+
)
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
if metadata:
|
| 497 |
+
# The metadata in this call may map to multiple
|
| 498 |
+
# file paths. Need to include `data_paths`
|
| 499 |
+
return data_paths, data_starts, data_ends
|
| 500 |
+
return data_starts, data_ends
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
class PyarrowEngine:
|
| 504 |
+
# The purpose of the PyarrowEngine class is
|
| 505 |
+
# to check if pyarrow can be imported (on initialization)
|
| 506 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
| 507 |
+
# future, this class may also be used to define other
|
| 508 |
+
# methods/logic that are specific to pyarrow.
|
| 509 |
+
|
| 510 |
+
def __init__(self):
|
| 511 |
+
import pyarrow.parquet as pq
|
| 512 |
+
|
| 513 |
+
self.pq = pq
|
| 514 |
+
|
| 515 |
+
def _row_group_filename(self, row_group, metadata):
|
| 516 |
+
raise NotImplementedError
|
| 517 |
+
|
| 518 |
+
def _parquet_byte_ranges(
|
| 519 |
+
self,
|
| 520 |
+
columns,
|
| 521 |
+
row_groups=None,
|
| 522 |
+
metadata=None,
|
| 523 |
+
footer=None,
|
| 524 |
+
footer_start=None,
|
| 525 |
+
filters=None,
|
| 526 |
+
):
|
| 527 |
+
if metadata is not None:
|
| 528 |
+
raise ValueError("metadata input not supported for PyarrowEngine")
|
| 529 |
+
if filters:
|
| 530 |
+
raise NotImplementedError
|
| 531 |
+
|
| 532 |
+
data_starts, data_ends = [], []
|
| 533 |
+
md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
|
| 534 |
+
|
| 535 |
+
# Convert columns to a set and add any index columns
|
| 536 |
+
# specified in the pandas metadata (just in case)
|
| 537 |
+
column_set = None if columns is None else set(columns)
|
| 538 |
+
if column_set is not None:
|
| 539 |
+
schema = md.schema.to_arrow_schema()
|
| 540 |
+
has_pandas_metadata = (
|
| 541 |
+
schema.metadata is not None and b"pandas" in schema.metadata
|
| 542 |
+
)
|
| 543 |
+
if has_pandas_metadata:
|
| 544 |
+
md_index = [
|
| 545 |
+
ind
|
| 546 |
+
for ind in json.loads(
|
| 547 |
+
schema.metadata[b"pandas"].decode("utf8")
|
| 548 |
+
).get("index_columns", [])
|
| 549 |
+
# Ignore RangeIndex information
|
| 550 |
+
if not isinstance(ind, dict)
|
| 551 |
+
]
|
| 552 |
+
column_set |= set(md_index)
|
| 553 |
+
|
| 554 |
+
# Loop through column chunks to add required byte ranges
|
| 555 |
+
for r in range(md.num_row_groups):
|
| 556 |
+
# Skip this row-group if we are targeting
|
| 557 |
+
# specific row-groups
|
| 558 |
+
if row_groups is None or r in row_groups:
|
| 559 |
+
row_group = md.row_group(r)
|
| 560 |
+
for c in range(row_group.num_columns):
|
| 561 |
+
column = row_group.column(c)
|
| 562 |
+
name = column.path_in_schema
|
| 563 |
+
# Skip this column if we are targeting a
|
| 564 |
+
# specific columns
|
| 565 |
+
split_name = name.split(".")[0]
|
| 566 |
+
if (
|
| 567 |
+
column_set is None
|
| 568 |
+
or name in column_set
|
| 569 |
+
or split_name in column_set
|
| 570 |
+
):
|
| 571 |
+
file_offset0 = column.dictionary_page_offset
|
| 572 |
+
if file_offset0 is None:
|
| 573 |
+
file_offset0 = column.data_page_offset
|
| 574 |
+
num_bytes = column.total_compressed_size
|
| 575 |
+
if file_offset0 < footer_start:
|
| 576 |
+
data_starts.append(file_offset0)
|
| 577 |
+
data_ends.append(
|
| 578 |
+
min(file_offset0 + num_bytes, footer_start)
|
| 579 |
+
)
|
| 580 |
+
return data_starts, data_ends
|
env/lib/python3.13/site-packages/fsspec/registry.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import importlib
|
| 4 |
+
import types
|
| 5 |
+
import warnings
|
| 6 |
+
|
| 7 |
+
__all__ = ["registry", "get_filesystem_class", "default"]
|
| 8 |
+
|
| 9 |
+
# internal, mutable
|
| 10 |
+
_registry: dict[str, type] = {}
|
| 11 |
+
|
| 12 |
+
# external, immutable
|
| 13 |
+
registry = types.MappingProxyType(_registry)
|
| 14 |
+
default = "file"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def register_implementation(name, cls, clobber=False, errtxt=None):
|
| 18 |
+
"""Add implementation class to the registry
|
| 19 |
+
|
| 20 |
+
Parameters
|
| 21 |
+
----------
|
| 22 |
+
name: str
|
| 23 |
+
Protocol name to associate with the class
|
| 24 |
+
cls: class or str
|
| 25 |
+
if a class: fsspec-compliant implementation class (normally inherits from
|
| 26 |
+
``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
|
| 27 |
+
str, the full path to an implementation class like package.module.class,
|
| 28 |
+
which gets added to known_implementations,
|
| 29 |
+
so the import is deferred until the filesystem is actually used.
|
| 30 |
+
clobber: bool (optional)
|
| 31 |
+
Whether to overwrite a protocol with the same name; if False, will raise
|
| 32 |
+
instead.
|
| 33 |
+
errtxt: str (optional)
|
| 34 |
+
If given, then a failure to import the given class will result in this
|
| 35 |
+
text being given.
|
| 36 |
+
"""
|
| 37 |
+
if isinstance(cls, str):
|
| 38 |
+
if name in known_implementations and clobber is False:
|
| 39 |
+
if cls != known_implementations[name]["class"]:
|
| 40 |
+
raise ValueError(
|
| 41 |
+
f"Name ({name}) already in the known_implementations and clobber "
|
| 42 |
+
f"is False"
|
| 43 |
+
)
|
| 44 |
+
else:
|
| 45 |
+
known_implementations[name] = {
|
| 46 |
+
"class": cls,
|
| 47 |
+
"err": errtxt or f"{cls} import failed for protocol {name}",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
else:
|
| 51 |
+
if name in registry and clobber is False:
|
| 52 |
+
if _registry[name] is not cls:
|
| 53 |
+
raise ValueError(
|
| 54 |
+
f"Name ({name}) already in the registry and clobber is False"
|
| 55 |
+
)
|
| 56 |
+
else:
|
| 57 |
+
_registry[name] = cls
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# protocols mapped to the class which implements them. This dict can be
|
| 61 |
+
# updated with register_implementation
|
| 62 |
+
known_implementations = {
|
| 63 |
+
"abfs": {
|
| 64 |
+
"class": "adlfs.AzureBlobFileSystem",
|
| 65 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
| 66 |
+
},
|
| 67 |
+
"adl": {
|
| 68 |
+
"class": "adlfs.AzureDatalakeFileSystem",
|
| 69 |
+
"err": "Install adlfs to access Azure Datalake Gen1",
|
| 70 |
+
},
|
| 71 |
+
"arrow_hdfs": {
|
| 72 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
| 73 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
| 74 |
+
},
|
| 75 |
+
"async_wrapper": {
|
| 76 |
+
"class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
|
| 77 |
+
},
|
| 78 |
+
"asynclocal": {
|
| 79 |
+
"class": "morefs.asyn_local.AsyncLocalFileSystem",
|
| 80 |
+
"err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
|
| 81 |
+
},
|
| 82 |
+
"asyncwrapper": {
|
| 83 |
+
"class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
|
| 84 |
+
},
|
| 85 |
+
"az": {
|
| 86 |
+
"class": "adlfs.AzureBlobFileSystem",
|
| 87 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
| 88 |
+
},
|
| 89 |
+
"blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
| 90 |
+
"box": {
|
| 91 |
+
"class": "boxfs.BoxFileSystem",
|
| 92 |
+
"err": "Please install boxfs to access BoxFileSystem",
|
| 93 |
+
},
|
| 94 |
+
"cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
| 95 |
+
"dask": {
|
| 96 |
+
"class": "fsspec.implementations.dask.DaskWorkerFileSystem",
|
| 97 |
+
"err": "Install dask distributed to access worker file system",
|
| 98 |
+
},
|
| 99 |
+
"data": {"class": "fsspec.implementations.data.DataFileSystem"},
|
| 100 |
+
"dbfs": {
|
| 101 |
+
"class": "fsspec.implementations.dbfs.DatabricksFileSystem",
|
| 102 |
+
"err": "Install the requests package to use the DatabricksFileSystem",
|
| 103 |
+
},
|
| 104 |
+
"dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
|
| 105 |
+
"dropbox": {
|
| 106 |
+
"class": "dropboxdrivefs.DropboxDriveFileSystem",
|
| 107 |
+
"err": (
|
| 108 |
+
'DropboxFileSystem requires "dropboxdrivefs","requests" and "'
|
| 109 |
+
'"dropbox" to be installed'
|
| 110 |
+
),
|
| 111 |
+
},
|
| 112 |
+
"dvc": {
|
| 113 |
+
"class": "dvc.api.DVCFileSystem",
|
| 114 |
+
"err": "Install dvc to access DVCFileSystem",
|
| 115 |
+
},
|
| 116 |
+
"file": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
| 117 |
+
"filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
|
| 118 |
+
"ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
|
| 119 |
+
"gcs": {
|
| 120 |
+
"class": "gcsfs.GCSFileSystem",
|
| 121 |
+
"err": "Please install gcsfs to access Google Storage",
|
| 122 |
+
},
|
| 123 |
+
"gdrive": {
|
| 124 |
+
"class": "gdrive_fsspec.GoogleDriveFileSystem",
|
| 125 |
+
"err": "Please install gdrive_fs for access to Google Drive",
|
| 126 |
+
},
|
| 127 |
+
"generic": {"class": "fsspec.generic.GenericFileSystem"},
|
| 128 |
+
"gist": {
|
| 129 |
+
"class": "fsspec.implementations.gist.GistFileSystem",
|
| 130 |
+
"err": "Install the requests package to use the gist FS",
|
| 131 |
+
},
|
| 132 |
+
"git": {
|
| 133 |
+
"class": "fsspec.implementations.git.GitFileSystem",
|
| 134 |
+
"err": "Install pygit2 to browse local git repos",
|
| 135 |
+
},
|
| 136 |
+
"github": {
|
| 137 |
+
"class": "fsspec.implementations.github.GithubFileSystem",
|
| 138 |
+
"err": "Install the requests package to use the github FS",
|
| 139 |
+
},
|
| 140 |
+
"gs": {
|
| 141 |
+
"class": "gcsfs.GCSFileSystem",
|
| 142 |
+
"err": "Please install gcsfs to access Google Storage",
|
| 143 |
+
},
|
| 144 |
+
"hdfs": {
|
| 145 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
| 146 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
| 147 |
+
},
|
| 148 |
+
"hf": {
|
| 149 |
+
"class": "huggingface_hub.HfFileSystem",
|
| 150 |
+
"err": "Install huggingface_hub to access HfFileSystem",
|
| 151 |
+
},
|
| 152 |
+
"http": {
|
| 153 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
| 154 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
| 155 |
+
},
|
| 156 |
+
"https": {
|
| 157 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
| 158 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
| 159 |
+
},
|
| 160 |
+
"jlab": {
|
| 161 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
| 162 |
+
"err": "Jupyter FS requires requests to be installed",
|
| 163 |
+
},
|
| 164 |
+
"jupyter": {
|
| 165 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
| 166 |
+
"err": "Jupyter FS requires requests to be installed",
|
| 167 |
+
},
|
| 168 |
+
"lakefs": {
|
| 169 |
+
"class": "lakefs_spec.LakeFSFileSystem",
|
| 170 |
+
"err": "Please install lakefs-spec to access LakeFSFileSystem",
|
| 171 |
+
},
|
| 172 |
+
"libarchive": {
|
| 173 |
+
"class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
|
| 174 |
+
"err": "LibArchive requires to be installed",
|
| 175 |
+
},
|
| 176 |
+
"local": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
| 177 |
+
"memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
|
| 178 |
+
"oci": {
|
| 179 |
+
"class": "ocifs.OCIFileSystem",
|
| 180 |
+
"err": "Install ocifs to access OCI Object Storage",
|
| 181 |
+
},
|
| 182 |
+
"ocilake": {
|
| 183 |
+
"class": "ocifs.OCIFileSystem",
|
| 184 |
+
"err": "Install ocifs to access OCI Data Lake",
|
| 185 |
+
},
|
| 186 |
+
"oss": {
|
| 187 |
+
"class": "ossfs.OSSFileSystem",
|
| 188 |
+
"err": "Install ossfs to access Alibaba Object Storage System",
|
| 189 |
+
},
|
| 190 |
+
"pyscript": {
|
| 191 |
+
"class": "pyscript_fsspec_client.client.PyscriptFileSystem",
|
| 192 |
+
"err": "Install requests (cpython) or run in pyscript",
|
| 193 |
+
},
|
| 194 |
+
"reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
|
| 195 |
+
"root": {
|
| 196 |
+
"class": "fsspec_xrootd.XRootDFileSystem",
|
| 197 |
+
"err": (
|
| 198 |
+
"Install fsspec-xrootd to access xrootd storage system. "
|
| 199 |
+
"Note: 'root' is the protocol name for xrootd storage systems, "
|
| 200 |
+
"not referring to root directories"
|
| 201 |
+
),
|
| 202 |
+
},
|
| 203 |
+
"s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
| 204 |
+
"s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
| 205 |
+
"sftp": {
|
| 206 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
| 207 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
| 208 |
+
},
|
| 209 |
+
"simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
|
| 210 |
+
"smb": {
|
| 211 |
+
"class": "fsspec.implementations.smb.SMBFileSystem",
|
| 212 |
+
"err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
|
| 213 |
+
},
|
| 214 |
+
"ssh": {
|
| 215 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
| 216 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
| 217 |
+
},
|
| 218 |
+
"tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
|
| 219 |
+
"tos": {
|
| 220 |
+
"class": "tosfs.TosFileSystem",
|
| 221 |
+
"err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
|
| 222 |
+
},
|
| 223 |
+
"tosfs": {
|
| 224 |
+
"class": "tosfs.TosFileSystem",
|
| 225 |
+
"err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
|
| 226 |
+
},
|
| 227 |
+
"wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
|
| 228 |
+
"webdav": {
|
| 229 |
+
"class": "webdav4.fsspec.WebdavFileSystem",
|
| 230 |
+
"err": "Install webdav4 to access WebDAV",
|
| 231 |
+
},
|
| 232 |
+
"webhdfs": {
|
| 233 |
+
"class": "fsspec.implementations.webhdfs.WebHDFS",
|
| 234 |
+
"err": 'webHDFS access requires "requests" to be installed',
|
| 235 |
+
},
|
| 236 |
+
"zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
assert list(known_implementations) == sorted(known_implementations), (
|
| 240 |
+
"Not in alphabetical order"
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def get_filesystem_class(protocol):
|
| 245 |
+
"""Fetch named protocol implementation from the registry
|
| 246 |
+
|
| 247 |
+
The dict ``known_implementations`` maps protocol names to the locations
|
| 248 |
+
of classes implementing the corresponding file-system. When used for the
|
| 249 |
+
first time, appropriate imports will happen and the class will be placed in
|
| 250 |
+
the registry. All subsequent calls will fetch directly from the registry.
|
| 251 |
+
|
| 252 |
+
Some protocol implementations require additional dependencies, and so the
|
| 253 |
+
import may fail. In this case, the string in the "err" field of the
|
| 254 |
+
``known_implementations`` will be given as the error message.
|
| 255 |
+
"""
|
| 256 |
+
if not protocol:
|
| 257 |
+
protocol = default
|
| 258 |
+
|
| 259 |
+
if protocol not in registry:
|
| 260 |
+
if protocol not in known_implementations:
|
| 261 |
+
raise ValueError(f"Protocol not known: {protocol}")
|
| 262 |
+
bit = known_implementations[protocol]
|
| 263 |
+
try:
|
| 264 |
+
register_implementation(protocol, _import_class(bit["class"]))
|
| 265 |
+
except ImportError as e:
|
| 266 |
+
raise ImportError(bit.get("err")) from e
|
| 267 |
+
cls = registry[protocol]
|
| 268 |
+
if getattr(cls, "protocol", None) in ("abstract", None):
|
| 269 |
+
cls.protocol = protocol
|
| 270 |
+
|
| 271 |
+
return cls
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
s3_msg = """Your installed version of s3fs is very old and known to cause
|
| 275 |
+
severe performance issues, see also https://github.com/dask/dask/issues/10276
|
| 276 |
+
|
| 277 |
+
To fix, you should specify a lower version bound on s3fs, or
|
| 278 |
+
update the current installation.
|
| 279 |
+
"""
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def _import_class(fqp: str):
|
| 283 |
+
"""Take a fully-qualified path and return the imported class or identifier.
|
| 284 |
+
|
| 285 |
+
``fqp`` is of the form "package.module.klass" or
|
| 286 |
+
"package.module:subobject.klass".
|
| 287 |
+
|
| 288 |
+
Warnings
|
| 289 |
+
--------
|
| 290 |
+
This can import arbitrary modules. Make sure you haven't installed any modules
|
| 291 |
+
that may execute malicious code at import time.
|
| 292 |
+
"""
|
| 293 |
+
if ":" in fqp:
|
| 294 |
+
mod, name = fqp.rsplit(":", 1)
|
| 295 |
+
else:
|
| 296 |
+
mod, name = fqp.rsplit(".", 1)
|
| 297 |
+
|
| 298 |
+
is_s3 = mod == "s3fs"
|
| 299 |
+
mod = importlib.import_module(mod)
|
| 300 |
+
if is_s3 and mod.__version__.split(".") < ["0", "5"]:
|
| 301 |
+
warnings.warn(s3_msg)
|
| 302 |
+
for part in name.split("."):
|
| 303 |
+
mod = getattr(mod, part)
|
| 304 |
+
|
| 305 |
+
if not isinstance(mod, type):
|
| 306 |
+
raise TypeError(f"{fqp} is not a class")
|
| 307 |
+
|
| 308 |
+
return mod
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def filesystem(protocol, **storage_options):
|
| 312 |
+
"""Instantiate filesystems for given protocol and arguments
|
| 313 |
+
|
| 314 |
+
``storage_options`` are specific to the protocol being chosen, and are
|
| 315 |
+
passed directly to the class.
|
| 316 |
+
"""
|
| 317 |
+
if protocol == "arrow_hdfs":
|
| 318 |
+
warnings.warn(
|
| 319 |
+
"The 'arrow_hdfs' protocol has been deprecated and will be "
|
| 320 |
+
"removed in the future. Specify it as 'hdfs'.",
|
| 321 |
+
DeprecationWarning,
|
| 322 |
+
)
|
| 323 |
+
|
| 324 |
+
cls = get_filesystem_class(protocol)
|
| 325 |
+
return cls(**storage_options)
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def available_protocols():
|
| 329 |
+
"""Return a list of the implemented protocols.
|
| 330 |
+
|
| 331 |
+
Note that any given protocol may require extra packages to be importable.
|
| 332 |
+
"""
|
| 333 |
+
return list(known_implementations)
|
env/lib/python3.13/site-packages/fsspec/spec.py
ADDED
|
@@ -0,0 +1,2281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
import threading
|
| 8 |
+
import warnings
|
| 9 |
+
import weakref
|
| 10 |
+
from errno import ESPIPE
|
| 11 |
+
from glob import has_magic
|
| 12 |
+
from hashlib import sha256
|
| 13 |
+
from typing import Any, ClassVar
|
| 14 |
+
|
| 15 |
+
from .callbacks import DEFAULT_CALLBACK
|
| 16 |
+
from .config import apply_config, conf
|
| 17 |
+
from .dircache import DirCache
|
| 18 |
+
from .transaction import Transaction
|
| 19 |
+
from .utils import (
|
| 20 |
+
_unstrip_protocol,
|
| 21 |
+
glob_translate,
|
| 22 |
+
isfilelike,
|
| 23 |
+
other_paths,
|
| 24 |
+
read_block,
|
| 25 |
+
stringify_path,
|
| 26 |
+
tokenize,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger("fsspec")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def make_instance(cls, args, kwargs):
|
| 33 |
+
return cls(*args, **kwargs)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class _Cached(type):
|
| 37 |
+
"""
|
| 38 |
+
Metaclass for caching file system instances.
|
| 39 |
+
|
| 40 |
+
Notes
|
| 41 |
+
-----
|
| 42 |
+
Instances are cached according to
|
| 43 |
+
|
| 44 |
+
* The values of the class attributes listed in `_extra_tokenize_attributes`
|
| 45 |
+
* The arguments passed to ``__init__``.
|
| 46 |
+
|
| 47 |
+
This creates an additional reference to the filesystem, which prevents the
|
| 48 |
+
filesystem from being garbage collected when all *user* references go away.
|
| 49 |
+
A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
|
| 50 |
+
be made for a filesystem instance to be garbage collected.
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
def __init__(cls, *args, **kwargs):
|
| 54 |
+
super().__init__(*args, **kwargs)
|
| 55 |
+
# Note: we intentionally create a reference here, to avoid garbage
|
| 56 |
+
# collecting instances when all other references are gone. To really
|
| 57 |
+
# delete a FileSystem, the cache must be cleared.
|
| 58 |
+
if conf.get("weakref_instance_cache"): # pragma: no cover
|
| 59 |
+
# debug option for analysing fork/spawn conditions
|
| 60 |
+
cls._cache = weakref.WeakValueDictionary()
|
| 61 |
+
else:
|
| 62 |
+
cls._cache = {}
|
| 63 |
+
cls._pid = os.getpid()
|
| 64 |
+
|
| 65 |
+
def __call__(cls, *args, **kwargs):
|
| 66 |
+
kwargs = apply_config(cls, kwargs)
|
| 67 |
+
extra_tokens = tuple(
|
| 68 |
+
getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
|
| 69 |
+
)
|
| 70 |
+
strip_tokenize_options = {
|
| 71 |
+
k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs
|
| 72 |
+
}
|
| 73 |
+
token = tokenize(
|
| 74 |
+
cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
|
| 75 |
+
)
|
| 76 |
+
skip = kwargs.pop("skip_instance_cache", False)
|
| 77 |
+
if os.getpid() != cls._pid:
|
| 78 |
+
cls._cache.clear()
|
| 79 |
+
cls._pid = os.getpid()
|
| 80 |
+
if not skip and cls.cachable and token in cls._cache:
|
| 81 |
+
cls._latest = token
|
| 82 |
+
return cls._cache[token]
|
| 83 |
+
else:
|
| 84 |
+
obj = super().__call__(*args, **kwargs, **strip_tokenize_options)
|
| 85 |
+
# Setting _fs_token here causes some static linters to complain.
|
| 86 |
+
obj._fs_token_ = token
|
| 87 |
+
obj.storage_args = args
|
| 88 |
+
obj.storage_options = kwargs
|
| 89 |
+
if obj.async_impl and obj.mirror_sync_methods:
|
| 90 |
+
from .asyn import mirror_sync_methods
|
| 91 |
+
|
| 92 |
+
mirror_sync_methods(obj)
|
| 93 |
+
|
| 94 |
+
if cls.cachable and not skip:
|
| 95 |
+
cls._latest = token
|
| 96 |
+
cls._cache[token] = obj
|
| 97 |
+
return obj
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
class AbstractFileSystem(metaclass=_Cached):
|
| 101 |
+
"""
|
| 102 |
+
An abstract super-class for pythonic file-systems
|
| 103 |
+
|
| 104 |
+
Implementations are expected to be compatible with or, better, subclass
|
| 105 |
+
from here.
|
| 106 |
+
"""
|
| 107 |
+
|
| 108 |
+
cachable = True # this class can be cached, instances reused
|
| 109 |
+
_cached = False
|
| 110 |
+
blocksize = 2**22
|
| 111 |
+
sep = "/"
|
| 112 |
+
protocol: ClassVar[str | tuple[str, ...]] = "abstract"
|
| 113 |
+
_latest = None
|
| 114 |
+
async_impl = False
|
| 115 |
+
mirror_sync_methods = False
|
| 116 |
+
root_marker = "" # For some FSs, may require leading '/' or other character
|
| 117 |
+
transaction_type = Transaction
|
| 118 |
+
|
| 119 |
+
#: Extra *class attributes* that should be considered when hashing.
|
| 120 |
+
_extra_tokenize_attributes = ()
|
| 121 |
+
#: *storage options* that should not be considered when hashing.
|
| 122 |
+
_strip_tokenize_options = ()
|
| 123 |
+
|
| 124 |
+
# Set by _Cached metaclass
|
| 125 |
+
storage_args: tuple[Any, ...]
|
| 126 |
+
storage_options: dict[str, Any]
|
| 127 |
+
|
| 128 |
+
def __init__(self, *args, **storage_options):
|
| 129 |
+
"""Create and configure file-system instance
|
| 130 |
+
|
| 131 |
+
Instances may be cachable, so if similar enough arguments are seen
|
| 132 |
+
a new instance is not required. The token attribute exists to allow
|
| 133 |
+
implementations to cache instances if they wish.
|
| 134 |
+
|
| 135 |
+
A reasonable default should be provided if there are no arguments.
|
| 136 |
+
|
| 137 |
+
Subclasses should call this method.
|
| 138 |
+
|
| 139 |
+
Parameters
|
| 140 |
+
----------
|
| 141 |
+
use_listings_cache, listings_expiry_time, max_paths:
|
| 142 |
+
passed to ``DirCache``, if the implementation supports
|
| 143 |
+
directory listing caching. Pass use_listings_cache=False
|
| 144 |
+
to disable such caching.
|
| 145 |
+
skip_instance_cache: bool
|
| 146 |
+
If this is a cachable implementation, pass True here to force
|
| 147 |
+
creating a new instance even if a matching instance exists, and prevent
|
| 148 |
+
storing this instance.
|
| 149 |
+
asynchronous: bool
|
| 150 |
+
loop: asyncio-compatible IOLoop or None
|
| 151 |
+
"""
|
| 152 |
+
if self._cached:
|
| 153 |
+
# reusing instance, don't change
|
| 154 |
+
return
|
| 155 |
+
self._cached = True
|
| 156 |
+
self._intrans = False
|
| 157 |
+
self._transaction = None
|
| 158 |
+
self._invalidated_caches_in_transaction = []
|
| 159 |
+
self.dircache = DirCache(**storage_options)
|
| 160 |
+
|
| 161 |
+
if storage_options.pop("add_docs", None):
|
| 162 |
+
warnings.warn("add_docs is no longer supported.", FutureWarning)
|
| 163 |
+
|
| 164 |
+
if storage_options.pop("add_aliases", None):
|
| 165 |
+
warnings.warn("add_aliases has been removed.", FutureWarning)
|
| 166 |
+
# This is set in _Cached
|
| 167 |
+
self._fs_token_ = None
|
| 168 |
+
|
| 169 |
+
@property
|
| 170 |
+
def fsid(self):
|
| 171 |
+
"""Persistent filesystem id that can be used to compare filesystems
|
| 172 |
+
across sessions.
|
| 173 |
+
"""
|
| 174 |
+
raise NotImplementedError
|
| 175 |
+
|
| 176 |
+
@property
|
| 177 |
+
def _fs_token(self):
|
| 178 |
+
return self._fs_token_
|
| 179 |
+
|
| 180 |
+
def __dask_tokenize__(self):
|
| 181 |
+
return self._fs_token
|
| 182 |
+
|
| 183 |
+
def __hash__(self):
|
| 184 |
+
return int(self._fs_token, 16)
|
| 185 |
+
|
| 186 |
+
def __eq__(self, other):
|
| 187 |
+
return isinstance(other, type(self)) and self._fs_token == other._fs_token
|
| 188 |
+
|
| 189 |
+
def __reduce__(self):
|
| 190 |
+
return make_instance, (type(self), self.storage_args, self.storage_options)
|
| 191 |
+
|
| 192 |
+
@classmethod
|
| 193 |
+
def _strip_protocol(cls, path):
|
| 194 |
+
"""Turn path from fully-qualified to file-system-specific
|
| 195 |
+
|
| 196 |
+
May require FS-specific handling, e.g., for relative paths or links.
|
| 197 |
+
"""
|
| 198 |
+
if isinstance(path, list):
|
| 199 |
+
return [cls._strip_protocol(p) for p in path]
|
| 200 |
+
path = stringify_path(path)
|
| 201 |
+
protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
|
| 202 |
+
for protocol in protos:
|
| 203 |
+
if path.startswith(protocol + "://"):
|
| 204 |
+
path = path[len(protocol) + 3 :]
|
| 205 |
+
elif path.startswith(protocol + "::"):
|
| 206 |
+
path = path[len(protocol) + 2 :]
|
| 207 |
+
path = path.rstrip("/")
|
| 208 |
+
# use of root_marker to make minimum required path, e.g., "/"
|
| 209 |
+
return path or cls.root_marker
|
| 210 |
+
|
| 211 |
+
def unstrip_protocol(self, name: str) -> str:
|
| 212 |
+
"""Format FS-specific path to generic, including protocol"""
|
| 213 |
+
protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
|
| 214 |
+
for protocol in protos:
|
| 215 |
+
if name.startswith(f"{protocol}://"):
|
| 216 |
+
return name
|
| 217 |
+
return f"{protos[0]}://{name}"
|
| 218 |
+
|
| 219 |
+
@staticmethod
|
| 220 |
+
def _get_kwargs_from_urls(path):
|
| 221 |
+
"""If kwargs can be encoded in the paths, extract them here
|
| 222 |
+
|
| 223 |
+
This should happen before instantiation of the class; incoming paths
|
| 224 |
+
then should be amended to strip the options in methods.
|
| 225 |
+
|
| 226 |
+
Examples may look like an sftp path "sftp://user@host:/my/path", where
|
| 227 |
+
the user and host should become kwargs and later get stripped.
|
| 228 |
+
"""
|
| 229 |
+
# by default, nothing happens
|
| 230 |
+
return {}
|
| 231 |
+
|
| 232 |
+
@classmethod
|
| 233 |
+
def current(cls):
|
| 234 |
+
"""Return the most recently instantiated FileSystem
|
| 235 |
+
|
| 236 |
+
If no instance has been created, then create one with defaults
|
| 237 |
+
"""
|
| 238 |
+
if cls._latest in cls._cache:
|
| 239 |
+
return cls._cache[cls._latest]
|
| 240 |
+
return cls()
|
| 241 |
+
|
| 242 |
+
@property
|
| 243 |
+
def transaction(self):
|
| 244 |
+
"""A context within which files are committed together upon exit
|
| 245 |
+
|
| 246 |
+
Requires the file class to implement `.commit()` and `.discard()`
|
| 247 |
+
for the normal and exception cases.
|
| 248 |
+
"""
|
| 249 |
+
if self._transaction is None:
|
| 250 |
+
self._transaction = self.transaction_type(self)
|
| 251 |
+
return self._transaction
|
| 252 |
+
|
| 253 |
+
def start_transaction(self):
|
| 254 |
+
"""Begin write transaction for deferring files, non-context version"""
|
| 255 |
+
self._intrans = True
|
| 256 |
+
self._transaction = self.transaction_type(self)
|
| 257 |
+
return self.transaction
|
| 258 |
+
|
| 259 |
+
def end_transaction(self):
|
| 260 |
+
"""Finish write transaction, non-context version"""
|
| 261 |
+
self.transaction.complete()
|
| 262 |
+
self._transaction = None
|
| 263 |
+
# The invalid cache must be cleared after the transaction is completed.
|
| 264 |
+
for path in self._invalidated_caches_in_transaction:
|
| 265 |
+
self.invalidate_cache(path)
|
| 266 |
+
self._invalidated_caches_in_transaction.clear()
|
| 267 |
+
|
| 268 |
+
def invalidate_cache(self, path=None):
|
| 269 |
+
"""
|
| 270 |
+
Discard any cached directory information
|
| 271 |
+
|
| 272 |
+
Parameters
|
| 273 |
+
----------
|
| 274 |
+
path: string or None
|
| 275 |
+
If None, clear all listings cached else listings at or under given
|
| 276 |
+
path.
|
| 277 |
+
"""
|
| 278 |
+
# Not necessary to implement invalidation mechanism, may have no cache.
|
| 279 |
+
# But if have, you should call this method of parent class from your
|
| 280 |
+
# subclass to ensure expiring caches after transacations correctly.
|
| 281 |
+
# See the implementation of FTPFileSystem in ftp.py
|
| 282 |
+
if self._intrans:
|
| 283 |
+
self._invalidated_caches_in_transaction.append(path)
|
| 284 |
+
|
| 285 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
| 286 |
+
"""
|
| 287 |
+
Create directory entry at path
|
| 288 |
+
|
| 289 |
+
For systems that don't have true directories, may create an for
|
| 290 |
+
this instance only and not touch the real filesystem
|
| 291 |
+
|
| 292 |
+
Parameters
|
| 293 |
+
----------
|
| 294 |
+
path: str
|
| 295 |
+
location
|
| 296 |
+
create_parents: bool
|
| 297 |
+
if True, this is equivalent to ``makedirs``
|
| 298 |
+
kwargs:
|
| 299 |
+
may be permissions, etc.
|
| 300 |
+
"""
|
| 301 |
+
pass # not necessary to implement, may not have directories
|
| 302 |
+
|
| 303 |
+
def makedirs(self, path, exist_ok=False):
|
| 304 |
+
"""Recursively make directories
|
| 305 |
+
|
| 306 |
+
Creates directory at path and any intervening required directories.
|
| 307 |
+
Raises exception if, for instance, the path already exists but is a
|
| 308 |
+
file.
|
| 309 |
+
|
| 310 |
+
Parameters
|
| 311 |
+
----------
|
| 312 |
+
path: str
|
| 313 |
+
leaf directory name
|
| 314 |
+
exist_ok: bool (False)
|
| 315 |
+
If False, will error if the target already exists
|
| 316 |
+
"""
|
| 317 |
+
pass # not necessary to implement, may not have directories
|
| 318 |
+
|
| 319 |
+
def rmdir(self, path):
|
| 320 |
+
"""Remove a directory, if empty"""
|
| 321 |
+
pass # not necessary to implement, may not have directories
|
| 322 |
+
|
| 323 |
+
def ls(self, path, detail=True, **kwargs):
|
| 324 |
+
"""List objects at path.
|
| 325 |
+
|
| 326 |
+
This should include subdirectories and files at that location. The
|
| 327 |
+
difference between a file and a directory must be clear when details
|
| 328 |
+
are requested.
|
| 329 |
+
|
| 330 |
+
The specific keys, or perhaps a FileInfo class, or similar, is TBD,
|
| 331 |
+
but must be consistent across implementations.
|
| 332 |
+
Must include:
|
| 333 |
+
|
| 334 |
+
- full path to the entry (without protocol)
|
| 335 |
+
- size of the entry, in bytes. If the value cannot be determined, will
|
| 336 |
+
be ``None``.
|
| 337 |
+
- type of entry, "file", "directory" or other
|
| 338 |
+
|
| 339 |
+
Additional information
|
| 340 |
+
may be present, appropriate to the file-system, e.g., generation,
|
| 341 |
+
checksum, etc.
|
| 342 |
+
|
| 343 |
+
May use refresh=True|False to allow use of self._ls_from_cache to
|
| 344 |
+
check for a saved listing and avoid calling the backend. This would be
|
| 345 |
+
common where listing may be expensive.
|
| 346 |
+
|
| 347 |
+
Parameters
|
| 348 |
+
----------
|
| 349 |
+
path: str
|
| 350 |
+
detail: bool
|
| 351 |
+
if True, gives a list of dictionaries, where each is the same as
|
| 352 |
+
the result of ``info(path)``. If False, gives a list of paths
|
| 353 |
+
(str).
|
| 354 |
+
kwargs: may have additional backend-specific options, such as version
|
| 355 |
+
information
|
| 356 |
+
|
| 357 |
+
Returns
|
| 358 |
+
-------
|
| 359 |
+
List of strings if detail is False, or list of directory information
|
| 360 |
+
dicts if detail is True.
|
| 361 |
+
"""
|
| 362 |
+
raise NotImplementedError
|
| 363 |
+
|
| 364 |
+
def _ls_from_cache(self, path):
|
| 365 |
+
"""Check cache for listing
|
| 366 |
+
|
| 367 |
+
Returns listing, if found (may be empty list for a directly that exists
|
| 368 |
+
but contains nothing), None if not in cache.
|
| 369 |
+
"""
|
| 370 |
+
parent = self._parent(path)
|
| 371 |
+
try:
|
| 372 |
+
return self.dircache[path.rstrip("/")]
|
| 373 |
+
except KeyError:
|
| 374 |
+
pass
|
| 375 |
+
try:
|
| 376 |
+
files = [
|
| 377 |
+
f
|
| 378 |
+
for f in self.dircache[parent]
|
| 379 |
+
if f["name"] == path
|
| 380 |
+
or (f["name"] == path.rstrip("/") and f["type"] == "directory")
|
| 381 |
+
]
|
| 382 |
+
if len(files) == 0:
|
| 383 |
+
# parent dir was listed but did not contain this file
|
| 384 |
+
raise FileNotFoundError(path)
|
| 385 |
+
return files
|
| 386 |
+
except KeyError:
|
| 387 |
+
pass
|
| 388 |
+
|
| 389 |
+
def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
|
| 390 |
+
"""Return all files under the given path.
|
| 391 |
+
|
| 392 |
+
List all files, recursing into subdirectories; output is iterator-style,
|
| 393 |
+
like ``os.walk()``. For a simple list of files, ``find()`` is available.
|
| 394 |
+
|
| 395 |
+
When topdown is True, the caller can modify the dirnames list in-place (perhaps
|
| 396 |
+
using del or slice assignment), and walk() will
|
| 397 |
+
only recurse into the subdirectories whose names remain in dirnames;
|
| 398 |
+
this can be used to prune the search, impose a specific order of visiting,
|
| 399 |
+
or even to inform walk() about directories the caller creates or renames before
|
| 400 |
+
it resumes walk() again.
|
| 401 |
+
Modifying dirnames when topdown is False has no effect. (see os.walk)
|
| 402 |
+
|
| 403 |
+
Note that the "files" outputted will include anything that is not
|
| 404 |
+
a directory, such as links.
|
| 405 |
+
|
| 406 |
+
Parameters
|
| 407 |
+
----------
|
| 408 |
+
path: str
|
| 409 |
+
Root to recurse into
|
| 410 |
+
maxdepth: int
|
| 411 |
+
Maximum recursion depth. None means limitless, but not recommended
|
| 412 |
+
on link-based file-systems.
|
| 413 |
+
topdown: bool (True)
|
| 414 |
+
Whether to walk the directory tree from the top downwards or from
|
| 415 |
+
the bottom upwards.
|
| 416 |
+
on_error: "omit", "raise", a callable
|
| 417 |
+
if omit (default), path with exception will simply be empty;
|
| 418 |
+
If raise, an underlying exception will be raised;
|
| 419 |
+
if callable, it will be called with a single OSError instance as argument
|
| 420 |
+
kwargs: passed to ``ls``
|
| 421 |
+
"""
|
| 422 |
+
if maxdepth is not None and maxdepth < 1:
|
| 423 |
+
raise ValueError("maxdepth must be at least 1")
|
| 424 |
+
|
| 425 |
+
path = self._strip_protocol(path)
|
| 426 |
+
full_dirs = {}
|
| 427 |
+
dirs = {}
|
| 428 |
+
files = {}
|
| 429 |
+
|
| 430 |
+
detail = kwargs.pop("detail", False)
|
| 431 |
+
try:
|
| 432 |
+
listing = self.ls(path, detail=True, **kwargs)
|
| 433 |
+
except (FileNotFoundError, OSError) as e:
|
| 434 |
+
if on_error == "raise":
|
| 435 |
+
raise
|
| 436 |
+
if callable(on_error):
|
| 437 |
+
on_error(e)
|
| 438 |
+
return
|
| 439 |
+
|
| 440 |
+
for info in listing:
|
| 441 |
+
# each info name must be at least [path]/part , but here
|
| 442 |
+
# we check also for names like [path]/part/
|
| 443 |
+
pathname = info["name"].rstrip("/")
|
| 444 |
+
name = pathname.rsplit("/", 1)[-1]
|
| 445 |
+
if info["type"] == "directory" and pathname != path:
|
| 446 |
+
# do not include "self" path
|
| 447 |
+
full_dirs[name] = pathname
|
| 448 |
+
dirs[name] = info
|
| 449 |
+
elif pathname == path:
|
| 450 |
+
# file-like with same name as give path
|
| 451 |
+
files[""] = info
|
| 452 |
+
else:
|
| 453 |
+
files[name] = info
|
| 454 |
+
|
| 455 |
+
if not detail:
|
| 456 |
+
dirs = list(dirs)
|
| 457 |
+
files = list(files)
|
| 458 |
+
|
| 459 |
+
if topdown:
|
| 460 |
+
# Yield before recursion if walking top down
|
| 461 |
+
yield path, dirs, files
|
| 462 |
+
|
| 463 |
+
if maxdepth is not None:
|
| 464 |
+
maxdepth -= 1
|
| 465 |
+
if maxdepth < 1:
|
| 466 |
+
if not topdown:
|
| 467 |
+
yield path, dirs, files
|
| 468 |
+
return
|
| 469 |
+
|
| 470 |
+
for d in dirs:
|
| 471 |
+
yield from self.walk(
|
| 472 |
+
full_dirs[d],
|
| 473 |
+
maxdepth=maxdepth,
|
| 474 |
+
detail=detail,
|
| 475 |
+
topdown=topdown,
|
| 476 |
+
**kwargs,
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
if not topdown:
|
| 480 |
+
# Yield after recursion if walking bottom up
|
| 481 |
+
yield path, dirs, files
|
| 482 |
+
|
| 483 |
+
def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
| 484 |
+
"""List all files below path.
|
| 485 |
+
|
| 486 |
+
Like posix ``find`` command without conditions
|
| 487 |
+
|
| 488 |
+
Parameters
|
| 489 |
+
----------
|
| 490 |
+
path : str
|
| 491 |
+
maxdepth: int or None
|
| 492 |
+
If not None, the maximum number of levels to descend
|
| 493 |
+
withdirs: bool
|
| 494 |
+
Whether to include directory paths in the output. This is True
|
| 495 |
+
when used by glob, but users usually only want files.
|
| 496 |
+
kwargs are passed to ``ls``.
|
| 497 |
+
"""
|
| 498 |
+
# TODO: allow equivalent of -name parameter
|
| 499 |
+
path = self._strip_protocol(path)
|
| 500 |
+
out = {}
|
| 501 |
+
|
| 502 |
+
# Add the root directory if withdirs is requested
|
| 503 |
+
# This is needed for posix glob compliance
|
| 504 |
+
if withdirs and path != "" and self.isdir(path):
|
| 505 |
+
out[path] = self.info(path)
|
| 506 |
+
|
| 507 |
+
for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
|
| 508 |
+
if withdirs:
|
| 509 |
+
files.update(dirs)
|
| 510 |
+
out.update({info["name"]: info for name, info in files.items()})
|
| 511 |
+
if not out and self.isfile(path):
|
| 512 |
+
# walk works on directories, but find should also return [path]
|
| 513 |
+
# when path happens to be a file
|
| 514 |
+
out[path] = {}
|
| 515 |
+
names = sorted(out)
|
| 516 |
+
if not detail:
|
| 517 |
+
return names
|
| 518 |
+
else:
|
| 519 |
+
return {name: out[name] for name in names}
|
| 520 |
+
|
| 521 |
+
def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
|
| 522 |
+
"""Space used by files and optionally directories within a path
|
| 523 |
+
|
| 524 |
+
Directory size does not include the size of its contents.
|
| 525 |
+
|
| 526 |
+
Parameters
|
| 527 |
+
----------
|
| 528 |
+
path: str
|
| 529 |
+
total: bool
|
| 530 |
+
Whether to sum all the file sizes
|
| 531 |
+
maxdepth: int or None
|
| 532 |
+
Maximum number of directory levels to descend, None for unlimited.
|
| 533 |
+
withdirs: bool
|
| 534 |
+
Whether to include directory paths in the output.
|
| 535 |
+
kwargs: passed to ``find``
|
| 536 |
+
|
| 537 |
+
Returns
|
| 538 |
+
-------
|
| 539 |
+
Dict of {path: size} if total=False, or int otherwise, where numbers
|
| 540 |
+
refer to bytes used.
|
| 541 |
+
"""
|
| 542 |
+
sizes = {}
|
| 543 |
+
if withdirs and self.isdir(path):
|
| 544 |
+
# Include top-level directory in output
|
| 545 |
+
info = self.info(path)
|
| 546 |
+
sizes[info["name"]] = info["size"]
|
| 547 |
+
for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
|
| 548 |
+
info = self.info(f)
|
| 549 |
+
sizes[info["name"]] = info["size"]
|
| 550 |
+
if total:
|
| 551 |
+
return sum(sizes.values())
|
| 552 |
+
else:
|
| 553 |
+
return sizes
|
| 554 |
+
|
| 555 |
+
def glob(self, path, maxdepth=None, **kwargs):
|
| 556 |
+
"""Find files by glob-matching.
|
| 557 |
+
|
| 558 |
+
Pattern matching capabilities for finding files that match the given pattern.
|
| 559 |
+
|
| 560 |
+
Parameters
|
| 561 |
+
----------
|
| 562 |
+
path: str
|
| 563 |
+
The glob pattern to match against
|
| 564 |
+
maxdepth: int or None
|
| 565 |
+
Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found.
|
| 566 |
+
Must be at least 1 if provided.
|
| 567 |
+
kwargs:
|
| 568 |
+
Additional arguments passed to ``find`` (e.g., detail=True)
|
| 569 |
+
|
| 570 |
+
Returns
|
| 571 |
+
-------
|
| 572 |
+
List of matched paths, or dict of paths and their info if detail=True
|
| 573 |
+
|
| 574 |
+
Notes
|
| 575 |
+
-----
|
| 576 |
+
Supported patterns:
|
| 577 |
+
- '*': Matches any sequence of characters within a single directory level
|
| 578 |
+
- ``'**'``: Matches any number of directory levels (must be an entire path component)
|
| 579 |
+
- '?': Matches exactly one character
|
| 580 |
+
- '[abc]': Matches any character in the set
|
| 581 |
+
- '[a-z]': Matches any character in the range
|
| 582 |
+
- '[!abc]': Matches any character NOT in the set
|
| 583 |
+
|
| 584 |
+
Special behaviors:
|
| 585 |
+
- If the path ends with '/', only folders are returned
|
| 586 |
+
- Consecutive '*' characters are compressed into a single '*'
|
| 587 |
+
- Empty brackets '[]' never match anything
|
| 588 |
+
- Negated empty brackets '[!]' match any single character
|
| 589 |
+
- Special characters in character classes are escaped properly
|
| 590 |
+
|
| 591 |
+
Limitations:
|
| 592 |
+
- ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``)
|
| 593 |
+
- No brace expansion ('{a,b}.txt')
|
| 594 |
+
- No extended glob patterns ('+(pattern)', '!(pattern)')
|
| 595 |
+
"""
|
| 596 |
+
if maxdepth is not None and maxdepth < 1:
|
| 597 |
+
raise ValueError("maxdepth must be at least 1")
|
| 598 |
+
|
| 599 |
+
import re
|
| 600 |
+
|
| 601 |
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
| 602 |
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
| 603 |
+
path = self._strip_protocol(path)
|
| 604 |
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
| 605 |
+
tuple(sep + "**" for sep in seps)
|
| 606 |
+
)
|
| 607 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
| 608 |
+
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
| 609 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
| 610 |
+
|
| 611 |
+
min_idx = min(idx_star, idx_qmark, idx_brace)
|
| 612 |
+
|
| 613 |
+
detail = kwargs.pop("detail", False)
|
| 614 |
+
|
| 615 |
+
if not has_magic(path):
|
| 616 |
+
if self.exists(path, **kwargs):
|
| 617 |
+
if not detail:
|
| 618 |
+
return [path]
|
| 619 |
+
else:
|
| 620 |
+
return {path: self.info(path, **kwargs)}
|
| 621 |
+
else:
|
| 622 |
+
if not detail:
|
| 623 |
+
return [] # glob of non-existent returns empty
|
| 624 |
+
else:
|
| 625 |
+
return {}
|
| 626 |
+
elif "/" in path[:min_idx]:
|
| 627 |
+
min_idx = path[:min_idx].rindex("/")
|
| 628 |
+
root = path[: min_idx + 1]
|
| 629 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 630 |
+
else:
|
| 631 |
+
root = ""
|
| 632 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 633 |
+
|
| 634 |
+
if "**" in path:
|
| 635 |
+
if maxdepth is not None:
|
| 636 |
+
idx_double_stars = path.find("**")
|
| 637 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
| 638 |
+
depth = depth - depth_double_stars + maxdepth
|
| 639 |
+
else:
|
| 640 |
+
depth = None
|
| 641 |
+
|
| 642 |
+
allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
|
| 643 |
+
|
| 644 |
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
| 645 |
+
pattern = re.compile(pattern)
|
| 646 |
+
|
| 647 |
+
out = {
|
| 648 |
+
p: info
|
| 649 |
+
for p, info in sorted(allpaths.items())
|
| 650 |
+
if pattern.match(
|
| 651 |
+
p + "/"
|
| 652 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
| 653 |
+
else p
|
| 654 |
+
)
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
if detail:
|
| 658 |
+
return out
|
| 659 |
+
else:
|
| 660 |
+
return list(out)
|
| 661 |
+
|
| 662 |
+
def exists(self, path, **kwargs):
|
| 663 |
+
"""Is there a file at the given path"""
|
| 664 |
+
try:
|
| 665 |
+
self.info(path, **kwargs)
|
| 666 |
+
return True
|
| 667 |
+
except: # noqa: E722
|
| 668 |
+
# any exception allowed bar FileNotFoundError?
|
| 669 |
+
return False
|
| 670 |
+
|
| 671 |
+
def lexists(self, path, **kwargs):
|
| 672 |
+
"""If there is a file at the given path (including
|
| 673 |
+
broken links)"""
|
| 674 |
+
return self.exists(path)
|
| 675 |
+
|
| 676 |
+
def info(self, path, **kwargs):
|
| 677 |
+
"""Give details of entry at path
|
| 678 |
+
|
| 679 |
+
Returns a single dictionary, with exactly the same information as ``ls``
|
| 680 |
+
would with ``detail=True``.
|
| 681 |
+
|
| 682 |
+
The default implementation calls ls and could be overridden by a
|
| 683 |
+
shortcut. kwargs are passed on to ```ls()``.
|
| 684 |
+
|
| 685 |
+
Some file systems might not be able to measure the file's size, in
|
| 686 |
+
which case, the returned dict will include ``'size': None``.
|
| 687 |
+
|
| 688 |
+
Returns
|
| 689 |
+
-------
|
| 690 |
+
dict with keys: name (full path in the FS), size (in bytes), type (file,
|
| 691 |
+
directory, or something else) and other FS-specific keys.
|
| 692 |
+
"""
|
| 693 |
+
path = self._strip_protocol(path)
|
| 694 |
+
out = self.ls(self._parent(path), detail=True, **kwargs)
|
| 695 |
+
out = [o for o in out if o["name"].rstrip("/") == path]
|
| 696 |
+
if out:
|
| 697 |
+
return out[0]
|
| 698 |
+
out = self.ls(path, detail=True, **kwargs)
|
| 699 |
+
path = path.rstrip("/")
|
| 700 |
+
out1 = [o for o in out if o["name"].rstrip("/") == path]
|
| 701 |
+
if len(out1) == 1:
|
| 702 |
+
if "size" not in out1[0]:
|
| 703 |
+
out1[0]["size"] = None
|
| 704 |
+
return out1[0]
|
| 705 |
+
elif len(out1) > 1 or out:
|
| 706 |
+
return {"name": path, "size": 0, "type": "directory"}
|
| 707 |
+
else:
|
| 708 |
+
raise FileNotFoundError(path)
|
| 709 |
+
|
| 710 |
+
def checksum(self, path):
|
| 711 |
+
"""Unique value for current version of file
|
| 712 |
+
|
| 713 |
+
If the checksum is the same from one moment to another, the contents
|
| 714 |
+
are guaranteed to be the same. If the checksum changes, the contents
|
| 715 |
+
*might* have changed.
|
| 716 |
+
|
| 717 |
+
This should normally be overridden; default will probably capture
|
| 718 |
+
creation/modification timestamp (which would be good) or maybe
|
| 719 |
+
access timestamp (which would be bad)
|
| 720 |
+
"""
|
| 721 |
+
return int(tokenize(self.info(path)), 16)
|
| 722 |
+
|
| 723 |
+
def size(self, path):
|
| 724 |
+
"""Size in bytes of file"""
|
| 725 |
+
return self.info(path).get("size", None)
|
| 726 |
+
|
| 727 |
+
def sizes(self, paths):
|
| 728 |
+
"""Size in bytes of each file in a list of paths"""
|
| 729 |
+
return [self.size(p) for p in paths]
|
| 730 |
+
|
| 731 |
+
def isdir(self, path):
|
| 732 |
+
"""Is this entry directory-like?"""
|
| 733 |
+
try:
|
| 734 |
+
return self.info(path)["type"] == "directory"
|
| 735 |
+
except OSError:
|
| 736 |
+
return False
|
| 737 |
+
|
| 738 |
+
def isfile(self, path):
|
| 739 |
+
"""Is this entry file-like?"""
|
| 740 |
+
try:
|
| 741 |
+
return self.info(path)["type"] == "file"
|
| 742 |
+
except: # noqa: E722
|
| 743 |
+
return False
|
| 744 |
+
|
| 745 |
+
def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
|
| 746 |
+
"""Get the contents of the file as a string.
|
| 747 |
+
|
| 748 |
+
Parameters
|
| 749 |
+
----------
|
| 750 |
+
path: str
|
| 751 |
+
URL of file on this filesystems
|
| 752 |
+
encoding, errors, newline: same as `open`.
|
| 753 |
+
"""
|
| 754 |
+
with self.open(
|
| 755 |
+
path,
|
| 756 |
+
mode="r",
|
| 757 |
+
encoding=encoding,
|
| 758 |
+
errors=errors,
|
| 759 |
+
newline=newline,
|
| 760 |
+
**kwargs,
|
| 761 |
+
) as f:
|
| 762 |
+
return f.read()
|
| 763 |
+
|
| 764 |
+
def write_text(
|
| 765 |
+
self, path, value, encoding=None, errors=None, newline=None, **kwargs
|
| 766 |
+
):
|
| 767 |
+
"""Write the text to the given file.
|
| 768 |
+
|
| 769 |
+
An existing file will be overwritten.
|
| 770 |
+
|
| 771 |
+
Parameters
|
| 772 |
+
----------
|
| 773 |
+
path: str
|
| 774 |
+
URL of file on this filesystems
|
| 775 |
+
value: str
|
| 776 |
+
Text to write.
|
| 777 |
+
encoding, errors, newline: same as `open`.
|
| 778 |
+
"""
|
| 779 |
+
with self.open(
|
| 780 |
+
path,
|
| 781 |
+
mode="w",
|
| 782 |
+
encoding=encoding,
|
| 783 |
+
errors=errors,
|
| 784 |
+
newline=newline,
|
| 785 |
+
**kwargs,
|
| 786 |
+
) as f:
|
| 787 |
+
return f.write(value)
|
| 788 |
+
|
| 789 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
| 790 |
+
"""Get the content of a file
|
| 791 |
+
|
| 792 |
+
Parameters
|
| 793 |
+
----------
|
| 794 |
+
path: URL of file on this filesystems
|
| 795 |
+
start, end: int
|
| 796 |
+
Bytes limits of the read. If negative, backwards from end,
|
| 797 |
+
like usual python slices. Either can be None for start or
|
| 798 |
+
end of file, respectively
|
| 799 |
+
kwargs: passed to ``open()``.
|
| 800 |
+
"""
|
| 801 |
+
# explicitly set buffering off?
|
| 802 |
+
with self.open(path, "rb", **kwargs) as f:
|
| 803 |
+
if start is not None:
|
| 804 |
+
if start >= 0:
|
| 805 |
+
f.seek(start)
|
| 806 |
+
else:
|
| 807 |
+
f.seek(max(0, f.size + start))
|
| 808 |
+
if end is not None:
|
| 809 |
+
if end < 0:
|
| 810 |
+
end = f.size + end
|
| 811 |
+
return f.read(end - f.tell())
|
| 812 |
+
return f.read()
|
| 813 |
+
|
| 814 |
+
def pipe_file(self, path, value, mode="overwrite", **kwargs):
|
| 815 |
+
"""Set the bytes of given file"""
|
| 816 |
+
if mode == "create" and self.exists(path):
|
| 817 |
+
# non-atomic but simple way; or could use "xb" in open(), which is likely
|
| 818 |
+
# not as well supported
|
| 819 |
+
raise FileExistsError
|
| 820 |
+
with self.open(path, "wb", **kwargs) as f:
|
| 821 |
+
f.write(value)
|
| 822 |
+
|
| 823 |
+
def pipe(self, path, value=None, **kwargs):
|
| 824 |
+
"""Put value into path
|
| 825 |
+
|
| 826 |
+
(counterpart to ``cat``)
|
| 827 |
+
|
| 828 |
+
Parameters
|
| 829 |
+
----------
|
| 830 |
+
path: string or dict(str, bytes)
|
| 831 |
+
If a string, a single remote location to put ``value`` bytes; if a dict,
|
| 832 |
+
a mapping of {path: bytesvalue}.
|
| 833 |
+
value: bytes, optional
|
| 834 |
+
If using a single path, these are the bytes to put there. Ignored if
|
| 835 |
+
``path`` is a dict
|
| 836 |
+
"""
|
| 837 |
+
if isinstance(path, str):
|
| 838 |
+
self.pipe_file(self._strip_protocol(path), value, **kwargs)
|
| 839 |
+
elif isinstance(path, dict):
|
| 840 |
+
for k, v in path.items():
|
| 841 |
+
self.pipe_file(self._strip_protocol(k), v, **kwargs)
|
| 842 |
+
else:
|
| 843 |
+
raise ValueError("path must be str or dict")
|
| 844 |
+
|
| 845 |
+
def cat_ranges(
|
| 846 |
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
| 847 |
+
):
|
| 848 |
+
"""Get the contents of byte ranges from one or more files
|
| 849 |
+
|
| 850 |
+
Parameters
|
| 851 |
+
----------
|
| 852 |
+
paths: list
|
| 853 |
+
A list of of filepaths on this filesystems
|
| 854 |
+
starts, ends: int or list
|
| 855 |
+
Bytes limits of the read. If using a single int, the same value will be
|
| 856 |
+
used to read all the specified files.
|
| 857 |
+
"""
|
| 858 |
+
if max_gap is not None:
|
| 859 |
+
raise NotImplementedError
|
| 860 |
+
if not isinstance(paths, list):
|
| 861 |
+
raise TypeError
|
| 862 |
+
if not isinstance(starts, list):
|
| 863 |
+
starts = [starts] * len(paths)
|
| 864 |
+
if not isinstance(ends, list):
|
| 865 |
+
ends = [ends] * len(paths)
|
| 866 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 867 |
+
raise ValueError
|
| 868 |
+
out = []
|
| 869 |
+
for p, s, e in zip(paths, starts, ends):
|
| 870 |
+
try:
|
| 871 |
+
out.append(self.cat_file(p, s, e))
|
| 872 |
+
except Exception as e:
|
| 873 |
+
if on_error == "return":
|
| 874 |
+
out.append(e)
|
| 875 |
+
else:
|
| 876 |
+
raise
|
| 877 |
+
return out
|
| 878 |
+
|
| 879 |
+
def cat(self, path, recursive=False, on_error="raise", **kwargs):
|
| 880 |
+
"""Fetch (potentially multiple) paths' contents
|
| 881 |
+
|
| 882 |
+
Parameters
|
| 883 |
+
----------
|
| 884 |
+
recursive: bool
|
| 885 |
+
If True, assume the path(s) are directories, and get all the
|
| 886 |
+
contained files
|
| 887 |
+
on_error : "raise", "omit", "return"
|
| 888 |
+
If raise, an underlying exception will be raised (converted to KeyError
|
| 889 |
+
if the type is in self.missing_exceptions); if omit, keys with exception
|
| 890 |
+
will simply not be included in the output; if "return", all keys are
|
| 891 |
+
included in the output, but the value will be bytes or an exception
|
| 892 |
+
instance.
|
| 893 |
+
kwargs: passed to cat_file
|
| 894 |
+
|
| 895 |
+
Returns
|
| 896 |
+
-------
|
| 897 |
+
dict of {path: contents} if there are multiple paths
|
| 898 |
+
or the path has been otherwise expanded
|
| 899 |
+
"""
|
| 900 |
+
paths = self.expand_path(path, recursive=recursive, **kwargs)
|
| 901 |
+
if (
|
| 902 |
+
len(paths) > 1
|
| 903 |
+
or isinstance(path, list)
|
| 904 |
+
or paths[0] != self._strip_protocol(path)
|
| 905 |
+
):
|
| 906 |
+
out = {}
|
| 907 |
+
for path in paths:
|
| 908 |
+
try:
|
| 909 |
+
out[path] = self.cat_file(path, **kwargs)
|
| 910 |
+
except Exception as e:
|
| 911 |
+
if on_error == "raise":
|
| 912 |
+
raise
|
| 913 |
+
if on_error == "return":
|
| 914 |
+
out[path] = e
|
| 915 |
+
return out
|
| 916 |
+
else:
|
| 917 |
+
return self.cat_file(paths[0], **kwargs)
|
| 918 |
+
|
| 919 |
+
def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
|
| 920 |
+
"""Copy single remote file to local"""
|
| 921 |
+
from .implementations.local import LocalFileSystem
|
| 922 |
+
|
| 923 |
+
if isfilelike(lpath):
|
| 924 |
+
outfile = lpath
|
| 925 |
+
elif self.isdir(rpath):
|
| 926 |
+
os.makedirs(lpath, exist_ok=True)
|
| 927 |
+
return None
|
| 928 |
+
|
| 929 |
+
fs = LocalFileSystem(auto_mkdir=True)
|
| 930 |
+
fs.makedirs(fs._parent(lpath), exist_ok=True)
|
| 931 |
+
|
| 932 |
+
with self.open(rpath, "rb", **kwargs) as f1:
|
| 933 |
+
if outfile is None:
|
| 934 |
+
outfile = open(lpath, "wb")
|
| 935 |
+
|
| 936 |
+
try:
|
| 937 |
+
callback.set_size(getattr(f1, "size", None))
|
| 938 |
+
data = True
|
| 939 |
+
while data:
|
| 940 |
+
data = f1.read(self.blocksize)
|
| 941 |
+
segment_len = outfile.write(data)
|
| 942 |
+
if segment_len is None:
|
| 943 |
+
segment_len = len(data)
|
| 944 |
+
callback.relative_update(segment_len)
|
| 945 |
+
finally:
|
| 946 |
+
if not isfilelike(lpath):
|
| 947 |
+
outfile.close()
|
| 948 |
+
|
| 949 |
+
def get(
|
| 950 |
+
self,
|
| 951 |
+
rpath,
|
| 952 |
+
lpath,
|
| 953 |
+
recursive=False,
|
| 954 |
+
callback=DEFAULT_CALLBACK,
|
| 955 |
+
maxdepth=None,
|
| 956 |
+
**kwargs,
|
| 957 |
+
):
|
| 958 |
+
"""Copy file(s) to local.
|
| 959 |
+
|
| 960 |
+
Copies a specific file or tree of files (if recursive=True). If lpath
|
| 961 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 962 |
+
will go within. Can submit a list of paths, which may be glob-patterns
|
| 963 |
+
and will be expanded.
|
| 964 |
+
|
| 965 |
+
Calls get_file for each source.
|
| 966 |
+
"""
|
| 967 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 968 |
+
# No need to expand paths when both source and destination
|
| 969 |
+
# are provided as lists
|
| 970 |
+
rpaths = rpath
|
| 971 |
+
lpaths = lpath
|
| 972 |
+
else:
|
| 973 |
+
from .implementations.local import (
|
| 974 |
+
LocalFileSystem,
|
| 975 |
+
make_path_posix,
|
| 976 |
+
trailing_sep,
|
| 977 |
+
)
|
| 978 |
+
|
| 979 |
+
source_is_str = isinstance(rpath, str)
|
| 980 |
+
rpaths = self.expand_path(
|
| 981 |
+
rpath, recursive=recursive, maxdepth=maxdepth, **kwargs
|
| 982 |
+
)
|
| 983 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 984 |
+
# Non-recursive glob does not copy directories
|
| 985 |
+
rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
|
| 986 |
+
if not rpaths:
|
| 987 |
+
return
|
| 988 |
+
|
| 989 |
+
if isinstance(lpath, str):
|
| 990 |
+
lpath = make_path_posix(lpath)
|
| 991 |
+
|
| 992 |
+
source_is_file = len(rpaths) == 1
|
| 993 |
+
dest_is_dir = isinstance(lpath, str) and (
|
| 994 |
+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
|
| 995 |
+
)
|
| 996 |
+
|
| 997 |
+
exists = source_is_str and (
|
| 998 |
+
(has_magic(rpath) and source_is_file)
|
| 999 |
+
or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
|
| 1000 |
+
)
|
| 1001 |
+
lpaths = other_paths(
|
| 1002 |
+
rpaths,
|
| 1003 |
+
lpath,
|
| 1004 |
+
exists=exists,
|
| 1005 |
+
flatten=not source_is_str,
|
| 1006 |
+
)
|
| 1007 |
+
|
| 1008 |
+
callback.set_size(len(lpaths))
|
| 1009 |
+
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
| 1010 |
+
with callback.branched(rpath, lpath) as child:
|
| 1011 |
+
self.get_file(rpath, lpath, callback=child, **kwargs)
|
| 1012 |
+
|
| 1013 |
+
def put_file(
|
| 1014 |
+
self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
|
| 1015 |
+
):
|
| 1016 |
+
"""Copy single file to remote"""
|
| 1017 |
+
if mode == "create" and self.exists(rpath):
|
| 1018 |
+
raise FileExistsError
|
| 1019 |
+
if os.path.isdir(lpath):
|
| 1020 |
+
self.makedirs(rpath, exist_ok=True)
|
| 1021 |
+
return None
|
| 1022 |
+
|
| 1023 |
+
with open(lpath, "rb") as f1:
|
| 1024 |
+
size = f1.seek(0, 2)
|
| 1025 |
+
callback.set_size(size)
|
| 1026 |
+
f1.seek(0)
|
| 1027 |
+
|
| 1028 |
+
self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
|
| 1029 |
+
with self.open(rpath, "wb", **kwargs) as f2:
|
| 1030 |
+
while f1.tell() < size:
|
| 1031 |
+
data = f1.read(self.blocksize)
|
| 1032 |
+
segment_len = f2.write(data)
|
| 1033 |
+
if segment_len is None:
|
| 1034 |
+
segment_len = len(data)
|
| 1035 |
+
callback.relative_update(segment_len)
|
| 1036 |
+
|
| 1037 |
+
def put(
|
| 1038 |
+
self,
|
| 1039 |
+
lpath,
|
| 1040 |
+
rpath,
|
| 1041 |
+
recursive=False,
|
| 1042 |
+
callback=DEFAULT_CALLBACK,
|
| 1043 |
+
maxdepth=None,
|
| 1044 |
+
**kwargs,
|
| 1045 |
+
):
|
| 1046 |
+
"""Copy file(s) from local.
|
| 1047 |
+
|
| 1048 |
+
Copies a specific file or tree of files (if recursive=True). If rpath
|
| 1049 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 1050 |
+
will go within.
|
| 1051 |
+
|
| 1052 |
+
Calls put_file for each source.
|
| 1053 |
+
"""
|
| 1054 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 1055 |
+
# No need to expand paths when both source and destination
|
| 1056 |
+
# are provided as lists
|
| 1057 |
+
rpaths = rpath
|
| 1058 |
+
lpaths = lpath
|
| 1059 |
+
else:
|
| 1060 |
+
from .implementations.local import (
|
| 1061 |
+
LocalFileSystem,
|
| 1062 |
+
make_path_posix,
|
| 1063 |
+
trailing_sep,
|
| 1064 |
+
)
|
| 1065 |
+
|
| 1066 |
+
source_is_str = isinstance(lpath, str)
|
| 1067 |
+
if source_is_str:
|
| 1068 |
+
lpath = make_path_posix(lpath)
|
| 1069 |
+
fs = LocalFileSystem()
|
| 1070 |
+
lpaths = fs.expand_path(
|
| 1071 |
+
lpath, recursive=recursive, maxdepth=maxdepth, **kwargs
|
| 1072 |
+
)
|
| 1073 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 1074 |
+
# Non-recursive glob does not copy directories
|
| 1075 |
+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
|
| 1076 |
+
if not lpaths:
|
| 1077 |
+
return
|
| 1078 |
+
|
| 1079 |
+
source_is_file = len(lpaths) == 1
|
| 1080 |
+
dest_is_dir = isinstance(rpath, str) and (
|
| 1081 |
+
trailing_sep(rpath) or self.isdir(rpath)
|
| 1082 |
+
)
|
| 1083 |
+
|
| 1084 |
+
rpath = (
|
| 1085 |
+
self._strip_protocol(rpath)
|
| 1086 |
+
if isinstance(rpath, str)
|
| 1087 |
+
else [self._strip_protocol(p) for p in rpath]
|
| 1088 |
+
)
|
| 1089 |
+
exists = source_is_str and (
|
| 1090 |
+
(has_magic(lpath) and source_is_file)
|
| 1091 |
+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
|
| 1092 |
+
)
|
| 1093 |
+
rpaths = other_paths(
|
| 1094 |
+
lpaths,
|
| 1095 |
+
rpath,
|
| 1096 |
+
exists=exists,
|
| 1097 |
+
flatten=not source_is_str,
|
| 1098 |
+
)
|
| 1099 |
+
|
| 1100 |
+
callback.set_size(len(rpaths))
|
| 1101 |
+
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
| 1102 |
+
with callback.branched(lpath, rpath) as child:
|
| 1103 |
+
self.put_file(lpath, rpath, callback=child, **kwargs)
|
| 1104 |
+
|
| 1105 |
+
def head(self, path, size=1024):
|
| 1106 |
+
"""Get the first ``size`` bytes from file"""
|
| 1107 |
+
with self.open(path, "rb") as f:
|
| 1108 |
+
return f.read(size)
|
| 1109 |
+
|
| 1110 |
+
def tail(self, path, size=1024):
|
| 1111 |
+
"""Get the last ``size`` bytes from file"""
|
| 1112 |
+
with self.open(path, "rb") as f:
|
| 1113 |
+
f.seek(max(-size, -f.size), 2)
|
| 1114 |
+
return f.read()
|
| 1115 |
+
|
| 1116 |
+
def cp_file(self, path1, path2, **kwargs):
|
| 1117 |
+
raise NotImplementedError
|
| 1118 |
+
|
| 1119 |
+
def copy(
|
| 1120 |
+
self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
|
| 1121 |
+
):
|
| 1122 |
+
"""Copy within two locations in the filesystem
|
| 1123 |
+
|
| 1124 |
+
on_error : "raise", "ignore"
|
| 1125 |
+
If raise, any not-found exceptions will be raised; if ignore any
|
| 1126 |
+
not-found exceptions will cause the path to be skipped; defaults to
|
| 1127 |
+
raise unless recursive is true, where the default is ignore
|
| 1128 |
+
"""
|
| 1129 |
+
if on_error is None and recursive:
|
| 1130 |
+
on_error = "ignore"
|
| 1131 |
+
elif on_error is None:
|
| 1132 |
+
on_error = "raise"
|
| 1133 |
+
|
| 1134 |
+
if isinstance(path1, list) and isinstance(path2, list):
|
| 1135 |
+
# No need to expand paths when both source and destination
|
| 1136 |
+
# are provided as lists
|
| 1137 |
+
paths1 = path1
|
| 1138 |
+
paths2 = path2
|
| 1139 |
+
else:
|
| 1140 |
+
from .implementations.local import trailing_sep
|
| 1141 |
+
|
| 1142 |
+
source_is_str = isinstance(path1, str)
|
| 1143 |
+
paths1 = self.expand_path(
|
| 1144 |
+
path1, recursive=recursive, maxdepth=maxdepth, **kwargs
|
| 1145 |
+
)
|
| 1146 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 1147 |
+
# Non-recursive glob does not copy directories
|
| 1148 |
+
paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
|
| 1149 |
+
if not paths1:
|
| 1150 |
+
return
|
| 1151 |
+
|
| 1152 |
+
source_is_file = len(paths1) == 1
|
| 1153 |
+
dest_is_dir = isinstance(path2, str) and (
|
| 1154 |
+
trailing_sep(path2) or self.isdir(path2)
|
| 1155 |
+
)
|
| 1156 |
+
|
| 1157 |
+
exists = source_is_str and (
|
| 1158 |
+
(has_magic(path1) and source_is_file)
|
| 1159 |
+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
|
| 1160 |
+
)
|
| 1161 |
+
paths2 = other_paths(
|
| 1162 |
+
paths1,
|
| 1163 |
+
path2,
|
| 1164 |
+
exists=exists,
|
| 1165 |
+
flatten=not source_is_str,
|
| 1166 |
+
)
|
| 1167 |
+
|
| 1168 |
+
for p1, p2 in zip(paths1, paths2):
|
| 1169 |
+
try:
|
| 1170 |
+
self.cp_file(p1, p2, **kwargs)
|
| 1171 |
+
except FileNotFoundError:
|
| 1172 |
+
if on_error == "raise":
|
| 1173 |
+
raise
|
| 1174 |
+
|
| 1175 |
+
def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
|
| 1176 |
+
"""Turn one or more globs or directories into a list of all matching paths
|
| 1177 |
+
to files or directories.
|
| 1178 |
+
|
| 1179 |
+
kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
|
| 1180 |
+
"""
|
| 1181 |
+
|
| 1182 |
+
if maxdepth is not None and maxdepth < 1:
|
| 1183 |
+
raise ValueError("maxdepth must be at least 1")
|
| 1184 |
+
|
| 1185 |
+
if isinstance(path, (str, os.PathLike)):
|
| 1186 |
+
out = self.expand_path([path], recursive, maxdepth, **kwargs)
|
| 1187 |
+
else:
|
| 1188 |
+
out = set()
|
| 1189 |
+
path = [self._strip_protocol(p) for p in path]
|
| 1190 |
+
for p in path:
|
| 1191 |
+
if has_magic(p):
|
| 1192 |
+
bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
|
| 1193 |
+
out |= bit
|
| 1194 |
+
if recursive:
|
| 1195 |
+
# glob call above expanded one depth so if maxdepth is defined
|
| 1196 |
+
# then decrement it in expand_path call below. If it is zero
|
| 1197 |
+
# after decrementing then avoid expand_path call.
|
| 1198 |
+
if maxdepth is not None and maxdepth <= 1:
|
| 1199 |
+
continue
|
| 1200 |
+
out |= set(
|
| 1201 |
+
self.expand_path(
|
| 1202 |
+
list(bit),
|
| 1203 |
+
recursive=recursive,
|
| 1204 |
+
maxdepth=maxdepth - 1 if maxdepth is not None else None,
|
| 1205 |
+
**kwargs,
|
| 1206 |
+
)
|
| 1207 |
+
)
|
| 1208 |
+
continue
|
| 1209 |
+
elif recursive:
|
| 1210 |
+
rec = set(
|
| 1211 |
+
self.find(
|
| 1212 |
+
p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
|
| 1213 |
+
)
|
| 1214 |
+
)
|
| 1215 |
+
out |= rec
|
| 1216 |
+
if p not in out and (recursive is False or self.exists(p)):
|
| 1217 |
+
# should only check once, for the root
|
| 1218 |
+
out.add(p)
|
| 1219 |
+
if not out:
|
| 1220 |
+
raise FileNotFoundError(path)
|
| 1221 |
+
return sorted(out)
|
| 1222 |
+
|
| 1223 |
+
def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
|
| 1224 |
+
"""Move file(s) from one location to another"""
|
| 1225 |
+
if path1 == path2:
|
| 1226 |
+
logger.debug("%s mv: The paths are the same, so no files were moved.", self)
|
| 1227 |
+
else:
|
| 1228 |
+
# explicitly raise exception to prevent data corruption
|
| 1229 |
+
self.copy(
|
| 1230 |
+
path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise"
|
| 1231 |
+
)
|
| 1232 |
+
self.rm(path1, recursive=recursive)
|
| 1233 |
+
|
| 1234 |
+
def rm_file(self, path):
|
| 1235 |
+
"""Delete a file"""
|
| 1236 |
+
self._rm(path)
|
| 1237 |
+
|
| 1238 |
+
def _rm(self, path):
|
| 1239 |
+
"""Delete one file"""
|
| 1240 |
+
# this is the old name for the method, prefer rm_file
|
| 1241 |
+
raise NotImplementedError
|
| 1242 |
+
|
| 1243 |
+
def rm(self, path, recursive=False, maxdepth=None):
|
| 1244 |
+
"""Delete files.
|
| 1245 |
+
|
| 1246 |
+
Parameters
|
| 1247 |
+
----------
|
| 1248 |
+
path: str or list of str
|
| 1249 |
+
File(s) to delete.
|
| 1250 |
+
recursive: bool
|
| 1251 |
+
If file(s) are directories, recursively delete contents and then
|
| 1252 |
+
also remove the directory
|
| 1253 |
+
maxdepth: int or None
|
| 1254 |
+
Depth to pass to walk for finding files to delete, if recursive.
|
| 1255 |
+
If None, there will be no limit and infinite recursion may be
|
| 1256 |
+
possible.
|
| 1257 |
+
"""
|
| 1258 |
+
path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
|
| 1259 |
+
for p in reversed(path):
|
| 1260 |
+
self.rm_file(p)
|
| 1261 |
+
|
| 1262 |
+
@classmethod
|
| 1263 |
+
def _parent(cls, path):
|
| 1264 |
+
path = cls._strip_protocol(path)
|
| 1265 |
+
if "/" in path:
|
| 1266 |
+
parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
|
| 1267 |
+
return cls.root_marker + parent
|
| 1268 |
+
else:
|
| 1269 |
+
return cls.root_marker
|
| 1270 |
+
|
| 1271 |
+
def _open(
|
| 1272 |
+
self,
|
| 1273 |
+
path,
|
| 1274 |
+
mode="rb",
|
| 1275 |
+
block_size=None,
|
| 1276 |
+
autocommit=True,
|
| 1277 |
+
cache_options=None,
|
| 1278 |
+
**kwargs,
|
| 1279 |
+
):
|
| 1280 |
+
"""Return raw bytes-mode file-like from the file-system"""
|
| 1281 |
+
return AbstractBufferedFile(
|
| 1282 |
+
self,
|
| 1283 |
+
path,
|
| 1284 |
+
mode,
|
| 1285 |
+
block_size,
|
| 1286 |
+
autocommit,
|
| 1287 |
+
cache_options=cache_options,
|
| 1288 |
+
**kwargs,
|
| 1289 |
+
)
|
| 1290 |
+
|
| 1291 |
+
def open(
|
| 1292 |
+
self,
|
| 1293 |
+
path,
|
| 1294 |
+
mode="rb",
|
| 1295 |
+
block_size=None,
|
| 1296 |
+
cache_options=None,
|
| 1297 |
+
compression=None,
|
| 1298 |
+
**kwargs,
|
| 1299 |
+
):
|
| 1300 |
+
"""
|
| 1301 |
+
Return a file-like object from the filesystem
|
| 1302 |
+
|
| 1303 |
+
The resultant instance must function correctly in a context ``with``
|
| 1304 |
+
block.
|
| 1305 |
+
|
| 1306 |
+
Parameters
|
| 1307 |
+
----------
|
| 1308 |
+
path: str
|
| 1309 |
+
Target file
|
| 1310 |
+
mode: str like 'rb', 'w'
|
| 1311 |
+
See builtin ``open()``
|
| 1312 |
+
Mode "x" (exclusive write) may be implemented by the backend. Even if
|
| 1313 |
+
it is, whether it is checked up front or on commit, and whether it is
|
| 1314 |
+
atomic is implementation-dependent.
|
| 1315 |
+
block_size: int
|
| 1316 |
+
Some indication of buffering - this is a value in bytes
|
| 1317 |
+
cache_options : dict, optional
|
| 1318 |
+
Extra arguments to pass through to the cache.
|
| 1319 |
+
compression: string or None
|
| 1320 |
+
If given, open file using compression codec. Can either be a compression
|
| 1321 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 1322 |
+
compression from the filename suffix.
|
| 1323 |
+
encoding, errors, newline: passed on to TextIOWrapper for text mode
|
| 1324 |
+
"""
|
| 1325 |
+
import io
|
| 1326 |
+
|
| 1327 |
+
path = self._strip_protocol(path)
|
| 1328 |
+
if "b" not in mode:
|
| 1329 |
+
mode = mode.replace("t", "") + "b"
|
| 1330 |
+
|
| 1331 |
+
text_kwargs = {
|
| 1332 |
+
k: kwargs.pop(k)
|
| 1333 |
+
for k in ["encoding", "errors", "newline"]
|
| 1334 |
+
if k in kwargs
|
| 1335 |
+
}
|
| 1336 |
+
return io.TextIOWrapper(
|
| 1337 |
+
self.open(
|
| 1338 |
+
path,
|
| 1339 |
+
mode,
|
| 1340 |
+
block_size=block_size,
|
| 1341 |
+
cache_options=cache_options,
|
| 1342 |
+
compression=compression,
|
| 1343 |
+
**kwargs,
|
| 1344 |
+
),
|
| 1345 |
+
**text_kwargs,
|
| 1346 |
+
)
|
| 1347 |
+
else:
|
| 1348 |
+
ac = kwargs.pop("autocommit", not self._intrans)
|
| 1349 |
+
f = self._open(
|
| 1350 |
+
path,
|
| 1351 |
+
mode=mode,
|
| 1352 |
+
block_size=block_size,
|
| 1353 |
+
autocommit=ac,
|
| 1354 |
+
cache_options=cache_options,
|
| 1355 |
+
**kwargs,
|
| 1356 |
+
)
|
| 1357 |
+
if compression is not None:
|
| 1358 |
+
from fsspec.compression import compr
|
| 1359 |
+
from fsspec.core import get_compression
|
| 1360 |
+
|
| 1361 |
+
compression = get_compression(path, compression)
|
| 1362 |
+
compress = compr[compression]
|
| 1363 |
+
f = compress(f, mode=mode[0])
|
| 1364 |
+
|
| 1365 |
+
if not ac and "r" not in mode:
|
| 1366 |
+
self.transaction.files.append(f)
|
| 1367 |
+
return f
|
| 1368 |
+
|
| 1369 |
+
def touch(self, path, truncate=True, **kwargs):
|
| 1370 |
+
"""Create empty file, or update timestamp
|
| 1371 |
+
|
| 1372 |
+
Parameters
|
| 1373 |
+
----------
|
| 1374 |
+
path: str
|
| 1375 |
+
file location
|
| 1376 |
+
truncate: bool
|
| 1377 |
+
If True, always set file size to 0; if False, update timestamp and
|
| 1378 |
+
leave file unchanged, if backend allows this
|
| 1379 |
+
"""
|
| 1380 |
+
if truncate or not self.exists(path):
|
| 1381 |
+
with self.open(path, "wb", **kwargs):
|
| 1382 |
+
pass
|
| 1383 |
+
else:
|
| 1384 |
+
raise NotImplementedError # update timestamp, if possible
|
| 1385 |
+
|
| 1386 |
+
def ukey(self, path):
|
| 1387 |
+
"""Hash of file properties, to tell if it has changed"""
|
| 1388 |
+
return sha256(str(self.info(path)).encode()).hexdigest()
|
| 1389 |
+
|
| 1390 |
+
def read_block(self, fn, offset, length, delimiter=None):
|
| 1391 |
+
"""Read a block of bytes from
|
| 1392 |
+
|
| 1393 |
+
Starting at ``offset`` of the file, read ``length`` bytes. If
|
| 1394 |
+
``delimiter`` is set then we ensure that the read starts and stops at
|
| 1395 |
+
delimiter boundaries that follow the locations ``offset`` and ``offset
|
| 1396 |
+
+ length``. If ``offset`` is zero then we start at zero. The
|
| 1397 |
+
bytestring returned WILL include the end delimiter string.
|
| 1398 |
+
|
| 1399 |
+
If offset+length is beyond the eof, reads to eof.
|
| 1400 |
+
|
| 1401 |
+
Parameters
|
| 1402 |
+
----------
|
| 1403 |
+
fn: string
|
| 1404 |
+
Path to filename
|
| 1405 |
+
offset: int
|
| 1406 |
+
Byte offset to start read
|
| 1407 |
+
length: int
|
| 1408 |
+
Number of bytes to read. If None, read to end.
|
| 1409 |
+
delimiter: bytes (optional)
|
| 1410 |
+
Ensure reading starts and stops at delimiter bytestring
|
| 1411 |
+
|
| 1412 |
+
Examples
|
| 1413 |
+
--------
|
| 1414 |
+
>>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP
|
| 1415 |
+
b'Alice, 100\\nBo'
|
| 1416 |
+
>>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
| 1417 |
+
b'Alice, 100\\nBob, 200\\n'
|
| 1418 |
+
|
| 1419 |
+
Use ``length=None`` to read to the end of the file.
|
| 1420 |
+
>>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP
|
| 1421 |
+
b'Alice, 100\\nBob, 200\\nCharlie, 300'
|
| 1422 |
+
|
| 1423 |
+
See Also
|
| 1424 |
+
--------
|
| 1425 |
+
:func:`fsspec.utils.read_block`
|
| 1426 |
+
"""
|
| 1427 |
+
with self.open(fn, "rb") as f:
|
| 1428 |
+
size = f.size
|
| 1429 |
+
if length is None:
|
| 1430 |
+
length = size
|
| 1431 |
+
if size is not None and offset + length > size:
|
| 1432 |
+
length = size - offset
|
| 1433 |
+
return read_block(f, offset, length, delimiter)
|
| 1434 |
+
|
| 1435 |
+
def to_json(self, *, include_password: bool = True) -> str:
|
| 1436 |
+
"""
|
| 1437 |
+
JSON representation of this filesystem instance.
|
| 1438 |
+
|
| 1439 |
+
Parameters
|
| 1440 |
+
----------
|
| 1441 |
+
include_password: bool, default True
|
| 1442 |
+
Whether to include the password (if any) in the output.
|
| 1443 |
+
|
| 1444 |
+
Returns
|
| 1445 |
+
-------
|
| 1446 |
+
JSON string with keys ``cls`` (the python location of this class),
|
| 1447 |
+
protocol (text name of this class's protocol, first one in case of
|
| 1448 |
+
multiple), ``args`` (positional args, usually empty), and all other
|
| 1449 |
+
keyword arguments as their own keys.
|
| 1450 |
+
|
| 1451 |
+
Warnings
|
| 1452 |
+
--------
|
| 1453 |
+
Serialized filesystems may contain sensitive information which have been
|
| 1454 |
+
passed to the constructor, such as passwords and tokens. Make sure you
|
| 1455 |
+
store and send them in a secure environment!
|
| 1456 |
+
"""
|
| 1457 |
+
from .json import FilesystemJSONEncoder
|
| 1458 |
+
|
| 1459 |
+
return json.dumps(
|
| 1460 |
+
self,
|
| 1461 |
+
cls=type(
|
| 1462 |
+
"_FilesystemJSONEncoder",
|
| 1463 |
+
(FilesystemJSONEncoder,),
|
| 1464 |
+
{"include_password": include_password},
|
| 1465 |
+
),
|
| 1466 |
+
)
|
| 1467 |
+
|
| 1468 |
+
@staticmethod
|
| 1469 |
+
def from_json(blob: str) -> AbstractFileSystem:
|
| 1470 |
+
"""
|
| 1471 |
+
Recreate a filesystem instance from JSON representation.
|
| 1472 |
+
|
| 1473 |
+
See ``.to_json()`` for the expected structure of the input.
|
| 1474 |
+
|
| 1475 |
+
Parameters
|
| 1476 |
+
----------
|
| 1477 |
+
blob: str
|
| 1478 |
+
|
| 1479 |
+
Returns
|
| 1480 |
+
-------
|
| 1481 |
+
file system instance, not necessarily of this particular class.
|
| 1482 |
+
|
| 1483 |
+
Warnings
|
| 1484 |
+
--------
|
| 1485 |
+
This can import arbitrary modules (as determined by the ``cls`` key).
|
| 1486 |
+
Make sure you haven't installed any modules that may execute malicious code
|
| 1487 |
+
at import time.
|
| 1488 |
+
"""
|
| 1489 |
+
from .json import FilesystemJSONDecoder
|
| 1490 |
+
|
| 1491 |
+
return json.loads(blob, cls=FilesystemJSONDecoder)
|
| 1492 |
+
|
| 1493 |
+
def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
|
| 1494 |
+
"""
|
| 1495 |
+
JSON-serializable dictionary representation of this filesystem instance.
|
| 1496 |
+
|
| 1497 |
+
Parameters
|
| 1498 |
+
----------
|
| 1499 |
+
include_password: bool, default True
|
| 1500 |
+
Whether to include the password (if any) in the output.
|
| 1501 |
+
|
| 1502 |
+
Returns
|
| 1503 |
+
-------
|
| 1504 |
+
Dictionary with keys ``cls`` (the python location of this class),
|
| 1505 |
+
protocol (text name of this class's protocol, first one in case of
|
| 1506 |
+
multiple), ``args`` (positional args, usually empty), and all other
|
| 1507 |
+
keyword arguments as their own keys.
|
| 1508 |
+
|
| 1509 |
+
Warnings
|
| 1510 |
+
--------
|
| 1511 |
+
Serialized filesystems may contain sensitive information which have been
|
| 1512 |
+
passed to the constructor, such as passwords and tokens. Make sure you
|
| 1513 |
+
store and send them in a secure environment!
|
| 1514 |
+
"""
|
| 1515 |
+
from .json import FilesystemJSONEncoder
|
| 1516 |
+
|
| 1517 |
+
json_encoder = FilesystemJSONEncoder()
|
| 1518 |
+
|
| 1519 |
+
cls = type(self)
|
| 1520 |
+
proto = self.protocol
|
| 1521 |
+
|
| 1522 |
+
storage_options = dict(self.storage_options)
|
| 1523 |
+
if not include_password:
|
| 1524 |
+
storage_options.pop("password", None)
|
| 1525 |
+
|
| 1526 |
+
return dict(
|
| 1527 |
+
cls=f"{cls.__module__}:{cls.__name__}",
|
| 1528 |
+
protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
|
| 1529 |
+
args=json_encoder.make_serializable(self.storage_args),
|
| 1530 |
+
**json_encoder.make_serializable(storage_options),
|
| 1531 |
+
)
|
| 1532 |
+
|
| 1533 |
+
@staticmethod
|
| 1534 |
+
def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
|
| 1535 |
+
"""
|
| 1536 |
+
Recreate a filesystem instance from dictionary representation.
|
| 1537 |
+
|
| 1538 |
+
See ``.to_dict()`` for the expected structure of the input.
|
| 1539 |
+
|
| 1540 |
+
Parameters
|
| 1541 |
+
----------
|
| 1542 |
+
dct: Dict[str, Any]
|
| 1543 |
+
|
| 1544 |
+
Returns
|
| 1545 |
+
-------
|
| 1546 |
+
file system instance, not necessarily of this particular class.
|
| 1547 |
+
|
| 1548 |
+
Warnings
|
| 1549 |
+
--------
|
| 1550 |
+
This can import arbitrary modules (as determined by the ``cls`` key).
|
| 1551 |
+
Make sure you haven't installed any modules that may execute malicious code
|
| 1552 |
+
at import time.
|
| 1553 |
+
"""
|
| 1554 |
+
from .json import FilesystemJSONDecoder
|
| 1555 |
+
|
| 1556 |
+
json_decoder = FilesystemJSONDecoder()
|
| 1557 |
+
|
| 1558 |
+
dct = dict(dct) # Defensive copy
|
| 1559 |
+
|
| 1560 |
+
cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
|
| 1561 |
+
if cls is None:
|
| 1562 |
+
raise ValueError("Not a serialized AbstractFileSystem")
|
| 1563 |
+
|
| 1564 |
+
dct.pop("cls", None)
|
| 1565 |
+
dct.pop("protocol", None)
|
| 1566 |
+
|
| 1567 |
+
return cls(
|
| 1568 |
+
*json_decoder.unmake_serializable(dct.pop("args", ())),
|
| 1569 |
+
**json_decoder.unmake_serializable(dct),
|
| 1570 |
+
)
|
| 1571 |
+
|
| 1572 |
+
def _get_pyarrow_filesystem(self):
|
| 1573 |
+
"""
|
| 1574 |
+
Make a version of the FS instance which will be acceptable to pyarrow
|
| 1575 |
+
"""
|
| 1576 |
+
# all instances already also derive from pyarrow
|
| 1577 |
+
return self
|
| 1578 |
+
|
| 1579 |
+
def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
|
| 1580 |
+
"""Create key/value store based on this file-system
|
| 1581 |
+
|
| 1582 |
+
Makes a MutableMapping interface to the FS at the given root path.
|
| 1583 |
+
See ``fsspec.mapping.FSMap`` for further details.
|
| 1584 |
+
"""
|
| 1585 |
+
from .mapping import FSMap
|
| 1586 |
+
|
| 1587 |
+
return FSMap(
|
| 1588 |
+
root,
|
| 1589 |
+
self,
|
| 1590 |
+
check=check,
|
| 1591 |
+
create=create,
|
| 1592 |
+
missing_exceptions=missing_exceptions,
|
| 1593 |
+
)
|
| 1594 |
+
|
| 1595 |
+
@classmethod
|
| 1596 |
+
def clear_instance_cache(cls):
|
| 1597 |
+
"""
|
| 1598 |
+
Clear the cache of filesystem instances.
|
| 1599 |
+
|
| 1600 |
+
Notes
|
| 1601 |
+
-----
|
| 1602 |
+
Unless overridden by setting the ``cachable`` class attribute to False,
|
| 1603 |
+
the filesystem class stores a reference to newly created instances. This
|
| 1604 |
+
prevents Python's normal rules around garbage collection from working,
|
| 1605 |
+
since the instances refcount will not drop to zero until
|
| 1606 |
+
``clear_instance_cache`` is called.
|
| 1607 |
+
"""
|
| 1608 |
+
cls._cache.clear()
|
| 1609 |
+
|
| 1610 |
+
def created(self, path):
|
| 1611 |
+
"""Return the created timestamp of a file as a datetime.datetime"""
|
| 1612 |
+
raise NotImplementedError
|
| 1613 |
+
|
| 1614 |
+
def modified(self, path):
|
| 1615 |
+
"""Return the modified timestamp of a file as a datetime.datetime"""
|
| 1616 |
+
raise NotImplementedError
|
| 1617 |
+
|
| 1618 |
+
def tree(
|
| 1619 |
+
self,
|
| 1620 |
+
path: str = "/",
|
| 1621 |
+
recursion_limit: int = 2,
|
| 1622 |
+
max_display: int = 25,
|
| 1623 |
+
display_size: bool = False,
|
| 1624 |
+
prefix: str = "",
|
| 1625 |
+
is_last: bool = True,
|
| 1626 |
+
first: bool = True,
|
| 1627 |
+
indent_size: int = 4,
|
| 1628 |
+
) -> str:
|
| 1629 |
+
"""
|
| 1630 |
+
Return a tree-like structure of the filesystem starting from the given path as a string.
|
| 1631 |
+
|
| 1632 |
+
Parameters
|
| 1633 |
+
----------
|
| 1634 |
+
path: Root path to start traversal from
|
| 1635 |
+
recursion_limit: Maximum depth of directory traversal
|
| 1636 |
+
max_display: Maximum number of items to display per directory
|
| 1637 |
+
display_size: Whether to display file sizes
|
| 1638 |
+
prefix: Current line prefix for visual tree structure
|
| 1639 |
+
is_last: Whether current item is last in its level
|
| 1640 |
+
first: Whether this is the first call (displays root path)
|
| 1641 |
+
indent_size: Number of spaces by indent
|
| 1642 |
+
|
| 1643 |
+
Returns
|
| 1644 |
+
-------
|
| 1645 |
+
str: A string representing the tree structure.
|
| 1646 |
+
|
| 1647 |
+
Example
|
| 1648 |
+
-------
|
| 1649 |
+
>>> from fsspec import filesystem
|
| 1650 |
+
|
| 1651 |
+
>>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
|
| 1652 |
+
>>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
|
| 1653 |
+
>>> print(tree)
|
| 1654 |
+
"""
|
| 1655 |
+
|
| 1656 |
+
def format_bytes(n: int) -> str:
|
| 1657 |
+
"""Format bytes as text."""
|
| 1658 |
+
for prefix, k in (
|
| 1659 |
+
("P", 2**50),
|
| 1660 |
+
("T", 2**40),
|
| 1661 |
+
("G", 2**30),
|
| 1662 |
+
("M", 2**20),
|
| 1663 |
+
("k", 2**10),
|
| 1664 |
+
):
|
| 1665 |
+
if n >= 0.9 * k:
|
| 1666 |
+
return f"{n / k:.2f} {prefix}b"
|
| 1667 |
+
return f"{n}B"
|
| 1668 |
+
|
| 1669 |
+
result = []
|
| 1670 |
+
|
| 1671 |
+
if first:
|
| 1672 |
+
result.append(path)
|
| 1673 |
+
|
| 1674 |
+
if recursion_limit:
|
| 1675 |
+
indent = " " * indent_size
|
| 1676 |
+
contents = self.ls(path, detail=True)
|
| 1677 |
+
contents.sort(
|
| 1678 |
+
key=lambda x: (x.get("type") != "directory", x.get("name", ""))
|
| 1679 |
+
)
|
| 1680 |
+
|
| 1681 |
+
if max_display is not None and len(contents) > max_display:
|
| 1682 |
+
displayed_contents = contents[:max_display]
|
| 1683 |
+
remaining_count = len(contents) - max_display
|
| 1684 |
+
else:
|
| 1685 |
+
displayed_contents = contents
|
| 1686 |
+
remaining_count = 0
|
| 1687 |
+
|
| 1688 |
+
for i, item in enumerate(displayed_contents):
|
| 1689 |
+
is_last_item = (i == len(displayed_contents) - 1) and (
|
| 1690 |
+
remaining_count == 0
|
| 1691 |
+
)
|
| 1692 |
+
|
| 1693 |
+
branch = (
|
| 1694 |
+
"└" + ("─" * (indent_size - 2))
|
| 1695 |
+
if is_last_item
|
| 1696 |
+
else "├" + ("─" * (indent_size - 2))
|
| 1697 |
+
)
|
| 1698 |
+
branch += " "
|
| 1699 |
+
new_prefix = prefix + (
|
| 1700 |
+
indent if is_last_item else "│" + " " * (indent_size - 1)
|
| 1701 |
+
)
|
| 1702 |
+
|
| 1703 |
+
name = os.path.basename(item.get("name", ""))
|
| 1704 |
+
|
| 1705 |
+
if display_size and item.get("type") == "directory":
|
| 1706 |
+
sub_contents = self.ls(item.get("name", ""), detail=True)
|
| 1707 |
+
num_files = sum(
|
| 1708 |
+
1 for sub_item in sub_contents if sub_item.get("type") == "file"
|
| 1709 |
+
)
|
| 1710 |
+
num_folders = sum(
|
| 1711 |
+
1
|
| 1712 |
+
for sub_item in sub_contents
|
| 1713 |
+
if sub_item.get("type") == "directory"
|
| 1714 |
+
)
|
| 1715 |
+
|
| 1716 |
+
if num_files == 0 and num_folders == 0:
|
| 1717 |
+
size = " (empty folder)"
|
| 1718 |
+
elif num_files == 0:
|
| 1719 |
+
size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
|
| 1720 |
+
elif num_folders == 0:
|
| 1721 |
+
size = f" ({num_files} file{'s' if num_files > 1 else ''})"
|
| 1722 |
+
else:
|
| 1723 |
+
size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
|
| 1724 |
+
elif display_size and item.get("type") == "file":
|
| 1725 |
+
size = f" ({format_bytes(item.get('size', 0))})"
|
| 1726 |
+
else:
|
| 1727 |
+
size = ""
|
| 1728 |
+
|
| 1729 |
+
result.append(f"{prefix}{branch}{name}{size}")
|
| 1730 |
+
|
| 1731 |
+
if item.get("type") == "directory" and recursion_limit > 0:
|
| 1732 |
+
result.append(
|
| 1733 |
+
self.tree(
|
| 1734 |
+
path=item.get("name", ""),
|
| 1735 |
+
recursion_limit=recursion_limit - 1,
|
| 1736 |
+
max_display=max_display,
|
| 1737 |
+
display_size=display_size,
|
| 1738 |
+
prefix=new_prefix,
|
| 1739 |
+
is_last=is_last_item,
|
| 1740 |
+
first=False,
|
| 1741 |
+
indent_size=indent_size,
|
| 1742 |
+
)
|
| 1743 |
+
)
|
| 1744 |
+
|
| 1745 |
+
if remaining_count > 0:
|
| 1746 |
+
more_message = f"{remaining_count} more item(s) not displayed."
|
| 1747 |
+
result.append(
|
| 1748 |
+
f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
|
| 1749 |
+
)
|
| 1750 |
+
|
| 1751 |
+
return "\n".join(_ for _ in result if _)
|
| 1752 |
+
|
| 1753 |
+
# ------------------------------------------------------------------------
|
| 1754 |
+
# Aliases
|
| 1755 |
+
|
| 1756 |
+
def read_bytes(self, path, start=None, end=None, **kwargs):
|
| 1757 |
+
"""Alias of `AbstractFileSystem.cat_file`."""
|
| 1758 |
+
return self.cat_file(path, start=start, end=end, **kwargs)
|
| 1759 |
+
|
| 1760 |
+
def write_bytes(self, path, value, **kwargs):
|
| 1761 |
+
"""Alias of `AbstractFileSystem.pipe_file`."""
|
| 1762 |
+
self.pipe_file(path, value, **kwargs)
|
| 1763 |
+
|
| 1764 |
+
def makedir(self, path, create_parents=True, **kwargs):
|
| 1765 |
+
"""Alias of `AbstractFileSystem.mkdir`."""
|
| 1766 |
+
return self.mkdir(path, create_parents=create_parents, **kwargs)
|
| 1767 |
+
|
| 1768 |
+
def mkdirs(self, path, exist_ok=False):
|
| 1769 |
+
"""Alias of `AbstractFileSystem.makedirs`."""
|
| 1770 |
+
return self.makedirs(path, exist_ok=exist_ok)
|
| 1771 |
+
|
| 1772 |
+
def listdir(self, path, detail=True, **kwargs):
|
| 1773 |
+
"""Alias of `AbstractFileSystem.ls`."""
|
| 1774 |
+
return self.ls(path, detail=detail, **kwargs)
|
| 1775 |
+
|
| 1776 |
+
def cp(self, path1, path2, **kwargs):
|
| 1777 |
+
"""Alias of `AbstractFileSystem.copy`."""
|
| 1778 |
+
return self.copy(path1, path2, **kwargs)
|
| 1779 |
+
|
| 1780 |
+
def move(self, path1, path2, **kwargs):
|
| 1781 |
+
"""Alias of `AbstractFileSystem.mv`."""
|
| 1782 |
+
return self.mv(path1, path2, **kwargs)
|
| 1783 |
+
|
| 1784 |
+
def stat(self, path, **kwargs):
|
| 1785 |
+
"""Alias of `AbstractFileSystem.info`."""
|
| 1786 |
+
return self.info(path, **kwargs)
|
| 1787 |
+
|
| 1788 |
+
def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
|
| 1789 |
+
"""Alias of `AbstractFileSystem.du`."""
|
| 1790 |
+
return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
|
| 1791 |
+
|
| 1792 |
+
def rename(self, path1, path2, **kwargs):
|
| 1793 |
+
"""Alias of `AbstractFileSystem.mv`."""
|
| 1794 |
+
return self.mv(path1, path2, **kwargs)
|
| 1795 |
+
|
| 1796 |
+
def delete(self, path, recursive=False, maxdepth=None):
|
| 1797 |
+
"""Alias of `AbstractFileSystem.rm`."""
|
| 1798 |
+
return self.rm(path, recursive=recursive, maxdepth=maxdepth)
|
| 1799 |
+
|
| 1800 |
+
def upload(self, lpath, rpath, recursive=False, **kwargs):
|
| 1801 |
+
"""Alias of `AbstractFileSystem.put`."""
|
| 1802 |
+
return self.put(lpath, rpath, recursive=recursive, **kwargs)
|
| 1803 |
+
|
| 1804 |
+
def download(self, rpath, lpath, recursive=False, **kwargs):
|
| 1805 |
+
"""Alias of `AbstractFileSystem.get`."""
|
| 1806 |
+
return self.get(rpath, lpath, recursive=recursive, **kwargs)
|
| 1807 |
+
|
| 1808 |
+
def sign(self, path, expiration=100, **kwargs):
|
| 1809 |
+
"""Create a signed URL representing the given path
|
| 1810 |
+
|
| 1811 |
+
Some implementations allow temporary URLs to be generated, as a
|
| 1812 |
+
way of delegating credentials.
|
| 1813 |
+
|
| 1814 |
+
Parameters
|
| 1815 |
+
----------
|
| 1816 |
+
path : str
|
| 1817 |
+
The path on the filesystem
|
| 1818 |
+
expiration : int
|
| 1819 |
+
Number of seconds to enable the URL for (if supported)
|
| 1820 |
+
|
| 1821 |
+
Returns
|
| 1822 |
+
-------
|
| 1823 |
+
URL : str
|
| 1824 |
+
The signed URL
|
| 1825 |
+
|
| 1826 |
+
Raises
|
| 1827 |
+
------
|
| 1828 |
+
NotImplementedError : if method is not implemented for a filesystem
|
| 1829 |
+
"""
|
| 1830 |
+
raise NotImplementedError("Sign is not implemented for this filesystem")
|
| 1831 |
+
|
| 1832 |
+
def _isfilestore(self):
|
| 1833 |
+
# Originally inherited from pyarrow DaskFileSystem. Keeping this
|
| 1834 |
+
# here for backwards compatibility as long as pyarrow uses its
|
| 1835 |
+
# legacy fsspec-compatible filesystems and thus accepts fsspec
|
| 1836 |
+
# filesystems as well
|
| 1837 |
+
return False
|
| 1838 |
+
|
| 1839 |
+
|
| 1840 |
+
class AbstractBufferedFile(io.IOBase):
|
| 1841 |
+
"""Convenient class to derive from to provide buffering
|
| 1842 |
+
|
| 1843 |
+
In the case that the backend does not provide a pythonic file-like object
|
| 1844 |
+
already, this class contains much of the logic to build one. The only
|
| 1845 |
+
methods that need to be overridden are ``_upload_chunk``,
|
| 1846 |
+
``_initiate_upload`` and ``_fetch_range``.
|
| 1847 |
+
"""
|
| 1848 |
+
|
| 1849 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
| 1850 |
+
_details = None
|
| 1851 |
+
|
| 1852 |
+
def __init__(
|
| 1853 |
+
self,
|
| 1854 |
+
fs,
|
| 1855 |
+
path,
|
| 1856 |
+
mode="rb",
|
| 1857 |
+
block_size="default",
|
| 1858 |
+
autocommit=True,
|
| 1859 |
+
cache_type="readahead",
|
| 1860 |
+
cache_options=None,
|
| 1861 |
+
size=None,
|
| 1862 |
+
**kwargs,
|
| 1863 |
+
):
|
| 1864 |
+
"""
|
| 1865 |
+
Template for files with buffered reading and writing
|
| 1866 |
+
|
| 1867 |
+
Parameters
|
| 1868 |
+
----------
|
| 1869 |
+
fs: instance of FileSystem
|
| 1870 |
+
path: str
|
| 1871 |
+
location in file-system
|
| 1872 |
+
mode: str
|
| 1873 |
+
Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
|
| 1874 |
+
systems may be read-only, and some may not support append.
|
| 1875 |
+
block_size: int
|
| 1876 |
+
Buffer size for reading or writing, 'default' for class default
|
| 1877 |
+
autocommit: bool
|
| 1878 |
+
Whether to write to final destination; may only impact what
|
| 1879 |
+
happens when file is being closed.
|
| 1880 |
+
cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
|
| 1881 |
+
Caching policy in read mode. See the definitions in ``core``.
|
| 1882 |
+
cache_options : dict
|
| 1883 |
+
Additional options passed to the constructor for the cache specified
|
| 1884 |
+
by `cache_type`.
|
| 1885 |
+
size: int
|
| 1886 |
+
If given and in read mode, suppressed having to look up the file size
|
| 1887 |
+
kwargs:
|
| 1888 |
+
Gets stored as self.kwargs
|
| 1889 |
+
"""
|
| 1890 |
+
from .core import caches
|
| 1891 |
+
|
| 1892 |
+
self.path = path
|
| 1893 |
+
self.fs = fs
|
| 1894 |
+
self.mode = mode
|
| 1895 |
+
self.blocksize = (
|
| 1896 |
+
self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
|
| 1897 |
+
)
|
| 1898 |
+
self.loc = 0
|
| 1899 |
+
self.autocommit = autocommit
|
| 1900 |
+
self.end = None
|
| 1901 |
+
self.start = None
|
| 1902 |
+
self.closed = False
|
| 1903 |
+
|
| 1904 |
+
if cache_options is None:
|
| 1905 |
+
cache_options = {}
|
| 1906 |
+
|
| 1907 |
+
if "trim" in kwargs:
|
| 1908 |
+
warnings.warn(
|
| 1909 |
+
"Passing 'trim' to control the cache behavior has been deprecated. "
|
| 1910 |
+
"Specify it within the 'cache_options' argument instead.",
|
| 1911 |
+
FutureWarning,
|
| 1912 |
+
)
|
| 1913 |
+
cache_options["trim"] = kwargs.pop("trim")
|
| 1914 |
+
|
| 1915 |
+
self.kwargs = kwargs
|
| 1916 |
+
|
| 1917 |
+
if mode not in {"ab", "rb", "wb", "xb"}:
|
| 1918 |
+
raise NotImplementedError("File mode not supported")
|
| 1919 |
+
if mode == "rb":
|
| 1920 |
+
if size is not None:
|
| 1921 |
+
self.size = size
|
| 1922 |
+
else:
|
| 1923 |
+
self.size = self.details["size"]
|
| 1924 |
+
self.cache = caches[cache_type](
|
| 1925 |
+
self.blocksize, self._fetch_range, self.size, **cache_options
|
| 1926 |
+
)
|
| 1927 |
+
else:
|
| 1928 |
+
self.buffer = io.BytesIO()
|
| 1929 |
+
self.offset = None
|
| 1930 |
+
self.forced = False
|
| 1931 |
+
self.location = None
|
| 1932 |
+
|
| 1933 |
+
@property
|
| 1934 |
+
def details(self):
|
| 1935 |
+
if self._details is None:
|
| 1936 |
+
self._details = self.fs.info(self.path)
|
| 1937 |
+
return self._details
|
| 1938 |
+
|
| 1939 |
+
@details.setter
|
| 1940 |
+
def details(self, value):
|
| 1941 |
+
self._details = value
|
| 1942 |
+
self.size = value["size"]
|
| 1943 |
+
|
| 1944 |
+
@property
|
| 1945 |
+
def full_name(self):
|
| 1946 |
+
return _unstrip_protocol(self.path, self.fs)
|
| 1947 |
+
|
| 1948 |
+
@property
|
| 1949 |
+
def closed(self):
|
| 1950 |
+
# get around this attr being read-only in IOBase
|
| 1951 |
+
# use getattr here, since this can be called during del
|
| 1952 |
+
return getattr(self, "_closed", True)
|
| 1953 |
+
|
| 1954 |
+
@closed.setter
|
| 1955 |
+
def closed(self, c):
|
| 1956 |
+
self._closed = c
|
| 1957 |
+
|
| 1958 |
+
def __hash__(self):
|
| 1959 |
+
if "w" in self.mode:
|
| 1960 |
+
return id(self)
|
| 1961 |
+
else:
|
| 1962 |
+
return int(tokenize(self.details), 16)
|
| 1963 |
+
|
| 1964 |
+
def __eq__(self, other):
|
| 1965 |
+
"""Files are equal if they have the same checksum, only in read mode"""
|
| 1966 |
+
if self is other:
|
| 1967 |
+
return True
|
| 1968 |
+
return (
|
| 1969 |
+
isinstance(other, type(self))
|
| 1970 |
+
and self.mode == "rb"
|
| 1971 |
+
and other.mode == "rb"
|
| 1972 |
+
and hash(self) == hash(other)
|
| 1973 |
+
)
|
| 1974 |
+
|
| 1975 |
+
def commit(self):
|
| 1976 |
+
"""Move from temp to final destination"""
|
| 1977 |
+
|
| 1978 |
+
def discard(self):
|
| 1979 |
+
"""Throw away temporary file"""
|
| 1980 |
+
|
| 1981 |
+
def info(self):
|
| 1982 |
+
"""File information about this path"""
|
| 1983 |
+
if self.readable():
|
| 1984 |
+
return self.details
|
| 1985 |
+
else:
|
| 1986 |
+
raise ValueError("Info not available while writing")
|
| 1987 |
+
|
| 1988 |
+
def tell(self):
|
| 1989 |
+
"""Current file location"""
|
| 1990 |
+
return self.loc
|
| 1991 |
+
|
| 1992 |
+
def seek(self, loc, whence=0):
|
| 1993 |
+
"""Set current file location
|
| 1994 |
+
|
| 1995 |
+
Parameters
|
| 1996 |
+
----------
|
| 1997 |
+
loc: int
|
| 1998 |
+
byte location
|
| 1999 |
+
whence: {0, 1, 2}
|
| 2000 |
+
from start of file, current location or end of file, resp.
|
| 2001 |
+
"""
|
| 2002 |
+
loc = int(loc)
|
| 2003 |
+
if not self.mode == "rb":
|
| 2004 |
+
raise OSError(ESPIPE, "Seek only available in read mode")
|
| 2005 |
+
if whence == 0:
|
| 2006 |
+
nloc = loc
|
| 2007 |
+
elif whence == 1:
|
| 2008 |
+
nloc = self.loc + loc
|
| 2009 |
+
elif whence == 2:
|
| 2010 |
+
nloc = self.size + loc
|
| 2011 |
+
else:
|
| 2012 |
+
raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
|
| 2013 |
+
if nloc < 0:
|
| 2014 |
+
raise ValueError("Seek before start of file")
|
| 2015 |
+
self.loc = nloc
|
| 2016 |
+
return self.loc
|
| 2017 |
+
|
| 2018 |
+
def write(self, data):
|
| 2019 |
+
"""
|
| 2020 |
+
Write data to buffer.
|
| 2021 |
+
|
| 2022 |
+
Buffer only sent on flush() or if buffer is greater than
|
| 2023 |
+
or equal to blocksize.
|
| 2024 |
+
|
| 2025 |
+
Parameters
|
| 2026 |
+
----------
|
| 2027 |
+
data: bytes
|
| 2028 |
+
Set of bytes to be written.
|
| 2029 |
+
"""
|
| 2030 |
+
if not self.writable():
|
| 2031 |
+
raise ValueError("File not in write mode")
|
| 2032 |
+
if self.closed:
|
| 2033 |
+
raise ValueError("I/O operation on closed file.")
|
| 2034 |
+
if self.forced:
|
| 2035 |
+
raise ValueError("This file has been force-flushed, can only close")
|
| 2036 |
+
out = self.buffer.write(data)
|
| 2037 |
+
self.loc += out
|
| 2038 |
+
if self.buffer.tell() >= self.blocksize:
|
| 2039 |
+
self.flush()
|
| 2040 |
+
return out
|
| 2041 |
+
|
| 2042 |
+
def flush(self, force=False):
|
| 2043 |
+
"""
|
| 2044 |
+
Write buffered data to backend store.
|
| 2045 |
+
|
| 2046 |
+
Writes the current buffer, if it is larger than the block-size, or if
|
| 2047 |
+
the file is being closed.
|
| 2048 |
+
|
| 2049 |
+
Parameters
|
| 2050 |
+
----------
|
| 2051 |
+
force: bool
|
| 2052 |
+
When closing, write the last block even if it is smaller than
|
| 2053 |
+
blocks are allowed to be. Disallows further writing to this file.
|
| 2054 |
+
"""
|
| 2055 |
+
|
| 2056 |
+
if self.closed:
|
| 2057 |
+
raise ValueError("Flush on closed file")
|
| 2058 |
+
if force and self.forced:
|
| 2059 |
+
raise ValueError("Force flush cannot be called more than once")
|
| 2060 |
+
if force:
|
| 2061 |
+
self.forced = True
|
| 2062 |
+
|
| 2063 |
+
if self.readable():
|
| 2064 |
+
# no-op to flush on read-mode
|
| 2065 |
+
return
|
| 2066 |
+
|
| 2067 |
+
if not force and self.buffer.tell() < self.blocksize:
|
| 2068 |
+
# Defer write on small block
|
| 2069 |
+
return
|
| 2070 |
+
|
| 2071 |
+
if self.offset is None:
|
| 2072 |
+
# Initialize a multipart upload
|
| 2073 |
+
self.offset = 0
|
| 2074 |
+
try:
|
| 2075 |
+
self._initiate_upload()
|
| 2076 |
+
except:
|
| 2077 |
+
self.closed = True
|
| 2078 |
+
raise
|
| 2079 |
+
|
| 2080 |
+
if self._upload_chunk(final=force) is not False:
|
| 2081 |
+
self.offset += self.buffer.seek(0, 2)
|
| 2082 |
+
self.buffer = io.BytesIO()
|
| 2083 |
+
|
| 2084 |
+
def _upload_chunk(self, final=False):
|
| 2085 |
+
"""Write one part of a multi-block file upload
|
| 2086 |
+
|
| 2087 |
+
Parameters
|
| 2088 |
+
==========
|
| 2089 |
+
final: bool
|
| 2090 |
+
This is the last block, so should complete file, if
|
| 2091 |
+
self.autocommit is True.
|
| 2092 |
+
"""
|
| 2093 |
+
# may not yet have been initialized, may need to call _initialize_upload
|
| 2094 |
+
|
| 2095 |
+
def _initiate_upload(self):
|
| 2096 |
+
"""Create remote file/upload"""
|
| 2097 |
+
pass
|
| 2098 |
+
|
| 2099 |
+
def _fetch_range(self, start, end):
|
| 2100 |
+
"""Get the specified set of bytes from remote"""
|
| 2101 |
+
return self.fs.cat_file(self.path, start=start, end=end)
|
| 2102 |
+
|
| 2103 |
+
def read(self, length=-1):
|
| 2104 |
+
"""
|
| 2105 |
+
Return data from cache, or fetch pieces as necessary
|
| 2106 |
+
|
| 2107 |
+
Parameters
|
| 2108 |
+
----------
|
| 2109 |
+
length: int (-1)
|
| 2110 |
+
Number of bytes to read; if <0, all remaining bytes.
|
| 2111 |
+
"""
|
| 2112 |
+
length = -1 if length is None else int(length)
|
| 2113 |
+
if self.mode != "rb":
|
| 2114 |
+
raise ValueError("File not in read mode")
|
| 2115 |
+
if length < 0:
|
| 2116 |
+
length = self.size - self.loc
|
| 2117 |
+
if self.closed:
|
| 2118 |
+
raise ValueError("I/O operation on closed file.")
|
| 2119 |
+
if length == 0:
|
| 2120 |
+
# don't even bother calling fetch
|
| 2121 |
+
return b""
|
| 2122 |
+
out = self.cache._fetch(self.loc, self.loc + length)
|
| 2123 |
+
|
| 2124 |
+
logger.debug(
|
| 2125 |
+
"%s read: %i - %i %s",
|
| 2126 |
+
self,
|
| 2127 |
+
self.loc,
|
| 2128 |
+
self.loc + length,
|
| 2129 |
+
self.cache._log_stats(),
|
| 2130 |
+
)
|
| 2131 |
+
self.loc += len(out)
|
| 2132 |
+
return out
|
| 2133 |
+
|
| 2134 |
+
def readinto(self, b):
|
| 2135 |
+
"""mirrors builtin file's readinto method
|
| 2136 |
+
|
| 2137 |
+
https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
|
| 2138 |
+
"""
|
| 2139 |
+
out = memoryview(b).cast("B")
|
| 2140 |
+
data = self.read(out.nbytes)
|
| 2141 |
+
out[: len(data)] = data
|
| 2142 |
+
return len(data)
|
| 2143 |
+
|
| 2144 |
+
def readuntil(self, char=b"\n", blocks=None):
|
| 2145 |
+
"""Return data between current position and first occurrence of char
|
| 2146 |
+
|
| 2147 |
+
char is included in the output, except if the end of the tile is
|
| 2148 |
+
encountered first.
|
| 2149 |
+
|
| 2150 |
+
Parameters
|
| 2151 |
+
----------
|
| 2152 |
+
char: bytes
|
| 2153 |
+
Thing to find
|
| 2154 |
+
blocks: None or int
|
| 2155 |
+
How much to read in each go. Defaults to file blocksize - which may
|
| 2156 |
+
mean a new read on every call.
|
| 2157 |
+
"""
|
| 2158 |
+
out = []
|
| 2159 |
+
while True:
|
| 2160 |
+
start = self.tell()
|
| 2161 |
+
part = self.read(blocks or self.blocksize)
|
| 2162 |
+
if len(part) == 0:
|
| 2163 |
+
break
|
| 2164 |
+
found = part.find(char)
|
| 2165 |
+
if found > -1:
|
| 2166 |
+
out.append(part[: found + len(char)])
|
| 2167 |
+
self.seek(start + found + len(char))
|
| 2168 |
+
break
|
| 2169 |
+
out.append(part)
|
| 2170 |
+
return b"".join(out)
|
| 2171 |
+
|
| 2172 |
+
def readline(self):
|
| 2173 |
+
"""Read until and including the first occurrence of newline character
|
| 2174 |
+
|
| 2175 |
+
Note that, because of character encoding, this is not necessarily a
|
| 2176 |
+
true line ending.
|
| 2177 |
+
"""
|
| 2178 |
+
return self.readuntil(b"\n")
|
| 2179 |
+
|
| 2180 |
+
def __next__(self):
|
| 2181 |
+
out = self.readline()
|
| 2182 |
+
if out:
|
| 2183 |
+
return out
|
| 2184 |
+
raise StopIteration
|
| 2185 |
+
|
| 2186 |
+
def __iter__(self):
|
| 2187 |
+
return self
|
| 2188 |
+
|
| 2189 |
+
def readlines(self):
|
| 2190 |
+
"""Return all data, split by the newline character, including the newline character"""
|
| 2191 |
+
data = self.read()
|
| 2192 |
+
lines = data.split(b"\n")
|
| 2193 |
+
out = [l + b"\n" for l in lines[:-1]]
|
| 2194 |
+
if data.endswith(b"\n"):
|
| 2195 |
+
return out
|
| 2196 |
+
else:
|
| 2197 |
+
return out + [lines[-1]]
|
| 2198 |
+
# return list(self) ???
|
| 2199 |
+
|
| 2200 |
+
def readinto1(self, b):
|
| 2201 |
+
return self.readinto(b)
|
| 2202 |
+
|
| 2203 |
+
def close(self):
|
| 2204 |
+
"""Close file
|
| 2205 |
+
|
| 2206 |
+
Finalizes writes, discards cache
|
| 2207 |
+
"""
|
| 2208 |
+
if getattr(self, "_unclosable", False):
|
| 2209 |
+
return
|
| 2210 |
+
if self.closed:
|
| 2211 |
+
return
|
| 2212 |
+
try:
|
| 2213 |
+
if self.mode == "rb":
|
| 2214 |
+
self.cache = None
|
| 2215 |
+
else:
|
| 2216 |
+
if not self.forced:
|
| 2217 |
+
self.flush(force=True)
|
| 2218 |
+
|
| 2219 |
+
if self.fs is not None:
|
| 2220 |
+
self.fs.invalidate_cache(self.path)
|
| 2221 |
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
| 2222 |
+
finally:
|
| 2223 |
+
self.closed = True
|
| 2224 |
+
|
| 2225 |
+
def readable(self):
|
| 2226 |
+
"""Whether opened for reading"""
|
| 2227 |
+
return "r" in self.mode and not self.closed
|
| 2228 |
+
|
| 2229 |
+
def seekable(self):
|
| 2230 |
+
"""Whether is seekable (only in read mode)"""
|
| 2231 |
+
return self.readable()
|
| 2232 |
+
|
| 2233 |
+
def writable(self):
|
| 2234 |
+
"""Whether opened for writing"""
|
| 2235 |
+
return self.mode in {"wb", "ab", "xb"} and not self.closed
|
| 2236 |
+
|
| 2237 |
+
def __reduce__(self):
|
| 2238 |
+
if self.mode != "rb":
|
| 2239 |
+
raise RuntimeError("Pickling a writeable file is not supported")
|
| 2240 |
+
|
| 2241 |
+
return reopen, (
|
| 2242 |
+
self.fs,
|
| 2243 |
+
self.path,
|
| 2244 |
+
self.mode,
|
| 2245 |
+
self.blocksize,
|
| 2246 |
+
self.loc,
|
| 2247 |
+
self.size,
|
| 2248 |
+
self.autocommit,
|
| 2249 |
+
self.cache.name if self.cache else "none",
|
| 2250 |
+
self.kwargs,
|
| 2251 |
+
)
|
| 2252 |
+
|
| 2253 |
+
def __del__(self):
|
| 2254 |
+
if not self.closed:
|
| 2255 |
+
self.close()
|
| 2256 |
+
|
| 2257 |
+
def __str__(self):
|
| 2258 |
+
return f"<File-like object {type(self.fs).__name__}, {self.path}>"
|
| 2259 |
+
|
| 2260 |
+
__repr__ = __str__
|
| 2261 |
+
|
| 2262 |
+
def __enter__(self):
|
| 2263 |
+
return self
|
| 2264 |
+
|
| 2265 |
+
def __exit__(self, *args):
|
| 2266 |
+
self.close()
|
| 2267 |
+
|
| 2268 |
+
|
| 2269 |
+
def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
|
| 2270 |
+
file = fs.open(
|
| 2271 |
+
path,
|
| 2272 |
+
mode=mode,
|
| 2273 |
+
block_size=blocksize,
|
| 2274 |
+
autocommit=autocommit,
|
| 2275 |
+
cache_type=cache_type,
|
| 2276 |
+
size=size,
|
| 2277 |
+
**kwargs,
|
| 2278 |
+
)
|
| 2279 |
+
if loc > 0:
|
| 2280 |
+
file.seek(loc)
|
| 2281 |
+
return file
|
env/lib/python3.13/site-packages/fsspec/transaction.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import deque
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Transaction:
|
| 5 |
+
"""Filesystem transaction write context
|
| 6 |
+
|
| 7 |
+
Gathers files for deferred commit or discard, so that several write
|
| 8 |
+
operations can be finalized semi-atomically. This works by having this
|
| 9 |
+
instance as the ``.transaction`` attribute of the given filesystem
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, fs, **kwargs):
|
| 13 |
+
"""
|
| 14 |
+
Parameters
|
| 15 |
+
----------
|
| 16 |
+
fs: FileSystem instance
|
| 17 |
+
"""
|
| 18 |
+
self.fs = fs
|
| 19 |
+
self.files = deque()
|
| 20 |
+
|
| 21 |
+
def __enter__(self):
|
| 22 |
+
self.start()
|
| 23 |
+
return self
|
| 24 |
+
|
| 25 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 26 |
+
"""End transaction and commit, if exit is not due to exception"""
|
| 27 |
+
# only commit if there was no exception
|
| 28 |
+
self.complete(commit=exc_type is None)
|
| 29 |
+
if self.fs:
|
| 30 |
+
self.fs._intrans = False
|
| 31 |
+
self.fs._transaction = None
|
| 32 |
+
self.fs = None
|
| 33 |
+
|
| 34 |
+
def start(self):
|
| 35 |
+
"""Start a transaction on this FileSystem"""
|
| 36 |
+
self.files = deque() # clean up after previous failed completions
|
| 37 |
+
self.fs._intrans = True
|
| 38 |
+
|
| 39 |
+
def complete(self, commit=True):
|
| 40 |
+
"""Finish transaction: commit or discard all deferred files"""
|
| 41 |
+
while self.files:
|
| 42 |
+
f = self.files.popleft()
|
| 43 |
+
if commit:
|
| 44 |
+
f.commit()
|
| 45 |
+
else:
|
| 46 |
+
f.discard()
|
| 47 |
+
self.fs._intrans = False
|
| 48 |
+
self.fs._transaction = None
|
| 49 |
+
self.fs = None
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class FileActor:
|
| 53 |
+
def __init__(self):
|
| 54 |
+
self.files = []
|
| 55 |
+
|
| 56 |
+
def commit(self):
|
| 57 |
+
for f in self.files:
|
| 58 |
+
f.commit()
|
| 59 |
+
self.files.clear()
|
| 60 |
+
|
| 61 |
+
def discard(self):
|
| 62 |
+
for f in self.files:
|
| 63 |
+
f.discard()
|
| 64 |
+
self.files.clear()
|
| 65 |
+
|
| 66 |
+
def append(self, f):
|
| 67 |
+
self.files.append(f)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class DaskTransaction(Transaction):
|
| 71 |
+
def __init__(self, fs):
|
| 72 |
+
"""
|
| 73 |
+
Parameters
|
| 74 |
+
----------
|
| 75 |
+
fs: FileSystem instance
|
| 76 |
+
"""
|
| 77 |
+
import distributed
|
| 78 |
+
|
| 79 |
+
super().__init__(fs)
|
| 80 |
+
client = distributed.default_client()
|
| 81 |
+
self.files = client.submit(FileActor, actor=True).result()
|
| 82 |
+
|
| 83 |
+
def complete(self, commit=True):
|
| 84 |
+
"""Finish transaction: commit or discard all deferred files"""
|
| 85 |
+
if commit:
|
| 86 |
+
self.files.commit().result()
|
| 87 |
+
else:
|
| 88 |
+
self.files.discard().result()
|
| 89 |
+
self.fs._intrans = False
|
| 90 |
+
self.fs = None
|
env/lib/python3.13/site-packages/fsspec/utils.py
ADDED
|
@@ -0,0 +1,738 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import logging
|
| 5 |
+
import math
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import sys
|
| 9 |
+
import tempfile
|
| 10 |
+
from collections.abc import Callable, Iterable, Iterator, Sequence
|
| 11 |
+
from functools import partial
|
| 12 |
+
from hashlib import md5
|
| 13 |
+
from importlib.metadata import version
|
| 14 |
+
from typing import IO, TYPE_CHECKING, Any, TypeVar
|
| 15 |
+
from urllib.parse import urlsplit
|
| 16 |
+
|
| 17 |
+
if TYPE_CHECKING:
|
| 18 |
+
import pathlib
|
| 19 |
+
from typing import TypeGuard
|
| 20 |
+
|
| 21 |
+
from fsspec.spec import AbstractFileSystem
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
| 25 |
+
|
| 26 |
+
T = TypeVar("T")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def infer_storage_options(
|
| 30 |
+
urlpath: str, inherit_storage_options: dict[str, Any] | None = None
|
| 31 |
+
) -> dict[str, Any]:
|
| 32 |
+
"""Infer storage options from URL path and merge it with existing storage
|
| 33 |
+
options.
|
| 34 |
+
|
| 35 |
+
Parameters
|
| 36 |
+
----------
|
| 37 |
+
urlpath: str or unicode
|
| 38 |
+
Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
|
| 39 |
+
inherit_storage_options: dict (optional)
|
| 40 |
+
Its contents will get merged with the inferred information from the
|
| 41 |
+
given path
|
| 42 |
+
|
| 43 |
+
Returns
|
| 44 |
+
-------
|
| 45 |
+
Storage options dict.
|
| 46 |
+
|
| 47 |
+
Examples
|
| 48 |
+
--------
|
| 49 |
+
>>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
|
| 50 |
+
{"protocol": "file", "path", "/mnt/datasets/test.csv"}
|
| 51 |
+
>>> infer_storage_options(
|
| 52 |
+
... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
|
| 53 |
+
... inherit_storage_options={'extra': 'value'},
|
| 54 |
+
... ) # doctest: +SKIP
|
| 55 |
+
{"protocol": "hdfs", "username": "username", "password": "pwd",
|
| 56 |
+
"host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
|
| 57 |
+
"url_query": "q=1", "extra": "value"}
|
| 58 |
+
"""
|
| 59 |
+
# Handle Windows paths including disk name in this special case
|
| 60 |
+
if (
|
| 61 |
+
re.match(r"^[a-zA-Z]:[\\/]", urlpath)
|
| 62 |
+
or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
|
| 63 |
+
):
|
| 64 |
+
return {"protocol": "file", "path": urlpath}
|
| 65 |
+
|
| 66 |
+
parsed_path = urlsplit(urlpath)
|
| 67 |
+
protocol = parsed_path.scheme or "file"
|
| 68 |
+
if parsed_path.fragment:
|
| 69 |
+
path = "#".join([parsed_path.path, parsed_path.fragment])
|
| 70 |
+
else:
|
| 71 |
+
path = parsed_path.path
|
| 72 |
+
if protocol == "file":
|
| 73 |
+
# Special case parsing file protocol URL on Windows according to:
|
| 74 |
+
# https://msdn.microsoft.com/en-us/library/jj710207.aspx
|
| 75 |
+
windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
|
| 76 |
+
if windows_path:
|
| 77 |
+
drive, path = windows_path.groups()
|
| 78 |
+
path = f"{drive}:{path}"
|
| 79 |
+
|
| 80 |
+
if protocol in ["http", "https"]:
|
| 81 |
+
# for HTTP, we don't want to parse, as requests will anyway
|
| 82 |
+
return {"protocol": protocol, "path": urlpath}
|
| 83 |
+
|
| 84 |
+
options: dict[str, Any] = {"protocol": protocol, "path": path}
|
| 85 |
+
|
| 86 |
+
if parsed_path.netloc:
|
| 87 |
+
# Parse `hostname` from netloc manually because `parsed_path.hostname`
|
| 88 |
+
# lowercases the hostname which is not always desirable (e.g. in S3):
|
| 89 |
+
# https://github.com/dask/dask/issues/1417
|
| 90 |
+
options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
|
| 91 |
+
|
| 92 |
+
if protocol in ("s3", "s3a", "gcs", "gs"):
|
| 93 |
+
options["path"] = options["host"] + options["path"]
|
| 94 |
+
else:
|
| 95 |
+
options["host"] = options["host"]
|
| 96 |
+
if parsed_path.port:
|
| 97 |
+
options["port"] = parsed_path.port
|
| 98 |
+
if parsed_path.username:
|
| 99 |
+
options["username"] = parsed_path.username
|
| 100 |
+
if parsed_path.password:
|
| 101 |
+
options["password"] = parsed_path.password
|
| 102 |
+
|
| 103 |
+
if parsed_path.query:
|
| 104 |
+
options["url_query"] = parsed_path.query
|
| 105 |
+
if parsed_path.fragment:
|
| 106 |
+
options["url_fragment"] = parsed_path.fragment
|
| 107 |
+
|
| 108 |
+
if inherit_storage_options:
|
| 109 |
+
update_storage_options(options, inherit_storage_options)
|
| 110 |
+
|
| 111 |
+
return options
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def update_storage_options(
|
| 115 |
+
options: dict[str, Any], inherited: dict[str, Any] | None = None
|
| 116 |
+
) -> None:
|
| 117 |
+
if not inherited:
|
| 118 |
+
inherited = {}
|
| 119 |
+
collisions = set(options) & set(inherited)
|
| 120 |
+
if collisions:
|
| 121 |
+
for collision in collisions:
|
| 122 |
+
if options.get(collision) != inherited.get(collision):
|
| 123 |
+
raise KeyError(
|
| 124 |
+
f"Collision between inferred and specified storage "
|
| 125 |
+
f"option:\n{collision}"
|
| 126 |
+
)
|
| 127 |
+
options.update(inherited)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
# Compression extensions registered via fsspec.compression.register_compression
|
| 131 |
+
compressions: dict[str, str] = {}
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def infer_compression(filename: str) -> str | None:
|
| 135 |
+
"""Infer compression, if available, from filename.
|
| 136 |
+
|
| 137 |
+
Infer a named compression type, if registered and available, from filename
|
| 138 |
+
extension. This includes builtin (gz, bz2, zip) compressions, as well as
|
| 139 |
+
optional compressions. See fsspec.compression.register_compression.
|
| 140 |
+
"""
|
| 141 |
+
extension = os.path.splitext(filename)[-1].strip(".").lower()
|
| 142 |
+
if extension in compressions:
|
| 143 |
+
return compressions[extension]
|
| 144 |
+
return None
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def build_name_function(max_int: float) -> Callable[[int], str]:
|
| 148 |
+
"""Returns a function that receives a single integer
|
| 149 |
+
and returns it as a string padded by enough zero characters
|
| 150 |
+
to align with maximum possible integer
|
| 151 |
+
|
| 152 |
+
>>> name_f = build_name_function(57)
|
| 153 |
+
|
| 154 |
+
>>> name_f(7)
|
| 155 |
+
'07'
|
| 156 |
+
>>> name_f(31)
|
| 157 |
+
'31'
|
| 158 |
+
>>> build_name_function(1000)(42)
|
| 159 |
+
'0042'
|
| 160 |
+
>>> build_name_function(999)(42)
|
| 161 |
+
'042'
|
| 162 |
+
>>> build_name_function(0)(0)
|
| 163 |
+
'0'
|
| 164 |
+
"""
|
| 165 |
+
# handle corner cases max_int is 0 or exact power of 10
|
| 166 |
+
max_int += 1e-8
|
| 167 |
+
|
| 168 |
+
pad_length = int(math.ceil(math.log10(max_int)))
|
| 169 |
+
|
| 170 |
+
def name_function(i: int) -> str:
|
| 171 |
+
return str(i).zfill(pad_length)
|
| 172 |
+
|
| 173 |
+
return name_function
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
|
| 177 |
+
r"""Seek current file to file start, file end, or byte after delimiter seq.
|
| 178 |
+
|
| 179 |
+
Seeks file to next chunk delimiter, where chunks are defined on file start,
|
| 180 |
+
a delimiting sequence, and file end. Use file.tell() to see location afterwards.
|
| 181 |
+
Note that file start is a valid split, so must be at offset > 0 to seek for
|
| 182 |
+
delimiter.
|
| 183 |
+
|
| 184 |
+
Parameters
|
| 185 |
+
----------
|
| 186 |
+
file: a file
|
| 187 |
+
delimiter: bytes
|
| 188 |
+
a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
|
| 189 |
+
blocksize: int
|
| 190 |
+
Number of bytes to read from the file at once.
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
Returns
|
| 194 |
+
-------
|
| 195 |
+
Returns True if a delimiter was found, False if at file start or end.
|
| 196 |
+
|
| 197 |
+
"""
|
| 198 |
+
|
| 199 |
+
if file.tell() == 0:
|
| 200 |
+
# beginning-of-file, return without seek
|
| 201 |
+
return False
|
| 202 |
+
|
| 203 |
+
# Interface is for binary IO, with delimiter as bytes, but initialize last
|
| 204 |
+
# with result of file.read to preserve compatibility with text IO.
|
| 205 |
+
last: bytes | None = None
|
| 206 |
+
while True:
|
| 207 |
+
current = file.read(blocksize)
|
| 208 |
+
if not current:
|
| 209 |
+
# end-of-file without delimiter
|
| 210 |
+
return False
|
| 211 |
+
full = last + current if last else current
|
| 212 |
+
try:
|
| 213 |
+
if delimiter in full:
|
| 214 |
+
i = full.index(delimiter)
|
| 215 |
+
file.seek(file.tell() - (len(full) - i) + len(delimiter))
|
| 216 |
+
return True
|
| 217 |
+
elif len(current) < blocksize:
|
| 218 |
+
# end-of-file without delimiter
|
| 219 |
+
return False
|
| 220 |
+
except (OSError, ValueError):
|
| 221 |
+
pass
|
| 222 |
+
last = full[-len(delimiter) :]
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def read_block(
|
| 226 |
+
f: IO[bytes],
|
| 227 |
+
offset: int,
|
| 228 |
+
length: int | None,
|
| 229 |
+
delimiter: bytes | None = None,
|
| 230 |
+
split_before: bool = False,
|
| 231 |
+
) -> bytes:
|
| 232 |
+
"""Read a block of bytes from a file
|
| 233 |
+
|
| 234 |
+
Parameters
|
| 235 |
+
----------
|
| 236 |
+
f: File
|
| 237 |
+
Open file
|
| 238 |
+
offset: int
|
| 239 |
+
Byte offset to start read
|
| 240 |
+
length: int
|
| 241 |
+
Number of bytes to read, read through end of file if None
|
| 242 |
+
delimiter: bytes (optional)
|
| 243 |
+
Ensure reading starts and stops at delimiter bytestring
|
| 244 |
+
split_before: bool (optional)
|
| 245 |
+
Start/stop read *before* delimiter bytestring.
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
If using the ``delimiter=`` keyword argument we ensure that the read
|
| 249 |
+
starts and stops at delimiter boundaries that follow the locations
|
| 250 |
+
``offset`` and ``offset + length``. If ``offset`` is zero then we
|
| 251 |
+
start at zero, regardless of delimiter. The bytestring returned WILL
|
| 252 |
+
include the terminating delimiter string.
|
| 253 |
+
|
| 254 |
+
Examples
|
| 255 |
+
--------
|
| 256 |
+
|
| 257 |
+
>>> from io import BytesIO # doctest: +SKIP
|
| 258 |
+
>>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
|
| 259 |
+
>>> read_block(f, 0, 13) # doctest: +SKIP
|
| 260 |
+
b'Alice, 100\\nBo'
|
| 261 |
+
|
| 262 |
+
>>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
| 263 |
+
b'Alice, 100\\nBob, 200\\n'
|
| 264 |
+
|
| 265 |
+
>>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
|
| 266 |
+
b'Bob, 200\\nCharlie, 300'
|
| 267 |
+
"""
|
| 268 |
+
if delimiter:
|
| 269 |
+
f.seek(offset)
|
| 270 |
+
found_start_delim = seek_delimiter(f, delimiter, 2**16)
|
| 271 |
+
if length is None:
|
| 272 |
+
return f.read()
|
| 273 |
+
start = f.tell()
|
| 274 |
+
length -= start - offset
|
| 275 |
+
|
| 276 |
+
f.seek(start + length)
|
| 277 |
+
found_end_delim = seek_delimiter(f, delimiter, 2**16)
|
| 278 |
+
end = f.tell()
|
| 279 |
+
|
| 280 |
+
# Adjust split location to before delimiter if seek found the
|
| 281 |
+
# delimiter sequence, not start or end of file.
|
| 282 |
+
if found_start_delim and split_before:
|
| 283 |
+
start -= len(delimiter)
|
| 284 |
+
|
| 285 |
+
if found_end_delim and split_before:
|
| 286 |
+
end -= len(delimiter)
|
| 287 |
+
|
| 288 |
+
offset = start
|
| 289 |
+
length = end - start
|
| 290 |
+
|
| 291 |
+
f.seek(offset)
|
| 292 |
+
|
| 293 |
+
# TODO: allow length to be None and read to the end of the file?
|
| 294 |
+
assert length is not None
|
| 295 |
+
b = f.read(length)
|
| 296 |
+
return b
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def tokenize(*args: Any, **kwargs: Any) -> str:
|
| 300 |
+
"""Deterministic token
|
| 301 |
+
|
| 302 |
+
(modified from dask.base)
|
| 303 |
+
|
| 304 |
+
>>> tokenize([1, 2, '3'])
|
| 305 |
+
'9d71491b50023b06fc76928e6eddb952'
|
| 306 |
+
|
| 307 |
+
>>> tokenize('Hello') == tokenize('Hello')
|
| 308 |
+
True
|
| 309 |
+
"""
|
| 310 |
+
if kwargs:
|
| 311 |
+
args += (kwargs,)
|
| 312 |
+
try:
|
| 313 |
+
h = md5(str(args).encode())
|
| 314 |
+
except ValueError:
|
| 315 |
+
# FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
|
| 316 |
+
h = md5(str(args).encode(), usedforsecurity=False)
|
| 317 |
+
return h.hexdigest()
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
|
| 321 |
+
"""Attempt to convert a path-like object to a string.
|
| 322 |
+
|
| 323 |
+
Parameters
|
| 324 |
+
----------
|
| 325 |
+
filepath: object to be converted
|
| 326 |
+
|
| 327 |
+
Returns
|
| 328 |
+
-------
|
| 329 |
+
filepath_str: maybe a string version of the object
|
| 330 |
+
|
| 331 |
+
Notes
|
| 332 |
+
-----
|
| 333 |
+
Objects supporting the fspath protocol are coerced according to its
|
| 334 |
+
__fspath__ method.
|
| 335 |
+
|
| 336 |
+
For backwards compatibility with older Python version, pathlib.Path
|
| 337 |
+
objects are specially coerced.
|
| 338 |
+
|
| 339 |
+
Any other object is passed through unchanged, which includes bytes,
|
| 340 |
+
strings, buffers, or anything else that's not even path-like.
|
| 341 |
+
"""
|
| 342 |
+
if isinstance(filepath, str):
|
| 343 |
+
return filepath
|
| 344 |
+
elif hasattr(filepath, "__fspath__"):
|
| 345 |
+
return filepath.__fspath__()
|
| 346 |
+
elif hasattr(filepath, "path"):
|
| 347 |
+
return filepath.path
|
| 348 |
+
else:
|
| 349 |
+
return filepath # type: ignore[return-value]
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def make_instance(
|
| 353 |
+
cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
|
| 354 |
+
) -> T:
|
| 355 |
+
inst = cls(*args, **kwargs)
|
| 356 |
+
inst._determine_worker() # type: ignore[attr-defined]
|
| 357 |
+
return inst
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
def common_prefix(paths: Iterable[str]) -> str:
|
| 361 |
+
"""For a list of paths, find the shortest prefix common to all"""
|
| 362 |
+
parts = [p.split("/") for p in paths]
|
| 363 |
+
lmax = min(len(p) for p in parts)
|
| 364 |
+
end = 0
|
| 365 |
+
for i in range(lmax):
|
| 366 |
+
end = all(p[i] == parts[0][i] for p in parts)
|
| 367 |
+
if not end:
|
| 368 |
+
break
|
| 369 |
+
i += end
|
| 370 |
+
return "/".join(parts[0][:i])
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
def other_paths(
|
| 374 |
+
paths: list[str],
|
| 375 |
+
path2: str | list[str],
|
| 376 |
+
exists: bool = False,
|
| 377 |
+
flatten: bool = False,
|
| 378 |
+
) -> list[str]:
|
| 379 |
+
"""In bulk file operations, construct a new file tree from a list of files
|
| 380 |
+
|
| 381 |
+
Parameters
|
| 382 |
+
----------
|
| 383 |
+
paths: list of str
|
| 384 |
+
The input file tree
|
| 385 |
+
path2: str or list of str
|
| 386 |
+
Root to construct the new list in. If this is already a list of str, we just
|
| 387 |
+
assert it has the right number of elements.
|
| 388 |
+
exists: bool (optional)
|
| 389 |
+
For a str destination, it is already exists (and is a dir), files should
|
| 390 |
+
end up inside.
|
| 391 |
+
flatten: bool (optional)
|
| 392 |
+
Whether to flatten the input directory tree structure so that the output files
|
| 393 |
+
are in the same directory.
|
| 394 |
+
|
| 395 |
+
Returns
|
| 396 |
+
-------
|
| 397 |
+
list of str
|
| 398 |
+
"""
|
| 399 |
+
|
| 400 |
+
if isinstance(path2, str):
|
| 401 |
+
path2 = path2.rstrip("/")
|
| 402 |
+
|
| 403 |
+
if flatten:
|
| 404 |
+
path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
|
| 405 |
+
else:
|
| 406 |
+
cp = common_prefix(paths)
|
| 407 |
+
if exists:
|
| 408 |
+
cp = cp.rsplit("/", 1)[0]
|
| 409 |
+
if not cp and all(not s.startswith("/") for s in paths):
|
| 410 |
+
path2 = ["/".join([path2, p]) for p in paths]
|
| 411 |
+
else:
|
| 412 |
+
path2 = [p.replace(cp, path2, 1) for p in paths]
|
| 413 |
+
else:
|
| 414 |
+
assert len(paths) == len(path2)
|
| 415 |
+
return path2
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
def is_exception(obj: Any) -> bool:
|
| 419 |
+
return isinstance(obj, BaseException)
|
| 420 |
+
|
| 421 |
+
|
| 422 |
+
def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
|
| 423 |
+
return all(hasattr(f, attr) for attr in ["read", "close", "tell"])
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
def get_protocol(url: str) -> str:
|
| 427 |
+
url = stringify_path(url)
|
| 428 |
+
parts = re.split(r"(\:\:|\://)", url, maxsplit=1)
|
| 429 |
+
if len(parts) > 1:
|
| 430 |
+
return parts[0]
|
| 431 |
+
return "file"
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
def get_file_extension(url: str) -> str:
|
| 435 |
+
url = stringify_path(url)
|
| 436 |
+
ext_parts = url.rsplit(".", 1)
|
| 437 |
+
if len(ext_parts) > 1:
|
| 438 |
+
return ext_parts[-1]
|
| 439 |
+
return ""
|
| 440 |
+
|
| 441 |
+
|
| 442 |
+
def can_be_local(path: str) -> bool:
|
| 443 |
+
"""Can the given URL be used with open_local?"""
|
| 444 |
+
from fsspec import get_filesystem_class
|
| 445 |
+
|
| 446 |
+
try:
|
| 447 |
+
return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
|
| 448 |
+
except (ValueError, ImportError):
|
| 449 |
+
# not in registry or import failed
|
| 450 |
+
return False
|
| 451 |
+
|
| 452 |
+
|
| 453 |
+
def get_package_version_without_import(name: str) -> str | None:
|
| 454 |
+
"""For given package name, try to find the version without importing it
|
| 455 |
+
|
| 456 |
+
Import and package.__version__ is still the backup here, so an import
|
| 457 |
+
*might* happen.
|
| 458 |
+
|
| 459 |
+
Returns either the version string, or None if the package
|
| 460 |
+
or the version was not readily found.
|
| 461 |
+
"""
|
| 462 |
+
if name in sys.modules:
|
| 463 |
+
mod = sys.modules[name]
|
| 464 |
+
if hasattr(mod, "__version__"):
|
| 465 |
+
return mod.__version__
|
| 466 |
+
try:
|
| 467 |
+
return version(name)
|
| 468 |
+
except: # noqa: E722
|
| 469 |
+
pass
|
| 470 |
+
try:
|
| 471 |
+
import importlib
|
| 472 |
+
|
| 473 |
+
mod = importlib.import_module(name)
|
| 474 |
+
return mod.__version__
|
| 475 |
+
except (ImportError, AttributeError):
|
| 476 |
+
return None
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
def setup_logging(
|
| 480 |
+
logger: logging.Logger | None = None,
|
| 481 |
+
logger_name: str | None = None,
|
| 482 |
+
level: str = "DEBUG",
|
| 483 |
+
clear: bool = True,
|
| 484 |
+
) -> logging.Logger:
|
| 485 |
+
if logger is None and logger_name is None:
|
| 486 |
+
raise ValueError("Provide either logger object or logger name")
|
| 487 |
+
logger = logger or logging.getLogger(logger_name)
|
| 488 |
+
handle = logging.StreamHandler()
|
| 489 |
+
formatter = logging.Formatter(
|
| 490 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
|
| 491 |
+
)
|
| 492 |
+
handle.setFormatter(formatter)
|
| 493 |
+
if clear:
|
| 494 |
+
logger.handlers.clear()
|
| 495 |
+
logger.addHandler(handle)
|
| 496 |
+
logger.setLevel(level)
|
| 497 |
+
return logger
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
|
| 501 |
+
return fs.unstrip_protocol(name)
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
def mirror_from(
|
| 505 |
+
origin_name: str, methods: Iterable[str]
|
| 506 |
+
) -> Callable[[type[T]], type[T]]:
|
| 507 |
+
"""Mirror attributes and methods from the given
|
| 508 |
+
origin_name attribute of the instance to the
|
| 509 |
+
decorated class"""
|
| 510 |
+
|
| 511 |
+
def origin_getter(method: str, self: Any) -> Any:
|
| 512 |
+
origin = getattr(self, origin_name)
|
| 513 |
+
return getattr(origin, method)
|
| 514 |
+
|
| 515 |
+
def wrapper(cls: type[T]) -> type[T]:
|
| 516 |
+
for method in methods:
|
| 517 |
+
wrapped_method = partial(origin_getter, method)
|
| 518 |
+
setattr(cls, method, property(wrapped_method))
|
| 519 |
+
return cls
|
| 520 |
+
|
| 521 |
+
return wrapper
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
@contextlib.contextmanager
|
| 525 |
+
def nullcontext(obj: T) -> Iterator[T]:
|
| 526 |
+
yield obj
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
def merge_offset_ranges(
|
| 530 |
+
paths: list[str],
|
| 531 |
+
starts: list[int] | int,
|
| 532 |
+
ends: list[int] | int,
|
| 533 |
+
max_gap: int = 0,
|
| 534 |
+
max_block: int | None = None,
|
| 535 |
+
sort: bool = True,
|
| 536 |
+
) -> tuple[list[str], list[int], list[int]]:
|
| 537 |
+
"""Merge adjacent byte-offset ranges when the inter-range
|
| 538 |
+
gap is <= `max_gap`, and when the merged byte range does not
|
| 539 |
+
exceed `max_block` (if specified). By default, this function
|
| 540 |
+
will re-order the input paths and byte ranges to ensure sorted
|
| 541 |
+
order. If the user can guarantee that the inputs are already
|
| 542 |
+
sorted, passing `sort=False` will skip the re-ordering.
|
| 543 |
+
"""
|
| 544 |
+
# Check input
|
| 545 |
+
if not isinstance(paths, list):
|
| 546 |
+
raise TypeError
|
| 547 |
+
if not isinstance(starts, list):
|
| 548 |
+
starts = [starts] * len(paths)
|
| 549 |
+
if not isinstance(ends, list):
|
| 550 |
+
ends = [ends] * len(paths)
|
| 551 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 552 |
+
raise ValueError
|
| 553 |
+
|
| 554 |
+
# Early Return
|
| 555 |
+
if len(starts) <= 1:
|
| 556 |
+
return paths, starts, ends
|
| 557 |
+
|
| 558 |
+
starts = [s or 0 for s in starts]
|
| 559 |
+
# Sort by paths and then ranges if `sort=True`
|
| 560 |
+
if sort:
|
| 561 |
+
paths, starts, ends = (
|
| 562 |
+
list(v)
|
| 563 |
+
for v in zip(
|
| 564 |
+
*sorted(
|
| 565 |
+
zip(paths, starts, ends),
|
| 566 |
+
)
|
| 567 |
+
)
|
| 568 |
+
)
|
| 569 |
+
|
| 570 |
+
if paths:
|
| 571 |
+
# Loop through the coupled `paths`, `starts`, and
|
| 572 |
+
# `ends`, and merge adjacent blocks when appropriate
|
| 573 |
+
new_paths = paths[:1]
|
| 574 |
+
new_starts = starts[:1]
|
| 575 |
+
new_ends = ends[:1]
|
| 576 |
+
for i in range(1, len(paths)):
|
| 577 |
+
if paths[i] == paths[i - 1] and new_ends[-1] is None:
|
| 578 |
+
continue
|
| 579 |
+
elif (
|
| 580 |
+
paths[i] != paths[i - 1]
|
| 581 |
+
or ((starts[i] - new_ends[-1]) > max_gap)
|
| 582 |
+
or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
|
| 583 |
+
):
|
| 584 |
+
# Cannot merge with previous block.
|
| 585 |
+
# Add new `paths`, `starts`, and `ends` elements
|
| 586 |
+
new_paths.append(paths[i])
|
| 587 |
+
new_starts.append(starts[i])
|
| 588 |
+
new_ends.append(ends[i])
|
| 589 |
+
else:
|
| 590 |
+
# Merge with previous block by updating the
|
| 591 |
+
# last element of `ends`
|
| 592 |
+
new_ends[-1] = ends[i]
|
| 593 |
+
return new_paths, new_starts, new_ends
|
| 594 |
+
|
| 595 |
+
# `paths` is empty. Just return input lists
|
| 596 |
+
return paths, starts, ends
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
def file_size(filelike: IO[bytes]) -> int:
|
| 600 |
+
"""Find length of any open read-mode file-like"""
|
| 601 |
+
pos = filelike.tell()
|
| 602 |
+
try:
|
| 603 |
+
return filelike.seek(0, 2)
|
| 604 |
+
finally:
|
| 605 |
+
filelike.seek(pos)
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
@contextlib.contextmanager
|
| 609 |
+
def atomic_write(path: str, mode: str = "wb"):
|
| 610 |
+
"""
|
| 611 |
+
A context manager that opens a temporary file next to `path` and, on exit,
|
| 612 |
+
replaces `path` with the temporary file, thereby updating `path`
|
| 613 |
+
atomically.
|
| 614 |
+
"""
|
| 615 |
+
fd, fn = tempfile.mkstemp(
|
| 616 |
+
dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
|
| 617 |
+
)
|
| 618 |
+
try:
|
| 619 |
+
with open(fd, mode) as fp:
|
| 620 |
+
yield fp
|
| 621 |
+
except BaseException:
|
| 622 |
+
with contextlib.suppress(FileNotFoundError):
|
| 623 |
+
os.unlink(fn)
|
| 624 |
+
raise
|
| 625 |
+
else:
|
| 626 |
+
os.replace(fn, path)
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
def _translate(pat, STAR, QUESTION_MARK):
|
| 630 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
| 631 |
+
res: list[str] = []
|
| 632 |
+
add = res.append
|
| 633 |
+
i, n = 0, len(pat)
|
| 634 |
+
while i < n:
|
| 635 |
+
c = pat[i]
|
| 636 |
+
i = i + 1
|
| 637 |
+
if c == "*":
|
| 638 |
+
# compress consecutive `*` into one
|
| 639 |
+
if (not res) or res[-1] is not STAR:
|
| 640 |
+
add(STAR)
|
| 641 |
+
elif c == "?":
|
| 642 |
+
add(QUESTION_MARK)
|
| 643 |
+
elif c == "[":
|
| 644 |
+
j = i
|
| 645 |
+
if j < n and pat[j] == "!":
|
| 646 |
+
j = j + 1
|
| 647 |
+
if j < n and pat[j] == "]":
|
| 648 |
+
j = j + 1
|
| 649 |
+
while j < n and pat[j] != "]":
|
| 650 |
+
j = j + 1
|
| 651 |
+
if j >= n:
|
| 652 |
+
add("\\[")
|
| 653 |
+
else:
|
| 654 |
+
stuff = pat[i:j]
|
| 655 |
+
if "-" not in stuff:
|
| 656 |
+
stuff = stuff.replace("\\", r"\\")
|
| 657 |
+
else:
|
| 658 |
+
chunks = []
|
| 659 |
+
k = i + 2 if pat[i] == "!" else i + 1
|
| 660 |
+
while True:
|
| 661 |
+
k = pat.find("-", k, j)
|
| 662 |
+
if k < 0:
|
| 663 |
+
break
|
| 664 |
+
chunks.append(pat[i:k])
|
| 665 |
+
i = k + 1
|
| 666 |
+
k = k + 3
|
| 667 |
+
chunk = pat[i:j]
|
| 668 |
+
if chunk:
|
| 669 |
+
chunks.append(chunk)
|
| 670 |
+
else:
|
| 671 |
+
chunks[-1] += "-"
|
| 672 |
+
# Remove empty ranges -- invalid in RE.
|
| 673 |
+
for k in range(len(chunks) - 1, 0, -1):
|
| 674 |
+
if chunks[k - 1][-1] > chunks[k][0]:
|
| 675 |
+
chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
|
| 676 |
+
del chunks[k]
|
| 677 |
+
# Escape backslashes and hyphens for set difference (--).
|
| 678 |
+
# Hyphens that create ranges shouldn't be escaped.
|
| 679 |
+
stuff = "-".join(
|
| 680 |
+
s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
|
| 681 |
+
)
|
| 682 |
+
# Escape set operations (&&, ~~ and ||).
|
| 683 |
+
stuff = re.sub(r"([&~|])", r"\\\1", stuff)
|
| 684 |
+
i = j + 1
|
| 685 |
+
if not stuff:
|
| 686 |
+
# Empty range: never match.
|
| 687 |
+
add("(?!)")
|
| 688 |
+
elif stuff == "!":
|
| 689 |
+
# Negated empty range: match any character.
|
| 690 |
+
add(".")
|
| 691 |
+
else:
|
| 692 |
+
if stuff[0] == "!":
|
| 693 |
+
stuff = "^" + stuff[1:]
|
| 694 |
+
elif stuff[0] in ("^", "["):
|
| 695 |
+
stuff = "\\" + stuff
|
| 696 |
+
add(f"[{stuff}]")
|
| 697 |
+
else:
|
| 698 |
+
add(re.escape(c))
|
| 699 |
+
assert i == n
|
| 700 |
+
return res
|
| 701 |
+
|
| 702 |
+
|
| 703 |
+
def glob_translate(pat):
|
| 704 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
| 705 |
+
# The keyword parameters' values are fixed to:
|
| 706 |
+
# recursive=True, include_hidden=True, seps=None
|
| 707 |
+
"""Translate a pathname with shell wildcards to a regular expression."""
|
| 708 |
+
if os.path.altsep:
|
| 709 |
+
seps = os.path.sep + os.path.altsep
|
| 710 |
+
else:
|
| 711 |
+
seps = os.path.sep
|
| 712 |
+
escaped_seps = "".join(map(re.escape, seps))
|
| 713 |
+
any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
|
| 714 |
+
not_sep = f"[^{escaped_seps}]"
|
| 715 |
+
one_last_segment = f"{not_sep}+"
|
| 716 |
+
one_segment = f"{one_last_segment}{any_sep}"
|
| 717 |
+
any_segments = f"(?:.+{any_sep})?"
|
| 718 |
+
any_last_segments = ".*"
|
| 719 |
+
results = []
|
| 720 |
+
parts = re.split(any_sep, pat)
|
| 721 |
+
last_part_idx = len(parts) - 1
|
| 722 |
+
for idx, part in enumerate(parts):
|
| 723 |
+
if part == "*":
|
| 724 |
+
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
| 725 |
+
continue
|
| 726 |
+
if part == "**":
|
| 727 |
+
results.append(any_segments if idx < last_part_idx else any_last_segments)
|
| 728 |
+
continue
|
| 729 |
+
elif "**" in part:
|
| 730 |
+
raise ValueError(
|
| 731 |
+
"Invalid pattern: '**' can only be an entire path component"
|
| 732 |
+
)
|
| 733 |
+
if part:
|
| 734 |
+
results.extend(_translate(part, f"{not_sep}*", not_sep))
|
| 735 |
+
if idx < last_part_idx:
|
| 736 |
+
results.append(any_sep)
|
| 737 |
+
res = "".join(results)
|
| 738 |
+
return rf"(?s:{res})\Z"
|
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/METADATA
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.1
|
| 2 |
+
Name: git-lfs
|
| 3 |
+
Version: 1.6
|
| 4 |
+
Summary: A lightweight Git Large File Storage fetcher
|
| 5 |
+
Home-page: https://github.com/liberapay/git-lfs-fetch.py
|
| 6 |
+
Author: Changaco
|
| 7 |
+
Author-email: changaco@changaco.oy.lc
|
| 8 |
+
License: CC0
|
| 9 |
+
Keywords: git lfs
|
| 10 |
+
Platform: UNKNOWN
|
| 11 |
+
Description-Content-Type: text/x-rst
|
| 12 |
+
|
| 13 |
+
A lightweight Git Large File Storage fetcher written in python.
|
| 14 |
+
|
| 15 |
+
This module cannot fully replace the official git-lfs client, it only knows how
|
| 16 |
+
to download the files, cache them (the same way the official client does), and
|
| 17 |
+
place them in a checkout directory. Uploading files is not implemented at all.
|
| 18 |
+
|
| 19 |
+
Installation
|
| 20 |
+
============
|
| 21 |
+
|
| 22 |
+
pip install git-lfs
|
| 23 |
+
|
| 24 |
+
python-git-lfs is compatible with python 2 and 3.
|
| 25 |
+
|
| 26 |
+
Usage
|
| 27 |
+
=====
|
| 28 |
+
|
| 29 |
+
Basic: simply run ``python -m git_lfs`` in a normal Git repository.
|
| 30 |
+
|
| 31 |
+
Advanced::
|
| 32 |
+
|
| 33 |
+
python -m git_lfs [-h] [-v] [git_repo] [checkout_dir]
|
| 34 |
+
|
| 35 |
+
positional arguments:
|
| 36 |
+
git_repo if it's bare you need to provide a checkout_dir
|
| 37 |
+
checkout_dir
|
| 38 |
+
|
| 39 |
+
optional arguments:
|
| 40 |
+
-h, --help show this help message and exit
|
| 41 |
+
-v, --verbose
|
| 42 |
+
|
| 43 |
+
License
|
| 44 |
+
=======
|
| 45 |
+
|
| 46 |
+
`CC0 Public Domain Dedication <http://creativecommons.org/publicdomain/zero/1.0/>`_
|
| 47 |
+
|
| 48 |
+
|
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/RECORD
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
git_lfs-1.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
git_lfs-1.6.dist-info/METADATA,sha256=R8JeXdM_tbNNzy23Sw992NKPOV3NXjkrUzCcGMqVT6Y,1174
|
| 3 |
+
git_lfs-1.6.dist-info/RECORD,,
|
| 4 |
+
git_lfs-1.6.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 5 |
+
git_lfs-1.6.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
|
| 6 |
+
git_lfs-1.6.dist-info/top_level.txt,sha256=sfhnq_PjKyFts61t6Ts6ssAYa2uj0KaCnjWWpCc2ElI,8
|
| 7 |
+
git_lfs/__init__.py,sha256=wxf1h0HCxlTYd0Ui_ieYgjVFLVCrl9aVpi-cYqta6kw,8688
|
| 8 |
+
git_lfs/__main__.py,sha256=Imn6ZITlrbp9EyuR_zkN8BN8ZMWDHU6cusJ1_RG5k-0,437
|
| 9 |
+
git_lfs/__pycache__/__init__.cpython-313.pyc,,
|
| 10 |
+
git_lfs/__pycache__/__main__.cpython-313.pyc,,
|
| 11 |
+
git_lfs/__pycache__/utils.cpython-313.pyc,,
|
| 12 |
+
git_lfs/utils.py,sha256=4b11S10mPHITmvKpAriLVHvcZyqYZk4oYORpvKawmM0,1216
|
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/REQUESTED
ADDED
|
File without changes
|
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: bdist_wheel (0.34.2)
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py2-none-any
|
| 5 |
+
Tag: py3-none-any
|
| 6 |
+
|
env/lib/python3.13/site-packages/git_lfs-1.6.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
git_lfs
|
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/METADATA
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: h11
|
| 3 |
+
Version: 0.16.0
|
| 4 |
+
Summary: A pure-Python, bring-your-own-I/O implementation of HTTP/1.1
|
| 5 |
+
Home-page: https://github.com/python-hyper/h11
|
| 6 |
+
Author: Nathaniel J. Smith
|
| 7 |
+
Author-email: njs@pobox.com
|
| 8 |
+
License: MIT
|
| 9 |
+
Classifier: Development Status :: 3 - Alpha
|
| 10 |
+
Classifier: Intended Audience :: Developers
|
| 11 |
+
Classifier: License :: OSI Approved :: MIT License
|
| 12 |
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
| 13 |
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
| 14 |
+
Classifier: Programming Language :: Python :: 3
|
| 15 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
| 16 |
+
Classifier: Programming Language :: Python :: 3.8
|
| 17 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 18 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 20 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 21 |
+
Classifier: Topic :: Internet :: WWW/HTTP
|
| 22 |
+
Classifier: Topic :: System :: Networking
|
| 23 |
+
Requires-Python: >=3.8
|
| 24 |
+
License-File: LICENSE.txt
|
| 25 |
+
Dynamic: author
|
| 26 |
+
Dynamic: author-email
|
| 27 |
+
Dynamic: classifier
|
| 28 |
+
Dynamic: description
|
| 29 |
+
Dynamic: home-page
|
| 30 |
+
Dynamic: license
|
| 31 |
+
Dynamic: license-file
|
| 32 |
+
Dynamic: requires-python
|
| 33 |
+
Dynamic: summary
|
| 34 |
+
|
| 35 |
+
h11
|
| 36 |
+
===
|
| 37 |
+
|
| 38 |
+
.. image:: https://travis-ci.org/python-hyper/h11.svg?branch=master
|
| 39 |
+
:target: https://travis-ci.org/python-hyper/h11
|
| 40 |
+
:alt: Automated test status
|
| 41 |
+
|
| 42 |
+
.. image:: https://codecov.io/gh/python-hyper/h11/branch/master/graph/badge.svg
|
| 43 |
+
:target: https://codecov.io/gh/python-hyper/h11
|
| 44 |
+
:alt: Test coverage
|
| 45 |
+
|
| 46 |
+
.. image:: https://readthedocs.org/projects/h11/badge/?version=latest
|
| 47 |
+
:target: http://h11.readthedocs.io/en/latest/?badge=latest
|
| 48 |
+
:alt: Documentation Status
|
| 49 |
+
|
| 50 |
+
This is a little HTTP/1.1 library written from scratch in Python,
|
| 51 |
+
heavily inspired by `hyper-h2 <https://hyper-h2.readthedocs.io/>`_.
|
| 52 |
+
|
| 53 |
+
It's a "bring-your-own-I/O" library; h11 contains no IO code
|
| 54 |
+
whatsoever. This means you can hook h11 up to your favorite network
|
| 55 |
+
API, and that could be anything you want: synchronous, threaded,
|
| 56 |
+
asynchronous, or your own implementation of `RFC 6214
|
| 57 |
+
<https://tools.ietf.org/html/rfc6214>`_ -- h11 won't judge you.
|
| 58 |
+
(Compare this to the current state of the art, where every time a `new
|
| 59 |
+
network API <https://trio.readthedocs.io/>`_ comes along then someone
|
| 60 |
+
gets to start over reimplementing the entire HTTP protocol from
|
| 61 |
+
scratch.) Cory Benfield made an `excellent blog post describing the
|
| 62 |
+
benefits of this approach
|
| 63 |
+
<https://lukasa.co.uk/2015/10/The_New_Hyper/>`_, or if you like video
|
| 64 |
+
then here's his `PyCon 2016 talk on the same theme
|
| 65 |
+
<https://www.youtube.com/watch?v=7cC3_jGwl_U>`_.
|
| 66 |
+
|
| 67 |
+
This also means that h11 is not immediately useful out of the box:
|
| 68 |
+
it's a toolkit for building programs that speak HTTP, not something
|
| 69 |
+
that could directly replace ``requests`` or ``twisted.web`` or
|
| 70 |
+
whatever. But h11 makes it much easier to implement something like
|
| 71 |
+
``requests`` or ``twisted.web``.
|
| 72 |
+
|
| 73 |
+
At a high level, working with h11 goes like this:
|
| 74 |
+
|
| 75 |
+
1) First, create an ``h11.Connection`` object to track the state of a
|
| 76 |
+
single HTTP/1.1 connection.
|
| 77 |
+
|
| 78 |
+
2) When you read data off the network, pass it to
|
| 79 |
+
``conn.receive_data(...)``; you'll get back a list of objects
|
| 80 |
+
representing high-level HTTP "events".
|
| 81 |
+
|
| 82 |
+
3) When you want to send a high-level HTTP event, create the
|
| 83 |
+
corresponding "event" object and pass it to ``conn.send(...)``;
|
| 84 |
+
this will give you back some bytes that you can then push out
|
| 85 |
+
through the network.
|
| 86 |
+
|
| 87 |
+
For example, a client might instantiate and then send a
|
| 88 |
+
``h11.Request`` object, then zero or more ``h11.Data`` objects for the
|
| 89 |
+
request body (e.g., if this is a POST), and then a
|
| 90 |
+
``h11.EndOfMessage`` to indicate the end of the message. Then the
|
| 91 |
+
server would then send back a ``h11.Response``, some ``h11.Data``, and
|
| 92 |
+
its own ``h11.EndOfMessage``. If either side violates the protocol,
|
| 93 |
+
you'll get a ``h11.ProtocolError`` exception.
|
| 94 |
+
|
| 95 |
+
h11 is suitable for implementing both servers and clients, and has a
|
| 96 |
+
pleasantly symmetric API: the events you send as a client are exactly
|
| 97 |
+
the ones that you receive as a server and vice-versa.
|
| 98 |
+
|
| 99 |
+
`Here's an example of a tiny HTTP client
|
| 100 |
+
<https://github.com/python-hyper/h11/blob/master/examples/basic-client.py>`_
|
| 101 |
+
|
| 102 |
+
It also has `a fine manual <https://h11.readthedocs.io/>`_.
|
| 103 |
+
|
| 104 |
+
FAQ
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
*Whyyyyy?*
|
| 108 |
+
|
| 109 |
+
I wanted to play with HTTP in `Curio
|
| 110 |
+
<https://curio.readthedocs.io/en/latest/tutorial.html>`__ and `Trio
|
| 111 |
+
<https://trio.readthedocs.io>`__, which at the time didn't have any
|
| 112 |
+
HTTP libraries. So I thought, no big deal, Python has, like, a dozen
|
| 113 |
+
different implementations of HTTP, surely I can find one that's
|
| 114 |
+
reusable. I didn't find one, but I did find Cory's call-to-arms
|
| 115 |
+
blog-post. So I figured, well, fine, if I have to implement HTTP from
|
| 116 |
+
scratch, at least I can make sure no-one *else* has to ever again.
|
| 117 |
+
|
| 118 |
+
*Should I use it?*
|
| 119 |
+
|
| 120 |
+
Maybe. You should be aware that it's a very young project. But, it's
|
| 121 |
+
feature complete and has an exhaustive test-suite and complete docs,
|
| 122 |
+
so the next step is for people to try using it and see how it goes
|
| 123 |
+
:-). If you do then please let us know -- if nothing else we'll want
|
| 124 |
+
to talk to you before making any incompatible changes!
|
| 125 |
+
|
| 126 |
+
*What are the features/limitations?*
|
| 127 |
+
|
| 128 |
+
Roughly speaking, it's trying to be a robust, complete, and non-hacky
|
| 129 |
+
implementation of the first "chapter" of the HTTP/1.1 spec: `RFC 7230:
|
| 130 |
+
HTTP/1.1 Message Syntax and Routing
|
| 131 |
+
<https://tools.ietf.org/html/rfc7230>`_. That is, it mostly focuses on
|
| 132 |
+
implementing HTTP at the level of taking bytes on and off the wire,
|
| 133 |
+
and the headers related to that, and tries to be anal about spec
|
| 134 |
+
conformance. It doesn't know about higher-level concerns like URL
|
| 135 |
+
routing, conditional GETs, cross-origin cookie policies, or content
|
| 136 |
+
negotiation. But it does know how to take care of framing,
|
| 137 |
+
cross-version differences in keep-alive handling, and the "obsolete
|
| 138 |
+
line folding" rule, so you can focus your energies on the hard /
|
| 139 |
+
interesting parts for your application, and it tries to support the
|
| 140 |
+
full specification in the sense that any useful HTTP/1.1 conformant
|
| 141 |
+
application should be able to use h11.
|
| 142 |
+
|
| 143 |
+
It's pure Python, and has no dependencies outside of the standard
|
| 144 |
+
library.
|
| 145 |
+
|
| 146 |
+
It has a test suite with 100.0% coverage for both statements and
|
| 147 |
+
branches.
|
| 148 |
+
|
| 149 |
+
Currently it supports Python 3 (testing on 3.8-3.12) and PyPy 3.
|
| 150 |
+
The last Python 2-compatible version was h11 0.11.x.
|
| 151 |
+
(Originally it had a Cython wrapper for `http-parser
|
| 152 |
+
<https://github.com/nodejs/http-parser>`_ and a beautiful nested state
|
| 153 |
+
machine implemented with ``yield from`` to postprocess the output. But
|
| 154 |
+
I had to take these out -- the new *parser* needs fewer lines-of-code
|
| 155 |
+
than the old *parser wrapper*, is written in pure Python, uses no
|
| 156 |
+
exotic language syntax, and has more features. It's sad, really; that
|
| 157 |
+
old state machine was really slick. I just need a few sentences here
|
| 158 |
+
to mourn that.)
|
| 159 |
+
|
| 160 |
+
I don't know how fast it is. I haven't benchmarked or profiled it yet,
|
| 161 |
+
so it's probably got a few pointless hot spots, and I've been trying
|
| 162 |
+
to err on the side of simplicity and robustness instead of
|
| 163 |
+
micro-optimization. But at the architectural level I tried hard to
|
| 164 |
+
avoid fundamentally bad decisions, e.g., I believe that all the
|
| 165 |
+
parsing algorithms remain linear-time even in the face of pathological
|
| 166 |
+
input like slowloris, and there are no byte-by-byte loops. (I also
|
| 167 |
+
believe that it maintains bounded memory usage in the face of
|
| 168 |
+
arbitrary/pathological input.)
|
| 169 |
+
|
| 170 |
+
The whole library is ~800 lines-of-code. You can read and understand
|
| 171 |
+
the whole thing in less than an hour. Most of the energy invested in
|
| 172 |
+
this so far has been spent on trying to keep things simple by
|
| 173 |
+
minimizing special-cases and ad hoc state manipulation; even though it
|
| 174 |
+
is now quite small and simple, I'm still annoyed that I haven't
|
| 175 |
+
figured out how to make it even smaller and simpler. (Unfortunately,
|
| 176 |
+
HTTP does not lend itself to simplicity.)
|
| 177 |
+
|
| 178 |
+
The API is ~feature complete and I don't expect the general outlines
|
| 179 |
+
to change much, but you can't judge an API's ergonomics until you
|
| 180 |
+
actually document and use it, so I'd expect some changes in the
|
| 181 |
+
details.
|
| 182 |
+
|
| 183 |
+
*How do I try it?*
|
| 184 |
+
|
| 185 |
+
.. code-block:: sh
|
| 186 |
+
|
| 187 |
+
$ pip install h11
|
| 188 |
+
$ git clone git@github.com:python-hyper/h11
|
| 189 |
+
$ cd h11/examples
|
| 190 |
+
$ python basic-client.py
|
| 191 |
+
|
| 192 |
+
and go from there.
|
| 193 |
+
|
| 194 |
+
*License?*
|
| 195 |
+
|
| 196 |
+
MIT
|
| 197 |
+
|
| 198 |
+
*Code of conduct?*
|
| 199 |
+
|
| 200 |
+
Contributors are requested to follow our `code of conduct
|
| 201 |
+
<https://github.com/python-hyper/h11/blob/master/CODE_OF_CONDUCT.md>`_ in
|
| 202 |
+
all project spaces.
|
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/RECORD
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
h11-0.16.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
h11-0.16.0.dist-info/METADATA,sha256=KPMmCYrAn8unm48YD5YIfIQf4kViFct7hyqcfVzRnWQ,8348
|
| 3 |
+
h11-0.16.0.dist-info/RECORD,,
|
| 4 |
+
h11-0.16.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
| 5 |
+
h11-0.16.0.dist-info/licenses/LICENSE.txt,sha256=N9tbuFkm2yikJ6JYZ_ELEjIAOuob5pzLhRE4rbjm82E,1124
|
| 6 |
+
h11-0.16.0.dist-info/top_level.txt,sha256=F7dC4jl3zeh8TGHEPaWJrMbeuoWbS379Gwdi-Yvdcis,4
|
| 7 |
+
h11/__init__.py,sha256=iO1KzkSO42yZ6ffg-VMgbx_ZVTWGUY00nRYEWn-s3kY,1507
|
| 8 |
+
h11/__pycache__/__init__.cpython-313.pyc,,
|
| 9 |
+
h11/__pycache__/_abnf.cpython-313.pyc,,
|
| 10 |
+
h11/__pycache__/_connection.cpython-313.pyc,,
|
| 11 |
+
h11/__pycache__/_events.cpython-313.pyc,,
|
| 12 |
+
h11/__pycache__/_headers.cpython-313.pyc,,
|
| 13 |
+
h11/__pycache__/_readers.cpython-313.pyc,,
|
| 14 |
+
h11/__pycache__/_receivebuffer.cpython-313.pyc,,
|
| 15 |
+
h11/__pycache__/_state.cpython-313.pyc,,
|
| 16 |
+
h11/__pycache__/_util.cpython-313.pyc,,
|
| 17 |
+
h11/__pycache__/_version.cpython-313.pyc,,
|
| 18 |
+
h11/__pycache__/_writers.cpython-313.pyc,,
|
| 19 |
+
h11/_abnf.py,sha256=ybixr0xsupnkA6GFAyMubuXF6Tc1lb_hF890NgCsfNc,4815
|
| 20 |
+
h11/_connection.py,sha256=k9YRVf6koZqbttBW36xSWaJpWdZwa-xQVU9AHEo9DuI,26863
|
| 21 |
+
h11/_events.py,sha256=I97aXoal1Wu7dkL548BANBUCkOIbe-x5CioYA9IBY14,11792
|
| 22 |
+
h11/_headers.py,sha256=P7D-lBNxHwdLZPLimmYwrPG-9ZkjElvvJZJdZAgSP-4,10412
|
| 23 |
+
h11/_readers.py,sha256=a4RypORUCC3d0q_kxPuBIM7jTD8iLt5X91TH0FsduN4,8590
|
| 24 |
+
h11/_receivebuffer.py,sha256=xrspsdsNgWFxRfQcTXxR8RrdjRXXTK0Io5cQYWpJ1Ws,5252
|
| 25 |
+
h11/_state.py,sha256=_5LG_BGR8FCcFQeBPH-TMHgm_-B-EUcWCnQof_9XjFE,13231
|
| 26 |
+
h11/_util.py,sha256=LWkkjXyJaFlAy6Lt39w73UStklFT5ovcvo0TkY7RYuk,4888
|
| 27 |
+
h11/_version.py,sha256=GVSsbPSPDcOuF6ptfIiXnVJoaEm3ygXbMnqlr_Giahw,686
|
| 28 |
+
h11/_writers.py,sha256=oFKm6PtjeHfbj4RLX7VB7KDc1gIY53gXG3_HR9ltmTA,5081
|
| 29 |
+
h11/py.typed,sha256=sow9soTwP9T_gEAQSVh7Gb8855h04Nwmhs2We-JRgZM,7
|
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: setuptools (78.1.0)
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
| 5 |
+
|
env/lib/python3.13/site-packages/h11-0.16.0.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
h11
|
env/lib/python3.13/site-packages/httpcore/__init__.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ._api import request, stream
|
| 2 |
+
from ._async import (
|
| 3 |
+
AsyncConnectionInterface,
|
| 4 |
+
AsyncConnectionPool,
|
| 5 |
+
AsyncHTTP2Connection,
|
| 6 |
+
AsyncHTTP11Connection,
|
| 7 |
+
AsyncHTTPConnection,
|
| 8 |
+
AsyncHTTPProxy,
|
| 9 |
+
AsyncSOCKSProxy,
|
| 10 |
+
)
|
| 11 |
+
from ._backends.base import (
|
| 12 |
+
SOCKET_OPTION,
|
| 13 |
+
AsyncNetworkBackend,
|
| 14 |
+
AsyncNetworkStream,
|
| 15 |
+
NetworkBackend,
|
| 16 |
+
NetworkStream,
|
| 17 |
+
)
|
| 18 |
+
from ._backends.mock import AsyncMockBackend, AsyncMockStream, MockBackend, MockStream
|
| 19 |
+
from ._backends.sync import SyncBackend
|
| 20 |
+
from ._exceptions import (
|
| 21 |
+
ConnectError,
|
| 22 |
+
ConnectionNotAvailable,
|
| 23 |
+
ConnectTimeout,
|
| 24 |
+
LocalProtocolError,
|
| 25 |
+
NetworkError,
|
| 26 |
+
PoolTimeout,
|
| 27 |
+
ProtocolError,
|
| 28 |
+
ProxyError,
|
| 29 |
+
ReadError,
|
| 30 |
+
ReadTimeout,
|
| 31 |
+
RemoteProtocolError,
|
| 32 |
+
TimeoutException,
|
| 33 |
+
UnsupportedProtocol,
|
| 34 |
+
WriteError,
|
| 35 |
+
WriteTimeout,
|
| 36 |
+
)
|
| 37 |
+
from ._models import URL, Origin, Proxy, Request, Response
|
| 38 |
+
from ._ssl import default_ssl_context
|
| 39 |
+
from ._sync import (
|
| 40 |
+
ConnectionInterface,
|
| 41 |
+
ConnectionPool,
|
| 42 |
+
HTTP2Connection,
|
| 43 |
+
HTTP11Connection,
|
| 44 |
+
HTTPConnection,
|
| 45 |
+
HTTPProxy,
|
| 46 |
+
SOCKSProxy,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# The 'httpcore.AnyIOBackend' class is conditional on 'anyio' being installed.
|
| 50 |
+
try:
|
| 51 |
+
from ._backends.anyio import AnyIOBackend
|
| 52 |
+
except ImportError: # pragma: nocover
|
| 53 |
+
|
| 54 |
+
class AnyIOBackend: # type: ignore
|
| 55 |
+
def __init__(self, *args, **kwargs): # type: ignore
|
| 56 |
+
msg = (
|
| 57 |
+
"Attempted to use 'httpcore.AnyIOBackend' but 'anyio' is not installed."
|
| 58 |
+
)
|
| 59 |
+
raise RuntimeError(msg)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# The 'httpcore.TrioBackend' class is conditional on 'trio' being installed.
|
| 63 |
+
try:
|
| 64 |
+
from ._backends.trio import TrioBackend
|
| 65 |
+
except ImportError: # pragma: nocover
|
| 66 |
+
|
| 67 |
+
class TrioBackend: # type: ignore
|
| 68 |
+
def __init__(self, *args, **kwargs): # type: ignore
|
| 69 |
+
msg = "Attempted to use 'httpcore.TrioBackend' but 'trio' is not installed."
|
| 70 |
+
raise RuntimeError(msg)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
__all__ = [
|
| 74 |
+
# top-level requests
|
| 75 |
+
"request",
|
| 76 |
+
"stream",
|
| 77 |
+
# models
|
| 78 |
+
"Origin",
|
| 79 |
+
"URL",
|
| 80 |
+
"Request",
|
| 81 |
+
"Response",
|
| 82 |
+
"Proxy",
|
| 83 |
+
# async
|
| 84 |
+
"AsyncHTTPConnection",
|
| 85 |
+
"AsyncConnectionPool",
|
| 86 |
+
"AsyncHTTPProxy",
|
| 87 |
+
"AsyncHTTP11Connection",
|
| 88 |
+
"AsyncHTTP2Connection",
|
| 89 |
+
"AsyncConnectionInterface",
|
| 90 |
+
"AsyncSOCKSProxy",
|
| 91 |
+
# sync
|
| 92 |
+
"HTTPConnection",
|
| 93 |
+
"ConnectionPool",
|
| 94 |
+
"HTTPProxy",
|
| 95 |
+
"HTTP11Connection",
|
| 96 |
+
"HTTP2Connection",
|
| 97 |
+
"ConnectionInterface",
|
| 98 |
+
"SOCKSProxy",
|
| 99 |
+
# network backends, implementations
|
| 100 |
+
"SyncBackend",
|
| 101 |
+
"AnyIOBackend",
|
| 102 |
+
"TrioBackend",
|
| 103 |
+
# network backends, mock implementations
|
| 104 |
+
"AsyncMockBackend",
|
| 105 |
+
"AsyncMockStream",
|
| 106 |
+
"MockBackend",
|
| 107 |
+
"MockStream",
|
| 108 |
+
# network backends, interface
|
| 109 |
+
"AsyncNetworkStream",
|
| 110 |
+
"AsyncNetworkBackend",
|
| 111 |
+
"NetworkStream",
|
| 112 |
+
"NetworkBackend",
|
| 113 |
+
# util
|
| 114 |
+
"default_ssl_context",
|
| 115 |
+
"SOCKET_OPTION",
|
| 116 |
+
# exceptions
|
| 117 |
+
"ConnectionNotAvailable",
|
| 118 |
+
"ProxyError",
|
| 119 |
+
"ProtocolError",
|
| 120 |
+
"LocalProtocolError",
|
| 121 |
+
"RemoteProtocolError",
|
| 122 |
+
"UnsupportedProtocol",
|
| 123 |
+
"TimeoutException",
|
| 124 |
+
"PoolTimeout",
|
| 125 |
+
"ConnectTimeout",
|
| 126 |
+
"ReadTimeout",
|
| 127 |
+
"WriteTimeout",
|
| 128 |
+
"NetworkError",
|
| 129 |
+
"ConnectError",
|
| 130 |
+
"ReadError",
|
| 131 |
+
"WriteError",
|
| 132 |
+
]
|
| 133 |
+
|
| 134 |
+
__version__ = "1.0.9"
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
__locals = locals()
|
| 138 |
+
for __name in __all__:
|
| 139 |
+
# Exclude SOCKET_OPTION, it causes AttributeError on Python 3.14
|
| 140 |
+
if not __name.startswith(("__", "SOCKET_OPTION")):
|
| 141 |
+
setattr(__locals[__name], "__module__", "httpcore") # noqa
|
env/lib/python3.13/site-packages/httpcore/_api.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import typing
|
| 5 |
+
|
| 6 |
+
from ._models import URL, Extensions, HeaderTypes, Response
|
| 7 |
+
from ._sync.connection_pool import ConnectionPool
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def request(
|
| 11 |
+
method: bytes | str,
|
| 12 |
+
url: URL | bytes | str,
|
| 13 |
+
*,
|
| 14 |
+
headers: HeaderTypes = None,
|
| 15 |
+
content: bytes | typing.Iterator[bytes] | None = None,
|
| 16 |
+
extensions: Extensions | None = None,
|
| 17 |
+
) -> Response:
|
| 18 |
+
"""
|
| 19 |
+
Sends an HTTP request, returning the response.
|
| 20 |
+
|
| 21 |
+
```
|
| 22 |
+
response = httpcore.request("GET", "https://www.example.com/")
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
Arguments:
|
| 26 |
+
method: The HTTP method for the request. Typically one of `"GET"`,
|
| 27 |
+
`"OPTIONS"`, `"HEAD"`, `"POST"`, `"PUT"`, `"PATCH"`, or `"DELETE"`.
|
| 28 |
+
url: The URL of the HTTP request. Either as an instance of `httpcore.URL`,
|
| 29 |
+
or as str/bytes.
|
| 30 |
+
headers: The HTTP request headers. Either as a dictionary of str/bytes,
|
| 31 |
+
or as a list of two-tuples of str/bytes.
|
| 32 |
+
content: The content of the request body. Either as bytes,
|
| 33 |
+
or as a bytes iterator.
|
| 34 |
+
extensions: A dictionary of optional extra information included on the request.
|
| 35 |
+
Possible keys include `"timeout"`.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
An instance of `httpcore.Response`.
|
| 39 |
+
"""
|
| 40 |
+
with ConnectionPool() as pool:
|
| 41 |
+
return pool.request(
|
| 42 |
+
method=method,
|
| 43 |
+
url=url,
|
| 44 |
+
headers=headers,
|
| 45 |
+
content=content,
|
| 46 |
+
extensions=extensions,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@contextlib.contextmanager
|
| 51 |
+
def stream(
|
| 52 |
+
method: bytes | str,
|
| 53 |
+
url: URL | bytes | str,
|
| 54 |
+
*,
|
| 55 |
+
headers: HeaderTypes = None,
|
| 56 |
+
content: bytes | typing.Iterator[bytes] | None = None,
|
| 57 |
+
extensions: Extensions | None = None,
|
| 58 |
+
) -> typing.Iterator[Response]:
|
| 59 |
+
"""
|
| 60 |
+
Sends an HTTP request, returning the response within a content manager.
|
| 61 |
+
|
| 62 |
+
```
|
| 63 |
+
with httpcore.stream("GET", "https://www.example.com/") as response:
|
| 64 |
+
...
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
When using the `stream()` function, the body of the response will not be
|
| 68 |
+
automatically read. If you want to access the response body you should
|
| 69 |
+
either use `content = response.read()`, or `for chunk in response.iter_content()`.
|
| 70 |
+
|
| 71 |
+
Arguments:
|
| 72 |
+
method: The HTTP method for the request. Typically one of `"GET"`,
|
| 73 |
+
`"OPTIONS"`, `"HEAD"`, `"POST"`, `"PUT"`, `"PATCH"`, or `"DELETE"`.
|
| 74 |
+
url: The URL of the HTTP request. Either as an instance of `httpcore.URL`,
|
| 75 |
+
or as str/bytes.
|
| 76 |
+
headers: The HTTP request headers. Either as a dictionary of str/bytes,
|
| 77 |
+
or as a list of two-tuples of str/bytes.
|
| 78 |
+
content: The content of the request body. Either as bytes,
|
| 79 |
+
or as a bytes iterator.
|
| 80 |
+
extensions: A dictionary of optional extra information included on the request.
|
| 81 |
+
Possible keys include `"timeout"`.
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
An instance of `httpcore.Response`.
|
| 85 |
+
"""
|
| 86 |
+
with ConnectionPool() as pool:
|
| 87 |
+
with pool.stream(
|
| 88 |
+
method=method,
|
| 89 |
+
url=url,
|
| 90 |
+
headers=headers,
|
| 91 |
+
content=content,
|
| 92 |
+
extensions=extensions,
|
| 93 |
+
) as response:
|
| 94 |
+
yield response
|
env/lib/python3.13/site-packages/httpcore/_exceptions.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import contextlib
|
| 2 |
+
import typing
|
| 3 |
+
|
| 4 |
+
ExceptionMapping = typing.Mapping[typing.Type[Exception], typing.Type[Exception]]
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@contextlib.contextmanager
|
| 8 |
+
def map_exceptions(map: ExceptionMapping) -> typing.Iterator[None]:
|
| 9 |
+
try:
|
| 10 |
+
yield
|
| 11 |
+
except Exception as exc: # noqa: PIE786
|
| 12 |
+
for from_exc, to_exc in map.items():
|
| 13 |
+
if isinstance(exc, from_exc):
|
| 14 |
+
raise to_exc(exc) from exc
|
| 15 |
+
raise # pragma: nocover
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ConnectionNotAvailable(Exception):
|
| 19 |
+
pass
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class ProxyError(Exception):
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class UnsupportedProtocol(Exception):
|
| 27 |
+
pass
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class ProtocolError(Exception):
|
| 31 |
+
pass
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class RemoteProtocolError(ProtocolError):
|
| 35 |
+
pass
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class LocalProtocolError(ProtocolError):
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Timeout errors
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class TimeoutException(Exception):
|
| 46 |
+
pass
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class PoolTimeout(TimeoutException):
|
| 50 |
+
pass
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class ConnectTimeout(TimeoutException):
|
| 54 |
+
pass
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class ReadTimeout(TimeoutException):
|
| 58 |
+
pass
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class WriteTimeout(TimeoutException):
|
| 62 |
+
pass
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# Network errors
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class NetworkError(Exception):
|
| 69 |
+
pass
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class ConnectError(NetworkError):
|
| 73 |
+
pass
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class ReadError(NetworkError):
|
| 77 |
+
pass
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class WriteError(NetworkError):
|
| 81 |
+
pass
|
env/lib/python3.13/site-packages/httpcore/_models.py
ADDED
|
@@ -0,0 +1,516 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import base64
|
| 4 |
+
import ssl
|
| 5 |
+
import typing
|
| 6 |
+
import urllib.parse
|
| 7 |
+
|
| 8 |
+
# Functions for typechecking...
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
ByteOrStr = typing.Union[bytes, str]
|
| 12 |
+
HeadersAsSequence = typing.Sequence[typing.Tuple[ByteOrStr, ByteOrStr]]
|
| 13 |
+
HeadersAsMapping = typing.Mapping[ByteOrStr, ByteOrStr]
|
| 14 |
+
HeaderTypes = typing.Union[HeadersAsSequence, HeadersAsMapping, None]
|
| 15 |
+
|
| 16 |
+
Extensions = typing.MutableMapping[str, typing.Any]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def enforce_bytes(value: bytes | str, *, name: str) -> bytes:
|
| 20 |
+
"""
|
| 21 |
+
Any arguments that are ultimately represented as bytes can be specified
|
| 22 |
+
either as bytes or as strings.
|
| 23 |
+
|
| 24 |
+
However we enforce that any string arguments must only contain characters in
|
| 25 |
+
the plain ASCII range. chr(0)...chr(127). If you need to use characters
|
| 26 |
+
outside that range then be precise, and use a byte-wise argument.
|
| 27 |
+
"""
|
| 28 |
+
if isinstance(value, str):
|
| 29 |
+
try:
|
| 30 |
+
return value.encode("ascii")
|
| 31 |
+
except UnicodeEncodeError:
|
| 32 |
+
raise TypeError(f"{name} strings may not include unicode characters.")
|
| 33 |
+
elif isinstance(value, bytes):
|
| 34 |
+
return value
|
| 35 |
+
|
| 36 |
+
seen_type = type(value).__name__
|
| 37 |
+
raise TypeError(f"{name} must be bytes or str, but got {seen_type}.")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def enforce_url(value: URL | bytes | str, *, name: str) -> URL:
|
| 41 |
+
"""
|
| 42 |
+
Type check for URL parameters.
|
| 43 |
+
"""
|
| 44 |
+
if isinstance(value, (bytes, str)):
|
| 45 |
+
return URL(value)
|
| 46 |
+
elif isinstance(value, URL):
|
| 47 |
+
return value
|
| 48 |
+
|
| 49 |
+
seen_type = type(value).__name__
|
| 50 |
+
raise TypeError(f"{name} must be a URL, bytes, or str, but got {seen_type}.")
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def enforce_headers(
|
| 54 |
+
value: HeadersAsMapping | HeadersAsSequence | None = None, *, name: str
|
| 55 |
+
) -> list[tuple[bytes, bytes]]:
|
| 56 |
+
"""
|
| 57 |
+
Convienence function that ensure all items in request or response headers
|
| 58 |
+
are either bytes or strings in the plain ASCII range.
|
| 59 |
+
"""
|
| 60 |
+
if value is None:
|
| 61 |
+
return []
|
| 62 |
+
elif isinstance(value, typing.Mapping):
|
| 63 |
+
return [
|
| 64 |
+
(
|
| 65 |
+
enforce_bytes(k, name="header name"),
|
| 66 |
+
enforce_bytes(v, name="header value"),
|
| 67 |
+
)
|
| 68 |
+
for k, v in value.items()
|
| 69 |
+
]
|
| 70 |
+
elif isinstance(value, typing.Sequence):
|
| 71 |
+
return [
|
| 72 |
+
(
|
| 73 |
+
enforce_bytes(k, name="header name"),
|
| 74 |
+
enforce_bytes(v, name="header value"),
|
| 75 |
+
)
|
| 76 |
+
for k, v in value
|
| 77 |
+
]
|
| 78 |
+
|
| 79 |
+
seen_type = type(value).__name__
|
| 80 |
+
raise TypeError(
|
| 81 |
+
f"{name} must be a mapping or sequence of two-tuples, but got {seen_type}."
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def enforce_stream(
|
| 86 |
+
value: bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes] | None,
|
| 87 |
+
*,
|
| 88 |
+
name: str,
|
| 89 |
+
) -> typing.Iterable[bytes] | typing.AsyncIterable[bytes]:
|
| 90 |
+
if value is None:
|
| 91 |
+
return ByteStream(b"")
|
| 92 |
+
elif isinstance(value, bytes):
|
| 93 |
+
return ByteStream(value)
|
| 94 |
+
return value
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# * https://tools.ietf.org/html/rfc3986#section-3.2.3
|
| 98 |
+
# * https://url.spec.whatwg.org/#url-miscellaneous
|
| 99 |
+
# * https://url.spec.whatwg.org/#scheme-state
|
| 100 |
+
DEFAULT_PORTS = {
|
| 101 |
+
b"ftp": 21,
|
| 102 |
+
b"http": 80,
|
| 103 |
+
b"https": 443,
|
| 104 |
+
b"ws": 80,
|
| 105 |
+
b"wss": 443,
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def include_request_headers(
|
| 110 |
+
headers: list[tuple[bytes, bytes]],
|
| 111 |
+
*,
|
| 112 |
+
url: "URL",
|
| 113 |
+
content: None | bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes],
|
| 114 |
+
) -> list[tuple[bytes, bytes]]:
|
| 115 |
+
headers_set = set(k.lower() for k, v in headers)
|
| 116 |
+
|
| 117 |
+
if b"host" not in headers_set:
|
| 118 |
+
default_port = DEFAULT_PORTS.get(url.scheme)
|
| 119 |
+
if url.port is None or url.port == default_port:
|
| 120 |
+
header_value = url.host
|
| 121 |
+
else:
|
| 122 |
+
header_value = b"%b:%d" % (url.host, url.port)
|
| 123 |
+
headers = [(b"Host", header_value)] + headers
|
| 124 |
+
|
| 125 |
+
if (
|
| 126 |
+
content is not None
|
| 127 |
+
and b"content-length" not in headers_set
|
| 128 |
+
and b"transfer-encoding" not in headers_set
|
| 129 |
+
):
|
| 130 |
+
if isinstance(content, bytes):
|
| 131 |
+
content_length = str(len(content)).encode("ascii")
|
| 132 |
+
headers += [(b"Content-Length", content_length)]
|
| 133 |
+
else:
|
| 134 |
+
headers += [(b"Transfer-Encoding", b"chunked")] # pragma: nocover
|
| 135 |
+
|
| 136 |
+
return headers
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# Interfaces for byte streams...
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
class ByteStream:
|
| 143 |
+
"""
|
| 144 |
+
A container for non-streaming content, and that supports both sync and async
|
| 145 |
+
stream iteration.
|
| 146 |
+
"""
|
| 147 |
+
|
| 148 |
+
def __init__(self, content: bytes) -> None:
|
| 149 |
+
self._content = content
|
| 150 |
+
|
| 151 |
+
def __iter__(self) -> typing.Iterator[bytes]:
|
| 152 |
+
yield self._content
|
| 153 |
+
|
| 154 |
+
async def __aiter__(self) -> typing.AsyncIterator[bytes]:
|
| 155 |
+
yield self._content
|
| 156 |
+
|
| 157 |
+
def __repr__(self) -> str:
|
| 158 |
+
return f"<{self.__class__.__name__} [{len(self._content)} bytes]>"
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
class Origin:
|
| 162 |
+
def __init__(self, scheme: bytes, host: bytes, port: int) -> None:
|
| 163 |
+
self.scheme = scheme
|
| 164 |
+
self.host = host
|
| 165 |
+
self.port = port
|
| 166 |
+
|
| 167 |
+
def __eq__(self, other: typing.Any) -> bool:
|
| 168 |
+
return (
|
| 169 |
+
isinstance(other, Origin)
|
| 170 |
+
and self.scheme == other.scheme
|
| 171 |
+
and self.host == other.host
|
| 172 |
+
and self.port == other.port
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
def __str__(self) -> str:
|
| 176 |
+
scheme = self.scheme.decode("ascii")
|
| 177 |
+
host = self.host.decode("ascii")
|
| 178 |
+
port = str(self.port)
|
| 179 |
+
return f"{scheme}://{host}:{port}"
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
class URL:
|
| 183 |
+
"""
|
| 184 |
+
Represents the URL against which an HTTP request may be made.
|
| 185 |
+
|
| 186 |
+
The URL may either be specified as a plain string, for convienence:
|
| 187 |
+
|
| 188 |
+
```python
|
| 189 |
+
url = httpcore.URL("https://www.example.com/")
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
Or be constructed with explicitily pre-parsed components:
|
| 193 |
+
|
| 194 |
+
```python
|
| 195 |
+
url = httpcore.URL(scheme=b'https', host=b'www.example.com', port=None, target=b'/')
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
Using this second more explicit style allows integrations that are using
|
| 199 |
+
`httpcore` to pass through URLs that have already been parsed in order to use
|
| 200 |
+
libraries such as `rfc-3986` rather than relying on the stdlib. It also ensures
|
| 201 |
+
that URL parsing is treated identically at both the networking level and at any
|
| 202 |
+
higher layers of abstraction.
|
| 203 |
+
|
| 204 |
+
The four components are important here, as they allow the URL to be precisely
|
| 205 |
+
specified in a pre-parsed format. They also allow certain types of request to
|
| 206 |
+
be created that could not otherwise be expressed.
|
| 207 |
+
|
| 208 |
+
For example, an HTTP request to `http://www.example.com/` forwarded via a proxy
|
| 209 |
+
at `http://localhost:8080`...
|
| 210 |
+
|
| 211 |
+
```python
|
| 212 |
+
# Constructs an HTTP request with a complete URL as the target:
|
| 213 |
+
# GET https://www.example.com/ HTTP/1.1
|
| 214 |
+
url = httpcore.URL(
|
| 215 |
+
scheme=b'http',
|
| 216 |
+
host=b'localhost',
|
| 217 |
+
port=8080,
|
| 218 |
+
target=b'https://www.example.com/'
|
| 219 |
+
)
|
| 220 |
+
request = httpcore.Request(
|
| 221 |
+
method="GET",
|
| 222 |
+
url=url
|
| 223 |
+
)
|
| 224 |
+
```
|
| 225 |
+
|
| 226 |
+
Another example is constructing an `OPTIONS *` request...
|
| 227 |
+
|
| 228 |
+
```python
|
| 229 |
+
# Constructs an 'OPTIONS *' HTTP request:
|
| 230 |
+
# OPTIONS * HTTP/1.1
|
| 231 |
+
url = httpcore.URL(scheme=b'https', host=b'www.example.com', target=b'*')
|
| 232 |
+
request = httpcore.Request(method="OPTIONS", url=url)
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
This kind of request is not possible to formulate with a URL string,
|
| 236 |
+
because the `/` delimiter is always used to demark the target from the
|
| 237 |
+
host/port portion of the URL.
|
| 238 |
+
|
| 239 |
+
For convenience, string-like arguments may be specified either as strings or
|
| 240 |
+
as bytes. However, once a request is being issue over-the-wire, the URL
|
| 241 |
+
components are always ultimately required to be a bytewise representation.
|
| 242 |
+
|
| 243 |
+
In order to avoid any ambiguity over character encodings, when strings are used
|
| 244 |
+
as arguments, they must be strictly limited to the ASCII range `chr(0)`-`chr(127)`.
|
| 245 |
+
If you require a bytewise representation that is outside this range you must
|
| 246 |
+
handle the character encoding directly, and pass a bytes instance.
|
| 247 |
+
"""
|
| 248 |
+
|
| 249 |
+
def __init__(
|
| 250 |
+
self,
|
| 251 |
+
url: bytes | str = "",
|
| 252 |
+
*,
|
| 253 |
+
scheme: bytes | str = b"",
|
| 254 |
+
host: bytes | str = b"",
|
| 255 |
+
port: int | None = None,
|
| 256 |
+
target: bytes | str = b"",
|
| 257 |
+
) -> None:
|
| 258 |
+
"""
|
| 259 |
+
Parameters:
|
| 260 |
+
url: The complete URL as a string or bytes.
|
| 261 |
+
scheme: The URL scheme as a string or bytes.
|
| 262 |
+
Typically either `"http"` or `"https"`.
|
| 263 |
+
host: The URL host as a string or bytes. Such as `"www.example.com"`.
|
| 264 |
+
port: The port to connect to. Either an integer or `None`.
|
| 265 |
+
target: The target of the HTTP request. Such as `"/items?search=red"`.
|
| 266 |
+
"""
|
| 267 |
+
if url:
|
| 268 |
+
parsed = urllib.parse.urlparse(enforce_bytes(url, name="url"))
|
| 269 |
+
self.scheme = parsed.scheme
|
| 270 |
+
self.host = parsed.hostname or b""
|
| 271 |
+
self.port = parsed.port
|
| 272 |
+
self.target = (parsed.path or b"/") + (
|
| 273 |
+
b"?" + parsed.query if parsed.query else b""
|
| 274 |
+
)
|
| 275 |
+
else:
|
| 276 |
+
self.scheme = enforce_bytes(scheme, name="scheme")
|
| 277 |
+
self.host = enforce_bytes(host, name="host")
|
| 278 |
+
self.port = port
|
| 279 |
+
self.target = enforce_bytes(target, name="target")
|
| 280 |
+
|
| 281 |
+
@property
|
| 282 |
+
def origin(self) -> Origin:
|
| 283 |
+
default_port = {
|
| 284 |
+
b"http": 80,
|
| 285 |
+
b"https": 443,
|
| 286 |
+
b"ws": 80,
|
| 287 |
+
b"wss": 443,
|
| 288 |
+
b"socks5": 1080,
|
| 289 |
+
b"socks5h": 1080,
|
| 290 |
+
}[self.scheme]
|
| 291 |
+
return Origin(
|
| 292 |
+
scheme=self.scheme, host=self.host, port=self.port or default_port
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
def __eq__(self, other: typing.Any) -> bool:
|
| 296 |
+
return (
|
| 297 |
+
isinstance(other, URL)
|
| 298 |
+
and other.scheme == self.scheme
|
| 299 |
+
and other.host == self.host
|
| 300 |
+
and other.port == self.port
|
| 301 |
+
and other.target == self.target
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
def __bytes__(self) -> bytes:
|
| 305 |
+
if self.port is None:
|
| 306 |
+
return b"%b://%b%b" % (self.scheme, self.host, self.target)
|
| 307 |
+
return b"%b://%b:%d%b" % (self.scheme, self.host, self.port, self.target)
|
| 308 |
+
|
| 309 |
+
def __repr__(self) -> str:
|
| 310 |
+
return (
|
| 311 |
+
f"{self.__class__.__name__}(scheme={self.scheme!r}, "
|
| 312 |
+
f"host={self.host!r}, port={self.port!r}, target={self.target!r})"
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
class Request:
|
| 317 |
+
"""
|
| 318 |
+
An HTTP request.
|
| 319 |
+
"""
|
| 320 |
+
|
| 321 |
+
def __init__(
|
| 322 |
+
self,
|
| 323 |
+
method: bytes | str,
|
| 324 |
+
url: URL | bytes | str,
|
| 325 |
+
*,
|
| 326 |
+
headers: HeaderTypes = None,
|
| 327 |
+
content: bytes
|
| 328 |
+
| typing.Iterable[bytes]
|
| 329 |
+
| typing.AsyncIterable[bytes]
|
| 330 |
+
| None = None,
|
| 331 |
+
extensions: Extensions | None = None,
|
| 332 |
+
) -> None:
|
| 333 |
+
"""
|
| 334 |
+
Parameters:
|
| 335 |
+
method: The HTTP request method, either as a string or bytes.
|
| 336 |
+
For example: `GET`.
|
| 337 |
+
url: The request URL, either as a `URL` instance, or as a string or bytes.
|
| 338 |
+
For example: `"https://www.example.com".`
|
| 339 |
+
headers: The HTTP request headers.
|
| 340 |
+
content: The content of the request body.
|
| 341 |
+
extensions: A dictionary of optional extra information included on
|
| 342 |
+
the request. Possible keys include `"timeout"`, and `"trace"`.
|
| 343 |
+
"""
|
| 344 |
+
self.method: bytes = enforce_bytes(method, name="method")
|
| 345 |
+
self.url: URL = enforce_url(url, name="url")
|
| 346 |
+
self.headers: list[tuple[bytes, bytes]] = enforce_headers(
|
| 347 |
+
headers, name="headers"
|
| 348 |
+
)
|
| 349 |
+
self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = (
|
| 350 |
+
enforce_stream(content, name="content")
|
| 351 |
+
)
|
| 352 |
+
self.extensions = {} if extensions is None else extensions
|
| 353 |
+
|
| 354 |
+
if "target" in self.extensions:
|
| 355 |
+
self.url = URL(
|
| 356 |
+
scheme=self.url.scheme,
|
| 357 |
+
host=self.url.host,
|
| 358 |
+
port=self.url.port,
|
| 359 |
+
target=self.extensions["target"],
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
def __repr__(self) -> str:
|
| 363 |
+
return f"<{self.__class__.__name__} [{self.method!r}]>"
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
class Response:
|
| 367 |
+
"""
|
| 368 |
+
An HTTP response.
|
| 369 |
+
"""
|
| 370 |
+
|
| 371 |
+
def __init__(
|
| 372 |
+
self,
|
| 373 |
+
status: int,
|
| 374 |
+
*,
|
| 375 |
+
headers: HeaderTypes = None,
|
| 376 |
+
content: bytes
|
| 377 |
+
| typing.Iterable[bytes]
|
| 378 |
+
| typing.AsyncIterable[bytes]
|
| 379 |
+
| None = None,
|
| 380 |
+
extensions: Extensions | None = None,
|
| 381 |
+
) -> None:
|
| 382 |
+
"""
|
| 383 |
+
Parameters:
|
| 384 |
+
status: The HTTP status code of the response. For example `200`.
|
| 385 |
+
headers: The HTTP response headers.
|
| 386 |
+
content: The content of the response body.
|
| 387 |
+
extensions: A dictionary of optional extra information included on
|
| 388 |
+
the responseself.Possible keys include `"http_version"`,
|
| 389 |
+
`"reason_phrase"`, and `"network_stream"`.
|
| 390 |
+
"""
|
| 391 |
+
self.status: int = status
|
| 392 |
+
self.headers: list[tuple[bytes, bytes]] = enforce_headers(
|
| 393 |
+
headers, name="headers"
|
| 394 |
+
)
|
| 395 |
+
self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = (
|
| 396 |
+
enforce_stream(content, name="content")
|
| 397 |
+
)
|
| 398 |
+
self.extensions = {} if extensions is None else extensions
|
| 399 |
+
|
| 400 |
+
self._stream_consumed = False
|
| 401 |
+
|
| 402 |
+
@property
|
| 403 |
+
def content(self) -> bytes:
|
| 404 |
+
if not hasattr(self, "_content"):
|
| 405 |
+
if isinstance(self.stream, typing.Iterable):
|
| 406 |
+
raise RuntimeError(
|
| 407 |
+
"Attempted to access 'response.content' on a streaming response. "
|
| 408 |
+
"Call 'response.read()' first."
|
| 409 |
+
)
|
| 410 |
+
else:
|
| 411 |
+
raise RuntimeError(
|
| 412 |
+
"Attempted to access 'response.content' on a streaming response. "
|
| 413 |
+
"Call 'await response.aread()' first."
|
| 414 |
+
)
|
| 415 |
+
return self._content
|
| 416 |
+
|
| 417 |
+
def __repr__(self) -> str:
|
| 418 |
+
return f"<{self.__class__.__name__} [{self.status}]>"
|
| 419 |
+
|
| 420 |
+
# Sync interface...
|
| 421 |
+
|
| 422 |
+
def read(self) -> bytes:
|
| 423 |
+
if not isinstance(self.stream, typing.Iterable): # pragma: nocover
|
| 424 |
+
raise RuntimeError(
|
| 425 |
+
"Attempted to read an asynchronous response using 'response.read()'. "
|
| 426 |
+
"You should use 'await response.aread()' instead."
|
| 427 |
+
)
|
| 428 |
+
if not hasattr(self, "_content"):
|
| 429 |
+
self._content = b"".join([part for part in self.iter_stream()])
|
| 430 |
+
return self._content
|
| 431 |
+
|
| 432 |
+
def iter_stream(self) -> typing.Iterator[bytes]:
|
| 433 |
+
if not isinstance(self.stream, typing.Iterable): # pragma: nocover
|
| 434 |
+
raise RuntimeError(
|
| 435 |
+
"Attempted to stream an asynchronous response using 'for ... in "
|
| 436 |
+
"response.iter_stream()'. "
|
| 437 |
+
"You should use 'async for ... in response.aiter_stream()' instead."
|
| 438 |
+
)
|
| 439 |
+
if self._stream_consumed:
|
| 440 |
+
raise RuntimeError(
|
| 441 |
+
"Attempted to call 'for ... in response.iter_stream()' more than once."
|
| 442 |
+
)
|
| 443 |
+
self._stream_consumed = True
|
| 444 |
+
for chunk in self.stream:
|
| 445 |
+
yield chunk
|
| 446 |
+
|
| 447 |
+
def close(self) -> None:
|
| 448 |
+
if not isinstance(self.stream, typing.Iterable): # pragma: nocover
|
| 449 |
+
raise RuntimeError(
|
| 450 |
+
"Attempted to close an asynchronous response using 'response.close()'. "
|
| 451 |
+
"You should use 'await response.aclose()' instead."
|
| 452 |
+
)
|
| 453 |
+
if hasattr(self.stream, "close"):
|
| 454 |
+
self.stream.close()
|
| 455 |
+
|
| 456 |
+
# Async interface...
|
| 457 |
+
|
| 458 |
+
async def aread(self) -> bytes:
|
| 459 |
+
if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
|
| 460 |
+
raise RuntimeError(
|
| 461 |
+
"Attempted to read an synchronous response using "
|
| 462 |
+
"'await response.aread()'. "
|
| 463 |
+
"You should use 'response.read()' instead."
|
| 464 |
+
)
|
| 465 |
+
if not hasattr(self, "_content"):
|
| 466 |
+
self._content = b"".join([part async for part in self.aiter_stream()])
|
| 467 |
+
return self._content
|
| 468 |
+
|
| 469 |
+
async def aiter_stream(self) -> typing.AsyncIterator[bytes]:
|
| 470 |
+
if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
|
| 471 |
+
raise RuntimeError(
|
| 472 |
+
"Attempted to stream an synchronous response using 'async for ... in "
|
| 473 |
+
"response.aiter_stream()'. "
|
| 474 |
+
"You should use 'for ... in response.iter_stream()' instead."
|
| 475 |
+
)
|
| 476 |
+
if self._stream_consumed:
|
| 477 |
+
raise RuntimeError(
|
| 478 |
+
"Attempted to call 'async for ... in response.aiter_stream()' "
|
| 479 |
+
"more than once."
|
| 480 |
+
)
|
| 481 |
+
self._stream_consumed = True
|
| 482 |
+
async for chunk in self.stream:
|
| 483 |
+
yield chunk
|
| 484 |
+
|
| 485 |
+
async def aclose(self) -> None:
|
| 486 |
+
if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
|
| 487 |
+
raise RuntimeError(
|
| 488 |
+
"Attempted to close a synchronous response using "
|
| 489 |
+
"'await response.aclose()'. "
|
| 490 |
+
"You should use 'response.close()' instead."
|
| 491 |
+
)
|
| 492 |
+
if hasattr(self.stream, "aclose"):
|
| 493 |
+
await self.stream.aclose()
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
class Proxy:
|
| 497 |
+
def __init__(
|
| 498 |
+
self,
|
| 499 |
+
url: URL | bytes | str,
|
| 500 |
+
auth: tuple[bytes | str, bytes | str] | None = None,
|
| 501 |
+
headers: HeadersAsMapping | HeadersAsSequence | None = None,
|
| 502 |
+
ssl_context: ssl.SSLContext | None = None,
|
| 503 |
+
):
|
| 504 |
+
self.url = enforce_url(url, name="url")
|
| 505 |
+
self.headers = enforce_headers(headers, name="headers")
|
| 506 |
+
self.ssl_context = ssl_context
|
| 507 |
+
|
| 508 |
+
if auth is not None:
|
| 509 |
+
username = enforce_bytes(auth[0], name="auth")
|
| 510 |
+
password = enforce_bytes(auth[1], name="auth")
|
| 511 |
+
userpass = username + b":" + password
|
| 512 |
+
authorization = b"Basic " + base64.b64encode(userpass)
|
| 513 |
+
self.auth: tuple[bytes, bytes] | None = (username, password)
|
| 514 |
+
self.headers = [(b"Proxy-Authorization", authorization)] + self.headers
|
| 515 |
+
else:
|
| 516 |
+
self.auth = None
|
env/lib/python3.13/site-packages/httpcore/_ssl.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ssl
|
| 2 |
+
|
| 3 |
+
import certifi
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def default_ssl_context() -> ssl.SSLContext:
|
| 7 |
+
context = ssl.create_default_context()
|
| 8 |
+
context.load_verify_locations(certifi.where())
|
| 9 |
+
return context
|
env/lib/python3.13/site-packages/httpcore/_synchronization.py
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import threading
|
| 4 |
+
import types
|
| 5 |
+
|
| 6 |
+
from ._exceptions import ExceptionMapping, PoolTimeout, map_exceptions
|
| 7 |
+
|
| 8 |
+
# Our async synchronization primatives use either 'anyio' or 'trio' depending
|
| 9 |
+
# on if they're running under asyncio or trio.
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
import trio
|
| 13 |
+
except (ImportError, NotImplementedError): # pragma: nocover
|
| 14 |
+
trio = None # type: ignore
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
import anyio
|
| 18 |
+
except ImportError: # pragma: nocover
|
| 19 |
+
anyio = None # type: ignore
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def current_async_library() -> str:
|
| 23 |
+
# Determine if we're running under trio or asyncio.
|
| 24 |
+
# See https://sniffio.readthedocs.io/en/latest/
|
| 25 |
+
try:
|
| 26 |
+
import sniffio
|
| 27 |
+
except ImportError: # pragma: nocover
|
| 28 |
+
environment = "asyncio"
|
| 29 |
+
else:
|
| 30 |
+
environment = sniffio.current_async_library()
|
| 31 |
+
|
| 32 |
+
if environment not in ("asyncio", "trio"): # pragma: nocover
|
| 33 |
+
raise RuntimeError("Running under an unsupported async environment.")
|
| 34 |
+
|
| 35 |
+
if environment == "asyncio" and anyio is None: # pragma: nocover
|
| 36 |
+
raise RuntimeError(
|
| 37 |
+
"Running with asyncio requires installation of 'httpcore[asyncio]'."
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
if environment == "trio" and trio is None: # pragma: nocover
|
| 41 |
+
raise RuntimeError(
|
| 42 |
+
"Running with trio requires installation of 'httpcore[trio]'."
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
return environment
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class AsyncLock:
|
| 49 |
+
"""
|
| 50 |
+
This is a standard lock.
|
| 51 |
+
|
| 52 |
+
In the sync case `Lock` provides thread locking.
|
| 53 |
+
In the async case `AsyncLock` provides async locking.
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
def __init__(self) -> None:
|
| 57 |
+
self._backend = ""
|
| 58 |
+
|
| 59 |
+
def setup(self) -> None:
|
| 60 |
+
"""
|
| 61 |
+
Detect if we're running under 'asyncio' or 'trio' and create
|
| 62 |
+
a lock with the correct implementation.
|
| 63 |
+
"""
|
| 64 |
+
self._backend = current_async_library()
|
| 65 |
+
if self._backend == "trio":
|
| 66 |
+
self._trio_lock = trio.Lock()
|
| 67 |
+
elif self._backend == "asyncio":
|
| 68 |
+
self._anyio_lock = anyio.Lock()
|
| 69 |
+
|
| 70 |
+
async def __aenter__(self) -> AsyncLock:
|
| 71 |
+
if not self._backend:
|
| 72 |
+
self.setup()
|
| 73 |
+
|
| 74 |
+
if self._backend == "trio":
|
| 75 |
+
await self._trio_lock.acquire()
|
| 76 |
+
elif self._backend == "asyncio":
|
| 77 |
+
await self._anyio_lock.acquire()
|
| 78 |
+
|
| 79 |
+
return self
|
| 80 |
+
|
| 81 |
+
async def __aexit__(
|
| 82 |
+
self,
|
| 83 |
+
exc_type: type[BaseException] | None = None,
|
| 84 |
+
exc_value: BaseException | None = None,
|
| 85 |
+
traceback: types.TracebackType | None = None,
|
| 86 |
+
) -> None:
|
| 87 |
+
if self._backend == "trio":
|
| 88 |
+
self._trio_lock.release()
|
| 89 |
+
elif self._backend == "asyncio":
|
| 90 |
+
self._anyio_lock.release()
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class AsyncThreadLock:
|
| 94 |
+
"""
|
| 95 |
+
This is a threading-only lock for no-I/O contexts.
|
| 96 |
+
|
| 97 |
+
In the sync case `ThreadLock` provides thread locking.
|
| 98 |
+
In the async case `AsyncThreadLock` is a no-op.
|
| 99 |
+
"""
|
| 100 |
+
|
| 101 |
+
def __enter__(self) -> AsyncThreadLock:
|
| 102 |
+
return self
|
| 103 |
+
|
| 104 |
+
def __exit__(
|
| 105 |
+
self,
|
| 106 |
+
exc_type: type[BaseException] | None = None,
|
| 107 |
+
exc_value: BaseException | None = None,
|
| 108 |
+
traceback: types.TracebackType | None = None,
|
| 109 |
+
) -> None:
|
| 110 |
+
pass
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
class AsyncEvent:
|
| 114 |
+
def __init__(self) -> None:
|
| 115 |
+
self._backend = ""
|
| 116 |
+
|
| 117 |
+
def setup(self) -> None:
|
| 118 |
+
"""
|
| 119 |
+
Detect if we're running under 'asyncio' or 'trio' and create
|
| 120 |
+
a lock with the correct implementation.
|
| 121 |
+
"""
|
| 122 |
+
self._backend = current_async_library()
|
| 123 |
+
if self._backend == "trio":
|
| 124 |
+
self._trio_event = trio.Event()
|
| 125 |
+
elif self._backend == "asyncio":
|
| 126 |
+
self._anyio_event = anyio.Event()
|
| 127 |
+
|
| 128 |
+
def set(self) -> None:
|
| 129 |
+
if not self._backend:
|
| 130 |
+
self.setup()
|
| 131 |
+
|
| 132 |
+
if self._backend == "trio":
|
| 133 |
+
self._trio_event.set()
|
| 134 |
+
elif self._backend == "asyncio":
|
| 135 |
+
self._anyio_event.set()
|
| 136 |
+
|
| 137 |
+
async def wait(self, timeout: float | None = None) -> None:
|
| 138 |
+
if not self._backend:
|
| 139 |
+
self.setup()
|
| 140 |
+
|
| 141 |
+
if self._backend == "trio":
|
| 142 |
+
trio_exc_map: ExceptionMapping = {trio.TooSlowError: PoolTimeout}
|
| 143 |
+
timeout_or_inf = float("inf") if timeout is None else timeout
|
| 144 |
+
with map_exceptions(trio_exc_map):
|
| 145 |
+
with trio.fail_after(timeout_or_inf):
|
| 146 |
+
await self._trio_event.wait()
|
| 147 |
+
elif self._backend == "asyncio":
|
| 148 |
+
anyio_exc_map: ExceptionMapping = {TimeoutError: PoolTimeout}
|
| 149 |
+
with map_exceptions(anyio_exc_map):
|
| 150 |
+
with anyio.fail_after(timeout):
|
| 151 |
+
await self._anyio_event.wait()
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
class AsyncSemaphore:
|
| 155 |
+
def __init__(self, bound: int) -> None:
|
| 156 |
+
self._bound = bound
|
| 157 |
+
self._backend = ""
|
| 158 |
+
|
| 159 |
+
def setup(self) -> None:
|
| 160 |
+
"""
|
| 161 |
+
Detect if we're running under 'asyncio' or 'trio' and create
|
| 162 |
+
a semaphore with the correct implementation.
|
| 163 |
+
"""
|
| 164 |
+
self._backend = current_async_library()
|
| 165 |
+
if self._backend == "trio":
|
| 166 |
+
self._trio_semaphore = trio.Semaphore(
|
| 167 |
+
initial_value=self._bound, max_value=self._bound
|
| 168 |
+
)
|
| 169 |
+
elif self._backend == "asyncio":
|
| 170 |
+
self._anyio_semaphore = anyio.Semaphore(
|
| 171 |
+
initial_value=self._bound, max_value=self._bound
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
async def acquire(self) -> None:
|
| 175 |
+
if not self._backend:
|
| 176 |
+
self.setup()
|
| 177 |
+
|
| 178 |
+
if self._backend == "trio":
|
| 179 |
+
await self._trio_semaphore.acquire()
|
| 180 |
+
elif self._backend == "asyncio":
|
| 181 |
+
await self._anyio_semaphore.acquire()
|
| 182 |
+
|
| 183 |
+
async def release(self) -> None:
|
| 184 |
+
if self._backend == "trio":
|
| 185 |
+
self._trio_semaphore.release()
|
| 186 |
+
elif self._backend == "asyncio":
|
| 187 |
+
self._anyio_semaphore.release()
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
class AsyncShieldCancellation:
|
| 191 |
+
# For certain portions of our codebase where we're dealing with
|
| 192 |
+
# closing connections during exception handling we want to shield
|
| 193 |
+
# the operation from being cancelled.
|
| 194 |
+
#
|
| 195 |
+
# with AsyncShieldCancellation():
|
| 196 |
+
# ... # clean-up operations, shielded from cancellation.
|
| 197 |
+
|
| 198 |
+
def __init__(self) -> None:
|
| 199 |
+
"""
|
| 200 |
+
Detect if we're running under 'asyncio' or 'trio' and create
|
| 201 |
+
a shielded scope with the correct implementation.
|
| 202 |
+
"""
|
| 203 |
+
self._backend = current_async_library()
|
| 204 |
+
|
| 205 |
+
if self._backend == "trio":
|
| 206 |
+
self._trio_shield = trio.CancelScope(shield=True)
|
| 207 |
+
elif self._backend == "asyncio":
|
| 208 |
+
self._anyio_shield = anyio.CancelScope(shield=True)
|
| 209 |
+
|
| 210 |
+
def __enter__(self) -> AsyncShieldCancellation:
|
| 211 |
+
if self._backend == "trio":
|
| 212 |
+
self._trio_shield.__enter__()
|
| 213 |
+
elif self._backend == "asyncio":
|
| 214 |
+
self._anyio_shield.__enter__()
|
| 215 |
+
return self
|
| 216 |
+
|
| 217 |
+
def __exit__(
|
| 218 |
+
self,
|
| 219 |
+
exc_type: type[BaseException] | None = None,
|
| 220 |
+
exc_value: BaseException | None = None,
|
| 221 |
+
traceback: types.TracebackType | None = None,
|
| 222 |
+
) -> None:
|
| 223 |
+
if self._backend == "trio":
|
| 224 |
+
self._trio_shield.__exit__(exc_type, exc_value, traceback)
|
| 225 |
+
elif self._backend == "asyncio":
|
| 226 |
+
self._anyio_shield.__exit__(exc_type, exc_value, traceback)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
# Our thread-based synchronization primitives...
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
class Lock:
|
| 233 |
+
"""
|
| 234 |
+
This is a standard lock.
|
| 235 |
+
|
| 236 |
+
In the sync case `Lock` provides thread locking.
|
| 237 |
+
In the async case `AsyncLock` provides async locking.
|
| 238 |
+
"""
|
| 239 |
+
|
| 240 |
+
def __init__(self) -> None:
|
| 241 |
+
self._lock = threading.Lock()
|
| 242 |
+
|
| 243 |
+
def __enter__(self) -> Lock:
|
| 244 |
+
self._lock.acquire()
|
| 245 |
+
return self
|
| 246 |
+
|
| 247 |
+
def __exit__(
|
| 248 |
+
self,
|
| 249 |
+
exc_type: type[BaseException] | None = None,
|
| 250 |
+
exc_value: BaseException | None = None,
|
| 251 |
+
traceback: types.TracebackType | None = None,
|
| 252 |
+
) -> None:
|
| 253 |
+
self._lock.release()
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
class ThreadLock:
|
| 257 |
+
"""
|
| 258 |
+
This is a threading-only lock for no-I/O contexts.
|
| 259 |
+
|
| 260 |
+
In the sync case `ThreadLock` provides thread locking.
|
| 261 |
+
In the async case `AsyncThreadLock` is a no-op.
|
| 262 |
+
"""
|
| 263 |
+
|
| 264 |
+
def __init__(self) -> None:
|
| 265 |
+
self._lock = threading.Lock()
|
| 266 |
+
|
| 267 |
+
def __enter__(self) -> ThreadLock:
|
| 268 |
+
self._lock.acquire()
|
| 269 |
+
return self
|
| 270 |
+
|
| 271 |
+
def __exit__(
|
| 272 |
+
self,
|
| 273 |
+
exc_type: type[BaseException] | None = None,
|
| 274 |
+
exc_value: BaseException | None = None,
|
| 275 |
+
traceback: types.TracebackType | None = None,
|
| 276 |
+
) -> None:
|
| 277 |
+
self._lock.release()
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
class Event:
|
| 281 |
+
def __init__(self) -> None:
|
| 282 |
+
self._event = threading.Event()
|
| 283 |
+
|
| 284 |
+
def set(self) -> None:
|
| 285 |
+
self._event.set()
|
| 286 |
+
|
| 287 |
+
def wait(self, timeout: float | None = None) -> None:
|
| 288 |
+
if timeout == float("inf"): # pragma: no cover
|
| 289 |
+
timeout = None
|
| 290 |
+
if not self._event.wait(timeout=timeout):
|
| 291 |
+
raise PoolTimeout() # pragma: nocover
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
class Semaphore:
|
| 295 |
+
def __init__(self, bound: int) -> None:
|
| 296 |
+
self._semaphore = threading.Semaphore(value=bound)
|
| 297 |
+
|
| 298 |
+
def acquire(self) -> None:
|
| 299 |
+
self._semaphore.acquire()
|
| 300 |
+
|
| 301 |
+
def release(self) -> None:
|
| 302 |
+
self._semaphore.release()
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
class ShieldCancellation:
|
| 306 |
+
# Thread-synchronous codebases don't support cancellation semantics.
|
| 307 |
+
# We have this class because we need to mirror the async and sync
|
| 308 |
+
# cases within our package, but it's just a no-op.
|
| 309 |
+
def __enter__(self) -> ShieldCancellation:
|
| 310 |
+
return self
|
| 311 |
+
|
| 312 |
+
def __exit__(
|
| 313 |
+
self,
|
| 314 |
+
exc_type: type[BaseException] | None = None,
|
| 315 |
+
exc_value: BaseException | None = None,
|
| 316 |
+
traceback: types.TracebackType | None = None,
|
| 317 |
+
) -> None:
|
| 318 |
+
pass
|
env/lib/python3.13/site-packages/httpcore/_trace.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
import logging
|
| 5 |
+
import types
|
| 6 |
+
import typing
|
| 7 |
+
|
| 8 |
+
from ._models import Request
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class Trace:
|
| 12 |
+
def __init__(
|
| 13 |
+
self,
|
| 14 |
+
name: str,
|
| 15 |
+
logger: logging.Logger,
|
| 16 |
+
request: Request | None = None,
|
| 17 |
+
kwargs: dict[str, typing.Any] | None = None,
|
| 18 |
+
) -> None:
|
| 19 |
+
self.name = name
|
| 20 |
+
self.logger = logger
|
| 21 |
+
self.trace_extension = (
|
| 22 |
+
None if request is None else request.extensions.get("trace")
|
| 23 |
+
)
|
| 24 |
+
self.debug = self.logger.isEnabledFor(logging.DEBUG)
|
| 25 |
+
self.kwargs = kwargs or {}
|
| 26 |
+
self.return_value: typing.Any = None
|
| 27 |
+
self.should_trace = self.debug or self.trace_extension is not None
|
| 28 |
+
self.prefix = self.logger.name.split(".")[-1]
|
| 29 |
+
|
| 30 |
+
def trace(self, name: str, info: dict[str, typing.Any]) -> None:
|
| 31 |
+
if self.trace_extension is not None:
|
| 32 |
+
prefix_and_name = f"{self.prefix}.{name}"
|
| 33 |
+
ret = self.trace_extension(prefix_and_name, info)
|
| 34 |
+
if inspect.iscoroutine(ret): # pragma: no cover
|
| 35 |
+
raise TypeError(
|
| 36 |
+
"If you are using a synchronous interface, "
|
| 37 |
+
"the callback of the `trace` extension should "
|
| 38 |
+
"be a normal function instead of an asynchronous function."
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
if self.debug:
|
| 42 |
+
if not info or "return_value" in info and info["return_value"] is None:
|
| 43 |
+
message = name
|
| 44 |
+
else:
|
| 45 |
+
args = " ".join([f"{key}={value!r}" for key, value in info.items()])
|
| 46 |
+
message = f"{name} {args}"
|
| 47 |
+
self.logger.debug(message)
|
| 48 |
+
|
| 49 |
+
def __enter__(self) -> Trace:
|
| 50 |
+
if self.should_trace:
|
| 51 |
+
info = self.kwargs
|
| 52 |
+
self.trace(f"{self.name}.started", info)
|
| 53 |
+
return self
|
| 54 |
+
|
| 55 |
+
def __exit__(
|
| 56 |
+
self,
|
| 57 |
+
exc_type: type[BaseException] | None = None,
|
| 58 |
+
exc_value: BaseException | None = None,
|
| 59 |
+
traceback: types.TracebackType | None = None,
|
| 60 |
+
) -> None:
|
| 61 |
+
if self.should_trace:
|
| 62 |
+
if exc_value is None:
|
| 63 |
+
info = {"return_value": self.return_value}
|
| 64 |
+
self.trace(f"{self.name}.complete", info)
|
| 65 |
+
else:
|
| 66 |
+
info = {"exception": exc_value}
|
| 67 |
+
self.trace(f"{self.name}.failed", info)
|
| 68 |
+
|
| 69 |
+
async def atrace(self, name: str, info: dict[str, typing.Any]) -> None:
|
| 70 |
+
if self.trace_extension is not None:
|
| 71 |
+
prefix_and_name = f"{self.prefix}.{name}"
|
| 72 |
+
coro = self.trace_extension(prefix_and_name, info)
|
| 73 |
+
if not inspect.iscoroutine(coro): # pragma: no cover
|
| 74 |
+
raise TypeError(
|
| 75 |
+
"If you're using an asynchronous interface, "
|
| 76 |
+
"the callback of the `trace` extension should "
|
| 77 |
+
"be an asynchronous function rather than a normal function."
|
| 78 |
+
)
|
| 79 |
+
await coro
|
| 80 |
+
|
| 81 |
+
if self.debug:
|
| 82 |
+
if not info or "return_value" in info and info["return_value"] is None:
|
| 83 |
+
message = name
|
| 84 |
+
else:
|
| 85 |
+
args = " ".join([f"{key}={value!r}" for key, value in info.items()])
|
| 86 |
+
message = f"{name} {args}"
|
| 87 |
+
self.logger.debug(message)
|
| 88 |
+
|
| 89 |
+
async def __aenter__(self) -> Trace:
|
| 90 |
+
if self.should_trace:
|
| 91 |
+
info = self.kwargs
|
| 92 |
+
await self.atrace(f"{self.name}.started", info)
|
| 93 |
+
return self
|
| 94 |
+
|
| 95 |
+
async def __aexit__(
|
| 96 |
+
self,
|
| 97 |
+
exc_type: type[BaseException] | None = None,
|
| 98 |
+
exc_value: BaseException | None = None,
|
| 99 |
+
traceback: types.TracebackType | None = None,
|
| 100 |
+
) -> None:
|
| 101 |
+
if self.should_trace:
|
| 102 |
+
if exc_value is None:
|
| 103 |
+
info = {"return_value": self.return_value}
|
| 104 |
+
await self.atrace(f"{self.name}.complete", info)
|
| 105 |
+
else:
|
| 106 |
+
info = {"exception": exc_value}
|
| 107 |
+
await self.atrace(f"{self.name}.failed", info)
|
env/lib/python3.13/site-packages/httpcore/_utils.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import select
|
| 4 |
+
import socket
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def is_socket_readable(sock: socket.socket | None) -> bool:
|
| 9 |
+
"""
|
| 10 |
+
Return whether a socket, as identifed by its file descriptor, is readable.
|
| 11 |
+
"A socket is readable" means that the read buffer isn't empty, i.e. that calling
|
| 12 |
+
.recv() on it would immediately return some data.
|
| 13 |
+
"""
|
| 14 |
+
# NOTE: we want check for readability without actually attempting to read, because
|
| 15 |
+
# we don't want to block forever if it's not readable.
|
| 16 |
+
|
| 17 |
+
# In the case that the socket no longer exists, or cannot return a file
|
| 18 |
+
# descriptor, we treat it as being readable, as if it the next read operation
|
| 19 |
+
# on it is ready to return the terminating `b""`.
|
| 20 |
+
sock_fd = None if sock is None else sock.fileno()
|
| 21 |
+
if sock_fd is None or sock_fd < 0: # pragma: nocover
|
| 22 |
+
return True
|
| 23 |
+
|
| 24 |
+
# The implementation below was stolen from:
|
| 25 |
+
# https://github.com/python-trio/trio/blob/20ee2b1b7376db637435d80e266212a35837ddcc/trio/_socket.py#L471-L478
|
| 26 |
+
# See also: https://github.com/encode/httpcore/pull/193#issuecomment-703129316
|
| 27 |
+
|
| 28 |
+
# Use select.select on Windows, and when poll is unavailable and select.poll
|
| 29 |
+
# everywhere else. (E.g. When eventlet is in use. See #327)
|
| 30 |
+
if (
|
| 31 |
+
sys.platform == "win32" or getattr(select, "poll", None) is None
|
| 32 |
+
): # pragma: nocover
|
| 33 |
+
rready, _, _ = select.select([sock_fd], [], [], 0)
|
| 34 |
+
return bool(rready)
|
| 35 |
+
p = select.poll()
|
| 36 |
+
p.register(sock_fd, select.POLLIN)
|
| 37 |
+
return bool(p.poll(0))
|
env/lib/python3.13/site-packages/httpcore/py.typed
ADDED
|
File without changes
|
env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/METADATA
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.3
|
| 2 |
+
Name: httpx
|
| 3 |
+
Version: 0.28.1
|
| 4 |
+
Summary: The next generation HTTP client.
|
| 5 |
+
Project-URL: Changelog, https://github.com/encode/httpx/blob/master/CHANGELOG.md
|
| 6 |
+
Project-URL: Documentation, https://www.python-httpx.org
|
| 7 |
+
Project-URL: Homepage, https://github.com/encode/httpx
|
| 8 |
+
Project-URL: Source, https://github.com/encode/httpx
|
| 9 |
+
Author-email: Tom Christie <tom@tomchristie.com>
|
| 10 |
+
License: BSD-3-Clause
|
| 11 |
+
Classifier: Development Status :: 4 - Beta
|
| 12 |
+
Classifier: Environment :: Web Environment
|
| 13 |
+
Classifier: Framework :: AsyncIO
|
| 14 |
+
Classifier: Framework :: Trio
|
| 15 |
+
Classifier: Intended Audience :: Developers
|
| 16 |
+
Classifier: License :: OSI Approved :: BSD License
|
| 17 |
+
Classifier: Operating System :: OS Independent
|
| 18 |
+
Classifier: Programming Language :: Python :: 3
|
| 19 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
| 20 |
+
Classifier: Programming Language :: Python :: 3.8
|
| 21 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 22 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 23 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 24 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 25 |
+
Classifier: Topic :: Internet :: WWW/HTTP
|
| 26 |
+
Requires-Python: >=3.8
|
| 27 |
+
Requires-Dist: anyio
|
| 28 |
+
Requires-Dist: certifi
|
| 29 |
+
Requires-Dist: httpcore==1.*
|
| 30 |
+
Requires-Dist: idna
|
| 31 |
+
Provides-Extra: brotli
|
| 32 |
+
Requires-Dist: brotli; (platform_python_implementation == 'CPython') and extra == 'brotli'
|
| 33 |
+
Requires-Dist: brotlicffi; (platform_python_implementation != 'CPython') and extra == 'brotli'
|
| 34 |
+
Provides-Extra: cli
|
| 35 |
+
Requires-Dist: click==8.*; extra == 'cli'
|
| 36 |
+
Requires-Dist: pygments==2.*; extra == 'cli'
|
| 37 |
+
Requires-Dist: rich<14,>=10; extra == 'cli'
|
| 38 |
+
Provides-Extra: http2
|
| 39 |
+
Requires-Dist: h2<5,>=3; extra == 'http2'
|
| 40 |
+
Provides-Extra: socks
|
| 41 |
+
Requires-Dist: socksio==1.*; extra == 'socks'
|
| 42 |
+
Provides-Extra: zstd
|
| 43 |
+
Requires-Dist: zstandard>=0.18.0; extra == 'zstd'
|
| 44 |
+
Description-Content-Type: text/markdown
|
| 45 |
+
|
| 46 |
+
<p align="center">
|
| 47 |
+
<a href="https://www.python-httpx.org/"><img width="350" height="208" src="https://raw.githubusercontent.com/encode/httpx/master/docs/img/butterfly.png" alt='HTTPX'></a>
|
| 48 |
+
</p>
|
| 49 |
+
|
| 50 |
+
<p align="center"><strong>HTTPX</strong> <em>- A next-generation HTTP client for Python.</em></p>
|
| 51 |
+
|
| 52 |
+
<p align="center">
|
| 53 |
+
<a href="https://github.com/encode/httpx/actions">
|
| 54 |
+
<img src="https://github.com/encode/httpx/workflows/Test%20Suite/badge.svg" alt="Test Suite">
|
| 55 |
+
</a>
|
| 56 |
+
<a href="https://pypi.org/project/httpx/">
|
| 57 |
+
<img src="https://badge.fury.io/py/httpx.svg" alt="Package version">
|
| 58 |
+
</a>
|
| 59 |
+
</p>
|
| 60 |
+
|
| 61 |
+
HTTPX is a fully featured HTTP client library for Python 3. It includes **an integrated command line client**, has support for both **HTTP/1.1 and HTTP/2**, and provides both **sync and async APIs**.
|
| 62 |
+
|
| 63 |
+
---
|
| 64 |
+
|
| 65 |
+
Install HTTPX using pip:
|
| 66 |
+
|
| 67 |
+
```shell
|
| 68 |
+
$ pip install httpx
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
Now, let's get started:
|
| 72 |
+
|
| 73 |
+
```pycon
|
| 74 |
+
>>> import httpx
|
| 75 |
+
>>> r = httpx.get('https://www.example.org/')
|
| 76 |
+
>>> r
|
| 77 |
+
<Response [200 OK]>
|
| 78 |
+
>>> r.status_code
|
| 79 |
+
200
|
| 80 |
+
>>> r.headers['content-type']
|
| 81 |
+
'text/html; charset=UTF-8'
|
| 82 |
+
>>> r.text
|
| 83 |
+
'<!doctype html>\n<html>\n<head>\n<title>Example Domain</title>...'
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
Or, using the command-line client.
|
| 87 |
+
|
| 88 |
+
```shell
|
| 89 |
+
$ pip install 'httpx[cli]' # The command line client is an optional dependency.
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
Which now allows us to use HTTPX directly from the command-line...
|
| 93 |
+
|
| 94 |
+
<p align="center">
|
| 95 |
+
<img width="700" src="https://raw.githubusercontent.com/encode/httpx/master/docs/img/httpx-help.png" alt='httpx --help'>
|
| 96 |
+
</p>
|
| 97 |
+
|
| 98 |
+
Sending a request...
|
| 99 |
+
|
| 100 |
+
<p align="center">
|
| 101 |
+
<img width="700" src="https://raw.githubusercontent.com/encode/httpx/master/docs/img/httpx-request.png" alt='httpx http://httpbin.org/json'>
|
| 102 |
+
</p>
|
| 103 |
+
|
| 104 |
+
## Features
|
| 105 |
+
|
| 106 |
+
HTTPX builds on the well-established usability of `requests`, and gives you:
|
| 107 |
+
|
| 108 |
+
* A broadly [requests-compatible API](https://www.python-httpx.org/compatibility/).
|
| 109 |
+
* An integrated command-line client.
|
| 110 |
+
* HTTP/1.1 [and HTTP/2 support](https://www.python-httpx.org/http2/).
|
| 111 |
+
* Standard synchronous interface, but with [async support if you need it](https://www.python-httpx.org/async/).
|
| 112 |
+
* Ability to make requests directly to [WSGI applications](https://www.python-httpx.org/advanced/transports/#wsgi-transport) or [ASGI applications](https://www.python-httpx.org/advanced/transports/#asgi-transport).
|
| 113 |
+
* Strict timeouts everywhere.
|
| 114 |
+
* Fully type annotated.
|
| 115 |
+
* 100% test coverage.
|
| 116 |
+
|
| 117 |
+
Plus all the standard features of `requests`...
|
| 118 |
+
|
| 119 |
+
* International Domains and URLs
|
| 120 |
+
* Keep-Alive & Connection Pooling
|
| 121 |
+
* Sessions with Cookie Persistence
|
| 122 |
+
* Browser-style SSL Verification
|
| 123 |
+
* Basic/Digest Authentication
|
| 124 |
+
* Elegant Key/Value Cookies
|
| 125 |
+
* Automatic Decompression
|
| 126 |
+
* Automatic Content Decoding
|
| 127 |
+
* Unicode Response Bodies
|
| 128 |
+
* Multipart File Uploads
|
| 129 |
+
* HTTP(S) Proxy Support
|
| 130 |
+
* Connection Timeouts
|
| 131 |
+
* Streaming Downloads
|
| 132 |
+
* .netrc Support
|
| 133 |
+
* Chunked Requests
|
| 134 |
+
|
| 135 |
+
## Installation
|
| 136 |
+
|
| 137 |
+
Install with pip:
|
| 138 |
+
|
| 139 |
+
```shell
|
| 140 |
+
$ pip install httpx
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
Or, to include the optional HTTP/2 support, use:
|
| 144 |
+
|
| 145 |
+
```shell
|
| 146 |
+
$ pip install httpx[http2]
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
HTTPX requires Python 3.8+.
|
| 150 |
+
|
| 151 |
+
## Documentation
|
| 152 |
+
|
| 153 |
+
Project documentation is available at [https://www.python-httpx.org/](https://www.python-httpx.org/).
|
| 154 |
+
|
| 155 |
+
For a run-through of all the basics, head over to the [QuickStart](https://www.python-httpx.org/quickstart/).
|
| 156 |
+
|
| 157 |
+
For more advanced topics, see the [Advanced Usage](https://www.python-httpx.org/advanced/) section, the [async support](https://www.python-httpx.org/async/) section, or the [HTTP/2](https://www.python-httpx.org/http2/) section.
|
| 158 |
+
|
| 159 |
+
The [Developer Interface](https://www.python-httpx.org/api/) provides a comprehensive API reference.
|
| 160 |
+
|
| 161 |
+
To find out about tools that integrate with HTTPX, see [Third Party Packages](https://www.python-httpx.org/third_party_packages/).
|
| 162 |
+
|
| 163 |
+
## Contribute
|
| 164 |
+
|
| 165 |
+
If you want to contribute with HTTPX check out the [Contributing Guide](https://www.python-httpx.org/contributing/) to learn how to start.
|
| 166 |
+
|
| 167 |
+
## Dependencies
|
| 168 |
+
|
| 169 |
+
The HTTPX project relies on these excellent libraries:
|
| 170 |
+
|
| 171 |
+
* `httpcore` - The underlying transport implementation for `httpx`.
|
| 172 |
+
* `h11` - HTTP/1.1 support.
|
| 173 |
+
* `certifi` - SSL certificates.
|
| 174 |
+
* `idna` - Internationalized domain name support.
|
| 175 |
+
* `sniffio` - Async library autodetection.
|
| 176 |
+
|
| 177 |
+
As well as these optional installs:
|
| 178 |
+
|
| 179 |
+
* `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)*
|
| 180 |
+
* `socksio` - SOCKS proxy support. *(Optional, with `httpx[socks]`)*
|
| 181 |
+
* `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)*
|
| 182 |
+
* `click` - Command line client support. *(Optional, with `httpx[cli]`)*
|
| 183 |
+
* `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)*
|
| 184 |
+
* `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)*
|
| 185 |
+
|
| 186 |
+
A huge amount of credit is due to `requests` for the API layout that
|
| 187 |
+
much of this work follows, as well as to `urllib3` for plenty of design
|
| 188 |
+
inspiration around the lower-level networking details.
|
| 189 |
+
|
| 190 |
+
---
|
| 191 |
+
|
| 192 |
+
<p align="center"><i>HTTPX is <a href="https://github.com/encode/httpx/blob/master/LICENSE.md">BSD licensed</a> code.<br/>Designed & crafted with care.</i><br/>— 🦋 —</p>
|
| 193 |
+
|
| 194 |
+
## Release Information
|
| 195 |
+
|
| 196 |
+
### Fixed
|
| 197 |
+
|
| 198 |
+
* Reintroduced supposedly-private `URLTypes` shortcut. (#2673)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
---
|
| 202 |
+
|
| 203 |
+
[Full changelog](https://github.com/encode/httpx/blob/master/CHANGELOG.md)
|
env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/RECORD
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
../../../bin/httpx,sha256=Rdk5LW602TkuZHquotWRLLc4TnaaXhrPLmhrYBsr8js,253
|
| 2 |
+
httpx-0.28.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 3 |
+
httpx-0.28.1.dist-info/METADATA,sha256=_rubD48-gNV8gZnDBPNcQzboWB0dGNeYPJJ2a4J5OyU,7052
|
| 4 |
+
httpx-0.28.1.dist-info/RECORD,,
|
| 5 |
+
httpx-0.28.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
| 6 |
+
httpx-0.28.1.dist-info/entry_points.txt,sha256=2lVkdQmxLA1pNMgSN2eV89o90HCZezhmNwsy6ryKDSA,37
|
| 7 |
+
httpx-0.28.1.dist-info/licenses/LICENSE.md,sha256=TsWdVE8StfU5o6cW_TIaxYzNgDC0ZSIfLIgCAM3yjY0,1508
|
| 8 |
+
httpx/__init__.py,sha256=CsaZe6yZj0rHg6322AWKWHGTMVr9txgEfD5P3_Rrz60,2171
|
| 9 |
+
httpx/__pycache__/__init__.cpython-313.pyc,,
|
| 10 |
+
httpx/__pycache__/__version__.cpython-313.pyc,,
|
| 11 |
+
httpx/__pycache__/_api.cpython-313.pyc,,
|
| 12 |
+
httpx/__pycache__/_auth.cpython-313.pyc,,
|
| 13 |
+
httpx/__pycache__/_client.cpython-313.pyc,,
|
| 14 |
+
httpx/__pycache__/_config.cpython-313.pyc,,
|
| 15 |
+
httpx/__pycache__/_content.cpython-313.pyc,,
|
| 16 |
+
httpx/__pycache__/_decoders.cpython-313.pyc,,
|
| 17 |
+
httpx/__pycache__/_exceptions.cpython-313.pyc,,
|
| 18 |
+
httpx/__pycache__/_main.cpython-313.pyc,,
|
| 19 |
+
httpx/__pycache__/_models.cpython-313.pyc,,
|
| 20 |
+
httpx/__pycache__/_multipart.cpython-313.pyc,,
|
| 21 |
+
httpx/__pycache__/_status_codes.cpython-313.pyc,,
|
| 22 |
+
httpx/__pycache__/_types.cpython-313.pyc,,
|
| 23 |
+
httpx/__pycache__/_urlparse.cpython-313.pyc,,
|
| 24 |
+
httpx/__pycache__/_urls.cpython-313.pyc,,
|
| 25 |
+
httpx/__pycache__/_utils.cpython-313.pyc,,
|
| 26 |
+
httpx/__version__.py,sha256=LoUyYeOXTieGzuP_64UL0wxdtxjuu_QbOvE7NOg-IqU,108
|
| 27 |
+
httpx/_api.py,sha256=r_Zgs4jIpcPJLqK5dbbSayqo_iVMKFaxZCd-oOHxLEs,11743
|
| 28 |
+
httpx/_auth.py,sha256=Yr3QwaUSK17rGYx-7j-FdicFIzz4Y9FFV-1F4-7RXX4,11891
|
| 29 |
+
httpx/_client.py,sha256=xD-UG67-WMkeltAAOeGGj-cZ2RRTAm19sWRxlFY7_40,65714
|
| 30 |
+
httpx/_config.py,sha256=pPp2U-wicfcKsF-KYRE1LYdt3e6ERGeIoXZ8Gjo3LWc,8547
|
| 31 |
+
httpx/_content.py,sha256=LGGzrJTR3OvN4Mb1GVVNLXkXJH-6oKlwAttO9p5w_yg,8161
|
| 32 |
+
httpx/_decoders.py,sha256=p0dX8I0NEHexs3UGp4SsZutiMhsXrrWl6-GnqVb0iKM,12041
|
| 33 |
+
httpx/_exceptions.py,sha256=bxW7fxzgVMAdNTbwT0Vnq04gJDW1_gI_GFiQPuMyjL0,8527
|
| 34 |
+
httpx/_main.py,sha256=Cg9GMabiTT_swaDfUgIRitSwxLRMSwUDOm7LdSGqlA4,15626
|
| 35 |
+
httpx/_models.py,sha256=4__Guyv1gLxuZChwim8kfQNiIOcJ9acreFOSurvZfms,44700
|
| 36 |
+
httpx/_multipart.py,sha256=KOHEZZl6oohg9mPaKyyu345qq1rJLg35TUG3YAzXB3Y,9843
|
| 37 |
+
httpx/_status_codes.py,sha256=DYn-2ufBgMeXy5s8x3_TB7wjAuAAMewTakPrm5rXEsc,5639
|
| 38 |
+
httpx/_transports/__init__.py,sha256=GbUoBSAOp7z-l-9j5YhMhR3DMIcn6FVLhj072O3Nnno,275
|
| 39 |
+
httpx/_transports/__pycache__/__init__.cpython-313.pyc,,
|
| 40 |
+
httpx/_transports/__pycache__/asgi.cpython-313.pyc,,
|
| 41 |
+
httpx/_transports/__pycache__/base.cpython-313.pyc,,
|
| 42 |
+
httpx/_transports/__pycache__/default.cpython-313.pyc,,
|
| 43 |
+
httpx/_transports/__pycache__/mock.cpython-313.pyc,,
|
| 44 |
+
httpx/_transports/__pycache__/wsgi.cpython-313.pyc,,
|
| 45 |
+
httpx/_transports/asgi.py,sha256=HRfiDYMPt4wQH2gFgHZg4c-i3sblo6bL5GTqcET-xz8,5501
|
| 46 |
+
httpx/_transports/base.py,sha256=kZS_VMbViYfF570pogUCJ1bulz-ybfL51Pqs9yktebU,2523
|
| 47 |
+
httpx/_transports/default.py,sha256=AzeaRUyVwCccTyyNJexDf0n1dFfzzydpdIQgvw7PLnk,13983
|
| 48 |
+
httpx/_transports/mock.py,sha256=PTo0d567RITXxGrki6kN7_67wwAxfwiMDcuXJiZCjEo,1232
|
| 49 |
+
httpx/_transports/wsgi.py,sha256=NcPX3Xap_EwCFZWO_OaSyQNuInCYx1QMNbO8GAei6jY,4825
|
| 50 |
+
httpx/_types.py,sha256=Jyh41GQq7AOev8IOWKDAg7zCbvHAfufmW5g_PiTtErY,2965
|
| 51 |
+
httpx/_urlparse.py,sha256=ZAmH47ONfkxrrj-PPYhGeiHjb6AjKCS-ANWIN4OL_KY,18546
|
| 52 |
+
httpx/_urls.py,sha256=dX99VR1DSOHpgo9Aq7PzYO4FKdxqKjwyNp8grf8dHN0,21550
|
| 53 |
+
httpx/_utils.py,sha256=_TVeqAKvxJkKHdz7dFeb4s0LZqQXgeFkXSgfiHBK_1o,8285
|
| 54 |
+
httpx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
env/lib/python3.13/site-packages/httpx-0.28.1.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: hatchling 1.26.3
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|