Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- venv/lib/python3.13/site-packages/_yaml/__init__.py +33 -0
- venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER +1 -0
- venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA +764 -0
- venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD +35 -0
- venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL +7 -0
- venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt +2 -0
- venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt +1 -0
- venv/lib/python3.13/site-packages/filelock/__init__.py +70 -0
- venv/lib/python3.13/site-packages/filelock/_api.py +403 -0
- venv/lib/python3.13/site-packages/filelock/_error.py +30 -0
- venv/lib/python3.13/site-packages/filelock/_soft.py +47 -0
- venv/lib/python3.13/site-packages/filelock/_unix.py +70 -0
- venv/lib/python3.13/site-packages/filelock/_util.py +52 -0
- venv/lib/python3.13/site-packages/filelock/_windows.py +65 -0
- venv/lib/python3.13/site-packages/filelock/asyncio.py +344 -0
- venv/lib/python3.13/site-packages/filelock/py.typed +0 -0
- venv/lib/python3.13/site-packages/filelock/version.py +34 -0
- venv/lib/python3.13/site-packages/fsspec/__init__.py +71 -0
- venv/lib/python3.13/site-packages/fsspec/_version.py +34 -0
- venv/lib/python3.13/site-packages/fsspec/caching.py +1004 -0
- venv/lib/python3.13/site-packages/fsspec/compression.py +182 -0
- venv/lib/python3.13/site-packages/fsspec/config.py +131 -0
- venv/lib/python3.13/site-packages/fsspec/conftest.py +125 -0
- venv/lib/python3.13/site-packages/fsspec/core.py +743 -0
- venv/lib/python3.13/site-packages/fsspec/dircache.py +98 -0
- venv/lib/python3.13/site-packages/fsspec/fuse.py +324 -0
- venv/lib/python3.13/site-packages/fsspec/generic.py +396 -0
- venv/lib/python3.13/site-packages/fsspec/gui.py +417 -0
- venv/lib/python3.13/site-packages/fsspec/json.py +117 -0
- venv/lib/python3.13/site-packages/fsspec/mapping.py +251 -0
- venv/lib/python3.13/site-packages/fsspec/parquet.py +541 -0
- venv/lib/python3.13/site-packages/fsspec/registry.py +330 -0
- venv/lib/python3.13/site-packages/fsspec/spec.py +2281 -0
- venv/lib/python3.13/site-packages/fsspec/transaction.py +90 -0
- venv/lib/python3.13/site-packages/hf_xet/__init__.py +5 -0
- venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER +1 -0
- venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA +209 -0
- venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD +22 -0
- venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL +4 -0
- venv/lib/python3.13/site-packages/packaging/__init__.py +15 -0
- venv/lib/python3.13/site-packages/packaging/_elffile.py +109 -0
- venv/lib/python3.13/site-packages/packaging/_manylinux.py +262 -0
- venv/lib/python3.13/site-packages/packaging/_musllinux.py +85 -0
- venv/lib/python3.13/site-packages/packaging/_parser.py +353 -0
- venv/lib/python3.13/site-packages/packaging/_structures.py +61 -0
- venv/lib/python3.13/site-packages/packaging/_tokenizer.py +195 -0
- venv/lib/python3.13/site-packages/packaging/markers.py +362 -0
- venv/lib/python3.13/site-packages/packaging/metadata.py +862 -0
- venv/lib/python3.13/site-packages/packaging/py.typed +0 -0
- venv/lib/python3.13/site-packages/packaging/requirements.py +91 -0
venv/lib/python3.13/site-packages/_yaml/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This is a stub package designed to roughly emulate the _yaml
|
| 2 |
+
# extension module, which previously existed as a standalone module
|
| 3 |
+
# and has been moved into the `yaml` package namespace.
|
| 4 |
+
# It does not perfectly mimic its old counterpart, but should get
|
| 5 |
+
# close enough for anyone who's relying on it even when they shouldn't.
|
| 6 |
+
import yaml
|
| 7 |
+
|
| 8 |
+
# in some circumstances, the yaml module we imoprted may be from a different version, so we need
|
| 9 |
+
# to tread carefully when poking at it here (it may not have the attributes we expect)
|
| 10 |
+
if not getattr(yaml, '__with_libyaml__', False):
|
| 11 |
+
from sys import version_info
|
| 12 |
+
|
| 13 |
+
exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
|
| 14 |
+
raise exc("No module named '_yaml'")
|
| 15 |
+
else:
|
| 16 |
+
from yaml._yaml import *
|
| 17 |
+
import warnings
|
| 18 |
+
warnings.warn(
|
| 19 |
+
'The _yaml extension module is now located at yaml._yaml'
|
| 20 |
+
' and its location is subject to change. To use the'
|
| 21 |
+
' LibYAML-based parser and emitter, import from `yaml`:'
|
| 22 |
+
' `from yaml import CLoader as Loader, CDumper as Dumper`.',
|
| 23 |
+
DeprecationWarning
|
| 24 |
+
)
|
| 25 |
+
del warnings
|
| 26 |
+
# Don't `del yaml` here because yaml is actually an existing
|
| 27 |
+
# namespace member of _yaml.
|
| 28 |
+
|
| 29 |
+
__name__ = '_yaml'
|
| 30 |
+
# If the module is top-level (i.e. not a part of any specific package)
|
| 31 |
+
# then the attribute should be set to ''.
|
| 32 |
+
# https://docs.python.org/3.8/library/types.html
|
| 33 |
+
__package__ = ''
|
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA
ADDED
|
@@ -0,0 +1,764 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: charset-normalizer
|
| 3 |
+
Version: 3.4.4
|
| 4 |
+
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
| 5 |
+
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
| 6 |
+
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
| 7 |
+
License: MIT
|
| 8 |
+
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
| 9 |
+
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
| 10 |
+
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
| 11 |
+
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
| 12 |
+
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
| 13 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 14 |
+
Classifier: Intended Audience :: Developers
|
| 15 |
+
Classifier: Operating System :: OS Independent
|
| 16 |
+
Classifier: Programming Language :: Python
|
| 17 |
+
Classifier: Programming Language :: Python :: 3
|
| 18 |
+
Classifier: Programming Language :: Python :: 3.7
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.8
|
| 20 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 21 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 22 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 23 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 24 |
+
Classifier: Programming Language :: Python :: 3.13
|
| 25 |
+
Classifier: Programming Language :: Python :: 3.14
|
| 26 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
| 27 |
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
| 28 |
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
| 29 |
+
Classifier: Topic :: Text Processing :: Linguistic
|
| 30 |
+
Classifier: Topic :: Utilities
|
| 31 |
+
Classifier: Typing :: Typed
|
| 32 |
+
Requires-Python: >=3.7
|
| 33 |
+
Description-Content-Type: text/markdown
|
| 34 |
+
License-File: LICENSE
|
| 35 |
+
Provides-Extra: unicode-backport
|
| 36 |
+
Dynamic: license-file
|
| 37 |
+
|
| 38 |
+
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
| 39 |
+
|
| 40 |
+
<p align="center">
|
| 41 |
+
<sup>The Real First Universal Charset Detector</sup><br>
|
| 42 |
+
<a href="https://pypi.org/project/charset-normalizer">
|
| 43 |
+
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
| 44 |
+
</a>
|
| 45 |
+
<a href="https://pepy.tech/project/charset-normalizer/">
|
| 46 |
+
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
| 47 |
+
</a>
|
| 48 |
+
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
| 49 |
+
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
| 50 |
+
</a>
|
| 51 |
+
</p>
|
| 52 |
+
<p align="center">
|
| 53 |
+
<sup><i>Featured Packages</i></sup><br>
|
| 54 |
+
<a href="https://github.com/jawah/niquests">
|
| 55 |
+
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
| 56 |
+
</a>
|
| 57 |
+
<a href="https://github.com/jawah/wassima">
|
| 58 |
+
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
| 59 |
+
</a>
|
| 60 |
+
</p>
|
| 61 |
+
<p align="center">
|
| 62 |
+
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
| 63 |
+
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
| 64 |
+
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
| 65 |
+
</a>
|
| 66 |
+
</p>
|
| 67 |
+
|
| 68 |
+
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
| 69 |
+
> I'm trying to resolve the issue by taking a new approach.
|
| 70 |
+
> All IANA character set names for which the Python core library provides codecs are supported.
|
| 71 |
+
|
| 72 |
+
<p align="center">
|
| 73 |
+
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
| 74 |
+
</p>
|
| 75 |
+
|
| 76 |
+
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
| 77 |
+
|
| 78 |
+
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
| 79 |
+
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
| 80 |
+
| `Fast` | ❌ | ✅ | ✅ |
|
| 81 |
+
| `Universal**` | ❌ | ✅ | ❌ |
|
| 82 |
+
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
| 83 |
+
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
| 84 |
+
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
| 85 |
+
| `Native Python` | ✅ | ✅ | ❌ |
|
| 86 |
+
| `Detect spoken language` | ❌ | ✅ | N/A |
|
| 87 |
+
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
| 88 |
+
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
| 89 |
+
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
| 90 |
+
|
| 91 |
+
<p align="center">
|
| 92 |
+
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
| 93 |
+
</p>
|
| 94 |
+
|
| 95 |
+
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
| 96 |
+
|
| 97 |
+
## ⚡ Performance
|
| 98 |
+
|
| 99 |
+
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
| 100 |
+
|
| 101 |
+
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
| 102 |
+
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
| 103 |
+
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
| 104 |
+
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
| 105 |
+
|
| 106 |
+
| Package | 99th percentile | 95th percentile | 50th percentile |
|
| 107 |
+
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
| 108 |
+
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
| 109 |
+
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
| 110 |
+
|
| 111 |
+
_updated as of december 2024 using CPython 3.12_
|
| 112 |
+
|
| 113 |
+
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
| 114 |
+
|
| 115 |
+
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
| 116 |
+
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
| 117 |
+
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
| 118 |
+
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
| 119 |
+
> (e.g. Supported Encoding) Challenge-them if you want.
|
| 120 |
+
|
| 121 |
+
## ✨ Installation
|
| 122 |
+
|
| 123 |
+
Using pip:
|
| 124 |
+
|
| 125 |
+
```sh
|
| 126 |
+
pip install charset-normalizer -U
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
## 🚀 Basic Usage
|
| 130 |
+
|
| 131 |
+
### CLI
|
| 132 |
+
This package comes with a CLI.
|
| 133 |
+
|
| 134 |
+
```
|
| 135 |
+
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
| 136 |
+
file [file ...]
|
| 137 |
+
|
| 138 |
+
The Real First Universal Charset Detector. Discover originating encoding used
|
| 139 |
+
on text file. Normalize text to unicode.
|
| 140 |
+
|
| 141 |
+
positional arguments:
|
| 142 |
+
files File(s) to be analysed
|
| 143 |
+
|
| 144 |
+
optional arguments:
|
| 145 |
+
-h, --help show this help message and exit
|
| 146 |
+
-v, --verbose Display complementary information about file if any.
|
| 147 |
+
Stdout will contain logs about the detection process.
|
| 148 |
+
-a, --with-alternative
|
| 149 |
+
Output complementary possibilities if any. Top-level
|
| 150 |
+
JSON WILL be a list.
|
| 151 |
+
-n, --normalize Permit to normalize input file. If not set, program
|
| 152 |
+
does not write anything.
|
| 153 |
+
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
| 154 |
+
JSON output.
|
| 155 |
+
-r, --replace Replace file when trying to normalize it instead of
|
| 156 |
+
creating a new one.
|
| 157 |
+
-f, --force Replace file without asking if you are sure, use this
|
| 158 |
+
flag with caution.
|
| 159 |
+
-t THRESHOLD, --threshold THRESHOLD
|
| 160 |
+
Define a custom maximum amount of chaos allowed in
|
| 161 |
+
decoded content. 0. <= chaos <= 1.
|
| 162 |
+
--version Show version information and exit.
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
```bash
|
| 166 |
+
normalizer ./data/sample.1.fr.srt
|
| 167 |
+
```
|
| 168 |
+
|
| 169 |
+
or
|
| 170 |
+
|
| 171 |
+
```bash
|
| 172 |
+
python -m charset_normalizer ./data/sample.1.fr.srt
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
| 176 |
+
|
| 177 |
+
```json
|
| 178 |
+
{
|
| 179 |
+
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
| 180 |
+
"encoding": "cp1252",
|
| 181 |
+
"encoding_aliases": [
|
| 182 |
+
"1252",
|
| 183 |
+
"windows_1252"
|
| 184 |
+
],
|
| 185 |
+
"alternative_encodings": [
|
| 186 |
+
"cp1254",
|
| 187 |
+
"cp1256",
|
| 188 |
+
"cp1258",
|
| 189 |
+
"iso8859_14",
|
| 190 |
+
"iso8859_15",
|
| 191 |
+
"iso8859_16",
|
| 192 |
+
"iso8859_3",
|
| 193 |
+
"iso8859_9",
|
| 194 |
+
"latin_1",
|
| 195 |
+
"mbcs"
|
| 196 |
+
],
|
| 197 |
+
"language": "French",
|
| 198 |
+
"alphabets": [
|
| 199 |
+
"Basic Latin",
|
| 200 |
+
"Latin-1 Supplement"
|
| 201 |
+
],
|
| 202 |
+
"has_sig_or_bom": false,
|
| 203 |
+
"chaos": 0.149,
|
| 204 |
+
"coherence": 97.152,
|
| 205 |
+
"unicode_path": null,
|
| 206 |
+
"is_preferred": true
|
| 207 |
+
}
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
### Python
|
| 211 |
+
*Just print out normalized text*
|
| 212 |
+
```python
|
| 213 |
+
from charset_normalizer import from_path
|
| 214 |
+
|
| 215 |
+
results = from_path('./my_subtitle.srt')
|
| 216 |
+
|
| 217 |
+
print(str(results.best()))
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
*Upgrade your code without effort*
|
| 221 |
+
```python
|
| 222 |
+
from charset_normalizer import detect
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
| 226 |
+
|
| 227 |
+
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
| 228 |
+
|
| 229 |
+
## 😇 Why
|
| 230 |
+
|
| 231 |
+
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
| 232 |
+
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
| 233 |
+
|
| 234 |
+
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
| 235 |
+
produce **two identical rendered string.**
|
| 236 |
+
What I want is to get readable text, the best I can.
|
| 237 |
+
|
| 238 |
+
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
| 239 |
+
|
| 240 |
+
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
| 241 |
+
|
| 242 |
+
## 🍰 How
|
| 243 |
+
|
| 244 |
+
- Discard all charset encoding table that could not fit the binary content.
|
| 245 |
+
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
| 246 |
+
- Extract matches with the lowest mess detected.
|
| 247 |
+
- Additionally, we measure coherence / probe for a language.
|
| 248 |
+
|
| 249 |
+
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
| 250 |
+
|
| 251 |
+
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
| 252 |
+
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
| 253 |
+
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
| 254 |
+
improve or rewrite it.
|
| 255 |
+
|
| 256 |
+
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
| 257 |
+
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
| 258 |
+
|
| 259 |
+
## ⚡ Known limitations
|
| 260 |
+
|
| 261 |
+
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
| 262 |
+
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
| 263 |
+
|
| 264 |
+
## ⚠️ About Python EOLs
|
| 265 |
+
|
| 266 |
+
**If you are running:**
|
| 267 |
+
|
| 268 |
+
- Python >=2.7,<3.5: Unsupported
|
| 269 |
+
- Python 3.5: charset-normalizer < 2.1
|
| 270 |
+
- Python 3.6: charset-normalizer < 3.1
|
| 271 |
+
- Python 3.7: charset-normalizer < 4.0
|
| 272 |
+
|
| 273 |
+
Upgrade your Python interpreter as soon as possible.
|
| 274 |
+
|
| 275 |
+
## 👤 Contributing
|
| 276 |
+
|
| 277 |
+
Contributions, issues and feature requests are very much welcome.<br />
|
| 278 |
+
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
| 279 |
+
|
| 280 |
+
## 📝 License
|
| 281 |
+
|
| 282 |
+
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
| 283 |
+
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
| 284 |
+
|
| 285 |
+
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
| 286 |
+
|
| 287 |
+
## 💼 For Enterprise
|
| 288 |
+
|
| 289 |
+
Professional support for charset-normalizer is available as part of the [Tidelift
|
| 290 |
+
Subscription][1]. Tidelift gives software development teams a single source for
|
| 291 |
+
purchasing and maintaining their software, with professional grade assurances
|
| 292 |
+
from the experts who know it best, while seamlessly integrating with existing
|
| 293 |
+
tools.
|
| 294 |
+
|
| 295 |
+
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
| 296 |
+
|
| 297 |
+
[](https://www.bestpractices.dev/projects/7297)
|
| 298 |
+
|
| 299 |
+
# Changelog
|
| 300 |
+
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
| 301 |
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
| 302 |
+
|
| 303 |
+
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
| 304 |
+
|
| 305 |
+
### Changed
|
| 306 |
+
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
| 307 |
+
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
| 308 |
+
|
| 309 |
+
### Removed
|
| 310 |
+
- `setuptools-scm` as a build dependency.
|
| 311 |
+
|
| 312 |
+
### Misc
|
| 313 |
+
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
| 314 |
+
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
| 315 |
+
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
| 316 |
+
|
| 317 |
+
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
| 318 |
+
|
| 319 |
+
### Changed
|
| 320 |
+
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
| 321 |
+
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
| 322 |
+
|
| 323 |
+
### Added
|
| 324 |
+
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
| 325 |
+
- Support for Python 3.14
|
| 326 |
+
|
| 327 |
+
### Fixed
|
| 328 |
+
- sdist archive contained useless directories.
|
| 329 |
+
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
| 330 |
+
|
| 331 |
+
### Misc
|
| 332 |
+
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
| 333 |
+
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
| 334 |
+
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
| 335 |
+
|
| 336 |
+
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
| 337 |
+
|
| 338 |
+
### Fixed
|
| 339 |
+
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
| 340 |
+
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
| 341 |
+
|
| 342 |
+
### Changed
|
| 343 |
+
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
| 344 |
+
|
| 345 |
+
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
| 346 |
+
|
| 347 |
+
### Changed
|
| 348 |
+
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
| 349 |
+
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
| 350 |
+
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
| 351 |
+
|
| 352 |
+
### Added
|
| 353 |
+
- pre-commit configuration.
|
| 354 |
+
- noxfile.
|
| 355 |
+
|
| 356 |
+
### Removed
|
| 357 |
+
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
| 358 |
+
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
| 359 |
+
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
| 360 |
+
- Unused `utils.range_scan` function.
|
| 361 |
+
|
| 362 |
+
### Fixed
|
| 363 |
+
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
| 364 |
+
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
| 365 |
+
|
| 366 |
+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
| 367 |
+
|
| 368 |
+
### Added
|
| 369 |
+
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
| 370 |
+
- Support for Python 3.13 (#512)
|
| 371 |
+
|
| 372 |
+
### Fixed
|
| 373 |
+
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
| 374 |
+
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
| 375 |
+
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
| 376 |
+
|
| 377 |
+
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
| 378 |
+
|
| 379 |
+
### Fixed
|
| 380 |
+
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
| 381 |
+
- Regression on some detection case showcased in the documentation (#371)
|
| 382 |
+
|
| 383 |
+
### Added
|
| 384 |
+
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
| 385 |
+
|
| 386 |
+
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
| 387 |
+
|
| 388 |
+
### Changed
|
| 389 |
+
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
| 390 |
+
- Improved the general detection reliability based on reports from the community
|
| 391 |
+
|
| 392 |
+
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
| 393 |
+
|
| 394 |
+
### Added
|
| 395 |
+
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
| 396 |
+
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
| 397 |
+
|
| 398 |
+
### Removed
|
| 399 |
+
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
| 400 |
+
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
| 401 |
+
|
| 402 |
+
### Changed
|
| 403 |
+
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
| 404 |
+
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
| 405 |
+
|
| 406 |
+
### Fixed
|
| 407 |
+
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
| 408 |
+
|
| 409 |
+
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
| 410 |
+
|
| 411 |
+
### Changed
|
| 412 |
+
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
| 413 |
+
- Minor improvement over the global detection reliability
|
| 414 |
+
|
| 415 |
+
### Added
|
| 416 |
+
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
| 417 |
+
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
| 418 |
+
- Explicit support for Python 3.12
|
| 419 |
+
|
| 420 |
+
### Fixed
|
| 421 |
+
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
| 422 |
+
|
| 423 |
+
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
| 424 |
+
|
| 425 |
+
### Added
|
| 426 |
+
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
| 427 |
+
|
| 428 |
+
### Removed
|
| 429 |
+
- Support for Python 3.6 (PR #260)
|
| 430 |
+
|
| 431 |
+
### Changed
|
| 432 |
+
- Optional speedup provided by mypy/c 1.0.1
|
| 433 |
+
|
| 434 |
+
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
| 435 |
+
|
| 436 |
+
### Fixed
|
| 437 |
+
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
| 438 |
+
|
| 439 |
+
### Changed
|
| 440 |
+
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
| 441 |
+
|
| 442 |
+
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
| 443 |
+
|
| 444 |
+
### Added
|
| 445 |
+
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
| 446 |
+
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
| 447 |
+
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
| 448 |
+
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
| 449 |
+
|
| 450 |
+
### Changed
|
| 451 |
+
- Build with static metadata using 'build' frontend
|
| 452 |
+
- Make the language detection stricter
|
| 453 |
+
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
| 454 |
+
|
| 455 |
+
### Fixed
|
| 456 |
+
- CLI with opt --normalize fail when using full path for files
|
| 457 |
+
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
| 458 |
+
- Sphinx warnings when generating the documentation
|
| 459 |
+
|
| 460 |
+
### Removed
|
| 461 |
+
- Coherence detector no longer return 'Simple English' instead return 'English'
|
| 462 |
+
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
| 463 |
+
- Breaking: Method `first()` and `best()` from CharsetMatch
|
| 464 |
+
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
| 465 |
+
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
| 466 |
+
- Breaking: Top-level function `normalize`
|
| 467 |
+
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
| 468 |
+
- Support for the backport `unicodedata2`
|
| 469 |
+
|
| 470 |
+
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
| 471 |
+
|
| 472 |
+
### Added
|
| 473 |
+
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
| 474 |
+
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
| 475 |
+
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
| 476 |
+
|
| 477 |
+
### Changed
|
| 478 |
+
- Build with static metadata using 'build' frontend
|
| 479 |
+
- Make the language detection stricter
|
| 480 |
+
|
| 481 |
+
### Fixed
|
| 482 |
+
- CLI with opt --normalize fail when using full path for files
|
| 483 |
+
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
| 484 |
+
|
| 485 |
+
### Removed
|
| 486 |
+
- Coherence detector no longer return 'Simple English' instead return 'English'
|
| 487 |
+
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
| 488 |
+
|
| 489 |
+
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
| 490 |
+
|
| 491 |
+
### Added
|
| 492 |
+
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
| 493 |
+
|
| 494 |
+
### Removed
|
| 495 |
+
- Breaking: Method `first()` and `best()` from CharsetMatch
|
| 496 |
+
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
| 497 |
+
|
| 498 |
+
### Fixed
|
| 499 |
+
- Sphinx warnings when generating the documentation
|
| 500 |
+
|
| 501 |
+
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
| 502 |
+
|
| 503 |
+
### Changed
|
| 504 |
+
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
| 505 |
+
|
| 506 |
+
### Removed
|
| 507 |
+
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
| 508 |
+
- Breaking: Top-level function `normalize`
|
| 509 |
+
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
| 510 |
+
- Support for the backport `unicodedata2`
|
| 511 |
+
|
| 512 |
+
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
| 513 |
+
|
| 514 |
+
### Deprecated
|
| 515 |
+
- Function `normalize` scheduled for removal in 3.0
|
| 516 |
+
|
| 517 |
+
### Changed
|
| 518 |
+
- Removed useless call to decode in fn is_unprintable (#206)
|
| 519 |
+
|
| 520 |
+
### Fixed
|
| 521 |
+
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
| 522 |
+
|
| 523 |
+
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
| 524 |
+
|
| 525 |
+
### Added
|
| 526 |
+
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
| 527 |
+
|
| 528 |
+
### Changed
|
| 529 |
+
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
| 530 |
+
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
| 531 |
+
|
| 532 |
+
### Fixed
|
| 533 |
+
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
| 534 |
+
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
| 535 |
+
|
| 536 |
+
### Removed
|
| 537 |
+
- Support for Python 3.5 (PR #192)
|
| 538 |
+
|
| 539 |
+
### Deprecated
|
| 540 |
+
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
| 541 |
+
|
| 542 |
+
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
| 543 |
+
|
| 544 |
+
### Fixed
|
| 545 |
+
- ASCII miss-detection on rare cases (PR #170)
|
| 546 |
+
|
| 547 |
+
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
| 548 |
+
|
| 549 |
+
### Added
|
| 550 |
+
- Explicit support for Python 3.11 (PR #164)
|
| 551 |
+
|
| 552 |
+
### Changed
|
| 553 |
+
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
| 554 |
+
|
| 555 |
+
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
| 556 |
+
|
| 557 |
+
### Fixed
|
| 558 |
+
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
| 559 |
+
|
| 560 |
+
### Changed
|
| 561 |
+
- Skipping the language-detection (CD) on ASCII (PR #155)
|
| 562 |
+
|
| 563 |
+
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
| 564 |
+
|
| 565 |
+
### Changed
|
| 566 |
+
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
| 567 |
+
|
| 568 |
+
### Fixed
|
| 569 |
+
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
| 570 |
+
|
| 571 |
+
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
| 572 |
+
### Changed
|
| 573 |
+
- Improvement over Vietnamese detection (PR #126)
|
| 574 |
+
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
| 575 |
+
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
| 576 |
+
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
| 577 |
+
- Code style as refactored by Sourcery-AI (PR #131)
|
| 578 |
+
- Minor adjustment on the MD around european words (PR #133)
|
| 579 |
+
- Remove and replace SRTs from assets / tests (PR #139)
|
| 580 |
+
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
| 581 |
+
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
| 582 |
+
|
| 583 |
+
### Fixed
|
| 584 |
+
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
| 585 |
+
- Avoid using too insignificant chunk (PR #137)
|
| 586 |
+
|
| 587 |
+
### Added
|
| 588 |
+
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
| 589 |
+
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
| 590 |
+
|
| 591 |
+
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
| 592 |
+
### Added
|
| 593 |
+
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
| 594 |
+
|
| 595 |
+
### Changed
|
| 596 |
+
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
| 597 |
+
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
| 598 |
+
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
| 599 |
+
- Various detection improvement (MD+CD) (PR #117)
|
| 600 |
+
|
| 601 |
+
### Removed
|
| 602 |
+
- Remove redundant logging entry about detected language(s) (PR #115)
|
| 603 |
+
|
| 604 |
+
### Fixed
|
| 605 |
+
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
| 606 |
+
|
| 607 |
+
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
| 608 |
+
### Fixed
|
| 609 |
+
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
| 610 |
+
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
| 611 |
+
|
| 612 |
+
### Changed
|
| 613 |
+
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
| 614 |
+
|
| 615 |
+
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
| 616 |
+
### Changed
|
| 617 |
+
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
| 618 |
+
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
| 619 |
+
- The Unicode detection is slightly improved (PR #93)
|
| 620 |
+
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
| 621 |
+
|
| 622 |
+
### Removed
|
| 623 |
+
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
| 624 |
+
|
| 625 |
+
### Fixed
|
| 626 |
+
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
| 627 |
+
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
| 628 |
+
- The MANIFEST.in was not exhaustive (PR #78)
|
| 629 |
+
|
| 630 |
+
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
| 631 |
+
### Fixed
|
| 632 |
+
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
| 633 |
+
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
| 634 |
+
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
| 635 |
+
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
| 636 |
+
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
| 637 |
+
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
| 638 |
+
|
| 639 |
+
### Changed
|
| 640 |
+
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
| 641 |
+
- Allow fallback on specified encoding if any (PR #71)
|
| 642 |
+
|
| 643 |
+
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
| 644 |
+
### Changed
|
| 645 |
+
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
| 646 |
+
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
| 647 |
+
|
| 648 |
+
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
| 649 |
+
### Fixed
|
| 650 |
+
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
| 651 |
+
|
| 652 |
+
### Changed
|
| 653 |
+
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
| 654 |
+
|
| 655 |
+
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
| 656 |
+
### Fixed
|
| 657 |
+
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
| 658 |
+
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
| 659 |
+
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
| 660 |
+
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
| 661 |
+
|
| 662 |
+
### Changed
|
| 663 |
+
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
| 664 |
+
|
| 665 |
+
### Added
|
| 666 |
+
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
| 667 |
+
|
| 668 |
+
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
| 669 |
+
### Changed
|
| 670 |
+
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
| 671 |
+
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
| 672 |
+
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
| 673 |
+
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
| 674 |
+
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
| 675 |
+
- utf_7 detection has been reinstated.
|
| 676 |
+
|
| 677 |
+
### Removed
|
| 678 |
+
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
| 679 |
+
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
| 680 |
+
- The exception hook on UnicodeDecodeError has been removed.
|
| 681 |
+
|
| 682 |
+
### Deprecated
|
| 683 |
+
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
| 684 |
+
|
| 685 |
+
### Fixed
|
| 686 |
+
- The CLI output used the relative path of the file(s). Should be absolute.
|
| 687 |
+
|
| 688 |
+
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
| 689 |
+
### Fixed
|
| 690 |
+
- Logger configuration/usage no longer conflict with others (PR #44)
|
| 691 |
+
|
| 692 |
+
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
| 693 |
+
### Removed
|
| 694 |
+
- Using standard logging instead of using the package loguru.
|
| 695 |
+
- Dropping nose test framework in favor of the maintained pytest.
|
| 696 |
+
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
| 697 |
+
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
| 698 |
+
- Stop support for UTF-7 that does not contain a SIG.
|
| 699 |
+
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
| 700 |
+
|
| 701 |
+
### Fixed
|
| 702 |
+
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
| 703 |
+
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
| 704 |
+
|
| 705 |
+
### Changed
|
| 706 |
+
- Improving the package final size by compressing frequencies.json.
|
| 707 |
+
- Huge improvement over the larges payload.
|
| 708 |
+
|
| 709 |
+
### Added
|
| 710 |
+
- CLI now produces JSON consumable output.
|
| 711 |
+
- Return ASCII if given sequences fit. Given reasonable confidence.
|
| 712 |
+
|
| 713 |
+
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
| 714 |
+
|
| 715 |
+
### Fixed
|
| 716 |
+
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
| 717 |
+
|
| 718 |
+
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
| 719 |
+
|
| 720 |
+
### Fixed
|
| 721 |
+
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
| 722 |
+
|
| 723 |
+
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
| 724 |
+
|
| 725 |
+
### Fixed
|
| 726 |
+
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
| 727 |
+
|
| 728 |
+
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
| 729 |
+
|
| 730 |
+
### Changed
|
| 731 |
+
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
| 732 |
+
|
| 733 |
+
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
| 734 |
+
|
| 735 |
+
### Fixed
|
| 736 |
+
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
| 737 |
+
|
| 738 |
+
### Changed
|
| 739 |
+
- Dependencies refactoring, constraints revised.
|
| 740 |
+
|
| 741 |
+
### Added
|
| 742 |
+
- Add python 3.9 and 3.10 to the supported interpreters
|
| 743 |
+
|
| 744 |
+
MIT License
|
| 745 |
+
|
| 746 |
+
Copyright (c) 2025 TAHRI Ahmed R.
|
| 747 |
+
|
| 748 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 749 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 750 |
+
in the Software without restriction, including without limitation the rights
|
| 751 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 752 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 753 |
+
furnished to do so, subject to the following conditions:
|
| 754 |
+
|
| 755 |
+
The above copyright notice and this permission notice shall be included in all
|
| 756 |
+
copies or substantial portions of the Software.
|
| 757 |
+
|
| 758 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 759 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 760 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 761 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 762 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 763 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 764 |
+
SOFTWARE.
|
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
../../../bin/normalizer,sha256=0NCCWHGXwNJFGXe9vG0dHrG67nHnzOFp4ZWd0RQ0qoI,225
|
| 2 |
+
charset_normalizer-3.4.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 3 |
+
charset_normalizer-3.4.4.dist-info/METADATA,sha256=jVuUFBti8dav19YLvWissTihVdF2ozUY4KKMw7jdkBQ,37303
|
| 4 |
+
charset_normalizer-3.4.4.dist-info/RECORD,,
|
| 5 |
+
charset_normalizer-3.4.4.dist-info/WHEEL,sha256=2iHh9e2o6T3nHtu_NVT7Cs7pebIqF94rZK8zrQfgoJI,190
|
| 6 |
+
charset_normalizer-3.4.4.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65
|
| 7 |
+
charset_normalizer-3.4.4.dist-info/licenses/LICENSE,sha256=bQ1Bv-FwrGx9wkjJpj4lTQ-0WmDVCoJX0K-SxuJJuIc,1071
|
| 8 |
+
charset_normalizer-3.4.4.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
|
| 9 |
+
charset_normalizer/__init__.py,sha256=OKRxRv2Zhnqk00tqkN0c1BtJjm165fWXLydE52IKuHc,1590
|
| 10 |
+
charset_normalizer/__main__.py,sha256=yzYxMR-IhKRHYwcSlavEv8oGdwxsR89mr2X09qXGdps,109
|
| 11 |
+
charset_normalizer/__pycache__/__init__.cpython-313.pyc,,
|
| 12 |
+
charset_normalizer/__pycache__/__main__.cpython-313.pyc,,
|
| 13 |
+
charset_normalizer/__pycache__/api.cpython-313.pyc,,
|
| 14 |
+
charset_normalizer/__pycache__/cd.cpython-313.pyc,,
|
| 15 |
+
charset_normalizer/__pycache__/constant.cpython-313.pyc,,
|
| 16 |
+
charset_normalizer/__pycache__/legacy.cpython-313.pyc,,
|
| 17 |
+
charset_normalizer/__pycache__/md.cpython-313.pyc,,
|
| 18 |
+
charset_normalizer/__pycache__/models.cpython-313.pyc,,
|
| 19 |
+
charset_normalizer/__pycache__/utils.cpython-313.pyc,,
|
| 20 |
+
charset_normalizer/__pycache__/version.cpython-313.pyc,,
|
| 21 |
+
charset_normalizer/api.py,sha256=V07i8aVeCD8T2fSia3C-fn0i9t8qQguEBhsqszg32Ns,22668
|
| 22 |
+
charset_normalizer/cd.py,sha256=WKTo1HDb-H9HfCDc3Bfwq5jzS25Ziy9SE2a74SgTq88,12522
|
| 23 |
+
charset_normalizer/cli/__init__.py,sha256=D8I86lFk2-py45JvqxniTirSj_sFyE6sjaY_0-G1shc,136
|
| 24 |
+
charset_normalizer/cli/__main__.py,sha256=dMaXG6IJXRvqq8z2tig7Qb83-BpWTln55ooiku5_uvg,12646
|
| 25 |
+
charset_normalizer/cli/__pycache__/__init__.cpython-313.pyc,,
|
| 26 |
+
charset_normalizer/cli/__pycache__/__main__.cpython-313.pyc,,
|
| 27 |
+
charset_normalizer/constant.py,sha256=7UVY4ldYhmQMHUdgQ_sgZmzcQ0xxYxpBunqSZ-XJZ8U,42713
|
| 28 |
+
charset_normalizer/legacy.py,sha256=sYBzSpzsRrg_wF4LP536pG64BItw7Tqtc3SMQAHvFLM,2731
|
| 29 |
+
charset_normalizer/md.cpython-313-x86_64-linux-gnu.so,sha256=sZ7umtJLjKfA83NFJ7npkiDyr06zDT8cWtl6uIx2MsM,15912
|
| 30 |
+
charset_normalizer/md.py,sha256=-_oN3h3_X99nkFfqamD3yu45DC_wfk5odH0Tr_CQiXs,20145
|
| 31 |
+
charset_normalizer/md__mypyc.cpython-313-x86_64-linux-gnu.so,sha256=i-yavqPJtZwjTKvP9hBLZ8CLZD88rVtguaSoLHso_Oc,291056
|
| 32 |
+
charset_normalizer/models.py,sha256=lKXhOnIPtiakbK3i__J9wpOfzx3JDTKj7Dn3Rg0VaRI,12394
|
| 33 |
+
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 34 |
+
charset_normalizer/utils.py,sha256=sTejPgrdlNsKNucZfJCxJ95lMTLA0ShHLLE3n5wpT9Q,12170
|
| 35 |
+
charset_normalizer/version.py,sha256=nKE4qBNk5WA4LIJ_yIH_aSDfvtsyizkWMg-PUG-UZVk,115
|
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: setuptools (80.9.0)
|
| 3 |
+
Root-Is-Purelib: false
|
| 4 |
+
Tag: cp313-cp313-manylinux_2_17_x86_64
|
| 5 |
+
Tag: cp313-cp313-manylinux2014_x86_64
|
| 6 |
+
Tag: cp313-cp313-manylinux_2_28_x86_64
|
| 7 |
+
|
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
normalizer = charset_normalizer.cli:cli_detect
|
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
charset_normalizer
|
venv/lib/python3.13/site-packages/filelock/__init__.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
A platform independent file lock that supports the with-statement.
|
| 3 |
+
|
| 4 |
+
.. autodata:: filelock.__version__
|
| 5 |
+
:no-value:
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import sys
|
| 12 |
+
import warnings
|
| 13 |
+
from typing import TYPE_CHECKING
|
| 14 |
+
|
| 15 |
+
from ._api import AcquireReturnProxy, BaseFileLock
|
| 16 |
+
from ._error import Timeout
|
| 17 |
+
from ._soft import SoftFileLock
|
| 18 |
+
from ._unix import UnixFileLock, has_fcntl
|
| 19 |
+
from ._windows import WindowsFileLock
|
| 20 |
+
from .asyncio import (
|
| 21 |
+
AsyncAcquireReturnProxy,
|
| 22 |
+
AsyncSoftFileLock,
|
| 23 |
+
AsyncUnixFileLock,
|
| 24 |
+
AsyncWindowsFileLock,
|
| 25 |
+
BaseAsyncFileLock,
|
| 26 |
+
)
|
| 27 |
+
from .version import version
|
| 28 |
+
|
| 29 |
+
#: version of the project as a string
|
| 30 |
+
__version__: str = version
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 34 |
+
_FileLock: type[BaseFileLock] = WindowsFileLock
|
| 35 |
+
_AsyncFileLock: type[BaseAsyncFileLock] = AsyncWindowsFileLock
|
| 36 |
+
else: # pragma: win32 no cover # noqa: PLR5501
|
| 37 |
+
if has_fcntl:
|
| 38 |
+
_FileLock: type[BaseFileLock] = UnixFileLock
|
| 39 |
+
_AsyncFileLock: type[BaseAsyncFileLock] = AsyncUnixFileLock
|
| 40 |
+
else:
|
| 41 |
+
_FileLock = SoftFileLock
|
| 42 |
+
_AsyncFileLock = AsyncSoftFileLock
|
| 43 |
+
if warnings is not None:
|
| 44 |
+
warnings.warn("only soft file lock is available", stacklevel=2)
|
| 45 |
+
|
| 46 |
+
if TYPE_CHECKING:
|
| 47 |
+
FileLock = SoftFileLock
|
| 48 |
+
AsyncFileLock = AsyncSoftFileLock
|
| 49 |
+
else:
|
| 50 |
+
#: Alias for the lock, which should be used for the current platform.
|
| 51 |
+
FileLock = _FileLock
|
| 52 |
+
AsyncFileLock = _AsyncFileLock
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
__all__ = [
|
| 56 |
+
"AcquireReturnProxy",
|
| 57 |
+
"AsyncAcquireReturnProxy",
|
| 58 |
+
"AsyncFileLock",
|
| 59 |
+
"AsyncSoftFileLock",
|
| 60 |
+
"AsyncUnixFileLock",
|
| 61 |
+
"AsyncWindowsFileLock",
|
| 62 |
+
"BaseAsyncFileLock",
|
| 63 |
+
"BaseFileLock",
|
| 64 |
+
"FileLock",
|
| 65 |
+
"SoftFileLock",
|
| 66 |
+
"Timeout",
|
| 67 |
+
"UnixFileLock",
|
| 68 |
+
"WindowsFileLock",
|
| 69 |
+
"__version__",
|
| 70 |
+
]
|
venv/lib/python3.13/site-packages/filelock/_api.py
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import inspect
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
import time
|
| 8 |
+
import warnings
|
| 9 |
+
from abc import ABCMeta, abstractmethod
|
| 10 |
+
from dataclasses import dataclass
|
| 11 |
+
from threading import local
|
| 12 |
+
from typing import TYPE_CHECKING, Any, cast
|
| 13 |
+
from weakref import WeakValueDictionary
|
| 14 |
+
|
| 15 |
+
from ._error import Timeout
|
| 16 |
+
|
| 17 |
+
if TYPE_CHECKING:
|
| 18 |
+
import sys
|
| 19 |
+
from types import TracebackType
|
| 20 |
+
|
| 21 |
+
if sys.version_info >= (3, 11): # pragma: no cover (py311+)
|
| 22 |
+
from typing import Self
|
| 23 |
+
else: # pragma: no cover (<py311)
|
| 24 |
+
from typing_extensions import Self
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
_LOGGER = logging.getLogger("filelock")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__
|
| 31 |
+
# is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired
|
| 32 |
+
# again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak)
|
| 33 |
+
class AcquireReturnProxy:
|
| 34 |
+
"""A context-aware object that will release the lock file when exiting."""
|
| 35 |
+
|
| 36 |
+
def __init__(self, lock: BaseFileLock) -> None:
|
| 37 |
+
self.lock = lock
|
| 38 |
+
|
| 39 |
+
def __enter__(self) -> BaseFileLock:
|
| 40 |
+
return self.lock
|
| 41 |
+
|
| 42 |
+
def __exit__(
|
| 43 |
+
self,
|
| 44 |
+
exc_type: type[BaseException] | None,
|
| 45 |
+
exc_value: BaseException | None,
|
| 46 |
+
traceback: TracebackType | None,
|
| 47 |
+
) -> None:
|
| 48 |
+
self.lock.release()
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@dataclass
|
| 52 |
+
class FileLockContext:
|
| 53 |
+
"""A dataclass which holds the context for a ``BaseFileLock`` object."""
|
| 54 |
+
|
| 55 |
+
# The context is held in a separate class to allow optional use of thread local storage via the
|
| 56 |
+
# ThreadLocalFileContext class.
|
| 57 |
+
|
| 58 |
+
#: The path to the lock file.
|
| 59 |
+
lock_file: str
|
| 60 |
+
|
| 61 |
+
#: The default timeout value.
|
| 62 |
+
timeout: float
|
| 63 |
+
|
| 64 |
+
#: The mode for the lock files
|
| 65 |
+
mode: int
|
| 66 |
+
|
| 67 |
+
#: Whether the lock should be blocking or not
|
| 68 |
+
blocking: bool
|
| 69 |
+
|
| 70 |
+
#: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held
|
| 71 |
+
lock_file_fd: int | None = None
|
| 72 |
+
|
| 73 |
+
#: The lock counter is used for implementing the nested locking mechanism.
|
| 74 |
+
lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class ThreadLocalFileContext(FileLockContext, local):
|
| 78 |
+
"""A thread local version of the ``FileLockContext`` class."""
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class FileLockMeta(ABCMeta):
|
| 82 |
+
def __call__( # noqa: PLR0913
|
| 83 |
+
cls,
|
| 84 |
+
lock_file: str | os.PathLike[str],
|
| 85 |
+
timeout: float = -1,
|
| 86 |
+
mode: int = 0o644,
|
| 87 |
+
thread_local: bool = True, # noqa: FBT001, FBT002
|
| 88 |
+
*,
|
| 89 |
+
blocking: bool = True,
|
| 90 |
+
is_singleton: bool = False,
|
| 91 |
+
**kwargs: Any, # capture remaining kwargs for subclasses # noqa: ANN401
|
| 92 |
+
) -> BaseFileLock:
|
| 93 |
+
if is_singleton:
|
| 94 |
+
instance = cls._instances.get(str(lock_file)) # type: ignore[attr-defined]
|
| 95 |
+
if instance:
|
| 96 |
+
params_to_check = {
|
| 97 |
+
"thread_local": (thread_local, instance.is_thread_local()),
|
| 98 |
+
"timeout": (timeout, instance.timeout),
|
| 99 |
+
"mode": (mode, instance.mode),
|
| 100 |
+
"blocking": (blocking, instance.blocking),
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
non_matching_params = {
|
| 104 |
+
name: (passed_param, set_param)
|
| 105 |
+
for name, (passed_param, set_param) in params_to_check.items()
|
| 106 |
+
if passed_param != set_param
|
| 107 |
+
}
|
| 108 |
+
if not non_matching_params:
|
| 109 |
+
return cast("BaseFileLock", instance)
|
| 110 |
+
|
| 111 |
+
# parameters do not match; raise error
|
| 112 |
+
msg = "Singleton lock instances cannot be initialized with differing arguments"
|
| 113 |
+
msg += "\nNon-matching arguments: "
|
| 114 |
+
for param_name, (passed_param, set_param) in non_matching_params.items():
|
| 115 |
+
msg += f"\n\t{param_name} (existing lock has {set_param} but {passed_param} was passed)"
|
| 116 |
+
raise ValueError(msg)
|
| 117 |
+
|
| 118 |
+
# Workaround to make `__init__`'s params optional in subclasses
|
| 119 |
+
# E.g. virtualenv changes the signature of the `__init__` method in the `BaseFileLock` class descendant
|
| 120 |
+
# (https://github.com/tox-dev/filelock/pull/340)
|
| 121 |
+
|
| 122 |
+
all_params = {
|
| 123 |
+
"timeout": timeout,
|
| 124 |
+
"mode": mode,
|
| 125 |
+
"thread_local": thread_local,
|
| 126 |
+
"blocking": blocking,
|
| 127 |
+
"is_singleton": is_singleton,
|
| 128 |
+
**kwargs,
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
present_params = inspect.signature(cls.__init__).parameters # type: ignore[misc]
|
| 132 |
+
init_params = {key: value for key, value in all_params.items() if key in present_params}
|
| 133 |
+
|
| 134 |
+
instance = super().__call__(lock_file, **init_params)
|
| 135 |
+
|
| 136 |
+
if is_singleton:
|
| 137 |
+
cls._instances[str(lock_file)] = instance # type: ignore[attr-defined]
|
| 138 |
+
|
| 139 |
+
return cast("BaseFileLock", instance)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
class BaseFileLock(contextlib.ContextDecorator, metaclass=FileLockMeta):
|
| 143 |
+
"""Abstract base class for a file lock object."""
|
| 144 |
+
|
| 145 |
+
_instances: WeakValueDictionary[str, BaseFileLock]
|
| 146 |
+
|
| 147 |
+
def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None:
|
| 148 |
+
"""Setup unique state for lock subclasses."""
|
| 149 |
+
super().__init_subclass__(**kwargs)
|
| 150 |
+
cls._instances = WeakValueDictionary()
|
| 151 |
+
|
| 152 |
+
def __init__( # noqa: PLR0913
|
| 153 |
+
self,
|
| 154 |
+
lock_file: str | os.PathLike[str],
|
| 155 |
+
timeout: float = -1,
|
| 156 |
+
mode: int = 0o644,
|
| 157 |
+
thread_local: bool = True, # noqa: FBT001, FBT002
|
| 158 |
+
*,
|
| 159 |
+
blocking: bool = True,
|
| 160 |
+
is_singleton: bool = False,
|
| 161 |
+
) -> None:
|
| 162 |
+
"""
|
| 163 |
+
Create a new lock object.
|
| 164 |
+
|
| 165 |
+
:param lock_file: path to the file
|
| 166 |
+
:param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
|
| 167 |
+
the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
|
| 168 |
+
to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock.
|
| 169 |
+
:param mode: file permissions for the lockfile
|
| 170 |
+
:param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
|
| 171 |
+
``False`` then the lock will be reentrant across threads.
|
| 172 |
+
:param blocking: whether the lock should be blocking or not
|
| 173 |
+
:param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
|
| 174 |
+
per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
|
| 175 |
+
to pass the same object around.
|
| 176 |
+
|
| 177 |
+
"""
|
| 178 |
+
self._is_thread_local = thread_local
|
| 179 |
+
self._is_singleton = is_singleton
|
| 180 |
+
|
| 181 |
+
# Create the context. Note that external code should not work with the context directly and should instead use
|
| 182 |
+
# properties of this class.
|
| 183 |
+
kwargs: dict[str, Any] = {
|
| 184 |
+
"lock_file": os.fspath(lock_file),
|
| 185 |
+
"timeout": timeout,
|
| 186 |
+
"mode": mode,
|
| 187 |
+
"blocking": blocking,
|
| 188 |
+
}
|
| 189 |
+
self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs)
|
| 190 |
+
|
| 191 |
+
def is_thread_local(self) -> bool:
|
| 192 |
+
""":return: a flag indicating if this lock is thread local or not"""
|
| 193 |
+
return self._is_thread_local
|
| 194 |
+
|
| 195 |
+
@property
|
| 196 |
+
def is_singleton(self) -> bool:
|
| 197 |
+
""":return: a flag indicating if this lock is singleton or not"""
|
| 198 |
+
return self._is_singleton
|
| 199 |
+
|
| 200 |
+
@property
|
| 201 |
+
def lock_file(self) -> str:
|
| 202 |
+
""":return: path to the lock file"""
|
| 203 |
+
return self._context.lock_file
|
| 204 |
+
|
| 205 |
+
@property
|
| 206 |
+
def timeout(self) -> float:
|
| 207 |
+
"""
|
| 208 |
+
:return: the default timeout value, in seconds
|
| 209 |
+
|
| 210 |
+
.. versionadded:: 2.0.0
|
| 211 |
+
"""
|
| 212 |
+
return self._context.timeout
|
| 213 |
+
|
| 214 |
+
@timeout.setter
|
| 215 |
+
def timeout(self, value: float | str) -> None:
|
| 216 |
+
"""
|
| 217 |
+
Change the default timeout value.
|
| 218 |
+
|
| 219 |
+
:param value: the new value, in seconds
|
| 220 |
+
|
| 221 |
+
"""
|
| 222 |
+
self._context.timeout = float(value)
|
| 223 |
+
|
| 224 |
+
@property
|
| 225 |
+
def blocking(self) -> bool:
|
| 226 |
+
""":return: whether the locking is blocking or not"""
|
| 227 |
+
return self._context.blocking
|
| 228 |
+
|
| 229 |
+
@blocking.setter
|
| 230 |
+
def blocking(self, value: bool) -> None:
|
| 231 |
+
"""
|
| 232 |
+
Change the default blocking value.
|
| 233 |
+
|
| 234 |
+
:param value: the new value as bool
|
| 235 |
+
|
| 236 |
+
"""
|
| 237 |
+
self._context.blocking = value
|
| 238 |
+
|
| 239 |
+
@property
|
| 240 |
+
def mode(self) -> int:
|
| 241 |
+
""":return: the file permissions for the lockfile"""
|
| 242 |
+
return self._context.mode
|
| 243 |
+
|
| 244 |
+
@abstractmethod
|
| 245 |
+
def _acquire(self) -> None:
|
| 246 |
+
"""If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file."""
|
| 247 |
+
raise NotImplementedError
|
| 248 |
+
|
| 249 |
+
@abstractmethod
|
| 250 |
+
def _release(self) -> None:
|
| 251 |
+
"""Releases the lock and sets self._context.lock_file_fd to None."""
|
| 252 |
+
raise NotImplementedError
|
| 253 |
+
|
| 254 |
+
@property
|
| 255 |
+
def is_locked(self) -> bool:
|
| 256 |
+
"""
|
| 257 |
+
|
| 258 |
+
:return: A boolean indicating if the lock file is holding the lock currently.
|
| 259 |
+
|
| 260 |
+
.. versionchanged:: 2.0.0
|
| 261 |
+
|
| 262 |
+
This was previously a method and is now a property.
|
| 263 |
+
"""
|
| 264 |
+
return self._context.lock_file_fd is not None
|
| 265 |
+
|
| 266 |
+
@property
|
| 267 |
+
def lock_counter(self) -> int:
|
| 268 |
+
""":return: The number of times this lock has been acquired (but not yet released)."""
|
| 269 |
+
return self._context.lock_counter
|
| 270 |
+
|
| 271 |
+
def acquire(
|
| 272 |
+
self,
|
| 273 |
+
timeout: float | None = None,
|
| 274 |
+
poll_interval: float = 0.05,
|
| 275 |
+
*,
|
| 276 |
+
poll_intervall: float | None = None,
|
| 277 |
+
blocking: bool | None = None,
|
| 278 |
+
) -> AcquireReturnProxy:
|
| 279 |
+
"""
|
| 280 |
+
Try to acquire the file lock.
|
| 281 |
+
|
| 282 |
+
:param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and
|
| 283 |
+
if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired
|
| 284 |
+
:param poll_interval: interval of trying to acquire the lock file
|
| 285 |
+
:param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead
|
| 286 |
+
:param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
|
| 287 |
+
first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
|
| 288 |
+
:raises Timeout: if fails to acquire lock within the timeout period
|
| 289 |
+
:return: a context object that will unlock the file when the context is exited
|
| 290 |
+
|
| 291 |
+
.. code-block:: python
|
| 292 |
+
|
| 293 |
+
# You can use this method in the context manager (recommended)
|
| 294 |
+
with lock.acquire():
|
| 295 |
+
pass
|
| 296 |
+
|
| 297 |
+
# Or use an equivalent try-finally construct:
|
| 298 |
+
lock.acquire()
|
| 299 |
+
try:
|
| 300 |
+
pass
|
| 301 |
+
finally:
|
| 302 |
+
lock.release()
|
| 303 |
+
|
| 304 |
+
.. versionchanged:: 2.0.0
|
| 305 |
+
|
| 306 |
+
This method returns now a *proxy* object instead of *self*,
|
| 307 |
+
so that it can be used in a with statement without side effects.
|
| 308 |
+
|
| 309 |
+
"""
|
| 310 |
+
# Use the default timeout, if no timeout is provided.
|
| 311 |
+
if timeout is None:
|
| 312 |
+
timeout = self._context.timeout
|
| 313 |
+
|
| 314 |
+
if blocking is None:
|
| 315 |
+
blocking = self._context.blocking
|
| 316 |
+
|
| 317 |
+
if poll_intervall is not None:
|
| 318 |
+
msg = "use poll_interval instead of poll_intervall"
|
| 319 |
+
warnings.warn(msg, DeprecationWarning, stacklevel=2)
|
| 320 |
+
poll_interval = poll_intervall
|
| 321 |
+
|
| 322 |
+
# Increment the number right at the beginning. We can still undo it, if something fails.
|
| 323 |
+
self._context.lock_counter += 1
|
| 324 |
+
|
| 325 |
+
lock_id = id(self)
|
| 326 |
+
lock_filename = self.lock_file
|
| 327 |
+
start_time = time.perf_counter()
|
| 328 |
+
try:
|
| 329 |
+
while True:
|
| 330 |
+
if not self.is_locked:
|
| 331 |
+
_LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
|
| 332 |
+
self._acquire()
|
| 333 |
+
if self.is_locked:
|
| 334 |
+
_LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
|
| 335 |
+
break
|
| 336 |
+
if blocking is False:
|
| 337 |
+
_LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
|
| 338 |
+
raise Timeout(lock_filename) # noqa: TRY301
|
| 339 |
+
if 0 <= timeout < time.perf_counter() - start_time:
|
| 340 |
+
_LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
|
| 341 |
+
raise Timeout(lock_filename) # noqa: TRY301
|
| 342 |
+
msg = "Lock %s not acquired on %s, waiting %s seconds ..."
|
| 343 |
+
_LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
|
| 344 |
+
time.sleep(poll_interval)
|
| 345 |
+
except BaseException: # Something did go wrong, so decrement the counter.
|
| 346 |
+
self._context.lock_counter = max(0, self._context.lock_counter - 1)
|
| 347 |
+
raise
|
| 348 |
+
return AcquireReturnProxy(lock=self)
|
| 349 |
+
|
| 350 |
+
def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002
|
| 351 |
+
"""
|
| 352 |
+
Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0.
|
| 353 |
+
Also note, that the lock file itself is not automatically deleted.
|
| 354 |
+
|
| 355 |
+
:param force: If true, the lock counter is ignored and the lock is released in every case/
|
| 356 |
+
|
| 357 |
+
"""
|
| 358 |
+
if self.is_locked:
|
| 359 |
+
self._context.lock_counter -= 1
|
| 360 |
+
|
| 361 |
+
if self._context.lock_counter == 0 or force:
|
| 362 |
+
lock_id, lock_filename = id(self), self.lock_file
|
| 363 |
+
|
| 364 |
+
_LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
|
| 365 |
+
self._release()
|
| 366 |
+
self._context.lock_counter = 0
|
| 367 |
+
_LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
|
| 368 |
+
|
| 369 |
+
def __enter__(self) -> Self:
|
| 370 |
+
"""
|
| 371 |
+
Acquire the lock.
|
| 372 |
+
|
| 373 |
+
:return: the lock object
|
| 374 |
+
|
| 375 |
+
"""
|
| 376 |
+
self.acquire()
|
| 377 |
+
return self
|
| 378 |
+
|
| 379 |
+
def __exit__(
|
| 380 |
+
self,
|
| 381 |
+
exc_type: type[BaseException] | None,
|
| 382 |
+
exc_value: BaseException | None,
|
| 383 |
+
traceback: TracebackType | None,
|
| 384 |
+
) -> None:
|
| 385 |
+
"""
|
| 386 |
+
Release the lock.
|
| 387 |
+
|
| 388 |
+
:param exc_type: the exception type if raised
|
| 389 |
+
:param exc_value: the exception value if raised
|
| 390 |
+
:param traceback: the exception traceback if raised
|
| 391 |
+
|
| 392 |
+
"""
|
| 393 |
+
self.release()
|
| 394 |
+
|
| 395 |
+
def __del__(self) -> None:
|
| 396 |
+
"""Called when the lock object is deleted."""
|
| 397 |
+
self.release(force=True)
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
__all__ = [
|
| 401 |
+
"AcquireReturnProxy",
|
| 402 |
+
"BaseFileLock",
|
| 403 |
+
]
|
venv/lib/python3.13/site-packages/filelock/_error.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Timeout(TimeoutError): # noqa: N818
|
| 7 |
+
"""Raised when the lock could not be acquired in *timeout* seconds."""
|
| 8 |
+
|
| 9 |
+
def __init__(self, lock_file: str) -> None:
|
| 10 |
+
super().__init__()
|
| 11 |
+
self._lock_file = lock_file
|
| 12 |
+
|
| 13 |
+
def __reduce__(self) -> str | tuple[Any, ...]:
|
| 14 |
+
return self.__class__, (self._lock_file,) # Properly pickle the exception
|
| 15 |
+
|
| 16 |
+
def __str__(self) -> str:
|
| 17 |
+
return f"The file lock '{self._lock_file}' could not be acquired."
|
| 18 |
+
|
| 19 |
+
def __repr__(self) -> str:
|
| 20 |
+
return f"{self.__class__.__name__}({self.lock_file!r})"
|
| 21 |
+
|
| 22 |
+
@property
|
| 23 |
+
def lock_file(self) -> str:
|
| 24 |
+
""":return: The path of the file lock."""
|
| 25 |
+
return self._lock_file
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
__all__ = [
|
| 29 |
+
"Timeout",
|
| 30 |
+
]
|
venv/lib/python3.13/site-packages/filelock/_soft.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
from contextlib import suppress
|
| 6 |
+
from errno import EACCES, EEXIST
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
from ._api import BaseFileLock
|
| 10 |
+
from ._util import ensure_directory_exists, raise_on_not_writable_file
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class SoftFileLock(BaseFileLock):
|
| 14 |
+
"""Simply watches the existence of the lock file."""
|
| 15 |
+
|
| 16 |
+
def _acquire(self) -> None:
|
| 17 |
+
raise_on_not_writable_file(self.lock_file)
|
| 18 |
+
ensure_directory_exists(self.lock_file)
|
| 19 |
+
# first check for exists and read-only mode as the open will mask this case as EEXIST
|
| 20 |
+
flags = (
|
| 21 |
+
os.O_WRONLY # open for writing only
|
| 22 |
+
| os.O_CREAT
|
| 23 |
+
| os.O_EXCL # together with above raise EEXIST if the file specified by filename exists
|
| 24 |
+
| os.O_TRUNC # truncate the file to zero byte
|
| 25 |
+
)
|
| 26 |
+
try:
|
| 27 |
+
file_handler = os.open(self.lock_file, flags, self._context.mode)
|
| 28 |
+
except OSError as exception: # re-raise unless expected exception
|
| 29 |
+
if not (
|
| 30 |
+
exception.errno == EEXIST # lock already exist
|
| 31 |
+
or (exception.errno == EACCES and sys.platform == "win32") # has no access to this lock
|
| 32 |
+
): # pragma: win32 no cover
|
| 33 |
+
raise
|
| 34 |
+
else:
|
| 35 |
+
self._context.lock_file_fd = file_handler
|
| 36 |
+
|
| 37 |
+
def _release(self) -> None:
|
| 38 |
+
assert self._context.lock_file_fd is not None # noqa: S101
|
| 39 |
+
os.close(self._context.lock_file_fd) # the lock file is definitely not None
|
| 40 |
+
self._context.lock_file_fd = None
|
| 41 |
+
with suppress(OSError): # the file is already deleted and that's what we want
|
| 42 |
+
Path(self.lock_file).unlink()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
__all__ = [
|
| 46 |
+
"SoftFileLock",
|
| 47 |
+
]
|
venv/lib/python3.13/site-packages/filelock/_unix.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
from contextlib import suppress
|
| 6 |
+
from errno import ENOSYS
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import cast
|
| 9 |
+
|
| 10 |
+
from ._api import BaseFileLock
|
| 11 |
+
from ._util import ensure_directory_exists
|
| 12 |
+
|
| 13 |
+
#: a flag to indicate if the fcntl API is available
|
| 14 |
+
has_fcntl = False
|
| 15 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 16 |
+
|
| 17 |
+
class UnixFileLock(BaseFileLock):
|
| 18 |
+
"""Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
|
| 19 |
+
|
| 20 |
+
def _acquire(self) -> None:
|
| 21 |
+
raise NotImplementedError
|
| 22 |
+
|
| 23 |
+
def _release(self) -> None:
|
| 24 |
+
raise NotImplementedError
|
| 25 |
+
|
| 26 |
+
else: # pragma: win32 no cover
|
| 27 |
+
try:
|
| 28 |
+
import fcntl
|
| 29 |
+
|
| 30 |
+
_ = (fcntl.flock, fcntl.LOCK_EX, fcntl.LOCK_NB, fcntl.LOCK_UN)
|
| 31 |
+
except (ImportError, AttributeError):
|
| 32 |
+
pass
|
| 33 |
+
else:
|
| 34 |
+
has_fcntl = True
|
| 35 |
+
|
| 36 |
+
class UnixFileLock(BaseFileLock):
|
| 37 |
+
"""Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
|
| 38 |
+
|
| 39 |
+
def _acquire(self) -> None:
|
| 40 |
+
ensure_directory_exists(self.lock_file)
|
| 41 |
+
open_flags = os.O_RDWR | os.O_TRUNC
|
| 42 |
+
if not Path(self.lock_file).exists():
|
| 43 |
+
open_flags |= os.O_CREAT
|
| 44 |
+
fd = os.open(self.lock_file, open_flags, self._context.mode)
|
| 45 |
+
with suppress(PermissionError): # This locked is not owned by this UID
|
| 46 |
+
os.fchmod(fd, self._context.mode)
|
| 47 |
+
try:
|
| 48 |
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
| 49 |
+
except OSError as exception:
|
| 50 |
+
os.close(fd)
|
| 51 |
+
if exception.errno == ENOSYS: # NotImplemented error
|
| 52 |
+
msg = "FileSystem does not appear to support flock; use SoftFileLock instead"
|
| 53 |
+
raise NotImplementedError(msg) from exception
|
| 54 |
+
else:
|
| 55 |
+
self._context.lock_file_fd = fd
|
| 56 |
+
|
| 57 |
+
def _release(self) -> None:
|
| 58 |
+
# Do not remove the lockfile:
|
| 59 |
+
# https://github.com/tox-dev/py-filelock/issues/31
|
| 60 |
+
# https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
|
| 61 |
+
fd = cast("int", self._context.lock_file_fd)
|
| 62 |
+
self._context.lock_file_fd = None
|
| 63 |
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
| 64 |
+
os.close(fd)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
__all__ = [
|
| 68 |
+
"UnixFileLock",
|
| 69 |
+
"has_fcntl",
|
| 70 |
+
]
|
venv/lib/python3.13/site-packages/filelock/_util.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import stat
|
| 5 |
+
import sys
|
| 6 |
+
from errno import EACCES, EISDIR
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def raise_on_not_writable_file(filename: str) -> None:
|
| 11 |
+
"""
|
| 12 |
+
Raise an exception if attempting to open the file for writing would fail.
|
| 13 |
+
|
| 14 |
+
This is done so files that will never be writable can be separated from files that are writable but currently
|
| 15 |
+
locked.
|
| 16 |
+
|
| 17 |
+
:param filename: file to check
|
| 18 |
+
:raises OSError: as if the file was opened for writing.
|
| 19 |
+
|
| 20 |
+
"""
|
| 21 |
+
try: # use stat to do exists + can write to check without race condition
|
| 22 |
+
file_stat = os.stat(filename) # noqa: PTH116
|
| 23 |
+
except OSError:
|
| 24 |
+
return # swallow does not exist or other errors
|
| 25 |
+
|
| 26 |
+
if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it
|
| 27 |
+
if not (file_stat.st_mode & stat.S_IWUSR):
|
| 28 |
+
raise PermissionError(EACCES, "Permission denied", filename)
|
| 29 |
+
|
| 30 |
+
if stat.S_ISDIR(file_stat.st_mode):
|
| 31 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 32 |
+
# On Windows, this is PermissionError
|
| 33 |
+
raise PermissionError(EACCES, "Permission denied", filename)
|
| 34 |
+
else: # pragma: win32 no cover # noqa: RET506
|
| 35 |
+
# On linux / macOS, this is IsADirectoryError
|
| 36 |
+
raise IsADirectoryError(EISDIR, "Is a directory", filename)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def ensure_directory_exists(filename: Path | str) -> None:
|
| 40 |
+
"""
|
| 41 |
+
Ensure the directory containing the file exists (create it if necessary).
|
| 42 |
+
|
| 43 |
+
:param filename: file.
|
| 44 |
+
|
| 45 |
+
"""
|
| 46 |
+
Path(filename).parent.mkdir(parents=True, exist_ok=True)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
__all__ = [
|
| 50 |
+
"ensure_directory_exists",
|
| 51 |
+
"raise_on_not_writable_file",
|
| 52 |
+
]
|
venv/lib/python3.13/site-packages/filelock/_windows.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
from contextlib import suppress
|
| 6 |
+
from errno import EACCES
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import cast
|
| 9 |
+
|
| 10 |
+
from ._api import BaseFileLock
|
| 11 |
+
from ._util import ensure_directory_exists, raise_on_not_writable_file
|
| 12 |
+
|
| 13 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 14 |
+
import msvcrt
|
| 15 |
+
|
| 16 |
+
class WindowsFileLock(BaseFileLock):
|
| 17 |
+
"""Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
|
| 18 |
+
|
| 19 |
+
def _acquire(self) -> None:
|
| 20 |
+
raise_on_not_writable_file(self.lock_file)
|
| 21 |
+
ensure_directory_exists(self.lock_file)
|
| 22 |
+
flags = (
|
| 23 |
+
os.O_RDWR # open for read and write
|
| 24 |
+
| os.O_CREAT # create file if not exists
|
| 25 |
+
| os.O_TRUNC # truncate file if not empty
|
| 26 |
+
)
|
| 27 |
+
try:
|
| 28 |
+
fd = os.open(self.lock_file, flags, self._context.mode)
|
| 29 |
+
except OSError as exception:
|
| 30 |
+
if exception.errno != EACCES: # has no access to this lock
|
| 31 |
+
raise
|
| 32 |
+
else:
|
| 33 |
+
try:
|
| 34 |
+
msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
|
| 35 |
+
except OSError as exception:
|
| 36 |
+
os.close(fd) # close file first
|
| 37 |
+
if exception.errno != EACCES: # file is already locked
|
| 38 |
+
raise
|
| 39 |
+
else:
|
| 40 |
+
self._context.lock_file_fd = fd
|
| 41 |
+
|
| 42 |
+
def _release(self) -> None:
|
| 43 |
+
fd = cast("int", self._context.lock_file_fd)
|
| 44 |
+
self._context.lock_file_fd = None
|
| 45 |
+
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
|
| 46 |
+
os.close(fd)
|
| 47 |
+
|
| 48 |
+
with suppress(OSError): # Probably another instance of the application hat acquired the file lock.
|
| 49 |
+
Path(self.lock_file).unlink()
|
| 50 |
+
|
| 51 |
+
else: # pragma: win32 no cover
|
| 52 |
+
|
| 53 |
+
class WindowsFileLock(BaseFileLock):
|
| 54 |
+
"""Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
|
| 55 |
+
|
| 56 |
+
def _acquire(self) -> None:
|
| 57 |
+
raise NotImplementedError
|
| 58 |
+
|
| 59 |
+
def _release(self) -> None:
|
| 60 |
+
raise NotImplementedError
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
__all__ = [
|
| 64 |
+
"WindowsFileLock",
|
| 65 |
+
]
|
venv/lib/python3.13/site-packages/filelock/asyncio.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""An asyncio-based implementation of the file lock."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import asyncio
|
| 6 |
+
import contextlib
|
| 7 |
+
import logging
|
| 8 |
+
import os
|
| 9 |
+
import time
|
| 10 |
+
from dataclasses import dataclass
|
| 11 |
+
from inspect import iscoroutinefunction
|
| 12 |
+
from threading import local
|
| 13 |
+
from typing import TYPE_CHECKING, Any, NoReturn, cast
|
| 14 |
+
|
| 15 |
+
from ._api import BaseFileLock, FileLockContext, FileLockMeta
|
| 16 |
+
from ._error import Timeout
|
| 17 |
+
from ._soft import SoftFileLock
|
| 18 |
+
from ._unix import UnixFileLock
|
| 19 |
+
from ._windows import WindowsFileLock
|
| 20 |
+
|
| 21 |
+
if TYPE_CHECKING:
|
| 22 |
+
import sys
|
| 23 |
+
from collections.abc import Callable
|
| 24 |
+
from concurrent import futures
|
| 25 |
+
from types import TracebackType
|
| 26 |
+
|
| 27 |
+
if sys.version_info >= (3, 11): # pragma: no cover (py311+)
|
| 28 |
+
from typing import Self
|
| 29 |
+
else: # pragma: no cover (<py311)
|
| 30 |
+
from typing_extensions import Self
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
_LOGGER = logging.getLogger("filelock")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class AsyncFileLockContext(FileLockContext):
|
| 38 |
+
"""A dataclass which holds the context for a ``BaseAsyncFileLock`` object."""
|
| 39 |
+
|
| 40 |
+
#: Whether run in executor
|
| 41 |
+
run_in_executor: bool = True
|
| 42 |
+
|
| 43 |
+
#: The executor
|
| 44 |
+
executor: futures.Executor | None = None
|
| 45 |
+
|
| 46 |
+
#: The loop
|
| 47 |
+
loop: asyncio.AbstractEventLoop | None = None
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class AsyncThreadLocalFileContext(AsyncFileLockContext, local):
|
| 51 |
+
"""A thread local version of the ``FileLockContext`` class."""
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class AsyncAcquireReturnProxy:
|
| 55 |
+
"""A context-aware object that will release the lock file when exiting."""
|
| 56 |
+
|
| 57 |
+
def __init__(self, lock: BaseAsyncFileLock) -> None: # noqa: D107
|
| 58 |
+
self.lock = lock
|
| 59 |
+
|
| 60 |
+
async def __aenter__(self) -> BaseAsyncFileLock: # noqa: D105
|
| 61 |
+
return self.lock
|
| 62 |
+
|
| 63 |
+
async def __aexit__( # noqa: D105
|
| 64 |
+
self,
|
| 65 |
+
exc_type: type[BaseException] | None,
|
| 66 |
+
exc_value: BaseException | None,
|
| 67 |
+
traceback: TracebackType | None,
|
| 68 |
+
) -> None:
|
| 69 |
+
await self.lock.release()
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class AsyncFileLockMeta(FileLockMeta):
|
| 73 |
+
def __call__( # type: ignore[override] # noqa: PLR0913
|
| 74 |
+
cls, # noqa: N805
|
| 75 |
+
lock_file: str | os.PathLike[str],
|
| 76 |
+
timeout: float = -1,
|
| 77 |
+
mode: int = 0o644,
|
| 78 |
+
thread_local: bool = False, # noqa: FBT001, FBT002
|
| 79 |
+
*,
|
| 80 |
+
blocking: bool = True,
|
| 81 |
+
is_singleton: bool = False,
|
| 82 |
+
loop: asyncio.AbstractEventLoop | None = None,
|
| 83 |
+
run_in_executor: bool = True,
|
| 84 |
+
executor: futures.Executor | None = None,
|
| 85 |
+
) -> BaseAsyncFileLock:
|
| 86 |
+
if thread_local and run_in_executor:
|
| 87 |
+
msg = "run_in_executor is not supported when thread_local is True"
|
| 88 |
+
raise ValueError(msg)
|
| 89 |
+
instance = super().__call__(
|
| 90 |
+
lock_file=lock_file,
|
| 91 |
+
timeout=timeout,
|
| 92 |
+
mode=mode,
|
| 93 |
+
thread_local=thread_local,
|
| 94 |
+
blocking=blocking,
|
| 95 |
+
is_singleton=is_singleton,
|
| 96 |
+
loop=loop,
|
| 97 |
+
run_in_executor=run_in_executor,
|
| 98 |
+
executor=executor,
|
| 99 |
+
)
|
| 100 |
+
return cast("BaseAsyncFileLock", instance)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class BaseAsyncFileLock(BaseFileLock, metaclass=AsyncFileLockMeta):
|
| 104 |
+
"""Base class for asynchronous file locks."""
|
| 105 |
+
|
| 106 |
+
def __init__( # noqa: PLR0913
|
| 107 |
+
self,
|
| 108 |
+
lock_file: str | os.PathLike[str],
|
| 109 |
+
timeout: float = -1,
|
| 110 |
+
mode: int = 0o644,
|
| 111 |
+
thread_local: bool = False, # noqa: FBT001, FBT002
|
| 112 |
+
*,
|
| 113 |
+
blocking: bool = True,
|
| 114 |
+
is_singleton: bool = False,
|
| 115 |
+
loop: asyncio.AbstractEventLoop | None = None,
|
| 116 |
+
run_in_executor: bool = True,
|
| 117 |
+
executor: futures.Executor | None = None,
|
| 118 |
+
) -> None:
|
| 119 |
+
"""
|
| 120 |
+
Create a new lock object.
|
| 121 |
+
|
| 122 |
+
:param lock_file: path to the file
|
| 123 |
+
:param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
|
| 124 |
+
the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
|
| 125 |
+
to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock.
|
| 126 |
+
:param mode: file permissions for the lockfile
|
| 127 |
+
:param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
|
| 128 |
+
``False`` then the lock will be reentrant across threads.
|
| 129 |
+
:param blocking: whether the lock should be blocking or not
|
| 130 |
+
:param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
|
| 131 |
+
per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
|
| 132 |
+
to pass the same object around.
|
| 133 |
+
:param loop: The event loop to use. If not specified, the running event loop will be used.
|
| 134 |
+
:param run_in_executor: If this is set to ``True`` then the lock will be acquired in an executor.
|
| 135 |
+
:param executor: The executor to use. If not specified, the default executor will be used.
|
| 136 |
+
|
| 137 |
+
"""
|
| 138 |
+
self._is_thread_local = thread_local
|
| 139 |
+
self._is_singleton = is_singleton
|
| 140 |
+
|
| 141 |
+
# Create the context. Note that external code should not work with the context directly and should instead use
|
| 142 |
+
# properties of this class.
|
| 143 |
+
kwargs: dict[str, Any] = {
|
| 144 |
+
"lock_file": os.fspath(lock_file),
|
| 145 |
+
"timeout": timeout,
|
| 146 |
+
"mode": mode,
|
| 147 |
+
"blocking": blocking,
|
| 148 |
+
"loop": loop,
|
| 149 |
+
"run_in_executor": run_in_executor,
|
| 150 |
+
"executor": executor,
|
| 151 |
+
}
|
| 152 |
+
self._context: AsyncFileLockContext = (AsyncThreadLocalFileContext if thread_local else AsyncFileLockContext)(
|
| 153 |
+
**kwargs
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
@property
|
| 157 |
+
def run_in_executor(self) -> bool:
|
| 158 |
+
"""::return: whether run in executor."""
|
| 159 |
+
return self._context.run_in_executor
|
| 160 |
+
|
| 161 |
+
@property
|
| 162 |
+
def executor(self) -> futures.Executor | None:
|
| 163 |
+
"""::return: the executor."""
|
| 164 |
+
return self._context.executor
|
| 165 |
+
|
| 166 |
+
@executor.setter
|
| 167 |
+
def executor(self, value: futures.Executor | None) -> None: # pragma: no cover
|
| 168 |
+
"""
|
| 169 |
+
Change the executor.
|
| 170 |
+
|
| 171 |
+
:param value: the new executor or ``None``
|
| 172 |
+
:type value: futures.Executor | None
|
| 173 |
+
|
| 174 |
+
"""
|
| 175 |
+
self._context.executor = value
|
| 176 |
+
|
| 177 |
+
@property
|
| 178 |
+
def loop(self) -> asyncio.AbstractEventLoop | None:
|
| 179 |
+
"""::return: the event loop."""
|
| 180 |
+
return self._context.loop
|
| 181 |
+
|
| 182 |
+
async def acquire( # type: ignore[override]
|
| 183 |
+
self,
|
| 184 |
+
timeout: float | None = None,
|
| 185 |
+
poll_interval: float = 0.05,
|
| 186 |
+
*,
|
| 187 |
+
blocking: bool | None = None,
|
| 188 |
+
) -> AsyncAcquireReturnProxy:
|
| 189 |
+
"""
|
| 190 |
+
Try to acquire the file lock.
|
| 191 |
+
|
| 192 |
+
:param timeout: maximum wait time for acquiring the lock, ``None`` means use the default
|
| 193 |
+
:attr:`~BaseFileLock.timeout` is and if ``timeout < 0``, there is no timeout and
|
| 194 |
+
this method will block until the lock could be acquired
|
| 195 |
+
:param poll_interval: interval of trying to acquire the lock file
|
| 196 |
+
:param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
|
| 197 |
+
first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
|
| 198 |
+
:raises Timeout: if fails to acquire lock within the timeout period
|
| 199 |
+
:return: a context object that will unlock the file when the context is exited
|
| 200 |
+
|
| 201 |
+
.. code-block:: python
|
| 202 |
+
|
| 203 |
+
# You can use this method in the context manager (recommended)
|
| 204 |
+
with lock.acquire():
|
| 205 |
+
pass
|
| 206 |
+
|
| 207 |
+
# Or use an equivalent try-finally construct:
|
| 208 |
+
lock.acquire()
|
| 209 |
+
try:
|
| 210 |
+
pass
|
| 211 |
+
finally:
|
| 212 |
+
lock.release()
|
| 213 |
+
|
| 214 |
+
"""
|
| 215 |
+
# Use the default timeout, if no timeout is provided.
|
| 216 |
+
if timeout is None:
|
| 217 |
+
timeout = self._context.timeout
|
| 218 |
+
|
| 219 |
+
if blocking is None:
|
| 220 |
+
blocking = self._context.blocking
|
| 221 |
+
|
| 222 |
+
# Increment the number right at the beginning. We can still undo it, if something fails.
|
| 223 |
+
self._context.lock_counter += 1
|
| 224 |
+
|
| 225 |
+
lock_id = id(self)
|
| 226 |
+
lock_filename = self.lock_file
|
| 227 |
+
start_time = time.perf_counter()
|
| 228 |
+
try:
|
| 229 |
+
while True:
|
| 230 |
+
if not self.is_locked:
|
| 231 |
+
_LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
|
| 232 |
+
await self._run_internal_method(self._acquire)
|
| 233 |
+
if self.is_locked:
|
| 234 |
+
_LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
|
| 235 |
+
break
|
| 236 |
+
if blocking is False:
|
| 237 |
+
_LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
|
| 238 |
+
raise Timeout(lock_filename) # noqa: TRY301
|
| 239 |
+
if 0 <= timeout < time.perf_counter() - start_time:
|
| 240 |
+
_LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
|
| 241 |
+
raise Timeout(lock_filename) # noqa: TRY301
|
| 242 |
+
msg = "Lock %s not acquired on %s, waiting %s seconds ..."
|
| 243 |
+
_LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
|
| 244 |
+
await asyncio.sleep(poll_interval)
|
| 245 |
+
except BaseException: # Something did go wrong, so decrement the counter.
|
| 246 |
+
self._context.lock_counter = max(0, self._context.lock_counter - 1)
|
| 247 |
+
raise
|
| 248 |
+
return AsyncAcquireReturnProxy(lock=self)
|
| 249 |
+
|
| 250 |
+
async def release(self, force: bool = False) -> None: # type: ignore[override] # noqa: FBT001, FBT002
|
| 251 |
+
"""
|
| 252 |
+
Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0.
|
| 253 |
+
Also note, that the lock file itself is not automatically deleted.
|
| 254 |
+
|
| 255 |
+
:param force: If true, the lock counter is ignored and the lock is released in every case/
|
| 256 |
+
|
| 257 |
+
"""
|
| 258 |
+
if self.is_locked:
|
| 259 |
+
self._context.lock_counter -= 1
|
| 260 |
+
|
| 261 |
+
if self._context.lock_counter == 0 or force:
|
| 262 |
+
lock_id, lock_filename = id(self), self.lock_file
|
| 263 |
+
|
| 264 |
+
_LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
|
| 265 |
+
await self._run_internal_method(self._release)
|
| 266 |
+
self._context.lock_counter = 0
|
| 267 |
+
_LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
|
| 268 |
+
|
| 269 |
+
async def _run_internal_method(self, method: Callable[[], Any]) -> None:
|
| 270 |
+
if iscoroutinefunction(method):
|
| 271 |
+
await method()
|
| 272 |
+
elif self.run_in_executor:
|
| 273 |
+
loop = self.loop or asyncio.get_running_loop()
|
| 274 |
+
await loop.run_in_executor(self.executor, method)
|
| 275 |
+
else:
|
| 276 |
+
method()
|
| 277 |
+
|
| 278 |
+
def __enter__(self) -> NoReturn:
|
| 279 |
+
"""
|
| 280 |
+
Replace old __enter__ method to avoid using it.
|
| 281 |
+
|
| 282 |
+
NOTE: DO NOT USE `with` FOR ASYNCIO LOCKS, USE `async with` INSTEAD.
|
| 283 |
+
|
| 284 |
+
:return: none
|
| 285 |
+
:rtype: NoReturn
|
| 286 |
+
"""
|
| 287 |
+
msg = "Do not use `with` for asyncio locks, use `async with` instead."
|
| 288 |
+
raise NotImplementedError(msg)
|
| 289 |
+
|
| 290 |
+
async def __aenter__(self) -> Self:
|
| 291 |
+
"""
|
| 292 |
+
Acquire the lock.
|
| 293 |
+
|
| 294 |
+
:return: the lock object
|
| 295 |
+
|
| 296 |
+
"""
|
| 297 |
+
await self.acquire()
|
| 298 |
+
return self
|
| 299 |
+
|
| 300 |
+
async def __aexit__(
|
| 301 |
+
self,
|
| 302 |
+
exc_type: type[BaseException] | None,
|
| 303 |
+
exc_value: BaseException | None,
|
| 304 |
+
traceback: TracebackType | None,
|
| 305 |
+
) -> None:
|
| 306 |
+
"""
|
| 307 |
+
Release the lock.
|
| 308 |
+
|
| 309 |
+
:param exc_type: the exception type if raised
|
| 310 |
+
:param exc_value: the exception value if raised
|
| 311 |
+
:param traceback: the exception traceback if raised
|
| 312 |
+
|
| 313 |
+
"""
|
| 314 |
+
await self.release()
|
| 315 |
+
|
| 316 |
+
def __del__(self) -> None:
|
| 317 |
+
"""Called when the lock object is deleted."""
|
| 318 |
+
with contextlib.suppress(RuntimeError):
|
| 319 |
+
loop = self.loop or asyncio.get_running_loop()
|
| 320 |
+
if not loop.is_running(): # pragma: no cover
|
| 321 |
+
loop.run_until_complete(self.release(force=True))
|
| 322 |
+
else:
|
| 323 |
+
loop.create_task(self.release(force=True))
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
class AsyncSoftFileLock(SoftFileLock, BaseAsyncFileLock):
|
| 327 |
+
"""Simply watches the existence of the lock file."""
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
class AsyncUnixFileLock(UnixFileLock, BaseAsyncFileLock):
|
| 331 |
+
"""Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
class AsyncWindowsFileLock(WindowsFileLock, BaseAsyncFileLock):
|
| 335 |
+
"""Uses the :func:`msvcrt.locking` to hard lock the lock file on windows systems."""
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
__all__ = [
|
| 339 |
+
"AsyncAcquireReturnProxy",
|
| 340 |
+
"AsyncSoftFileLock",
|
| 341 |
+
"AsyncUnixFileLock",
|
| 342 |
+
"AsyncWindowsFileLock",
|
| 343 |
+
"BaseAsyncFileLock",
|
| 344 |
+
]
|
venv/lib/python3.13/site-packages/filelock/py.typed
ADDED
|
File without changes
|
venv/lib/python3.13/site-packages/filelock/version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# file generated by setuptools-scm
|
| 2 |
+
# don't change, don't track in version control
|
| 3 |
+
|
| 4 |
+
__all__ = [
|
| 5 |
+
"__version__",
|
| 6 |
+
"__version_tuple__",
|
| 7 |
+
"version",
|
| 8 |
+
"version_tuple",
|
| 9 |
+
"__commit_id__",
|
| 10 |
+
"commit_id",
|
| 11 |
+
]
|
| 12 |
+
|
| 13 |
+
TYPE_CHECKING = False
|
| 14 |
+
if TYPE_CHECKING:
|
| 15 |
+
from typing import Tuple
|
| 16 |
+
from typing import Union
|
| 17 |
+
|
| 18 |
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
| 19 |
+
COMMIT_ID = Union[str, None]
|
| 20 |
+
else:
|
| 21 |
+
VERSION_TUPLE = object
|
| 22 |
+
COMMIT_ID = object
|
| 23 |
+
|
| 24 |
+
version: str
|
| 25 |
+
__version__: str
|
| 26 |
+
__version_tuple__: VERSION_TUPLE
|
| 27 |
+
version_tuple: VERSION_TUPLE
|
| 28 |
+
commit_id: COMMIT_ID
|
| 29 |
+
__commit_id__: COMMIT_ID
|
| 30 |
+
|
| 31 |
+
__version__ = version = '3.20.0'
|
| 32 |
+
__version_tuple__ = version_tuple = (3, 20, 0)
|
| 33 |
+
|
| 34 |
+
__commit_id__ = commit_id = None
|
venv/lib/python3.13/site-packages/fsspec/__init__.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from . import caching
|
| 2 |
+
from ._version import __version__ # noqa: F401
|
| 3 |
+
from .callbacks import Callback
|
| 4 |
+
from .compression import available_compressions
|
| 5 |
+
from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
|
| 6 |
+
from .exceptions import FSTimeoutError
|
| 7 |
+
from .mapping import FSMap, get_mapper
|
| 8 |
+
from .registry import (
|
| 9 |
+
available_protocols,
|
| 10 |
+
filesystem,
|
| 11 |
+
get_filesystem_class,
|
| 12 |
+
register_implementation,
|
| 13 |
+
registry,
|
| 14 |
+
)
|
| 15 |
+
from .spec import AbstractFileSystem
|
| 16 |
+
|
| 17 |
+
__all__ = [
|
| 18 |
+
"AbstractFileSystem",
|
| 19 |
+
"FSTimeoutError",
|
| 20 |
+
"FSMap",
|
| 21 |
+
"filesystem",
|
| 22 |
+
"register_implementation",
|
| 23 |
+
"get_filesystem_class",
|
| 24 |
+
"get_fs_token_paths",
|
| 25 |
+
"get_mapper",
|
| 26 |
+
"open",
|
| 27 |
+
"open_files",
|
| 28 |
+
"open_local",
|
| 29 |
+
"registry",
|
| 30 |
+
"caching",
|
| 31 |
+
"Callback",
|
| 32 |
+
"available_protocols",
|
| 33 |
+
"available_compressions",
|
| 34 |
+
"url_to_fs",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def process_entries():
|
| 39 |
+
try:
|
| 40 |
+
from importlib.metadata import entry_points
|
| 41 |
+
except ImportError:
|
| 42 |
+
return
|
| 43 |
+
if entry_points is not None:
|
| 44 |
+
try:
|
| 45 |
+
eps = entry_points()
|
| 46 |
+
except TypeError:
|
| 47 |
+
pass # importlib-metadata < 0.8
|
| 48 |
+
else:
|
| 49 |
+
if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
|
| 50 |
+
specs = eps.select(group="fsspec.specs")
|
| 51 |
+
else:
|
| 52 |
+
specs = eps.get("fsspec.specs", [])
|
| 53 |
+
registered_names = {}
|
| 54 |
+
for spec in specs:
|
| 55 |
+
err_msg = f"Unable to load filesystem from {spec}"
|
| 56 |
+
name = spec.name
|
| 57 |
+
if name in registered_names:
|
| 58 |
+
continue
|
| 59 |
+
registered_names[name] = True
|
| 60 |
+
register_implementation(
|
| 61 |
+
name,
|
| 62 |
+
spec.value.replace(":", "."),
|
| 63 |
+
errtxt=err_msg,
|
| 64 |
+
# We take our implementations as the ones to overload with if
|
| 65 |
+
# for some reason we encounter some, may be the same, already
|
| 66 |
+
# registered
|
| 67 |
+
clobber=True,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
process_entries()
|
venv/lib/python3.13/site-packages/fsspec/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# file generated by setuptools-scm
|
| 2 |
+
# don't change, don't track in version control
|
| 3 |
+
|
| 4 |
+
__all__ = [
|
| 5 |
+
"__version__",
|
| 6 |
+
"__version_tuple__",
|
| 7 |
+
"version",
|
| 8 |
+
"version_tuple",
|
| 9 |
+
"__commit_id__",
|
| 10 |
+
"commit_id",
|
| 11 |
+
]
|
| 12 |
+
|
| 13 |
+
TYPE_CHECKING = False
|
| 14 |
+
if TYPE_CHECKING:
|
| 15 |
+
from typing import Tuple
|
| 16 |
+
from typing import Union
|
| 17 |
+
|
| 18 |
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
| 19 |
+
COMMIT_ID = Union[str, None]
|
| 20 |
+
else:
|
| 21 |
+
VERSION_TUPLE = object
|
| 22 |
+
COMMIT_ID = object
|
| 23 |
+
|
| 24 |
+
version: str
|
| 25 |
+
__version__: str
|
| 26 |
+
__version_tuple__: VERSION_TUPLE
|
| 27 |
+
version_tuple: VERSION_TUPLE
|
| 28 |
+
commit_id: COMMIT_ID
|
| 29 |
+
__commit_id__: COMMIT_ID
|
| 30 |
+
|
| 31 |
+
__version__ = version = '2025.10.0'
|
| 32 |
+
__version_tuple__ = version_tuple = (2025, 10, 0)
|
| 33 |
+
|
| 34 |
+
__commit_id__ = commit_id = None
|
venv/lib/python3.13/site-packages/fsspec/caching.py
ADDED
|
@@ -0,0 +1,1004 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import collections
|
| 4 |
+
import functools
|
| 5 |
+
import logging
|
| 6 |
+
import math
|
| 7 |
+
import os
|
| 8 |
+
import threading
|
| 9 |
+
import warnings
|
| 10 |
+
from collections import OrderedDict
|
| 11 |
+
from concurrent.futures import Future, ThreadPoolExecutor
|
| 12 |
+
from itertools import groupby
|
| 13 |
+
from operator import itemgetter
|
| 14 |
+
from typing import (
|
| 15 |
+
TYPE_CHECKING,
|
| 16 |
+
Any,
|
| 17 |
+
Callable,
|
| 18 |
+
ClassVar,
|
| 19 |
+
Generic,
|
| 20 |
+
NamedTuple,
|
| 21 |
+
TypeVar,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
if TYPE_CHECKING:
|
| 25 |
+
import mmap
|
| 26 |
+
|
| 27 |
+
from typing_extensions import ParamSpec
|
| 28 |
+
|
| 29 |
+
P = ParamSpec("P")
|
| 30 |
+
else:
|
| 31 |
+
P = TypeVar("P")
|
| 32 |
+
|
| 33 |
+
T = TypeVar("T")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
logger = logging.getLogger("fsspec")
|
| 37 |
+
|
| 38 |
+
Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
|
| 39 |
+
MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class BaseCache:
|
| 43 |
+
"""Pass-though cache: doesn't keep anything, calls every time
|
| 44 |
+
|
| 45 |
+
Acts as base class for other cachers
|
| 46 |
+
|
| 47 |
+
Parameters
|
| 48 |
+
----------
|
| 49 |
+
blocksize: int
|
| 50 |
+
How far to read ahead in numbers of bytes
|
| 51 |
+
fetcher: func
|
| 52 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 53 |
+
specified
|
| 54 |
+
size: int
|
| 55 |
+
How big this file is
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
name: ClassVar[str] = "none"
|
| 59 |
+
|
| 60 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 61 |
+
self.blocksize = blocksize
|
| 62 |
+
self.nblocks = 0
|
| 63 |
+
self.fetcher = fetcher
|
| 64 |
+
self.size = size
|
| 65 |
+
self.hit_count = 0
|
| 66 |
+
self.miss_count = 0
|
| 67 |
+
# the bytes that we actually requested
|
| 68 |
+
self.total_requested_bytes = 0
|
| 69 |
+
|
| 70 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 71 |
+
if start is None:
|
| 72 |
+
start = 0
|
| 73 |
+
if stop is None:
|
| 74 |
+
stop = self.size
|
| 75 |
+
if start >= self.size or start >= stop:
|
| 76 |
+
return b""
|
| 77 |
+
return self.fetcher(start, stop)
|
| 78 |
+
|
| 79 |
+
def _reset_stats(self) -> None:
|
| 80 |
+
"""Reset hit and miss counts for a more ganular report e.g. by file."""
|
| 81 |
+
self.hit_count = 0
|
| 82 |
+
self.miss_count = 0
|
| 83 |
+
self.total_requested_bytes = 0
|
| 84 |
+
|
| 85 |
+
def _log_stats(self) -> str:
|
| 86 |
+
"""Return a formatted string of the cache statistics."""
|
| 87 |
+
if self.hit_count == 0 and self.miss_count == 0:
|
| 88 |
+
# a cache that does nothing, this is for logs only
|
| 89 |
+
return ""
|
| 90 |
+
return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
|
| 91 |
+
|
| 92 |
+
def __repr__(self) -> str:
|
| 93 |
+
# TODO: use rich for better formatting
|
| 94 |
+
return f"""
|
| 95 |
+
<{self.__class__.__name__}:
|
| 96 |
+
block size : {self.blocksize}
|
| 97 |
+
block count : {self.nblocks}
|
| 98 |
+
file size : {self.size}
|
| 99 |
+
cache hits : {self.hit_count}
|
| 100 |
+
cache misses: {self.miss_count}
|
| 101 |
+
total requested bytes: {self.total_requested_bytes}>
|
| 102 |
+
"""
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
class MMapCache(BaseCache):
|
| 106 |
+
"""memory-mapped sparse file cache
|
| 107 |
+
|
| 108 |
+
Opens temporary file, which is filled blocks-wise when data is requested.
|
| 109 |
+
Ensure there is enough disc space in the temporary location.
|
| 110 |
+
|
| 111 |
+
This cache method might only work on posix
|
| 112 |
+
|
| 113 |
+
Parameters
|
| 114 |
+
----------
|
| 115 |
+
blocksize: int
|
| 116 |
+
How far to read ahead in numbers of bytes
|
| 117 |
+
fetcher: Fetcher
|
| 118 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 119 |
+
specified
|
| 120 |
+
size: int
|
| 121 |
+
How big this file is
|
| 122 |
+
location: str
|
| 123 |
+
Where to create the temporary file. If None, a temporary file is
|
| 124 |
+
created using tempfile.TemporaryFile().
|
| 125 |
+
blocks: set[int]
|
| 126 |
+
Set of block numbers that have already been fetched. If None, an empty
|
| 127 |
+
set is created.
|
| 128 |
+
multi_fetcher: MultiFetcher
|
| 129 |
+
Function of the form f([(start, end)]) which gets bytes from remote
|
| 130 |
+
as specified. This function is used to fetch multiple blocks at once.
|
| 131 |
+
If not specified, the fetcher function is used instead.
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
name = "mmap"
|
| 135 |
+
|
| 136 |
+
def __init__(
|
| 137 |
+
self,
|
| 138 |
+
blocksize: int,
|
| 139 |
+
fetcher: Fetcher,
|
| 140 |
+
size: int,
|
| 141 |
+
location: str | None = None,
|
| 142 |
+
blocks: set[int] | None = None,
|
| 143 |
+
multi_fetcher: MultiFetcher | None = None,
|
| 144 |
+
) -> None:
|
| 145 |
+
super().__init__(blocksize, fetcher, size)
|
| 146 |
+
self.blocks = set() if blocks is None else blocks
|
| 147 |
+
self.location = location
|
| 148 |
+
self.multi_fetcher = multi_fetcher
|
| 149 |
+
self.cache = self._makefile()
|
| 150 |
+
|
| 151 |
+
def _makefile(self) -> mmap.mmap | bytearray:
|
| 152 |
+
import mmap
|
| 153 |
+
import tempfile
|
| 154 |
+
|
| 155 |
+
if self.size == 0:
|
| 156 |
+
return bytearray()
|
| 157 |
+
|
| 158 |
+
# posix version
|
| 159 |
+
if self.location is None or not os.path.exists(self.location):
|
| 160 |
+
if self.location is None:
|
| 161 |
+
fd = tempfile.TemporaryFile()
|
| 162 |
+
self.blocks = set()
|
| 163 |
+
else:
|
| 164 |
+
fd = open(self.location, "wb+")
|
| 165 |
+
fd.seek(self.size - 1)
|
| 166 |
+
fd.write(b"1")
|
| 167 |
+
fd.flush()
|
| 168 |
+
else:
|
| 169 |
+
fd = open(self.location, "r+b")
|
| 170 |
+
|
| 171 |
+
return mmap.mmap(fd.fileno(), self.size)
|
| 172 |
+
|
| 173 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 174 |
+
logger.debug(f"MMap cache fetching {start}-{end}")
|
| 175 |
+
if start is None:
|
| 176 |
+
start = 0
|
| 177 |
+
if end is None:
|
| 178 |
+
end = self.size
|
| 179 |
+
if start >= self.size or start >= end:
|
| 180 |
+
return b""
|
| 181 |
+
start_block = start // self.blocksize
|
| 182 |
+
end_block = end // self.blocksize
|
| 183 |
+
block_range = range(start_block, end_block + 1)
|
| 184 |
+
# Determine which blocks need to be fetched. This sequence is sorted by construction.
|
| 185 |
+
need = (i for i in block_range if i not in self.blocks)
|
| 186 |
+
# Count the number of blocks already cached
|
| 187 |
+
self.hit_count += sum(1 for i in block_range if i in self.blocks)
|
| 188 |
+
|
| 189 |
+
ranges = []
|
| 190 |
+
|
| 191 |
+
# Consolidate needed blocks.
|
| 192 |
+
# Algorithm adapted from Python 2.x itertools documentation.
|
| 193 |
+
# We are grouping an enumerated sequence of blocks. By comparing when the difference
|
| 194 |
+
# between an ascending range (provided by enumerate) and the needed block numbers
|
| 195 |
+
# we can detect when the block number skips values. The key computes this difference.
|
| 196 |
+
# Whenever the difference changes, we know that we have previously cached block(s),
|
| 197 |
+
# and a new group is started. In other words, this algorithm neatly groups
|
| 198 |
+
# runs of consecutive block numbers so they can be fetched together.
|
| 199 |
+
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
|
| 200 |
+
# Extract the blocks from the enumerated sequence
|
| 201 |
+
_blocks = tuple(map(itemgetter(1), _blocks))
|
| 202 |
+
# Compute start of first block
|
| 203 |
+
sstart = _blocks[0] * self.blocksize
|
| 204 |
+
# Compute the end of the last block. Last block may not be full size.
|
| 205 |
+
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
|
| 206 |
+
|
| 207 |
+
# Fetch bytes (could be multiple consecutive blocks)
|
| 208 |
+
self.total_requested_bytes += send - sstart
|
| 209 |
+
logger.debug(
|
| 210 |
+
f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
|
| 211 |
+
)
|
| 212 |
+
ranges.append((sstart, send))
|
| 213 |
+
|
| 214 |
+
# Update set of cached blocks
|
| 215 |
+
self.blocks.update(_blocks)
|
| 216 |
+
# Update cache statistics with number of blocks we had to cache
|
| 217 |
+
self.miss_count += len(_blocks)
|
| 218 |
+
|
| 219 |
+
if not ranges:
|
| 220 |
+
return self.cache[start:end]
|
| 221 |
+
|
| 222 |
+
if self.multi_fetcher:
|
| 223 |
+
logger.debug(f"MMap get blocks {ranges}")
|
| 224 |
+
for idx, r in enumerate(self.multi_fetcher(ranges)):
|
| 225 |
+
(sstart, send) = ranges[idx]
|
| 226 |
+
logger.debug(f"MMap copy block ({sstart}-{send}")
|
| 227 |
+
self.cache[sstart:send] = r
|
| 228 |
+
else:
|
| 229 |
+
for sstart, send in ranges:
|
| 230 |
+
logger.debug(f"MMap get block ({sstart}-{send}")
|
| 231 |
+
self.cache[sstart:send] = self.fetcher(sstart, send)
|
| 232 |
+
|
| 233 |
+
return self.cache[start:end]
|
| 234 |
+
|
| 235 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 236 |
+
state = self.__dict__.copy()
|
| 237 |
+
# Remove the unpicklable entries.
|
| 238 |
+
del state["cache"]
|
| 239 |
+
return state
|
| 240 |
+
|
| 241 |
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
| 242 |
+
# Restore instance attributes
|
| 243 |
+
self.__dict__.update(state)
|
| 244 |
+
self.cache = self._makefile()
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
class ReadAheadCache(BaseCache):
|
| 248 |
+
"""Cache which reads only when we get beyond a block of data
|
| 249 |
+
|
| 250 |
+
This is a much simpler version of BytesCache, and does not attempt to
|
| 251 |
+
fill holes in the cache or keep fragments alive. It is best suited to
|
| 252 |
+
many small reads in a sequential order (e.g., reading lines from a file).
|
| 253 |
+
"""
|
| 254 |
+
|
| 255 |
+
name = "readahead"
|
| 256 |
+
|
| 257 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 258 |
+
super().__init__(blocksize, fetcher, size)
|
| 259 |
+
self.cache = b""
|
| 260 |
+
self.start = 0
|
| 261 |
+
self.end = 0
|
| 262 |
+
|
| 263 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 264 |
+
if start is None:
|
| 265 |
+
start = 0
|
| 266 |
+
if end is None or end > self.size:
|
| 267 |
+
end = self.size
|
| 268 |
+
if start >= self.size or start >= end:
|
| 269 |
+
return b""
|
| 270 |
+
l = end - start
|
| 271 |
+
if start >= self.start and end <= self.end:
|
| 272 |
+
# cache hit
|
| 273 |
+
self.hit_count += 1
|
| 274 |
+
return self.cache[start - self.start : end - self.start]
|
| 275 |
+
elif self.start <= start < self.end:
|
| 276 |
+
# partial hit
|
| 277 |
+
self.miss_count += 1
|
| 278 |
+
part = self.cache[start - self.start :]
|
| 279 |
+
l -= len(part)
|
| 280 |
+
start = self.end
|
| 281 |
+
else:
|
| 282 |
+
# miss
|
| 283 |
+
self.miss_count += 1
|
| 284 |
+
part = b""
|
| 285 |
+
end = min(self.size, end + self.blocksize)
|
| 286 |
+
self.total_requested_bytes += end - start
|
| 287 |
+
self.cache = self.fetcher(start, end) # new block replaces old
|
| 288 |
+
self.start = start
|
| 289 |
+
self.end = self.start + len(self.cache)
|
| 290 |
+
return part + self.cache[:l]
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
class FirstChunkCache(BaseCache):
|
| 294 |
+
"""Caches the first block of a file only
|
| 295 |
+
|
| 296 |
+
This may be useful for file types where the metadata is stored in the header,
|
| 297 |
+
but is randomly accessed.
|
| 298 |
+
"""
|
| 299 |
+
|
| 300 |
+
name = "first"
|
| 301 |
+
|
| 302 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 303 |
+
if blocksize > size:
|
| 304 |
+
# this will buffer the whole thing
|
| 305 |
+
blocksize = size
|
| 306 |
+
super().__init__(blocksize, fetcher, size)
|
| 307 |
+
self.cache: bytes | None = None
|
| 308 |
+
|
| 309 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 310 |
+
start = start or 0
|
| 311 |
+
if start > self.size:
|
| 312 |
+
logger.debug("FirstChunkCache: requested start > file size")
|
| 313 |
+
return b""
|
| 314 |
+
|
| 315 |
+
end = min(end, self.size)
|
| 316 |
+
|
| 317 |
+
if start < self.blocksize:
|
| 318 |
+
if self.cache is None:
|
| 319 |
+
self.miss_count += 1
|
| 320 |
+
if end > self.blocksize:
|
| 321 |
+
self.total_requested_bytes += end
|
| 322 |
+
data = self.fetcher(0, end)
|
| 323 |
+
self.cache = data[: self.blocksize]
|
| 324 |
+
return data[start:]
|
| 325 |
+
self.cache = self.fetcher(0, self.blocksize)
|
| 326 |
+
self.total_requested_bytes += self.blocksize
|
| 327 |
+
part = self.cache[start:end]
|
| 328 |
+
if end > self.blocksize:
|
| 329 |
+
self.total_requested_bytes += end - self.blocksize
|
| 330 |
+
part += self.fetcher(self.blocksize, end)
|
| 331 |
+
self.hit_count += 1
|
| 332 |
+
return part
|
| 333 |
+
else:
|
| 334 |
+
self.miss_count += 1
|
| 335 |
+
self.total_requested_bytes += end - start
|
| 336 |
+
return self.fetcher(start, end)
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
class BlockCache(BaseCache):
|
| 340 |
+
"""
|
| 341 |
+
Cache holding memory as a set of blocks.
|
| 342 |
+
|
| 343 |
+
Requests are only ever made ``blocksize`` at a time, and are
|
| 344 |
+
stored in an LRU cache. The least recently accessed block is
|
| 345 |
+
discarded when more than ``maxblocks`` are stored.
|
| 346 |
+
|
| 347 |
+
Parameters
|
| 348 |
+
----------
|
| 349 |
+
blocksize : int
|
| 350 |
+
The number of bytes to store in each block.
|
| 351 |
+
Requests are only ever made for ``blocksize``, so this
|
| 352 |
+
should balance the overhead of making a request against
|
| 353 |
+
the granularity of the blocks.
|
| 354 |
+
fetcher : Callable
|
| 355 |
+
size : int
|
| 356 |
+
The total size of the file being cached.
|
| 357 |
+
maxblocks : int
|
| 358 |
+
The maximum number of blocks to cache for. The maximum memory
|
| 359 |
+
use for this cache is then ``blocksize * maxblocks``.
|
| 360 |
+
"""
|
| 361 |
+
|
| 362 |
+
name = "blockcache"
|
| 363 |
+
|
| 364 |
+
def __init__(
|
| 365 |
+
self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
|
| 366 |
+
) -> None:
|
| 367 |
+
super().__init__(blocksize, fetcher, size)
|
| 368 |
+
self.nblocks = math.ceil(size / blocksize)
|
| 369 |
+
self.maxblocks = maxblocks
|
| 370 |
+
self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
|
| 371 |
+
|
| 372 |
+
def cache_info(self):
|
| 373 |
+
"""
|
| 374 |
+
The statistics on the block cache.
|
| 375 |
+
|
| 376 |
+
Returns
|
| 377 |
+
-------
|
| 378 |
+
NamedTuple
|
| 379 |
+
Returned directly from the LRU Cache used internally.
|
| 380 |
+
"""
|
| 381 |
+
return self._fetch_block_cached.cache_info()
|
| 382 |
+
|
| 383 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 384 |
+
state = self.__dict__
|
| 385 |
+
del state["_fetch_block_cached"]
|
| 386 |
+
return state
|
| 387 |
+
|
| 388 |
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
| 389 |
+
self.__dict__.update(state)
|
| 390 |
+
self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
|
| 391 |
+
self._fetch_block
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 395 |
+
if start is None:
|
| 396 |
+
start = 0
|
| 397 |
+
if end is None:
|
| 398 |
+
end = self.size
|
| 399 |
+
if start >= self.size or start >= end:
|
| 400 |
+
return b""
|
| 401 |
+
|
| 402 |
+
# byte position -> block numbers
|
| 403 |
+
start_block_number = start // self.blocksize
|
| 404 |
+
end_block_number = end // self.blocksize
|
| 405 |
+
|
| 406 |
+
# these are cached, so safe to do multiple calls for the same start and end.
|
| 407 |
+
for block_number in range(start_block_number, end_block_number + 1):
|
| 408 |
+
self._fetch_block_cached(block_number)
|
| 409 |
+
|
| 410 |
+
return self._read_cache(
|
| 411 |
+
start,
|
| 412 |
+
end,
|
| 413 |
+
start_block_number=start_block_number,
|
| 414 |
+
end_block_number=end_block_number,
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
def _fetch_block(self, block_number: int) -> bytes:
|
| 418 |
+
"""
|
| 419 |
+
Fetch the block of data for `block_number`.
|
| 420 |
+
"""
|
| 421 |
+
if block_number > self.nblocks:
|
| 422 |
+
raise ValueError(
|
| 423 |
+
f"'block_number={block_number}' is greater than "
|
| 424 |
+
f"the number of blocks ({self.nblocks})"
|
| 425 |
+
)
|
| 426 |
+
|
| 427 |
+
start = block_number * self.blocksize
|
| 428 |
+
end = start + self.blocksize
|
| 429 |
+
self.total_requested_bytes += end - start
|
| 430 |
+
self.miss_count += 1
|
| 431 |
+
logger.info("BlockCache fetching block %d", block_number)
|
| 432 |
+
block_contents = super()._fetch(start, end)
|
| 433 |
+
return block_contents
|
| 434 |
+
|
| 435 |
+
def _read_cache(
|
| 436 |
+
self, start: int, end: int, start_block_number: int, end_block_number: int
|
| 437 |
+
) -> bytes:
|
| 438 |
+
"""
|
| 439 |
+
Read from our block cache.
|
| 440 |
+
|
| 441 |
+
Parameters
|
| 442 |
+
----------
|
| 443 |
+
start, end : int
|
| 444 |
+
The start and end byte positions.
|
| 445 |
+
start_block_number, end_block_number : int
|
| 446 |
+
The start and end block numbers.
|
| 447 |
+
"""
|
| 448 |
+
start_pos = start % self.blocksize
|
| 449 |
+
end_pos = end % self.blocksize
|
| 450 |
+
|
| 451 |
+
self.hit_count += 1
|
| 452 |
+
if start_block_number == end_block_number:
|
| 453 |
+
block: bytes = self._fetch_block_cached(start_block_number)
|
| 454 |
+
return block[start_pos:end_pos]
|
| 455 |
+
|
| 456 |
+
else:
|
| 457 |
+
# read from the initial
|
| 458 |
+
out = [self._fetch_block_cached(start_block_number)[start_pos:]]
|
| 459 |
+
|
| 460 |
+
# intermediate blocks
|
| 461 |
+
# Note: it'd be nice to combine these into one big request. However
|
| 462 |
+
# that doesn't play nicely with our LRU cache.
|
| 463 |
+
out.extend(
|
| 464 |
+
map(
|
| 465 |
+
self._fetch_block_cached,
|
| 466 |
+
range(start_block_number + 1, end_block_number),
|
| 467 |
+
)
|
| 468 |
+
)
|
| 469 |
+
|
| 470 |
+
# final block
|
| 471 |
+
out.append(self._fetch_block_cached(end_block_number)[:end_pos])
|
| 472 |
+
|
| 473 |
+
return b"".join(out)
|
| 474 |
+
|
| 475 |
+
|
| 476 |
+
class BytesCache(BaseCache):
|
| 477 |
+
"""Cache which holds data in a in-memory bytes object
|
| 478 |
+
|
| 479 |
+
Implements read-ahead by the block size, for semi-random reads progressing
|
| 480 |
+
through the file.
|
| 481 |
+
|
| 482 |
+
Parameters
|
| 483 |
+
----------
|
| 484 |
+
trim: bool
|
| 485 |
+
As we read more data, whether to discard the start of the buffer when
|
| 486 |
+
we are more than a blocksize ahead of it.
|
| 487 |
+
"""
|
| 488 |
+
|
| 489 |
+
name: ClassVar[str] = "bytes"
|
| 490 |
+
|
| 491 |
+
def __init__(
|
| 492 |
+
self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
|
| 493 |
+
) -> None:
|
| 494 |
+
super().__init__(blocksize, fetcher, size)
|
| 495 |
+
self.cache = b""
|
| 496 |
+
self.start: int | None = None
|
| 497 |
+
self.end: int | None = None
|
| 498 |
+
self.trim = trim
|
| 499 |
+
|
| 500 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 501 |
+
# TODO: only set start/end after fetch, in case it fails?
|
| 502 |
+
# is this where retry logic might go?
|
| 503 |
+
if start is None:
|
| 504 |
+
start = 0
|
| 505 |
+
if end is None:
|
| 506 |
+
end = self.size
|
| 507 |
+
if start >= self.size or start >= end:
|
| 508 |
+
return b""
|
| 509 |
+
if (
|
| 510 |
+
self.start is not None
|
| 511 |
+
and start >= self.start
|
| 512 |
+
and self.end is not None
|
| 513 |
+
and end < self.end
|
| 514 |
+
):
|
| 515 |
+
# cache hit: we have all the required data
|
| 516 |
+
offset = start - self.start
|
| 517 |
+
self.hit_count += 1
|
| 518 |
+
return self.cache[offset : offset + end - start]
|
| 519 |
+
|
| 520 |
+
if self.blocksize:
|
| 521 |
+
bend = min(self.size, end + self.blocksize)
|
| 522 |
+
else:
|
| 523 |
+
bend = end
|
| 524 |
+
|
| 525 |
+
if bend == start or start > self.size:
|
| 526 |
+
return b""
|
| 527 |
+
|
| 528 |
+
if (self.start is None or start < self.start) and (
|
| 529 |
+
self.end is None or end > self.end
|
| 530 |
+
):
|
| 531 |
+
# First read, or extending both before and after
|
| 532 |
+
self.total_requested_bytes += bend - start
|
| 533 |
+
self.miss_count += 1
|
| 534 |
+
self.cache = self.fetcher(start, bend)
|
| 535 |
+
self.start = start
|
| 536 |
+
else:
|
| 537 |
+
assert self.start is not None
|
| 538 |
+
assert self.end is not None
|
| 539 |
+
self.miss_count += 1
|
| 540 |
+
|
| 541 |
+
if start < self.start:
|
| 542 |
+
if self.end is None or self.end - end > self.blocksize:
|
| 543 |
+
self.total_requested_bytes += bend - start
|
| 544 |
+
self.cache = self.fetcher(start, bend)
|
| 545 |
+
self.start = start
|
| 546 |
+
else:
|
| 547 |
+
self.total_requested_bytes += self.start - start
|
| 548 |
+
new = self.fetcher(start, self.start)
|
| 549 |
+
self.start = start
|
| 550 |
+
self.cache = new + self.cache
|
| 551 |
+
elif self.end is not None and bend > self.end:
|
| 552 |
+
if self.end > self.size:
|
| 553 |
+
pass
|
| 554 |
+
elif end - self.end > self.blocksize:
|
| 555 |
+
self.total_requested_bytes += bend - start
|
| 556 |
+
self.cache = self.fetcher(start, bend)
|
| 557 |
+
self.start = start
|
| 558 |
+
else:
|
| 559 |
+
self.total_requested_bytes += bend - self.end
|
| 560 |
+
new = self.fetcher(self.end, bend)
|
| 561 |
+
self.cache = self.cache + new
|
| 562 |
+
|
| 563 |
+
self.end = self.start + len(self.cache)
|
| 564 |
+
offset = start - self.start
|
| 565 |
+
out = self.cache[offset : offset + end - start]
|
| 566 |
+
if self.trim:
|
| 567 |
+
num = (self.end - self.start) // (self.blocksize + 1)
|
| 568 |
+
if num > 1:
|
| 569 |
+
self.start += self.blocksize * num
|
| 570 |
+
self.cache = self.cache[self.blocksize * num :]
|
| 571 |
+
return out
|
| 572 |
+
|
| 573 |
+
def __len__(self) -> int:
|
| 574 |
+
return len(self.cache)
|
| 575 |
+
|
| 576 |
+
|
| 577 |
+
class AllBytes(BaseCache):
|
| 578 |
+
"""Cache entire contents of the file"""
|
| 579 |
+
|
| 580 |
+
name: ClassVar[str] = "all"
|
| 581 |
+
|
| 582 |
+
def __init__(
|
| 583 |
+
self,
|
| 584 |
+
blocksize: int | None = None,
|
| 585 |
+
fetcher: Fetcher | None = None,
|
| 586 |
+
size: int | None = None,
|
| 587 |
+
data: bytes | None = None,
|
| 588 |
+
) -> None:
|
| 589 |
+
super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
|
| 590 |
+
if data is None:
|
| 591 |
+
self.miss_count += 1
|
| 592 |
+
self.total_requested_bytes += self.size
|
| 593 |
+
data = self.fetcher(0, self.size)
|
| 594 |
+
self.data = data
|
| 595 |
+
|
| 596 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 597 |
+
self.hit_count += 1
|
| 598 |
+
return self.data[start:stop]
|
| 599 |
+
|
| 600 |
+
|
| 601 |
+
class KnownPartsOfAFile(BaseCache):
|
| 602 |
+
"""
|
| 603 |
+
Cache holding known file parts.
|
| 604 |
+
|
| 605 |
+
Parameters
|
| 606 |
+
----------
|
| 607 |
+
blocksize: int
|
| 608 |
+
How far to read ahead in numbers of bytes
|
| 609 |
+
fetcher: func
|
| 610 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 611 |
+
specified
|
| 612 |
+
size: int
|
| 613 |
+
How big this file is
|
| 614 |
+
data: dict
|
| 615 |
+
A dictionary mapping explicit `(start, stop)` file-offset tuples
|
| 616 |
+
with known bytes.
|
| 617 |
+
strict: bool, default True
|
| 618 |
+
Whether to fetch reads that go beyond a known byte-range boundary.
|
| 619 |
+
If `False`, any read that ends outside a known part will be zero
|
| 620 |
+
padded. Note that zero padding will not be used for reads that
|
| 621 |
+
begin outside a known byte-range.
|
| 622 |
+
"""
|
| 623 |
+
|
| 624 |
+
name: ClassVar[str] = "parts"
|
| 625 |
+
|
| 626 |
+
def __init__(
|
| 627 |
+
self,
|
| 628 |
+
blocksize: int,
|
| 629 |
+
fetcher: Fetcher,
|
| 630 |
+
size: int,
|
| 631 |
+
data: dict[tuple[int, int], bytes] | None = None,
|
| 632 |
+
strict: bool = True,
|
| 633 |
+
**_: Any,
|
| 634 |
+
):
|
| 635 |
+
super().__init__(blocksize, fetcher, size)
|
| 636 |
+
self.strict = strict
|
| 637 |
+
|
| 638 |
+
# simple consolidation of contiguous blocks
|
| 639 |
+
if data:
|
| 640 |
+
old_offsets = sorted(data.keys())
|
| 641 |
+
offsets = [old_offsets[0]]
|
| 642 |
+
blocks = [data.pop(old_offsets[0])]
|
| 643 |
+
for start, stop in old_offsets[1:]:
|
| 644 |
+
start0, stop0 = offsets[-1]
|
| 645 |
+
if start == stop0:
|
| 646 |
+
offsets[-1] = (start0, stop)
|
| 647 |
+
blocks[-1] += data.pop((start, stop))
|
| 648 |
+
else:
|
| 649 |
+
offsets.append((start, stop))
|
| 650 |
+
blocks.append(data.pop((start, stop)))
|
| 651 |
+
|
| 652 |
+
self.data = dict(zip(offsets, blocks))
|
| 653 |
+
else:
|
| 654 |
+
self.data = {}
|
| 655 |
+
|
| 656 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 657 |
+
if start is None:
|
| 658 |
+
start = 0
|
| 659 |
+
if stop is None:
|
| 660 |
+
stop = self.size
|
| 661 |
+
|
| 662 |
+
out = b""
|
| 663 |
+
for (loc0, loc1), data in self.data.items():
|
| 664 |
+
# If self.strict=False, use zero-padded data
|
| 665 |
+
# for reads beyond the end of a "known" buffer
|
| 666 |
+
if loc0 <= start < loc1:
|
| 667 |
+
off = start - loc0
|
| 668 |
+
out = data[off : off + stop - start]
|
| 669 |
+
if not self.strict or loc0 <= stop <= loc1:
|
| 670 |
+
# The request is within a known range, or
|
| 671 |
+
# it begins within a known range, and we
|
| 672 |
+
# are allowed to pad reads beyond the
|
| 673 |
+
# buffer with zero
|
| 674 |
+
out += b"\x00" * (stop - start - len(out))
|
| 675 |
+
self.hit_count += 1
|
| 676 |
+
return out
|
| 677 |
+
else:
|
| 678 |
+
# The request ends outside a known range,
|
| 679 |
+
# and we are being "strict" about reads
|
| 680 |
+
# beyond the buffer
|
| 681 |
+
start = loc1
|
| 682 |
+
break
|
| 683 |
+
|
| 684 |
+
# We only get here if there is a request outside the
|
| 685 |
+
# known parts of the file. In an ideal world, this
|
| 686 |
+
# should never happen
|
| 687 |
+
if self.fetcher is None:
|
| 688 |
+
# We cannot fetch the data, so raise an error
|
| 689 |
+
raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
|
| 690 |
+
# We can fetch the data, but should warn the user
|
| 691 |
+
# that this may be slow
|
| 692 |
+
warnings.warn(
|
| 693 |
+
f"Read is outside the known file parts: {(start, stop)}. "
|
| 694 |
+
f"IO/caching performance may be poor!"
|
| 695 |
+
)
|
| 696 |
+
logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
|
| 697 |
+
self.total_requested_bytes += stop - start
|
| 698 |
+
self.miss_count += 1
|
| 699 |
+
return out + super()._fetch(start, stop)
|
| 700 |
+
|
| 701 |
+
|
| 702 |
+
class UpdatableLRU(Generic[P, T]):
|
| 703 |
+
"""
|
| 704 |
+
Custom implementation of LRU cache that allows updating keys
|
| 705 |
+
|
| 706 |
+
Used by BackgroudBlockCache
|
| 707 |
+
"""
|
| 708 |
+
|
| 709 |
+
class CacheInfo(NamedTuple):
|
| 710 |
+
hits: int
|
| 711 |
+
misses: int
|
| 712 |
+
maxsize: int
|
| 713 |
+
currsize: int
|
| 714 |
+
|
| 715 |
+
def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
|
| 716 |
+
self._cache: OrderedDict[Any, T] = collections.OrderedDict()
|
| 717 |
+
self._func = func
|
| 718 |
+
self._max_size = max_size
|
| 719 |
+
self._hits = 0
|
| 720 |
+
self._misses = 0
|
| 721 |
+
self._lock = threading.Lock()
|
| 722 |
+
|
| 723 |
+
def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
|
| 724 |
+
if kwargs:
|
| 725 |
+
raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
|
| 726 |
+
with self._lock:
|
| 727 |
+
if args in self._cache:
|
| 728 |
+
self._cache.move_to_end(args)
|
| 729 |
+
self._hits += 1
|
| 730 |
+
return self._cache[args]
|
| 731 |
+
|
| 732 |
+
result = self._func(*args, **kwargs)
|
| 733 |
+
|
| 734 |
+
with self._lock:
|
| 735 |
+
self._cache[args] = result
|
| 736 |
+
self._misses += 1
|
| 737 |
+
if len(self._cache) > self._max_size:
|
| 738 |
+
self._cache.popitem(last=False)
|
| 739 |
+
|
| 740 |
+
return result
|
| 741 |
+
|
| 742 |
+
def is_key_cached(self, *args: Any) -> bool:
|
| 743 |
+
with self._lock:
|
| 744 |
+
return args in self._cache
|
| 745 |
+
|
| 746 |
+
def add_key(self, result: T, *args: Any) -> None:
|
| 747 |
+
with self._lock:
|
| 748 |
+
self._cache[args] = result
|
| 749 |
+
if len(self._cache) > self._max_size:
|
| 750 |
+
self._cache.popitem(last=False)
|
| 751 |
+
|
| 752 |
+
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
| 753 |
+
with self._lock:
|
| 754 |
+
return self.CacheInfo(
|
| 755 |
+
maxsize=self._max_size,
|
| 756 |
+
currsize=len(self._cache),
|
| 757 |
+
hits=self._hits,
|
| 758 |
+
misses=self._misses,
|
| 759 |
+
)
|
| 760 |
+
|
| 761 |
+
|
| 762 |
+
class BackgroundBlockCache(BaseCache):
|
| 763 |
+
"""
|
| 764 |
+
Cache holding memory as a set of blocks with pre-loading of
|
| 765 |
+
the next block in the background.
|
| 766 |
+
|
| 767 |
+
Requests are only ever made ``blocksize`` at a time, and are
|
| 768 |
+
stored in an LRU cache. The least recently accessed block is
|
| 769 |
+
discarded when more than ``maxblocks`` are stored. If the
|
| 770 |
+
next block is not in cache, it is loaded in a separate thread
|
| 771 |
+
in non-blocking way.
|
| 772 |
+
|
| 773 |
+
Parameters
|
| 774 |
+
----------
|
| 775 |
+
blocksize : int
|
| 776 |
+
The number of bytes to store in each block.
|
| 777 |
+
Requests are only ever made for ``blocksize``, so this
|
| 778 |
+
should balance the overhead of making a request against
|
| 779 |
+
the granularity of the blocks.
|
| 780 |
+
fetcher : Callable
|
| 781 |
+
size : int
|
| 782 |
+
The total size of the file being cached.
|
| 783 |
+
maxblocks : int
|
| 784 |
+
The maximum number of blocks to cache for. The maximum memory
|
| 785 |
+
use for this cache is then ``blocksize * maxblocks``.
|
| 786 |
+
"""
|
| 787 |
+
|
| 788 |
+
name: ClassVar[str] = "background"
|
| 789 |
+
|
| 790 |
+
def __init__(
|
| 791 |
+
self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
|
| 792 |
+
) -> None:
|
| 793 |
+
super().__init__(blocksize, fetcher, size)
|
| 794 |
+
self.nblocks = math.ceil(size / blocksize)
|
| 795 |
+
self.maxblocks = maxblocks
|
| 796 |
+
self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
|
| 797 |
+
|
| 798 |
+
self._thread_executor = ThreadPoolExecutor(max_workers=1)
|
| 799 |
+
self._fetch_future_block_number: int | None = None
|
| 800 |
+
self._fetch_future: Future[bytes] | None = None
|
| 801 |
+
self._fetch_future_lock = threading.Lock()
|
| 802 |
+
|
| 803 |
+
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
| 804 |
+
"""
|
| 805 |
+
The statistics on the block cache.
|
| 806 |
+
|
| 807 |
+
Returns
|
| 808 |
+
-------
|
| 809 |
+
NamedTuple
|
| 810 |
+
Returned directly from the LRU Cache used internally.
|
| 811 |
+
"""
|
| 812 |
+
return self._fetch_block_cached.cache_info()
|
| 813 |
+
|
| 814 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 815 |
+
state = self.__dict__
|
| 816 |
+
del state["_fetch_block_cached"]
|
| 817 |
+
del state["_thread_executor"]
|
| 818 |
+
del state["_fetch_future_block_number"]
|
| 819 |
+
del state["_fetch_future"]
|
| 820 |
+
del state["_fetch_future_lock"]
|
| 821 |
+
return state
|
| 822 |
+
|
| 823 |
+
def __setstate__(self, state) -> None:
|
| 824 |
+
self.__dict__.update(state)
|
| 825 |
+
self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
|
| 826 |
+
self._thread_executor = ThreadPoolExecutor(max_workers=1)
|
| 827 |
+
self._fetch_future_block_number = None
|
| 828 |
+
self._fetch_future = None
|
| 829 |
+
self._fetch_future_lock = threading.Lock()
|
| 830 |
+
|
| 831 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 832 |
+
if start is None:
|
| 833 |
+
start = 0
|
| 834 |
+
if end is None:
|
| 835 |
+
end = self.size
|
| 836 |
+
if start >= self.size or start >= end:
|
| 837 |
+
return b""
|
| 838 |
+
|
| 839 |
+
# byte position -> block numbers
|
| 840 |
+
start_block_number = start // self.blocksize
|
| 841 |
+
end_block_number = end // self.blocksize
|
| 842 |
+
|
| 843 |
+
fetch_future_block_number = None
|
| 844 |
+
fetch_future = None
|
| 845 |
+
with self._fetch_future_lock:
|
| 846 |
+
# Background thread is running. Check we we can or must join it.
|
| 847 |
+
if self._fetch_future is not None:
|
| 848 |
+
assert self._fetch_future_block_number is not None
|
| 849 |
+
if self._fetch_future.done():
|
| 850 |
+
logger.info("BlockCache joined background fetch without waiting.")
|
| 851 |
+
self._fetch_block_cached.add_key(
|
| 852 |
+
self._fetch_future.result(), self._fetch_future_block_number
|
| 853 |
+
)
|
| 854 |
+
# Cleanup the fetch variables. Done with fetching the block.
|
| 855 |
+
self._fetch_future_block_number = None
|
| 856 |
+
self._fetch_future = None
|
| 857 |
+
else:
|
| 858 |
+
# Must join if we need the block for the current fetch
|
| 859 |
+
must_join = bool(
|
| 860 |
+
start_block_number
|
| 861 |
+
<= self._fetch_future_block_number
|
| 862 |
+
<= end_block_number
|
| 863 |
+
)
|
| 864 |
+
if must_join:
|
| 865 |
+
# Copy to the local variables to release lock
|
| 866 |
+
# before waiting for result
|
| 867 |
+
fetch_future_block_number = self._fetch_future_block_number
|
| 868 |
+
fetch_future = self._fetch_future
|
| 869 |
+
|
| 870 |
+
# Cleanup the fetch variables. Have a local copy.
|
| 871 |
+
self._fetch_future_block_number = None
|
| 872 |
+
self._fetch_future = None
|
| 873 |
+
|
| 874 |
+
# Need to wait for the future for the current read
|
| 875 |
+
if fetch_future is not None:
|
| 876 |
+
logger.info("BlockCache waiting for background fetch.")
|
| 877 |
+
# Wait until result and put it in cache
|
| 878 |
+
self._fetch_block_cached.add_key(
|
| 879 |
+
fetch_future.result(), fetch_future_block_number
|
| 880 |
+
)
|
| 881 |
+
|
| 882 |
+
# these are cached, so safe to do multiple calls for the same start and end.
|
| 883 |
+
for block_number in range(start_block_number, end_block_number + 1):
|
| 884 |
+
self._fetch_block_cached(block_number)
|
| 885 |
+
|
| 886 |
+
# fetch next block in the background if nothing is running in the background,
|
| 887 |
+
# the block is within file and it is not already cached
|
| 888 |
+
end_block_plus_1 = end_block_number + 1
|
| 889 |
+
with self._fetch_future_lock:
|
| 890 |
+
if (
|
| 891 |
+
self._fetch_future is None
|
| 892 |
+
and end_block_plus_1 <= self.nblocks
|
| 893 |
+
and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
|
| 894 |
+
):
|
| 895 |
+
self._fetch_future_block_number = end_block_plus_1
|
| 896 |
+
self._fetch_future = self._thread_executor.submit(
|
| 897 |
+
self._fetch_block, end_block_plus_1, "async"
|
| 898 |
+
)
|
| 899 |
+
|
| 900 |
+
return self._read_cache(
|
| 901 |
+
start,
|
| 902 |
+
end,
|
| 903 |
+
start_block_number=start_block_number,
|
| 904 |
+
end_block_number=end_block_number,
|
| 905 |
+
)
|
| 906 |
+
|
| 907 |
+
def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
|
| 908 |
+
"""
|
| 909 |
+
Fetch the block of data for `block_number`.
|
| 910 |
+
"""
|
| 911 |
+
if block_number > self.nblocks:
|
| 912 |
+
raise ValueError(
|
| 913 |
+
f"'block_number={block_number}' is greater than "
|
| 914 |
+
f"the number of blocks ({self.nblocks})"
|
| 915 |
+
)
|
| 916 |
+
|
| 917 |
+
start = block_number * self.blocksize
|
| 918 |
+
end = start + self.blocksize
|
| 919 |
+
logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
|
| 920 |
+
self.total_requested_bytes += end - start
|
| 921 |
+
self.miss_count += 1
|
| 922 |
+
block_contents = super()._fetch(start, end)
|
| 923 |
+
return block_contents
|
| 924 |
+
|
| 925 |
+
def _read_cache(
|
| 926 |
+
self, start: int, end: int, start_block_number: int, end_block_number: int
|
| 927 |
+
) -> bytes:
|
| 928 |
+
"""
|
| 929 |
+
Read from our block cache.
|
| 930 |
+
|
| 931 |
+
Parameters
|
| 932 |
+
----------
|
| 933 |
+
start, end : int
|
| 934 |
+
The start and end byte positions.
|
| 935 |
+
start_block_number, end_block_number : int
|
| 936 |
+
The start and end block numbers.
|
| 937 |
+
"""
|
| 938 |
+
start_pos = start % self.blocksize
|
| 939 |
+
end_pos = end % self.blocksize
|
| 940 |
+
|
| 941 |
+
# kind of pointless to count this as a hit, but it is
|
| 942 |
+
self.hit_count += 1
|
| 943 |
+
|
| 944 |
+
if start_block_number == end_block_number:
|
| 945 |
+
block = self._fetch_block_cached(start_block_number)
|
| 946 |
+
return block[start_pos:end_pos]
|
| 947 |
+
|
| 948 |
+
else:
|
| 949 |
+
# read from the initial
|
| 950 |
+
out = [self._fetch_block_cached(start_block_number)[start_pos:]]
|
| 951 |
+
|
| 952 |
+
# intermediate blocks
|
| 953 |
+
# Note: it'd be nice to combine these into one big request. However
|
| 954 |
+
# that doesn't play nicely with our LRU cache.
|
| 955 |
+
out.extend(
|
| 956 |
+
map(
|
| 957 |
+
self._fetch_block_cached,
|
| 958 |
+
range(start_block_number + 1, end_block_number),
|
| 959 |
+
)
|
| 960 |
+
)
|
| 961 |
+
|
| 962 |
+
# final block
|
| 963 |
+
out.append(self._fetch_block_cached(end_block_number)[:end_pos])
|
| 964 |
+
|
| 965 |
+
return b"".join(out)
|
| 966 |
+
|
| 967 |
+
|
| 968 |
+
caches: dict[str | None, type[BaseCache]] = {
|
| 969 |
+
# one custom case
|
| 970 |
+
None: BaseCache,
|
| 971 |
+
}
|
| 972 |
+
|
| 973 |
+
|
| 974 |
+
def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
|
| 975 |
+
"""'Register' cache implementation.
|
| 976 |
+
|
| 977 |
+
Parameters
|
| 978 |
+
----------
|
| 979 |
+
clobber: bool, optional
|
| 980 |
+
If set to True (default is False) - allow to overwrite existing
|
| 981 |
+
entry.
|
| 982 |
+
|
| 983 |
+
Raises
|
| 984 |
+
------
|
| 985 |
+
ValueError
|
| 986 |
+
"""
|
| 987 |
+
name = cls.name
|
| 988 |
+
if not clobber and name in caches:
|
| 989 |
+
raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
|
| 990 |
+
caches[name] = cls
|
| 991 |
+
|
| 992 |
+
|
| 993 |
+
for c in (
|
| 994 |
+
BaseCache,
|
| 995 |
+
MMapCache,
|
| 996 |
+
BytesCache,
|
| 997 |
+
ReadAheadCache,
|
| 998 |
+
BlockCache,
|
| 999 |
+
FirstChunkCache,
|
| 1000 |
+
AllBytes,
|
| 1001 |
+
KnownPartsOfAFile,
|
| 1002 |
+
BackgroundBlockCache,
|
| 1003 |
+
):
|
| 1004 |
+
register_cache(c)
|
venv/lib/python3.13/site-packages/fsspec/compression.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper functions for a standard streaming compression API"""
|
| 2 |
+
|
| 3 |
+
from zipfile import ZipFile
|
| 4 |
+
|
| 5 |
+
import fsspec.utils
|
| 6 |
+
from fsspec.spec import AbstractBufferedFile
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def noop_file(file, mode, **kwargs):
|
| 10 |
+
return file
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# TODO: files should also be available as contexts
|
| 14 |
+
# should be functions of the form func(infile, mode=, **kwargs) -> file-like
|
| 15 |
+
compr = {None: noop_file}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def register_compression(name, callback, extensions, force=False):
|
| 19 |
+
"""Register an "inferable" file compression type.
|
| 20 |
+
|
| 21 |
+
Registers transparent file compression type for use with fsspec.open.
|
| 22 |
+
Compression can be specified by name in open, or "infer"-ed for any files
|
| 23 |
+
ending with the given extensions.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
name: (str) The compression type name. Eg. "gzip".
|
| 27 |
+
callback: A callable of form (infile, mode, **kwargs) -> file-like.
|
| 28 |
+
Accepts an input file-like object, the target mode and kwargs.
|
| 29 |
+
Returns a wrapped file-like object.
|
| 30 |
+
extensions: (str, Iterable[str]) A file extension, or list of file
|
| 31 |
+
extensions for which to infer this compression scheme. Eg. "gz".
|
| 32 |
+
force: (bool) Force re-registration of compression type or extensions.
|
| 33 |
+
|
| 34 |
+
Raises:
|
| 35 |
+
ValueError: If name or extensions already registered, and not force.
|
| 36 |
+
|
| 37 |
+
"""
|
| 38 |
+
if isinstance(extensions, str):
|
| 39 |
+
extensions = [extensions]
|
| 40 |
+
|
| 41 |
+
# Validate registration
|
| 42 |
+
if name in compr and not force:
|
| 43 |
+
raise ValueError(f"Duplicate compression registration: {name}")
|
| 44 |
+
|
| 45 |
+
for ext in extensions:
|
| 46 |
+
if ext in fsspec.utils.compressions and not force:
|
| 47 |
+
raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
|
| 48 |
+
|
| 49 |
+
compr[name] = callback
|
| 50 |
+
|
| 51 |
+
for ext in extensions:
|
| 52 |
+
fsspec.utils.compressions[ext] = name
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def unzip(infile, mode="rb", filename=None, **kwargs):
|
| 56 |
+
if "r" not in mode:
|
| 57 |
+
filename = filename or "file"
|
| 58 |
+
z = ZipFile(infile, mode="w", **kwargs)
|
| 59 |
+
fo = z.open(filename, mode="w")
|
| 60 |
+
fo.close = lambda closer=fo.close: closer() or z.close()
|
| 61 |
+
return fo
|
| 62 |
+
z = ZipFile(infile)
|
| 63 |
+
if filename is None:
|
| 64 |
+
filename = z.namelist()[0]
|
| 65 |
+
return z.open(filename, mode="r", **kwargs)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
register_compression("zip", unzip, "zip")
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
from bz2 import BZ2File
|
| 72 |
+
except ImportError:
|
| 73 |
+
pass
|
| 74 |
+
else:
|
| 75 |
+
register_compression("bz2", BZ2File, "bz2")
|
| 76 |
+
|
| 77 |
+
try: # pragma: no cover
|
| 78 |
+
from isal import igzip
|
| 79 |
+
|
| 80 |
+
def isal(infile, mode="rb", **kwargs):
|
| 81 |
+
return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
|
| 82 |
+
|
| 83 |
+
register_compression("gzip", isal, "gz")
|
| 84 |
+
except ImportError:
|
| 85 |
+
from gzip import GzipFile
|
| 86 |
+
|
| 87 |
+
register_compression(
|
| 88 |
+
"gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
from lzma import LZMAFile
|
| 93 |
+
|
| 94 |
+
register_compression("lzma", LZMAFile, "lzma")
|
| 95 |
+
register_compression("xz", LZMAFile, "xz")
|
| 96 |
+
except ImportError:
|
| 97 |
+
pass
|
| 98 |
+
|
| 99 |
+
try:
|
| 100 |
+
import lzmaffi
|
| 101 |
+
|
| 102 |
+
register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
|
| 103 |
+
register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
|
| 104 |
+
except ImportError:
|
| 105 |
+
pass
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class SnappyFile(AbstractBufferedFile):
|
| 109 |
+
def __init__(self, infile, mode, **kwargs):
|
| 110 |
+
import snappy
|
| 111 |
+
|
| 112 |
+
super().__init__(
|
| 113 |
+
fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
|
| 114 |
+
)
|
| 115 |
+
self.infile = infile
|
| 116 |
+
if "r" in mode:
|
| 117 |
+
self.codec = snappy.StreamDecompressor()
|
| 118 |
+
else:
|
| 119 |
+
self.codec = snappy.StreamCompressor()
|
| 120 |
+
|
| 121 |
+
def _upload_chunk(self, final=False):
|
| 122 |
+
self.buffer.seek(0)
|
| 123 |
+
out = self.codec.add_chunk(self.buffer.read())
|
| 124 |
+
self.infile.write(out)
|
| 125 |
+
return True
|
| 126 |
+
|
| 127 |
+
def seek(self, loc, whence=0):
|
| 128 |
+
raise NotImplementedError("SnappyFile is not seekable")
|
| 129 |
+
|
| 130 |
+
def seekable(self):
|
| 131 |
+
return False
|
| 132 |
+
|
| 133 |
+
def _fetch_range(self, start, end):
|
| 134 |
+
"""Get the specified set of bytes from remote"""
|
| 135 |
+
data = self.infile.read(end - start)
|
| 136 |
+
return self.codec.decompress(data)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
try:
|
| 140 |
+
import snappy
|
| 141 |
+
|
| 142 |
+
snappy.compress(b"")
|
| 143 |
+
# Snappy may use the .sz file extension, but this is not part of the
|
| 144 |
+
# standard implementation.
|
| 145 |
+
register_compression("snappy", SnappyFile, [])
|
| 146 |
+
|
| 147 |
+
except (ImportError, NameError, AttributeError):
|
| 148 |
+
pass
|
| 149 |
+
|
| 150 |
+
try:
|
| 151 |
+
import lz4.frame
|
| 152 |
+
|
| 153 |
+
register_compression("lz4", lz4.frame.open, "lz4")
|
| 154 |
+
except ImportError:
|
| 155 |
+
pass
|
| 156 |
+
|
| 157 |
+
try:
|
| 158 |
+
# zstd in the standard library for python >= 3.14
|
| 159 |
+
from compression.zstd import ZstdFile
|
| 160 |
+
|
| 161 |
+
register_compression("zstd", ZstdFile, "zst")
|
| 162 |
+
|
| 163 |
+
except ImportError:
|
| 164 |
+
try:
|
| 165 |
+
import zstandard as zstd
|
| 166 |
+
|
| 167 |
+
def zstandard_file(infile, mode="rb"):
|
| 168 |
+
if "r" in mode:
|
| 169 |
+
cctx = zstd.ZstdDecompressor()
|
| 170 |
+
return cctx.stream_reader(infile)
|
| 171 |
+
else:
|
| 172 |
+
cctx = zstd.ZstdCompressor(level=10)
|
| 173 |
+
return cctx.stream_writer(infile)
|
| 174 |
+
|
| 175 |
+
register_compression("zstd", zstandard_file, "zst")
|
| 176 |
+
except ImportError:
|
| 177 |
+
pass
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def available_compressions():
|
| 181 |
+
"""Return a list of the implemented compressions."""
|
| 182 |
+
return list(compr)
|
venv/lib/python3.13/site-packages/fsspec/config.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import configparser
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import warnings
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
conf: dict[str, dict[str, Any]] = {}
|
| 10 |
+
default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
|
| 11 |
+
conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def set_conf_env(conf_dict, envdict=os.environ):
|
| 15 |
+
"""Set config values from environment variables
|
| 16 |
+
|
| 17 |
+
Looks for variables of the form ``FSSPEC_<protocol>`` and
|
| 18 |
+
``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
|
| 19 |
+
as a json dictionary and used to ``update`` the config of the
|
| 20 |
+
corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
|
| 21 |
+
attempt to convert the string value, but the kwarg keys will be lower-cased.
|
| 22 |
+
|
| 23 |
+
The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
|
| 24 |
+
``FSSPEC_<protocol>`` ones.
|
| 25 |
+
|
| 26 |
+
Parameters
|
| 27 |
+
----------
|
| 28 |
+
conf_dict : dict(str, dict)
|
| 29 |
+
This dict will be mutated
|
| 30 |
+
envdict : dict-like(str, str)
|
| 31 |
+
Source for the values - usually the real environment
|
| 32 |
+
"""
|
| 33 |
+
kwarg_keys = []
|
| 34 |
+
for key in envdict:
|
| 35 |
+
if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
|
| 36 |
+
if key.count("_") > 1:
|
| 37 |
+
kwarg_keys.append(key)
|
| 38 |
+
continue
|
| 39 |
+
try:
|
| 40 |
+
value = json.loads(envdict[key])
|
| 41 |
+
except json.decoder.JSONDecodeError as ex:
|
| 42 |
+
warnings.warn(
|
| 43 |
+
f"Ignoring environment variable {key} due to a parse failure: {ex}"
|
| 44 |
+
)
|
| 45 |
+
else:
|
| 46 |
+
if isinstance(value, dict):
|
| 47 |
+
_, proto = key.split("_", 1)
|
| 48 |
+
conf_dict.setdefault(proto.lower(), {}).update(value)
|
| 49 |
+
else:
|
| 50 |
+
warnings.warn(
|
| 51 |
+
f"Ignoring environment variable {key} due to not being a dict:"
|
| 52 |
+
f" {type(value)}"
|
| 53 |
+
)
|
| 54 |
+
elif key.startswith("FSSPEC"):
|
| 55 |
+
warnings.warn(
|
| 56 |
+
f"Ignoring environment variable {key} due to having an unexpected name"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
for key in kwarg_keys:
|
| 60 |
+
_, proto, kwarg = key.split("_", 2)
|
| 61 |
+
conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def set_conf_files(cdir, conf_dict):
|
| 65 |
+
"""Set config values from files
|
| 66 |
+
|
| 67 |
+
Scans for INI and JSON files in the given dictionary, and uses their
|
| 68 |
+
contents to set the config. In case of repeated values, later values
|
| 69 |
+
win.
|
| 70 |
+
|
| 71 |
+
In the case of INI files, all values are strings, and these will not
|
| 72 |
+
be converted.
|
| 73 |
+
|
| 74 |
+
Parameters
|
| 75 |
+
----------
|
| 76 |
+
cdir : str
|
| 77 |
+
Directory to search
|
| 78 |
+
conf_dict : dict(str, dict)
|
| 79 |
+
This dict will be mutated
|
| 80 |
+
"""
|
| 81 |
+
if not os.path.isdir(cdir):
|
| 82 |
+
return
|
| 83 |
+
allfiles = sorted(os.listdir(cdir))
|
| 84 |
+
for fn in allfiles:
|
| 85 |
+
if fn.endswith(".ini"):
|
| 86 |
+
ini = configparser.ConfigParser()
|
| 87 |
+
ini.read(os.path.join(cdir, fn))
|
| 88 |
+
for key in ini:
|
| 89 |
+
if key == "DEFAULT":
|
| 90 |
+
continue
|
| 91 |
+
conf_dict.setdefault(key, {}).update(dict(ini[key]))
|
| 92 |
+
if fn.endswith(".json"):
|
| 93 |
+
with open(os.path.join(cdir, fn)) as f:
|
| 94 |
+
js = json.load(f)
|
| 95 |
+
for key in js:
|
| 96 |
+
conf_dict.setdefault(key, {}).update(dict(js[key]))
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def apply_config(cls, kwargs, conf_dict=None):
|
| 100 |
+
"""Supply default values for kwargs when instantiating class
|
| 101 |
+
|
| 102 |
+
Augments the passed kwargs, by finding entries in the config dict
|
| 103 |
+
which match the classes ``.protocol`` attribute (one or more str)
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
cls : file system implementation
|
| 108 |
+
kwargs : dict
|
| 109 |
+
conf_dict : dict of dict
|
| 110 |
+
Typically this is the global configuration
|
| 111 |
+
|
| 112 |
+
Returns
|
| 113 |
+
-------
|
| 114 |
+
dict : the modified set of kwargs
|
| 115 |
+
"""
|
| 116 |
+
if conf_dict is None:
|
| 117 |
+
conf_dict = conf
|
| 118 |
+
protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
|
| 119 |
+
kw = {}
|
| 120 |
+
for proto in protos:
|
| 121 |
+
# default kwargs from the current state of the config
|
| 122 |
+
if proto in conf_dict:
|
| 123 |
+
kw.update(conf_dict[proto])
|
| 124 |
+
# explicit kwargs always win
|
| 125 |
+
kw.update(**kwargs)
|
| 126 |
+
kwargs = kw
|
| 127 |
+
return kwargs
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
set_conf_files(conf_dir, conf)
|
| 131 |
+
set_conf_env(conf)
|
venv/lib/python3.13/site-packages/fsspec/conftest.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import subprocess
|
| 4 |
+
import sys
|
| 5 |
+
import time
|
| 6 |
+
from collections import deque
|
| 7 |
+
from collections.abc import Generator, Sequence
|
| 8 |
+
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
import fsspec
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@pytest.fixture()
|
| 15 |
+
def m():
|
| 16 |
+
"""
|
| 17 |
+
Fixture providing a memory filesystem.
|
| 18 |
+
"""
|
| 19 |
+
m = fsspec.filesystem("memory")
|
| 20 |
+
m.store.clear()
|
| 21 |
+
m.pseudo_dirs.clear()
|
| 22 |
+
m.pseudo_dirs.append("")
|
| 23 |
+
try:
|
| 24 |
+
yield m
|
| 25 |
+
finally:
|
| 26 |
+
m.store.clear()
|
| 27 |
+
m.pseudo_dirs.clear()
|
| 28 |
+
m.pseudo_dirs.append("")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class InstanceCacheInspector:
|
| 32 |
+
"""
|
| 33 |
+
Helper class to inspect instance caches of filesystem classes in tests.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def clear(self) -> None:
|
| 37 |
+
"""
|
| 38 |
+
Clear instance caches of all currently imported filesystem classes.
|
| 39 |
+
"""
|
| 40 |
+
classes = deque([fsspec.spec.AbstractFileSystem])
|
| 41 |
+
while classes:
|
| 42 |
+
cls = classes.popleft()
|
| 43 |
+
cls.clear_instance_cache()
|
| 44 |
+
classes.extend(cls.__subclasses__())
|
| 45 |
+
|
| 46 |
+
def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]:
|
| 47 |
+
"""
|
| 48 |
+
Gather counts of filesystem instances in the instance caches
|
| 49 |
+
of all currently imported filesystem classes.
|
| 50 |
+
|
| 51 |
+
Parameters
|
| 52 |
+
----------
|
| 53 |
+
omit_zero:
|
| 54 |
+
Whether to omit instance types with no cached instances.
|
| 55 |
+
"""
|
| 56 |
+
out: dict[str, int] = {}
|
| 57 |
+
classes = deque([fsspec.spec.AbstractFileSystem])
|
| 58 |
+
while classes:
|
| 59 |
+
cls = classes.popleft()
|
| 60 |
+
count = len(cls._cache) # there is no public interface for the cache
|
| 61 |
+
# note: skip intermediate AbstractFileSystem subclasses
|
| 62 |
+
# if they proxy the protocol attribute via a property.
|
| 63 |
+
if isinstance(cls.protocol, (Sequence, str)):
|
| 64 |
+
key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0]
|
| 65 |
+
if count or not omit_zero:
|
| 66 |
+
out[key] = count
|
| 67 |
+
classes.extend(cls.__subclasses__())
|
| 68 |
+
return out
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@pytest.fixture(scope="function", autouse=True)
|
| 72 |
+
def instance_caches() -> Generator[InstanceCacheInspector, None, None]:
|
| 73 |
+
"""
|
| 74 |
+
Fixture to ensure empty filesystem instance caches before and after a test.
|
| 75 |
+
|
| 76 |
+
Used by default for all tests.
|
| 77 |
+
Clears caches of all imported filesystem classes.
|
| 78 |
+
Can be used to write test assertions about instance caches.
|
| 79 |
+
|
| 80 |
+
Usage:
|
| 81 |
+
|
| 82 |
+
def test_something(instance_caches):
|
| 83 |
+
# Test code here
|
| 84 |
+
fsspec.open("file://abc")
|
| 85 |
+
fsspec.open("memory://foo/bar")
|
| 86 |
+
|
| 87 |
+
# Test assertion
|
| 88 |
+
assert instance_caches.gather_counts() == {"file": 1, "memory": 1}
|
| 89 |
+
|
| 90 |
+
Returns
|
| 91 |
+
-------
|
| 92 |
+
instance_caches: An instance cache inspector for clearing and inspecting caches.
|
| 93 |
+
"""
|
| 94 |
+
ic = InstanceCacheInspector()
|
| 95 |
+
|
| 96 |
+
ic.clear()
|
| 97 |
+
try:
|
| 98 |
+
yield ic
|
| 99 |
+
finally:
|
| 100 |
+
ic.clear()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
@pytest.fixture(scope="function")
|
| 104 |
+
def ftp_writable(tmpdir):
|
| 105 |
+
"""
|
| 106 |
+
Fixture providing a writable FTP filesystem.
|
| 107 |
+
"""
|
| 108 |
+
pytest.importorskip("pyftpdlib")
|
| 109 |
+
|
| 110 |
+
d = str(tmpdir)
|
| 111 |
+
with open(os.path.join(d, "out"), "wb") as f:
|
| 112 |
+
f.write(b"hello" * 10000)
|
| 113 |
+
P = subprocess.Popen(
|
| 114 |
+
[sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
|
| 115 |
+
)
|
| 116 |
+
try:
|
| 117 |
+
time.sleep(1)
|
| 118 |
+
yield "localhost", 2121, "user", "pass"
|
| 119 |
+
finally:
|
| 120 |
+
P.terminate()
|
| 121 |
+
P.wait()
|
| 122 |
+
try:
|
| 123 |
+
shutil.rmtree(tmpdir)
|
| 124 |
+
except Exception:
|
| 125 |
+
pass
|
venv/lib/python3.13/site-packages/fsspec/core.py
ADDED
|
@@ -0,0 +1,743 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
from glob import has_magic
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# for backwards compat, we export cache things from here too
|
| 11 |
+
from fsspec.caching import ( # noqa: F401
|
| 12 |
+
BaseCache,
|
| 13 |
+
BlockCache,
|
| 14 |
+
BytesCache,
|
| 15 |
+
MMapCache,
|
| 16 |
+
ReadAheadCache,
|
| 17 |
+
caches,
|
| 18 |
+
)
|
| 19 |
+
from fsspec.compression import compr
|
| 20 |
+
from fsspec.config import conf
|
| 21 |
+
from fsspec.registry import filesystem, get_filesystem_class
|
| 22 |
+
from fsspec.utils import (
|
| 23 |
+
_unstrip_protocol,
|
| 24 |
+
build_name_function,
|
| 25 |
+
infer_compression,
|
| 26 |
+
stringify_path,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger("fsspec")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class OpenFile:
|
| 33 |
+
"""
|
| 34 |
+
File-like object to be used in a context
|
| 35 |
+
|
| 36 |
+
Can layer (buffered) text-mode and compression over any file-system, which
|
| 37 |
+
are typically binary-only.
|
| 38 |
+
|
| 39 |
+
These instances are safe to serialize, as the low-level file object
|
| 40 |
+
is not created until invoked using ``with``.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
fs: FileSystem
|
| 45 |
+
The file system to use for opening the file. Should be a subclass or duck-type
|
| 46 |
+
with ``fsspec.spec.AbstractFileSystem``
|
| 47 |
+
path: str
|
| 48 |
+
Location to open
|
| 49 |
+
mode: str like 'rb', optional
|
| 50 |
+
Mode of the opened file
|
| 51 |
+
compression: str or None, optional
|
| 52 |
+
Compression to apply
|
| 53 |
+
encoding: str or None, optional
|
| 54 |
+
The encoding to use if opened in text mode.
|
| 55 |
+
errors: str or None, optional
|
| 56 |
+
How to handle encoding errors if opened in text mode.
|
| 57 |
+
newline: None or str
|
| 58 |
+
Passed to TextIOWrapper in text mode, how to handle line endings.
|
| 59 |
+
autoopen: bool
|
| 60 |
+
If True, calls open() immediately. Mostly used by pickle
|
| 61 |
+
pos: int
|
| 62 |
+
If given and autoopen is True, seek to this location immediately
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
def __init__(
|
| 66 |
+
self,
|
| 67 |
+
fs,
|
| 68 |
+
path,
|
| 69 |
+
mode="rb",
|
| 70 |
+
compression=None,
|
| 71 |
+
encoding=None,
|
| 72 |
+
errors=None,
|
| 73 |
+
newline=None,
|
| 74 |
+
):
|
| 75 |
+
self.fs = fs
|
| 76 |
+
self.path = path
|
| 77 |
+
self.mode = mode
|
| 78 |
+
self.compression = get_compression(path, compression)
|
| 79 |
+
self.encoding = encoding
|
| 80 |
+
self.errors = errors
|
| 81 |
+
self.newline = newline
|
| 82 |
+
self.fobjects = []
|
| 83 |
+
|
| 84 |
+
def __reduce__(self):
|
| 85 |
+
return (
|
| 86 |
+
OpenFile,
|
| 87 |
+
(
|
| 88 |
+
self.fs,
|
| 89 |
+
self.path,
|
| 90 |
+
self.mode,
|
| 91 |
+
self.compression,
|
| 92 |
+
self.encoding,
|
| 93 |
+
self.errors,
|
| 94 |
+
self.newline,
|
| 95 |
+
),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def __repr__(self):
|
| 99 |
+
return f"<OpenFile '{self.path}'>"
|
| 100 |
+
|
| 101 |
+
def __enter__(self):
|
| 102 |
+
mode = self.mode.replace("t", "").replace("b", "") + "b"
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
f = self.fs.open(self.path, mode=mode)
|
| 106 |
+
except FileNotFoundError as e:
|
| 107 |
+
if has_magic(self.path):
|
| 108 |
+
raise FileNotFoundError(
|
| 109 |
+
"%s not found. The URL contains glob characters: you maybe needed\n"
|
| 110 |
+
"to pass expand=True in fsspec.open() or the storage_options of \n"
|
| 111 |
+
"your library. You can also set the config value 'open_expand'\n"
|
| 112 |
+
"before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
|
| 113 |
+
self.path,
|
| 114 |
+
) from e
|
| 115 |
+
raise
|
| 116 |
+
|
| 117 |
+
self.fobjects = [f]
|
| 118 |
+
|
| 119 |
+
if self.compression is not None:
|
| 120 |
+
compress = compr[self.compression]
|
| 121 |
+
f = compress(f, mode=mode[0])
|
| 122 |
+
self.fobjects.append(f)
|
| 123 |
+
|
| 124 |
+
if "b" not in self.mode:
|
| 125 |
+
# assume, for example, that 'r' is equivalent to 'rt' as in builtin
|
| 126 |
+
f = PickleableTextIOWrapper(
|
| 127 |
+
f, encoding=self.encoding, errors=self.errors, newline=self.newline
|
| 128 |
+
)
|
| 129 |
+
self.fobjects.append(f)
|
| 130 |
+
|
| 131 |
+
return self.fobjects[-1]
|
| 132 |
+
|
| 133 |
+
def __exit__(self, *args):
|
| 134 |
+
self.close()
|
| 135 |
+
|
| 136 |
+
@property
|
| 137 |
+
def full_name(self):
|
| 138 |
+
return _unstrip_protocol(self.path, self.fs)
|
| 139 |
+
|
| 140 |
+
def open(self):
|
| 141 |
+
"""Materialise this as a real open file without context
|
| 142 |
+
|
| 143 |
+
The OpenFile object should be explicitly closed to avoid enclosed file
|
| 144 |
+
instances persisting. You must, therefore, keep a reference to the OpenFile
|
| 145 |
+
during the life of the file-like it generates.
|
| 146 |
+
"""
|
| 147 |
+
return self.__enter__()
|
| 148 |
+
|
| 149 |
+
def close(self):
|
| 150 |
+
"""Close all encapsulated file objects"""
|
| 151 |
+
for f in reversed(self.fobjects):
|
| 152 |
+
if "r" not in self.mode and not f.closed:
|
| 153 |
+
f.flush()
|
| 154 |
+
f.close()
|
| 155 |
+
self.fobjects.clear()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class OpenFiles(list):
|
| 159 |
+
"""List of OpenFile instances
|
| 160 |
+
|
| 161 |
+
Can be used in a single context, which opens and closes all of the
|
| 162 |
+
contained files. Normal list access to get the elements works as
|
| 163 |
+
normal.
|
| 164 |
+
|
| 165 |
+
A special case is made for caching filesystems - the files will
|
| 166 |
+
be down/uploaded together at the start or end of the context, and
|
| 167 |
+
this may happen concurrently, if the target filesystem supports it.
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
def __init__(self, *args, mode="rb", fs=None):
|
| 171 |
+
self.mode = mode
|
| 172 |
+
self.fs = fs
|
| 173 |
+
self.files = []
|
| 174 |
+
super().__init__(*args)
|
| 175 |
+
|
| 176 |
+
def __enter__(self):
|
| 177 |
+
if self.fs is None:
|
| 178 |
+
raise ValueError("Context has already been used")
|
| 179 |
+
|
| 180 |
+
fs = self.fs
|
| 181 |
+
while True:
|
| 182 |
+
if hasattr(fs, "open_many"):
|
| 183 |
+
# check for concurrent cache download; or set up for upload
|
| 184 |
+
self.files = fs.open_many(self)
|
| 185 |
+
return self.files
|
| 186 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
| 187 |
+
fs = fs.fs
|
| 188 |
+
else:
|
| 189 |
+
break
|
| 190 |
+
return [s.__enter__() for s in self]
|
| 191 |
+
|
| 192 |
+
def __exit__(self, *args):
|
| 193 |
+
fs = self.fs
|
| 194 |
+
[s.__exit__(*args) for s in self]
|
| 195 |
+
if "r" not in self.mode:
|
| 196 |
+
while True:
|
| 197 |
+
if hasattr(fs, "open_many"):
|
| 198 |
+
# check for concurrent cache upload
|
| 199 |
+
fs.commit_many(self.files)
|
| 200 |
+
return
|
| 201 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
| 202 |
+
fs = fs.fs
|
| 203 |
+
else:
|
| 204 |
+
break
|
| 205 |
+
|
| 206 |
+
def __getitem__(self, item):
|
| 207 |
+
out = super().__getitem__(item)
|
| 208 |
+
if isinstance(item, slice):
|
| 209 |
+
return OpenFiles(out, mode=self.mode, fs=self.fs)
|
| 210 |
+
return out
|
| 211 |
+
|
| 212 |
+
def __repr__(self):
|
| 213 |
+
return f"<List of {len(self)} OpenFile instances>"
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def open_files(
|
| 217 |
+
urlpath,
|
| 218 |
+
mode="rb",
|
| 219 |
+
compression=None,
|
| 220 |
+
encoding="utf8",
|
| 221 |
+
errors=None,
|
| 222 |
+
name_function=None,
|
| 223 |
+
num=1,
|
| 224 |
+
protocol=None,
|
| 225 |
+
newline=None,
|
| 226 |
+
auto_mkdir=True,
|
| 227 |
+
expand=True,
|
| 228 |
+
**kwargs,
|
| 229 |
+
):
|
| 230 |
+
"""Given a path or paths, return a list of ``OpenFile`` objects.
|
| 231 |
+
|
| 232 |
+
For writing, a str path must contain the "*" character, which will be filled
|
| 233 |
+
in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
|
| 234 |
+
|
| 235 |
+
For either reading or writing, can instead provide explicit list of paths.
|
| 236 |
+
|
| 237 |
+
Parameters
|
| 238 |
+
----------
|
| 239 |
+
urlpath: string or list
|
| 240 |
+
Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
|
| 241 |
+
to read from alternative filesystems. To read from multiple files you
|
| 242 |
+
can pass a globstring or a list of paths, with the caveat that they
|
| 243 |
+
must all have the same protocol.
|
| 244 |
+
mode: 'rb', 'wt', etc.
|
| 245 |
+
compression: string or None
|
| 246 |
+
If given, open file using compression codec. Can either be a compression
|
| 247 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 248 |
+
compression from the filename suffix.
|
| 249 |
+
encoding: str
|
| 250 |
+
For text mode only
|
| 251 |
+
errors: None or str
|
| 252 |
+
Passed to TextIOWrapper in text mode
|
| 253 |
+
name_function: function or None
|
| 254 |
+
if opening a set of files for writing, those files do not yet exist,
|
| 255 |
+
so we need to generate their names by formatting the urlpath for
|
| 256 |
+
each sequence number
|
| 257 |
+
num: int [1]
|
| 258 |
+
if writing mode, number of files we expect to create (passed to
|
| 259 |
+
name+function)
|
| 260 |
+
protocol: str or None
|
| 261 |
+
If given, overrides the protocol found in the URL.
|
| 262 |
+
newline: bytes or None
|
| 263 |
+
Used for line terminator in text mode. If None, uses system default;
|
| 264 |
+
if blank, uses no translation.
|
| 265 |
+
auto_mkdir: bool (True)
|
| 266 |
+
If in write mode, this will ensure the target directory exists before
|
| 267 |
+
writing, by calling ``fs.mkdirs(exist_ok=True)``.
|
| 268 |
+
expand: bool
|
| 269 |
+
**kwargs: dict
|
| 270 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 271 |
+
host, port, username, password, etc.
|
| 272 |
+
|
| 273 |
+
Examples
|
| 274 |
+
--------
|
| 275 |
+
>>> files = open_files('2015-*-*.csv') # doctest: +SKIP
|
| 276 |
+
>>> files = open_files(
|
| 277 |
+
... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
|
| 278 |
+
... ) # doctest: +SKIP
|
| 279 |
+
|
| 280 |
+
Returns
|
| 281 |
+
-------
|
| 282 |
+
An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
|
| 283 |
+
be used as a single context
|
| 284 |
+
|
| 285 |
+
Notes
|
| 286 |
+
-----
|
| 287 |
+
For a full list of the available protocols and the implementations that
|
| 288 |
+
they map across to see the latest online documentation:
|
| 289 |
+
|
| 290 |
+
- For implementations built into ``fsspec`` see
|
| 291 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
| 292 |
+
- For implementations in separate packages see
|
| 293 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
| 294 |
+
"""
|
| 295 |
+
fs, fs_token, paths = get_fs_token_paths(
|
| 296 |
+
urlpath,
|
| 297 |
+
mode,
|
| 298 |
+
num=num,
|
| 299 |
+
name_function=name_function,
|
| 300 |
+
storage_options=kwargs,
|
| 301 |
+
protocol=protocol,
|
| 302 |
+
expand=expand,
|
| 303 |
+
)
|
| 304 |
+
if fs.protocol == "file":
|
| 305 |
+
fs.auto_mkdir = auto_mkdir
|
| 306 |
+
elif "r" not in mode and auto_mkdir:
|
| 307 |
+
parents = {fs._parent(path) for path in paths}
|
| 308 |
+
for parent in parents:
|
| 309 |
+
try:
|
| 310 |
+
fs.makedirs(parent, exist_ok=True)
|
| 311 |
+
except PermissionError:
|
| 312 |
+
pass
|
| 313 |
+
return OpenFiles(
|
| 314 |
+
[
|
| 315 |
+
OpenFile(
|
| 316 |
+
fs,
|
| 317 |
+
path,
|
| 318 |
+
mode=mode,
|
| 319 |
+
compression=compression,
|
| 320 |
+
encoding=encoding,
|
| 321 |
+
errors=errors,
|
| 322 |
+
newline=newline,
|
| 323 |
+
)
|
| 324 |
+
for path in paths
|
| 325 |
+
],
|
| 326 |
+
mode=mode,
|
| 327 |
+
fs=fs,
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def _un_chain(path, kwargs):
|
| 332 |
+
# Avoid a circular import
|
| 333 |
+
from fsspec.implementations.chained import ChainedFileSystem
|
| 334 |
+
|
| 335 |
+
if "::" in path:
|
| 336 |
+
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
|
| 337 |
+
bits = []
|
| 338 |
+
for p in path.split("::"):
|
| 339 |
+
if "://" in p or x.match(p):
|
| 340 |
+
bits.append(p)
|
| 341 |
+
else:
|
| 342 |
+
bits.append(p + "://")
|
| 343 |
+
else:
|
| 344 |
+
bits = [path]
|
| 345 |
+
# [[url, protocol, kwargs], ...]
|
| 346 |
+
out = []
|
| 347 |
+
previous_bit = None
|
| 348 |
+
kwargs = kwargs.copy()
|
| 349 |
+
for bit in reversed(bits):
|
| 350 |
+
protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
|
| 351 |
+
cls = get_filesystem_class(protocol)
|
| 352 |
+
extra_kwargs = cls._get_kwargs_from_urls(bit)
|
| 353 |
+
kws = kwargs.pop(protocol, {})
|
| 354 |
+
if bit is bits[0]:
|
| 355 |
+
kws.update(kwargs)
|
| 356 |
+
kw = dict(
|
| 357 |
+
**{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
|
| 358 |
+
**kws,
|
| 359 |
+
)
|
| 360 |
+
bit = cls._strip_protocol(bit)
|
| 361 |
+
if "target_protocol" not in kw and issubclass(cls, ChainedFileSystem):
|
| 362 |
+
bit = previous_bit
|
| 363 |
+
out.append((bit, protocol, kw))
|
| 364 |
+
previous_bit = bit
|
| 365 |
+
out.reverse()
|
| 366 |
+
return out
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def url_to_fs(url, **kwargs):
|
| 370 |
+
"""
|
| 371 |
+
Turn fully-qualified and potentially chained URL into filesystem instance
|
| 372 |
+
|
| 373 |
+
Parameters
|
| 374 |
+
----------
|
| 375 |
+
url : str
|
| 376 |
+
The fsspec-compatible URL
|
| 377 |
+
**kwargs: dict
|
| 378 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 379 |
+
host, port, username, password, etc.
|
| 380 |
+
|
| 381 |
+
Returns
|
| 382 |
+
-------
|
| 383 |
+
filesystem : FileSystem
|
| 384 |
+
The new filesystem discovered from ``url`` and created with
|
| 385 |
+
``**kwargs``.
|
| 386 |
+
urlpath : str
|
| 387 |
+
The file-systems-specific URL for ``url``.
|
| 388 |
+
"""
|
| 389 |
+
url = stringify_path(url)
|
| 390 |
+
# non-FS arguments that appear in fsspec.open()
|
| 391 |
+
# inspect could keep this in sync with open()'s signature
|
| 392 |
+
known_kwargs = {
|
| 393 |
+
"compression",
|
| 394 |
+
"encoding",
|
| 395 |
+
"errors",
|
| 396 |
+
"expand",
|
| 397 |
+
"mode",
|
| 398 |
+
"name_function",
|
| 399 |
+
"newline",
|
| 400 |
+
"num",
|
| 401 |
+
}
|
| 402 |
+
kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
|
| 403 |
+
chain = _un_chain(url, kwargs)
|
| 404 |
+
inkwargs = {}
|
| 405 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
| 406 |
+
for i, ch in enumerate(reversed(chain)):
|
| 407 |
+
urls, protocol, kw = ch
|
| 408 |
+
if i == len(chain) - 1:
|
| 409 |
+
inkwargs = dict(**kw, **inkwargs)
|
| 410 |
+
continue
|
| 411 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
| 412 |
+
inkwargs["target_protocol"] = protocol
|
| 413 |
+
inkwargs["fo"] = urls
|
| 414 |
+
urlpath, protocol, _ = chain[0]
|
| 415 |
+
fs = filesystem(protocol, **inkwargs)
|
| 416 |
+
return fs, urlpath
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
DEFAULT_EXPAND = conf.get("open_expand", False)
|
| 420 |
+
|
| 421 |
+
|
| 422 |
+
def open(
|
| 423 |
+
urlpath,
|
| 424 |
+
mode="rb",
|
| 425 |
+
compression=None,
|
| 426 |
+
encoding="utf8",
|
| 427 |
+
errors=None,
|
| 428 |
+
protocol=None,
|
| 429 |
+
newline=None,
|
| 430 |
+
expand=None,
|
| 431 |
+
**kwargs,
|
| 432 |
+
):
|
| 433 |
+
"""Given a path or paths, return one ``OpenFile`` object.
|
| 434 |
+
|
| 435 |
+
Parameters
|
| 436 |
+
----------
|
| 437 |
+
urlpath: string or list
|
| 438 |
+
Absolute or relative filepath. Prefix with a protocol like ``s3://``
|
| 439 |
+
to read from alternative filesystems. Should not include glob
|
| 440 |
+
character(s).
|
| 441 |
+
mode: 'rb', 'wt', etc.
|
| 442 |
+
compression: string or None
|
| 443 |
+
If given, open file using compression codec. Can either be a compression
|
| 444 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 445 |
+
compression from the filename suffix.
|
| 446 |
+
encoding: str
|
| 447 |
+
For text mode only
|
| 448 |
+
errors: None or str
|
| 449 |
+
Passed to TextIOWrapper in text mode
|
| 450 |
+
protocol: str or None
|
| 451 |
+
If given, overrides the protocol found in the URL.
|
| 452 |
+
newline: bytes or None
|
| 453 |
+
Used for line terminator in text mode. If None, uses system default;
|
| 454 |
+
if blank, uses no translation.
|
| 455 |
+
expand: bool or None
|
| 456 |
+
Whether to regard file paths containing special glob characters as needing
|
| 457 |
+
expansion (finding the first match) or absolute. Setting False allows using
|
| 458 |
+
paths which do embed such characters. If None (default), this argument
|
| 459 |
+
takes its value from the DEFAULT_EXPAND module variable, which takes
|
| 460 |
+
its initial value from the "open_expand" config value at startup, which will
|
| 461 |
+
be False if not set.
|
| 462 |
+
**kwargs: dict
|
| 463 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 464 |
+
host, port, username, password, etc.
|
| 465 |
+
|
| 466 |
+
Examples
|
| 467 |
+
--------
|
| 468 |
+
>>> openfile = open('2015-01-01.csv') # doctest: +SKIP
|
| 469 |
+
>>> openfile = open(
|
| 470 |
+
... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
|
| 471 |
+
... ) # doctest: +SKIP
|
| 472 |
+
>>> with openfile as f:
|
| 473 |
+
... df = pd.read_csv(f) # doctest: +SKIP
|
| 474 |
+
...
|
| 475 |
+
|
| 476 |
+
Returns
|
| 477 |
+
-------
|
| 478 |
+
``OpenFile`` object.
|
| 479 |
+
|
| 480 |
+
Notes
|
| 481 |
+
-----
|
| 482 |
+
For a full list of the available protocols and the implementations that
|
| 483 |
+
they map across to see the latest online documentation:
|
| 484 |
+
|
| 485 |
+
- For implementations built into ``fsspec`` see
|
| 486 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
| 487 |
+
- For implementations in separate packages see
|
| 488 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
| 489 |
+
"""
|
| 490 |
+
expand = DEFAULT_EXPAND if expand is None else expand
|
| 491 |
+
out = open_files(
|
| 492 |
+
urlpath=[urlpath],
|
| 493 |
+
mode=mode,
|
| 494 |
+
compression=compression,
|
| 495 |
+
encoding=encoding,
|
| 496 |
+
errors=errors,
|
| 497 |
+
protocol=protocol,
|
| 498 |
+
newline=newline,
|
| 499 |
+
expand=expand,
|
| 500 |
+
**kwargs,
|
| 501 |
+
)
|
| 502 |
+
if not out:
|
| 503 |
+
raise FileNotFoundError(urlpath)
|
| 504 |
+
return out[0]
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
def open_local(
|
| 508 |
+
url: str | list[str] | Path | list[Path],
|
| 509 |
+
mode: str = "rb",
|
| 510 |
+
**storage_options: dict,
|
| 511 |
+
) -> str | list[str]:
|
| 512 |
+
"""Open file(s) which can be resolved to local
|
| 513 |
+
|
| 514 |
+
For files which either are local, or get downloaded upon open
|
| 515 |
+
(e.g., by file caching)
|
| 516 |
+
|
| 517 |
+
Parameters
|
| 518 |
+
----------
|
| 519 |
+
url: str or list(str)
|
| 520 |
+
mode: str
|
| 521 |
+
Must be read mode
|
| 522 |
+
storage_options:
|
| 523 |
+
passed on to FS for or used by open_files (e.g., compression)
|
| 524 |
+
"""
|
| 525 |
+
if "r" not in mode:
|
| 526 |
+
raise ValueError("Can only ensure local files when reading")
|
| 527 |
+
of = open_files(url, mode=mode, **storage_options)
|
| 528 |
+
if not getattr(of[0].fs, "local_file", False):
|
| 529 |
+
raise ValueError(
|
| 530 |
+
"open_local can only be used on a filesystem which"
|
| 531 |
+
" has attribute local_file=True"
|
| 532 |
+
)
|
| 533 |
+
with of as files:
|
| 534 |
+
paths = [f.name for f in files]
|
| 535 |
+
if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
|
| 536 |
+
return paths[0]
|
| 537 |
+
return paths
|
| 538 |
+
|
| 539 |
+
|
| 540 |
+
def get_compression(urlpath, compression):
|
| 541 |
+
if compression == "infer":
|
| 542 |
+
compression = infer_compression(urlpath)
|
| 543 |
+
if compression is not None and compression not in compr:
|
| 544 |
+
raise ValueError(f"Compression type {compression} not supported")
|
| 545 |
+
return compression
|
| 546 |
+
|
| 547 |
+
|
| 548 |
+
def split_protocol(urlpath):
|
| 549 |
+
"""Return protocol, path pair"""
|
| 550 |
+
urlpath = stringify_path(urlpath)
|
| 551 |
+
if "://" in urlpath:
|
| 552 |
+
protocol, path = urlpath.split("://", 1)
|
| 553 |
+
if len(protocol) > 1:
|
| 554 |
+
# excludes Windows paths
|
| 555 |
+
return protocol, path
|
| 556 |
+
if urlpath.startswith("data:"):
|
| 557 |
+
return urlpath.split(":", 1)
|
| 558 |
+
return None, urlpath
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
def strip_protocol(urlpath):
|
| 562 |
+
"""Return only path part of full URL, according to appropriate backend"""
|
| 563 |
+
protocol, _ = split_protocol(urlpath)
|
| 564 |
+
cls = get_filesystem_class(protocol)
|
| 565 |
+
return cls._strip_protocol(urlpath)
|
| 566 |
+
|
| 567 |
+
|
| 568 |
+
def expand_paths_if_needed(paths, mode, num, fs, name_function):
|
| 569 |
+
"""Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
|
| 570 |
+
in them (read mode).
|
| 571 |
+
|
| 572 |
+
:param paths: list of paths
|
| 573 |
+
mode: str
|
| 574 |
+
Mode in which to open files.
|
| 575 |
+
num: int
|
| 576 |
+
If opening in writing mode, number of files we expect to create.
|
| 577 |
+
fs: filesystem object
|
| 578 |
+
name_function: callable
|
| 579 |
+
If opening in writing mode, this callable is used to generate path
|
| 580 |
+
names. Names are generated for each partition by
|
| 581 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
| 582 |
+
:return: list of paths
|
| 583 |
+
"""
|
| 584 |
+
expanded_paths = []
|
| 585 |
+
paths = list(paths)
|
| 586 |
+
|
| 587 |
+
if "w" in mode: # read mode
|
| 588 |
+
if sum(1 for p in paths if "*" in p) > 1:
|
| 589 |
+
raise ValueError(
|
| 590 |
+
"When writing data, only one filename mask can be specified."
|
| 591 |
+
)
|
| 592 |
+
num = max(num, len(paths))
|
| 593 |
+
|
| 594 |
+
for curr_path in paths:
|
| 595 |
+
if "*" in curr_path:
|
| 596 |
+
# expand using name_function
|
| 597 |
+
expanded_paths.extend(_expand_paths(curr_path, name_function, num))
|
| 598 |
+
else:
|
| 599 |
+
expanded_paths.append(curr_path)
|
| 600 |
+
# if we generated more paths that asked for, trim the list
|
| 601 |
+
if len(expanded_paths) > num:
|
| 602 |
+
expanded_paths = expanded_paths[:num]
|
| 603 |
+
|
| 604 |
+
else: # read mode
|
| 605 |
+
for curr_path in paths:
|
| 606 |
+
if has_magic(curr_path):
|
| 607 |
+
# expand using glob
|
| 608 |
+
expanded_paths.extend(fs.glob(curr_path))
|
| 609 |
+
else:
|
| 610 |
+
expanded_paths.append(curr_path)
|
| 611 |
+
|
| 612 |
+
return expanded_paths
|
| 613 |
+
|
| 614 |
+
|
| 615 |
+
def get_fs_token_paths(
|
| 616 |
+
urlpath,
|
| 617 |
+
mode="rb",
|
| 618 |
+
num=1,
|
| 619 |
+
name_function=None,
|
| 620 |
+
storage_options=None,
|
| 621 |
+
protocol=None,
|
| 622 |
+
expand=True,
|
| 623 |
+
):
|
| 624 |
+
"""Filesystem, deterministic token, and paths from a urlpath and options.
|
| 625 |
+
|
| 626 |
+
Parameters
|
| 627 |
+
----------
|
| 628 |
+
urlpath: string or iterable
|
| 629 |
+
Absolute or relative filepath, URL (may include protocols like
|
| 630 |
+
``s3://``), or globstring pointing to data.
|
| 631 |
+
mode: str, optional
|
| 632 |
+
Mode in which to open files.
|
| 633 |
+
num: int, optional
|
| 634 |
+
If opening in writing mode, number of files we expect to create.
|
| 635 |
+
name_function: callable, optional
|
| 636 |
+
If opening in writing mode, this callable is used to generate path
|
| 637 |
+
names. Names are generated for each partition by
|
| 638 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
| 639 |
+
storage_options: dict, optional
|
| 640 |
+
Additional keywords to pass to the filesystem class.
|
| 641 |
+
protocol: str or None
|
| 642 |
+
To override the protocol specifier in the URL
|
| 643 |
+
expand: bool
|
| 644 |
+
Expand string paths for writing, assuming the path is a directory
|
| 645 |
+
"""
|
| 646 |
+
if isinstance(urlpath, (list, tuple, set)):
|
| 647 |
+
if not urlpath:
|
| 648 |
+
raise ValueError("empty urlpath sequence")
|
| 649 |
+
urlpath0 = stringify_path(next(iter(urlpath)))
|
| 650 |
+
else:
|
| 651 |
+
urlpath0 = stringify_path(urlpath)
|
| 652 |
+
storage_options = storage_options or {}
|
| 653 |
+
if protocol:
|
| 654 |
+
storage_options["protocol"] = protocol
|
| 655 |
+
chain = _un_chain(urlpath0, storage_options or {})
|
| 656 |
+
inkwargs = {}
|
| 657 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
| 658 |
+
for i, ch in enumerate(reversed(chain)):
|
| 659 |
+
urls, nested_protocol, kw = ch
|
| 660 |
+
if i == len(chain) - 1:
|
| 661 |
+
inkwargs = dict(**kw, **inkwargs)
|
| 662 |
+
continue
|
| 663 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
| 664 |
+
inkwargs["target_protocol"] = nested_protocol
|
| 665 |
+
inkwargs["fo"] = urls
|
| 666 |
+
paths, protocol, _ = chain[0]
|
| 667 |
+
fs = filesystem(protocol, **inkwargs)
|
| 668 |
+
if isinstance(urlpath, (list, tuple, set)):
|
| 669 |
+
pchains = [
|
| 670 |
+
_un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
|
| 671 |
+
]
|
| 672 |
+
if len({pc[1] for pc in pchains}) > 1:
|
| 673 |
+
raise ValueError("Protocol mismatch getting fs from %s", urlpath)
|
| 674 |
+
paths = [pc[0] for pc in pchains]
|
| 675 |
+
else:
|
| 676 |
+
paths = fs._strip_protocol(paths)
|
| 677 |
+
if isinstance(paths, (list, tuple, set)):
|
| 678 |
+
if expand:
|
| 679 |
+
paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
|
| 680 |
+
elif not isinstance(paths, list):
|
| 681 |
+
paths = list(paths)
|
| 682 |
+
else:
|
| 683 |
+
if ("w" in mode or "x" in mode) and expand:
|
| 684 |
+
paths = _expand_paths(paths, name_function, num)
|
| 685 |
+
elif "*" in paths:
|
| 686 |
+
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
|
| 687 |
+
else:
|
| 688 |
+
paths = [paths]
|
| 689 |
+
|
| 690 |
+
return fs, fs._fs_token, paths
|
| 691 |
+
|
| 692 |
+
|
| 693 |
+
def _expand_paths(path, name_function, num):
|
| 694 |
+
if isinstance(path, str):
|
| 695 |
+
if path.count("*") > 1:
|
| 696 |
+
raise ValueError("Output path spec must contain exactly one '*'.")
|
| 697 |
+
elif "*" not in path:
|
| 698 |
+
path = os.path.join(path, "*.part")
|
| 699 |
+
|
| 700 |
+
if name_function is None:
|
| 701 |
+
name_function = build_name_function(num - 1)
|
| 702 |
+
|
| 703 |
+
paths = [path.replace("*", name_function(i)) for i in range(num)]
|
| 704 |
+
if paths != sorted(paths):
|
| 705 |
+
logger.warning(
|
| 706 |
+
"In order to preserve order between partitions"
|
| 707 |
+
" paths created with ``name_function`` should "
|
| 708 |
+
"sort to partition order"
|
| 709 |
+
)
|
| 710 |
+
elif isinstance(path, (tuple, list)):
|
| 711 |
+
assert len(path) == num
|
| 712 |
+
paths = list(path)
|
| 713 |
+
else:
|
| 714 |
+
raise ValueError(
|
| 715 |
+
"Path should be either\n"
|
| 716 |
+
"1. A list of paths: ['foo.json', 'bar.json', ...]\n"
|
| 717 |
+
"2. A directory: 'foo/\n"
|
| 718 |
+
"3. A path with a '*' in it: 'foo.*.json'"
|
| 719 |
+
)
|
| 720 |
+
return paths
|
| 721 |
+
|
| 722 |
+
|
| 723 |
+
class PickleableTextIOWrapper(io.TextIOWrapper):
|
| 724 |
+
"""TextIOWrapper cannot be pickled. This solves it.
|
| 725 |
+
|
| 726 |
+
Requires that ``buffer`` be pickleable, which all instances of
|
| 727 |
+
AbstractBufferedFile are.
|
| 728 |
+
"""
|
| 729 |
+
|
| 730 |
+
def __init__(
|
| 731 |
+
self,
|
| 732 |
+
buffer,
|
| 733 |
+
encoding=None,
|
| 734 |
+
errors=None,
|
| 735 |
+
newline=None,
|
| 736 |
+
line_buffering=False,
|
| 737 |
+
write_through=False,
|
| 738 |
+
):
|
| 739 |
+
self.args = buffer, encoding, errors, newline, line_buffering, write_through
|
| 740 |
+
super().__init__(*self.args)
|
| 741 |
+
|
| 742 |
+
def __reduce__(self):
|
| 743 |
+
return PickleableTextIOWrapper, self.args
|
venv/lib/python3.13/site-packages/fsspec/dircache.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
from collections.abc import MutableMapping
|
| 3 |
+
from functools import lru_cache
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class DirCache(MutableMapping):
|
| 7 |
+
"""
|
| 8 |
+
Caching of directory listings, in a structure like::
|
| 9 |
+
|
| 10 |
+
{"path0": [
|
| 11 |
+
{"name": "path0/file0",
|
| 12 |
+
"size": 123,
|
| 13 |
+
"type": "file",
|
| 14 |
+
...
|
| 15 |
+
},
|
| 16 |
+
{"name": "path0/file1",
|
| 17 |
+
},
|
| 18 |
+
...
|
| 19 |
+
],
|
| 20 |
+
"path1": [...]
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
Parameters to this class control listing expiry or indeed turn
|
| 24 |
+
caching off
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(
|
| 28 |
+
self,
|
| 29 |
+
use_listings_cache=True,
|
| 30 |
+
listings_expiry_time=None,
|
| 31 |
+
max_paths=None,
|
| 32 |
+
**kwargs,
|
| 33 |
+
):
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
Parameters
|
| 37 |
+
----------
|
| 38 |
+
use_listings_cache: bool
|
| 39 |
+
If False, this cache never returns items, but always reports KeyError,
|
| 40 |
+
and setting items has no effect
|
| 41 |
+
listings_expiry_time: int or float (optional)
|
| 42 |
+
Time in seconds that a listing is considered valid. If None,
|
| 43 |
+
listings do not expire.
|
| 44 |
+
max_paths: int (optional)
|
| 45 |
+
The number of most recent listings that are considered valid; 'recent'
|
| 46 |
+
refers to when the entry was set.
|
| 47 |
+
"""
|
| 48 |
+
self._cache = {}
|
| 49 |
+
self._times = {}
|
| 50 |
+
if max_paths:
|
| 51 |
+
self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
|
| 52 |
+
self.use_listings_cache = use_listings_cache
|
| 53 |
+
self.listings_expiry_time = listings_expiry_time
|
| 54 |
+
self.max_paths = max_paths
|
| 55 |
+
|
| 56 |
+
def __getitem__(self, item):
|
| 57 |
+
if self.listings_expiry_time is not None:
|
| 58 |
+
if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
|
| 59 |
+
del self._cache[item]
|
| 60 |
+
if self.max_paths:
|
| 61 |
+
self._q(item)
|
| 62 |
+
return self._cache[item] # maybe raises KeyError
|
| 63 |
+
|
| 64 |
+
def clear(self):
|
| 65 |
+
self._cache.clear()
|
| 66 |
+
|
| 67 |
+
def __len__(self):
|
| 68 |
+
return len(self._cache)
|
| 69 |
+
|
| 70 |
+
def __contains__(self, item):
|
| 71 |
+
try:
|
| 72 |
+
self[item]
|
| 73 |
+
return True
|
| 74 |
+
except KeyError:
|
| 75 |
+
return False
|
| 76 |
+
|
| 77 |
+
def __setitem__(self, key, value):
|
| 78 |
+
if not self.use_listings_cache:
|
| 79 |
+
return
|
| 80 |
+
if self.max_paths:
|
| 81 |
+
self._q(key)
|
| 82 |
+
self._cache[key] = value
|
| 83 |
+
if self.listings_expiry_time is not None:
|
| 84 |
+
self._times[key] = time.time()
|
| 85 |
+
|
| 86 |
+
def __delitem__(self, key):
|
| 87 |
+
del self._cache[key]
|
| 88 |
+
|
| 89 |
+
def __iter__(self):
|
| 90 |
+
entries = list(self._cache)
|
| 91 |
+
|
| 92 |
+
return (k for k in entries if k in self)
|
| 93 |
+
|
| 94 |
+
def __reduce__(self):
|
| 95 |
+
return (
|
| 96 |
+
DirCache,
|
| 97 |
+
(self.use_listings_cache, self.listings_expiry_time, self.max_paths),
|
| 98 |
+
)
|
venv/lib/python3.13/site-packages/fsspec/fuse.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
import stat
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
from errno import EIO, ENOENT
|
| 8 |
+
|
| 9 |
+
from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
|
| 10 |
+
|
| 11 |
+
from fsspec import __version__
|
| 12 |
+
from fsspec.core import url_to_fs
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger("fsspec.fuse")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class FUSEr(Operations):
|
| 18 |
+
def __init__(self, fs, path, ready_file=False):
|
| 19 |
+
self.fs = fs
|
| 20 |
+
self.cache = {}
|
| 21 |
+
self.root = path.rstrip("/") + "/"
|
| 22 |
+
self.counter = 0
|
| 23 |
+
logger.info("Starting FUSE at %s", path)
|
| 24 |
+
self._ready_file = ready_file
|
| 25 |
+
|
| 26 |
+
def getattr(self, path, fh=None):
|
| 27 |
+
logger.debug("getattr %s", path)
|
| 28 |
+
if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
|
| 29 |
+
return {"type": "file", "st_size": 5}
|
| 30 |
+
|
| 31 |
+
path = "".join([self.root, path.lstrip("/")]).rstrip("/")
|
| 32 |
+
try:
|
| 33 |
+
info = self.fs.info(path)
|
| 34 |
+
except FileNotFoundError as exc:
|
| 35 |
+
raise FuseOSError(ENOENT) from exc
|
| 36 |
+
|
| 37 |
+
data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
|
| 38 |
+
perm = info.get("mode", 0o777)
|
| 39 |
+
|
| 40 |
+
if info["type"] != "file":
|
| 41 |
+
data["st_mode"] = stat.S_IFDIR | perm
|
| 42 |
+
data["st_size"] = 0
|
| 43 |
+
data["st_blksize"] = 0
|
| 44 |
+
else:
|
| 45 |
+
data["st_mode"] = stat.S_IFREG | perm
|
| 46 |
+
data["st_size"] = info["size"]
|
| 47 |
+
data["st_blksize"] = 5 * 2**20
|
| 48 |
+
data["st_nlink"] = 1
|
| 49 |
+
data["st_atime"] = info["atime"] if "atime" in info else time.time()
|
| 50 |
+
data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
|
| 51 |
+
data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
|
| 52 |
+
return data
|
| 53 |
+
|
| 54 |
+
def readdir(self, path, fh):
|
| 55 |
+
logger.debug("readdir %s", path)
|
| 56 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 57 |
+
files = self.fs.ls(path, False)
|
| 58 |
+
files = [os.path.basename(f.rstrip("/")) for f in files]
|
| 59 |
+
return [".", ".."] + files
|
| 60 |
+
|
| 61 |
+
def mkdir(self, path, mode):
|
| 62 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 63 |
+
self.fs.mkdir(path)
|
| 64 |
+
return 0
|
| 65 |
+
|
| 66 |
+
def rmdir(self, path):
|
| 67 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 68 |
+
self.fs.rmdir(path)
|
| 69 |
+
return 0
|
| 70 |
+
|
| 71 |
+
def read(self, path, size, offset, fh):
|
| 72 |
+
logger.debug("read %s", (path, size, offset))
|
| 73 |
+
if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
|
| 74 |
+
# status indicator
|
| 75 |
+
return b"ready"
|
| 76 |
+
|
| 77 |
+
f = self.cache[fh]
|
| 78 |
+
f.seek(offset)
|
| 79 |
+
out = f.read(size)
|
| 80 |
+
return out
|
| 81 |
+
|
| 82 |
+
def write(self, path, data, offset, fh):
|
| 83 |
+
logger.debug("write %s", (path, offset))
|
| 84 |
+
f = self.cache[fh]
|
| 85 |
+
f.seek(offset)
|
| 86 |
+
f.write(data)
|
| 87 |
+
return len(data)
|
| 88 |
+
|
| 89 |
+
def create(self, path, flags, fi=None):
|
| 90 |
+
logger.debug("create %s", (path, flags))
|
| 91 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 92 |
+
self.fs.touch(fn) # OS will want to get attributes immediately
|
| 93 |
+
f = self.fs.open(fn, "wb")
|
| 94 |
+
self.cache[self.counter] = f
|
| 95 |
+
self.counter += 1
|
| 96 |
+
return self.counter - 1
|
| 97 |
+
|
| 98 |
+
def open(self, path, flags):
|
| 99 |
+
logger.debug("open %s", (path, flags))
|
| 100 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 101 |
+
if flags % 2 == 0:
|
| 102 |
+
# read
|
| 103 |
+
mode = "rb"
|
| 104 |
+
else:
|
| 105 |
+
# write/create
|
| 106 |
+
mode = "wb"
|
| 107 |
+
self.cache[self.counter] = self.fs.open(fn, mode)
|
| 108 |
+
self.counter += 1
|
| 109 |
+
return self.counter - 1
|
| 110 |
+
|
| 111 |
+
def truncate(self, path, length, fh=None):
|
| 112 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 113 |
+
if length != 0:
|
| 114 |
+
raise NotImplementedError
|
| 115 |
+
# maybe should be no-op since open with write sets size to zero anyway
|
| 116 |
+
self.fs.touch(fn)
|
| 117 |
+
|
| 118 |
+
def unlink(self, path):
|
| 119 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 120 |
+
try:
|
| 121 |
+
self.fs.rm(fn, False)
|
| 122 |
+
except (OSError, FileNotFoundError) as exc:
|
| 123 |
+
raise FuseOSError(EIO) from exc
|
| 124 |
+
|
| 125 |
+
def release(self, path, fh):
|
| 126 |
+
try:
|
| 127 |
+
if fh in self.cache:
|
| 128 |
+
f = self.cache[fh]
|
| 129 |
+
f.close()
|
| 130 |
+
self.cache.pop(fh)
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(e)
|
| 133 |
+
return 0
|
| 134 |
+
|
| 135 |
+
def chmod(self, path, mode):
|
| 136 |
+
if hasattr(self.fs, "chmod"):
|
| 137 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 138 |
+
return self.fs.chmod(path, mode)
|
| 139 |
+
raise NotImplementedError
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def run(
|
| 143 |
+
fs,
|
| 144 |
+
path,
|
| 145 |
+
mount_point,
|
| 146 |
+
foreground=True,
|
| 147 |
+
threads=False,
|
| 148 |
+
ready_file=False,
|
| 149 |
+
ops_class=FUSEr,
|
| 150 |
+
):
|
| 151 |
+
"""Mount stuff in a local directory
|
| 152 |
+
|
| 153 |
+
This uses fusepy to make it appear as if a given path on an fsspec
|
| 154 |
+
instance is in fact resident within the local file-system.
|
| 155 |
+
|
| 156 |
+
This requires that fusepy by installed, and that FUSE be available on
|
| 157 |
+
the system (typically requiring a package to be installed with
|
| 158 |
+
apt, yum, brew, etc.).
|
| 159 |
+
|
| 160 |
+
Parameters
|
| 161 |
+
----------
|
| 162 |
+
fs: file-system instance
|
| 163 |
+
From one of the compatible implementations
|
| 164 |
+
path: str
|
| 165 |
+
Location on that file-system to regard as the root directory to
|
| 166 |
+
mount. Note that you typically should include the terminating "/"
|
| 167 |
+
character.
|
| 168 |
+
mount_point: str
|
| 169 |
+
An empty directory on the local file-system where the contents of
|
| 170 |
+
the remote path will appear.
|
| 171 |
+
foreground: bool
|
| 172 |
+
Whether or not calling this function will block. Operation will
|
| 173 |
+
typically be more stable if True.
|
| 174 |
+
threads: bool
|
| 175 |
+
Whether or not to create threads when responding to file operations
|
| 176 |
+
within the mounter directory. Operation will typically be more
|
| 177 |
+
stable if False.
|
| 178 |
+
ready_file: bool
|
| 179 |
+
Whether the FUSE process is ready. The ``.fuse_ready`` file will
|
| 180 |
+
exist in the ``mount_point`` directory if True. Debugging purpose.
|
| 181 |
+
ops_class: FUSEr or Subclass of FUSEr
|
| 182 |
+
To override the default behavior of FUSEr. For Example, logging
|
| 183 |
+
to file.
|
| 184 |
+
|
| 185 |
+
"""
|
| 186 |
+
func = lambda: FUSE(
|
| 187 |
+
ops_class(fs, path, ready_file=ready_file),
|
| 188 |
+
mount_point,
|
| 189 |
+
nothreads=not threads,
|
| 190 |
+
foreground=foreground,
|
| 191 |
+
)
|
| 192 |
+
if not foreground:
|
| 193 |
+
th = threading.Thread(target=func)
|
| 194 |
+
th.daemon = True
|
| 195 |
+
th.start()
|
| 196 |
+
return th
|
| 197 |
+
else: # pragma: no cover
|
| 198 |
+
try:
|
| 199 |
+
func()
|
| 200 |
+
except KeyboardInterrupt:
|
| 201 |
+
pass
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def main(args):
|
| 205 |
+
"""Mount filesystem from chained URL to MOUNT_POINT.
|
| 206 |
+
|
| 207 |
+
Examples:
|
| 208 |
+
|
| 209 |
+
python3 -m fsspec.fuse memory /usr/share /tmp/mem
|
| 210 |
+
|
| 211 |
+
python3 -m fsspec.fuse local /tmp/source /tmp/local \\
|
| 212 |
+
-l /tmp/fsspecfuse.log
|
| 213 |
+
|
| 214 |
+
You can also mount chained-URLs and use special settings:
|
| 215 |
+
|
| 216 |
+
python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
|
| 217 |
+
/ /tmp/zip \\
|
| 218 |
+
-o 'filecache-cache_storage=/tmp/simplecache'
|
| 219 |
+
|
| 220 |
+
You can specify the type of the setting by using `[int]` or `[bool]`,
|
| 221 |
+
(`true`, `yes`, `1` represents the Boolean value `True`):
|
| 222 |
+
|
| 223 |
+
python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
|
| 224 |
+
/historic/packages/RPMS /tmp/ftp \\
|
| 225 |
+
-o 'simplecache-cache_storage=/tmp/simplecache' \\
|
| 226 |
+
-o 'simplecache-check_files=false[bool]' \\
|
| 227 |
+
-o 'ftp-listings_expiry_time=60[int]' \\
|
| 228 |
+
-o 'ftp-username=anonymous' \\
|
| 229 |
+
-o 'ftp-password=xieyanbo'
|
| 230 |
+
"""
|
| 231 |
+
|
| 232 |
+
class RawDescriptionArgumentParser(argparse.ArgumentParser):
|
| 233 |
+
def format_help(self):
|
| 234 |
+
usage = super().format_help()
|
| 235 |
+
parts = usage.split("\n\n")
|
| 236 |
+
parts[1] = self.description.rstrip()
|
| 237 |
+
return "\n\n".join(parts)
|
| 238 |
+
|
| 239 |
+
parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
|
| 240 |
+
parser.add_argument("--version", action="version", version=__version__)
|
| 241 |
+
parser.add_argument("url", type=str, help="fs url")
|
| 242 |
+
parser.add_argument("source_path", type=str, help="source directory in fs")
|
| 243 |
+
parser.add_argument("mount_point", type=str, help="local directory")
|
| 244 |
+
parser.add_argument(
|
| 245 |
+
"-o",
|
| 246 |
+
"--option",
|
| 247 |
+
action="append",
|
| 248 |
+
help="Any options of protocol included in the chained URL",
|
| 249 |
+
)
|
| 250 |
+
parser.add_argument(
|
| 251 |
+
"-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
|
| 252 |
+
)
|
| 253 |
+
parser.add_argument(
|
| 254 |
+
"-f",
|
| 255 |
+
"--foreground",
|
| 256 |
+
action="store_false",
|
| 257 |
+
help="Running in foreground or not (Default: False)",
|
| 258 |
+
)
|
| 259 |
+
parser.add_argument(
|
| 260 |
+
"-t",
|
| 261 |
+
"--threads",
|
| 262 |
+
action="store_false",
|
| 263 |
+
help="Running with threads support (Default: False)",
|
| 264 |
+
)
|
| 265 |
+
parser.add_argument(
|
| 266 |
+
"-r",
|
| 267 |
+
"--ready-file",
|
| 268 |
+
action="store_false",
|
| 269 |
+
help="The `.fuse_ready` file will exist after FUSE is ready. "
|
| 270 |
+
"(Debugging purpose, Default: False)",
|
| 271 |
+
)
|
| 272 |
+
args = parser.parse_args(args)
|
| 273 |
+
|
| 274 |
+
kwargs = {}
|
| 275 |
+
for item in args.option or []:
|
| 276 |
+
key, sep, value = item.partition("=")
|
| 277 |
+
if not sep:
|
| 278 |
+
parser.error(message=f"Wrong option: {item!r}")
|
| 279 |
+
val = value.lower()
|
| 280 |
+
if val.endswith("[int]"):
|
| 281 |
+
value = int(value[: -len("[int]")])
|
| 282 |
+
elif val.endswith("[bool]"):
|
| 283 |
+
value = val[: -len("[bool]")] in ["1", "yes", "true"]
|
| 284 |
+
|
| 285 |
+
if "-" in key:
|
| 286 |
+
fs_name, setting_name = key.split("-", 1)
|
| 287 |
+
if fs_name in kwargs:
|
| 288 |
+
kwargs[fs_name][setting_name] = value
|
| 289 |
+
else:
|
| 290 |
+
kwargs[fs_name] = {setting_name: value}
|
| 291 |
+
else:
|
| 292 |
+
kwargs[key] = value
|
| 293 |
+
|
| 294 |
+
if args.log_file:
|
| 295 |
+
logging.basicConfig(
|
| 296 |
+
level=logging.DEBUG,
|
| 297 |
+
filename=args.log_file,
|
| 298 |
+
format="%(asctime)s %(message)s",
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
class LoggingFUSEr(FUSEr, LoggingMixIn):
|
| 302 |
+
pass
|
| 303 |
+
|
| 304 |
+
fuser = LoggingFUSEr
|
| 305 |
+
else:
|
| 306 |
+
fuser = FUSEr
|
| 307 |
+
|
| 308 |
+
fs, url_path = url_to_fs(args.url, **kwargs)
|
| 309 |
+
logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
|
| 310 |
+
run(
|
| 311 |
+
fs,
|
| 312 |
+
args.source_path,
|
| 313 |
+
args.mount_point,
|
| 314 |
+
foreground=args.foreground,
|
| 315 |
+
threads=args.threads,
|
| 316 |
+
ready_file=args.ready_file,
|
| 317 |
+
ops_class=fuser,
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
if __name__ == "__main__":
|
| 322 |
+
import sys
|
| 323 |
+
|
| 324 |
+
main(sys.argv[1:])
|
venv/lib/python3.13/site-packages/fsspec/generic.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import shutil
|
| 7 |
+
import uuid
|
| 8 |
+
|
| 9 |
+
from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
|
| 10 |
+
from .callbacks import DEFAULT_CALLBACK
|
| 11 |
+
from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
|
| 12 |
+
|
| 13 |
+
_generic_fs = {}
|
| 14 |
+
logger = logging.getLogger("fsspec.generic")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def set_generic_fs(protocol, **storage_options):
|
| 18 |
+
"""Populate the dict used for method=="generic" lookups"""
|
| 19 |
+
_generic_fs[protocol] = filesystem(protocol, **storage_options)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _resolve_fs(url, method, protocol=None, storage_options=None):
|
| 23 |
+
"""Pick instance of backend FS"""
|
| 24 |
+
url = url[0] if isinstance(url, (list, tuple)) else url
|
| 25 |
+
protocol = protocol or split_protocol(url)[0]
|
| 26 |
+
storage_options = storage_options or {}
|
| 27 |
+
if method == "default":
|
| 28 |
+
return filesystem(protocol)
|
| 29 |
+
if method == "generic":
|
| 30 |
+
return _generic_fs[protocol]
|
| 31 |
+
if method == "current":
|
| 32 |
+
cls = get_filesystem_class(protocol)
|
| 33 |
+
return cls.current()
|
| 34 |
+
if method == "options":
|
| 35 |
+
fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
|
| 36 |
+
return fs
|
| 37 |
+
raise ValueError(f"Unknown FS resolution method: {method}")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def rsync(
|
| 41 |
+
source,
|
| 42 |
+
destination,
|
| 43 |
+
delete_missing=False,
|
| 44 |
+
source_field="size",
|
| 45 |
+
dest_field="size",
|
| 46 |
+
update_cond="different",
|
| 47 |
+
inst_kwargs=None,
|
| 48 |
+
fs=None,
|
| 49 |
+
**kwargs,
|
| 50 |
+
):
|
| 51 |
+
"""Sync files between two directory trees
|
| 52 |
+
|
| 53 |
+
(experimental)
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
source: str
|
| 58 |
+
Root of the directory tree to take files from. This must be a directory, but
|
| 59 |
+
do not include any terminating "/" character
|
| 60 |
+
destination: str
|
| 61 |
+
Root path to copy into. The contents of this location should be
|
| 62 |
+
identical to the contents of ``source`` when done. This will be made a
|
| 63 |
+
directory, and the terminal "/" should not be included.
|
| 64 |
+
delete_missing: bool
|
| 65 |
+
If there are paths in the destination that don't exist in the
|
| 66 |
+
source and this is True, delete them. Otherwise, leave them alone.
|
| 67 |
+
source_field: str | callable
|
| 68 |
+
If ``update_field`` is "different", this is the key in the info
|
| 69 |
+
of source files to consider for difference. Maybe a function of the
|
| 70 |
+
info dict.
|
| 71 |
+
dest_field: str | callable
|
| 72 |
+
If ``update_field`` is "different", this is the key in the info
|
| 73 |
+
of destination files to consider for difference. May be a function of
|
| 74 |
+
the info dict.
|
| 75 |
+
update_cond: "different"|"always"|"never"
|
| 76 |
+
If "always", every file is copied, regardless of whether it exists in
|
| 77 |
+
the destination. If "never", files that exist in the destination are
|
| 78 |
+
not copied again. If "different" (default), only copy if the info
|
| 79 |
+
fields given by ``source_field`` and ``dest_field`` (usually "size")
|
| 80 |
+
are different. Other comparisons may be added in the future.
|
| 81 |
+
inst_kwargs: dict|None
|
| 82 |
+
If ``fs`` is None, use this set of keyword arguments to make a
|
| 83 |
+
GenericFileSystem instance
|
| 84 |
+
fs: GenericFileSystem|None
|
| 85 |
+
Instance to use if explicitly given. The instance defines how to
|
| 86 |
+
to make downstream file system instances from paths.
|
| 87 |
+
|
| 88 |
+
Returns
|
| 89 |
+
-------
|
| 90 |
+
dict of the copy operations that were performed, {source: destination}
|
| 91 |
+
"""
|
| 92 |
+
fs = fs or GenericFileSystem(**(inst_kwargs or {}))
|
| 93 |
+
source = fs._strip_protocol(source)
|
| 94 |
+
destination = fs._strip_protocol(destination)
|
| 95 |
+
allfiles = fs.find(source, withdirs=True, detail=True)
|
| 96 |
+
if not fs.isdir(source):
|
| 97 |
+
raise ValueError("Can only rsync on a directory")
|
| 98 |
+
otherfiles = fs.find(destination, withdirs=True, detail=True)
|
| 99 |
+
dirs = [
|
| 100 |
+
a
|
| 101 |
+
for a, v in allfiles.items()
|
| 102 |
+
if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
|
| 103 |
+
]
|
| 104 |
+
logger.debug(f"{len(dirs)} directories to create")
|
| 105 |
+
if dirs:
|
| 106 |
+
fs.make_many_dirs(
|
| 107 |
+
[dirn.replace(source, destination) for dirn in dirs], exist_ok=True
|
| 108 |
+
)
|
| 109 |
+
allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
|
| 110 |
+
logger.debug(f"{len(allfiles)} files to consider for copy")
|
| 111 |
+
to_delete = [
|
| 112 |
+
o
|
| 113 |
+
for o, v in otherfiles.items()
|
| 114 |
+
if o.replace(destination, source) not in allfiles and v["type"] == "file"
|
| 115 |
+
]
|
| 116 |
+
for k, v in allfiles.copy().items():
|
| 117 |
+
otherfile = k.replace(source, destination)
|
| 118 |
+
if otherfile in otherfiles:
|
| 119 |
+
if update_cond == "always":
|
| 120 |
+
allfiles[k] = otherfile
|
| 121 |
+
elif update_cond == "never":
|
| 122 |
+
allfiles.pop(k)
|
| 123 |
+
elif update_cond == "different":
|
| 124 |
+
inf1 = source_field(v) if callable(source_field) else v[source_field]
|
| 125 |
+
v2 = otherfiles[otherfile]
|
| 126 |
+
inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
|
| 127 |
+
if inf1 != inf2:
|
| 128 |
+
# details mismatch, make copy
|
| 129 |
+
allfiles[k] = otherfile
|
| 130 |
+
else:
|
| 131 |
+
# details match, don't copy
|
| 132 |
+
allfiles.pop(k)
|
| 133 |
+
else:
|
| 134 |
+
# file not in target yet
|
| 135 |
+
allfiles[k] = otherfile
|
| 136 |
+
logger.debug(f"{len(allfiles)} files to copy")
|
| 137 |
+
if allfiles:
|
| 138 |
+
source_files, target_files = zip(*allfiles.items())
|
| 139 |
+
fs.cp(source_files, target_files, **kwargs)
|
| 140 |
+
logger.debug(f"{len(to_delete)} files to delete")
|
| 141 |
+
if delete_missing and to_delete:
|
| 142 |
+
fs.rm(to_delete)
|
| 143 |
+
return allfiles
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
class GenericFileSystem(AsyncFileSystem):
|
| 147 |
+
"""Wrapper over all other FS types
|
| 148 |
+
|
| 149 |
+
<experimental!>
|
| 150 |
+
|
| 151 |
+
This implementation is a single unified interface to be able to run FS operations
|
| 152 |
+
over generic URLs, and dispatch to the specific implementations using the URL
|
| 153 |
+
protocol prefix.
|
| 154 |
+
|
| 155 |
+
Note: instances of this FS are always async, even if you never use it with any async
|
| 156 |
+
backend.
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
protocol = "generic" # there is no real reason to ever use a protocol with this FS
|
| 160 |
+
|
| 161 |
+
def __init__(self, default_method="default", storage_options=None, **kwargs):
|
| 162 |
+
"""
|
| 163 |
+
|
| 164 |
+
Parameters
|
| 165 |
+
----------
|
| 166 |
+
default_method: str (optional)
|
| 167 |
+
Defines how to configure backend FS instances. Options are:
|
| 168 |
+
- "default": instantiate like FSClass(), with no
|
| 169 |
+
extra arguments; this is the default instance of that FS, and can be
|
| 170 |
+
configured via the config system
|
| 171 |
+
- "generic": takes instances from the `_generic_fs` dict in this module,
|
| 172 |
+
which you must populate before use. Keys are by protocol
|
| 173 |
+
- "options": expects storage_options, a dict mapping protocol to
|
| 174 |
+
kwargs to use when constructing the filesystem
|
| 175 |
+
- "current": takes the most recently instantiated version of each FS
|
| 176 |
+
"""
|
| 177 |
+
self.method = default_method
|
| 178 |
+
self.st_opts = storage_options
|
| 179 |
+
super().__init__(**kwargs)
|
| 180 |
+
|
| 181 |
+
def _parent(self, path):
|
| 182 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 183 |
+
return fs.unstrip_protocol(fs._parent(path))
|
| 184 |
+
|
| 185 |
+
def _strip_protocol(self, path):
|
| 186 |
+
# normalization only
|
| 187 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 188 |
+
return fs.unstrip_protocol(fs._strip_protocol(path))
|
| 189 |
+
|
| 190 |
+
async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
| 191 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 192 |
+
if fs.async_impl:
|
| 193 |
+
out = await fs._find(
|
| 194 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
| 195 |
+
)
|
| 196 |
+
else:
|
| 197 |
+
out = fs.find(
|
| 198 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
| 199 |
+
)
|
| 200 |
+
result = {}
|
| 201 |
+
for k, v in out.items():
|
| 202 |
+
v = v.copy() # don't corrupt target FS dircache
|
| 203 |
+
name = fs.unstrip_protocol(k)
|
| 204 |
+
v["name"] = name
|
| 205 |
+
result[name] = v
|
| 206 |
+
if detail:
|
| 207 |
+
return result
|
| 208 |
+
return list(result)
|
| 209 |
+
|
| 210 |
+
async def _info(self, url, **kwargs):
|
| 211 |
+
fs = _resolve_fs(url, self.method)
|
| 212 |
+
if fs.async_impl:
|
| 213 |
+
out = await fs._info(url, **kwargs)
|
| 214 |
+
else:
|
| 215 |
+
out = fs.info(url, **kwargs)
|
| 216 |
+
out = out.copy() # don't edit originals
|
| 217 |
+
out["name"] = fs.unstrip_protocol(out["name"])
|
| 218 |
+
return out
|
| 219 |
+
|
| 220 |
+
async def _ls(
|
| 221 |
+
self,
|
| 222 |
+
url,
|
| 223 |
+
detail=True,
|
| 224 |
+
**kwargs,
|
| 225 |
+
):
|
| 226 |
+
fs = _resolve_fs(url, self.method)
|
| 227 |
+
if fs.async_impl:
|
| 228 |
+
out = await fs._ls(url, detail=True, **kwargs)
|
| 229 |
+
else:
|
| 230 |
+
out = fs.ls(url, detail=True, **kwargs)
|
| 231 |
+
out = [o.copy() for o in out] # don't edit originals
|
| 232 |
+
for o in out:
|
| 233 |
+
o["name"] = fs.unstrip_protocol(o["name"])
|
| 234 |
+
if detail:
|
| 235 |
+
return out
|
| 236 |
+
else:
|
| 237 |
+
return [o["name"] for o in out]
|
| 238 |
+
|
| 239 |
+
async def _cat_file(
|
| 240 |
+
self,
|
| 241 |
+
url,
|
| 242 |
+
**kwargs,
|
| 243 |
+
):
|
| 244 |
+
fs = _resolve_fs(url, self.method)
|
| 245 |
+
if fs.async_impl:
|
| 246 |
+
return await fs._cat_file(url, **kwargs)
|
| 247 |
+
else:
|
| 248 |
+
return fs.cat_file(url, **kwargs)
|
| 249 |
+
|
| 250 |
+
async def _pipe_file(
|
| 251 |
+
self,
|
| 252 |
+
path,
|
| 253 |
+
value,
|
| 254 |
+
**kwargs,
|
| 255 |
+
):
|
| 256 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 257 |
+
if fs.async_impl:
|
| 258 |
+
return await fs._pipe_file(path, value, **kwargs)
|
| 259 |
+
else:
|
| 260 |
+
return fs.pipe_file(path, value, **kwargs)
|
| 261 |
+
|
| 262 |
+
async def _rm(self, url, **kwargs):
|
| 263 |
+
urls = url
|
| 264 |
+
if isinstance(urls, str):
|
| 265 |
+
urls = [urls]
|
| 266 |
+
fs = _resolve_fs(urls[0], self.method)
|
| 267 |
+
if fs.async_impl:
|
| 268 |
+
await fs._rm(urls, **kwargs)
|
| 269 |
+
else:
|
| 270 |
+
fs.rm(url, **kwargs)
|
| 271 |
+
|
| 272 |
+
async def _makedirs(self, path, exist_ok=False):
|
| 273 |
+
logger.debug("Make dir %s", path)
|
| 274 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
| 275 |
+
if fs.async_impl:
|
| 276 |
+
await fs._makedirs(path, exist_ok=exist_ok)
|
| 277 |
+
else:
|
| 278 |
+
fs.makedirs(path, exist_ok=exist_ok)
|
| 279 |
+
|
| 280 |
+
def rsync(self, source, destination, **kwargs):
|
| 281 |
+
"""Sync files between two directory trees
|
| 282 |
+
|
| 283 |
+
See `func:rsync` for more details.
|
| 284 |
+
"""
|
| 285 |
+
rsync(source, destination, fs=self, **kwargs)
|
| 286 |
+
|
| 287 |
+
async def _cp_file(
|
| 288 |
+
self,
|
| 289 |
+
url,
|
| 290 |
+
url2,
|
| 291 |
+
blocksize=2**20,
|
| 292 |
+
callback=DEFAULT_CALLBACK,
|
| 293 |
+
tempdir: str | None = None,
|
| 294 |
+
**kwargs,
|
| 295 |
+
):
|
| 296 |
+
fs = _resolve_fs(url, self.method)
|
| 297 |
+
fs2 = _resolve_fs(url2, self.method)
|
| 298 |
+
if fs is fs2:
|
| 299 |
+
# pure remote
|
| 300 |
+
if fs.async_impl:
|
| 301 |
+
return await fs._copy(url, url2, **kwargs)
|
| 302 |
+
else:
|
| 303 |
+
return fs.copy(url, url2, **kwargs)
|
| 304 |
+
await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
|
| 305 |
+
|
| 306 |
+
async def _make_many_dirs(self, urls, exist_ok=True):
|
| 307 |
+
fs = _resolve_fs(urls[0], self.method)
|
| 308 |
+
if fs.async_impl:
|
| 309 |
+
coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
|
| 310 |
+
await _run_coros_in_chunks(coros)
|
| 311 |
+
else:
|
| 312 |
+
for u in urls:
|
| 313 |
+
fs.makedirs(u, exist_ok=exist_ok)
|
| 314 |
+
|
| 315 |
+
make_many_dirs = sync_wrapper(_make_many_dirs)
|
| 316 |
+
|
| 317 |
+
async def _copy(
|
| 318 |
+
self,
|
| 319 |
+
path1: list[str],
|
| 320 |
+
path2: list[str],
|
| 321 |
+
recursive: bool = False,
|
| 322 |
+
on_error: str = "ignore",
|
| 323 |
+
maxdepth: int | None = None,
|
| 324 |
+
batch_size: int | None = None,
|
| 325 |
+
tempdir: str | None = None,
|
| 326 |
+
**kwargs,
|
| 327 |
+
):
|
| 328 |
+
# TODO: special case for one FS being local, which can use get/put
|
| 329 |
+
# TODO: special case for one being memFS, which can use cat/pipe
|
| 330 |
+
if recursive:
|
| 331 |
+
raise NotImplementedError("Please use fsspec.generic.rsync")
|
| 332 |
+
path1 = [path1] if isinstance(path1, str) else path1
|
| 333 |
+
path2 = [path2] if isinstance(path2, str) else path2
|
| 334 |
+
|
| 335 |
+
fs = _resolve_fs(path1, self.method)
|
| 336 |
+
fs2 = _resolve_fs(path2, self.method)
|
| 337 |
+
|
| 338 |
+
if fs is fs2:
|
| 339 |
+
if fs.async_impl:
|
| 340 |
+
return await fs._copy(path1, path2, **kwargs)
|
| 341 |
+
else:
|
| 342 |
+
return fs.copy(path1, path2, **kwargs)
|
| 343 |
+
|
| 344 |
+
await copy_file_op(
|
| 345 |
+
fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
async def copy_file_op(
|
| 350 |
+
fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
|
| 351 |
+
):
|
| 352 |
+
import tempfile
|
| 353 |
+
|
| 354 |
+
tempdir = tempdir or tempfile.mkdtemp()
|
| 355 |
+
try:
|
| 356 |
+
coros = [
|
| 357 |
+
_copy_file_op(
|
| 358 |
+
fs1,
|
| 359 |
+
u1,
|
| 360 |
+
fs2,
|
| 361 |
+
u2,
|
| 362 |
+
os.path.join(tempdir, uuid.uuid4().hex),
|
| 363 |
+
)
|
| 364 |
+
for u1, u2 in zip(url1, url2)
|
| 365 |
+
]
|
| 366 |
+
out = await _run_coros_in_chunks(
|
| 367 |
+
coros, batch_size=batch_size, return_exceptions=True
|
| 368 |
+
)
|
| 369 |
+
finally:
|
| 370 |
+
shutil.rmtree(tempdir)
|
| 371 |
+
if on_error == "return":
|
| 372 |
+
return out
|
| 373 |
+
elif on_error == "raise":
|
| 374 |
+
for o in out:
|
| 375 |
+
if isinstance(o, Exception):
|
| 376 |
+
raise o
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
|
| 380 |
+
if fs1.async_impl:
|
| 381 |
+
await fs1._get_file(url1, local)
|
| 382 |
+
else:
|
| 383 |
+
fs1.get_file(url1, local)
|
| 384 |
+
if fs2.async_impl:
|
| 385 |
+
await fs2._put_file(local, url2)
|
| 386 |
+
else:
|
| 387 |
+
fs2.put_file(local, url2)
|
| 388 |
+
os.unlink(local)
|
| 389 |
+
logger.debug("Copy %s -> %s; done", url1, url2)
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
async def maybe_await(cor):
|
| 393 |
+
if inspect.iscoroutine(cor):
|
| 394 |
+
return await cor
|
| 395 |
+
else:
|
| 396 |
+
return cor
|
venv/lib/python3.13/site-packages/fsspec/gui.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
import contextlib
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
from collections.abc import Sequence
|
| 7 |
+
from typing import ClassVar
|
| 8 |
+
|
| 9 |
+
import panel as pn
|
| 10 |
+
|
| 11 |
+
from .core import OpenFile, get_filesystem_class, split_protocol
|
| 12 |
+
from .registry import known_implementations
|
| 13 |
+
|
| 14 |
+
pn.extension()
|
| 15 |
+
logger = logging.getLogger("fsspec.gui")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class SigSlot:
|
| 19 |
+
"""Signal-slot mixin, for Panel event passing
|
| 20 |
+
|
| 21 |
+
Include this class in a widget manager's superclasses to be able to
|
| 22 |
+
register events and callbacks on Panel widgets managed by that class.
|
| 23 |
+
|
| 24 |
+
The method ``_register`` should be called as widgets are added, and external
|
| 25 |
+
code should call ``connect`` to associate callbacks.
|
| 26 |
+
|
| 27 |
+
By default, all signals emit a DEBUG logging statement.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
# names of signals that this class may emit each of which must be
|
| 31 |
+
# set by _register for any new instance
|
| 32 |
+
signals: ClassVar[Sequence[str]] = []
|
| 33 |
+
# names of actions that this class may respond to
|
| 34 |
+
slots: ClassVar[Sequence[str]] = []
|
| 35 |
+
|
| 36 |
+
# each of which must be a method name
|
| 37 |
+
|
| 38 |
+
def __init__(self):
|
| 39 |
+
self._ignoring_events = False
|
| 40 |
+
self._sigs = {}
|
| 41 |
+
self._map = {}
|
| 42 |
+
self._setup()
|
| 43 |
+
|
| 44 |
+
def _setup(self):
|
| 45 |
+
"""Create GUI elements and register signals"""
|
| 46 |
+
self.panel = pn.pane.PaneBase()
|
| 47 |
+
# no signals to set up in the base class
|
| 48 |
+
|
| 49 |
+
def _register(
|
| 50 |
+
self, widget, name, thing="value", log_level=logging.DEBUG, auto=False
|
| 51 |
+
):
|
| 52 |
+
"""Watch the given attribute of a widget and assign it a named event
|
| 53 |
+
|
| 54 |
+
This is normally called at the time a widget is instantiated, in the
|
| 55 |
+
class which owns it.
|
| 56 |
+
|
| 57 |
+
Parameters
|
| 58 |
+
----------
|
| 59 |
+
widget : pn.layout.Panel or None
|
| 60 |
+
Widget to watch. If None, an anonymous signal not associated with
|
| 61 |
+
any widget.
|
| 62 |
+
name : str
|
| 63 |
+
Name of this event
|
| 64 |
+
thing : str
|
| 65 |
+
Attribute of the given widget to watch
|
| 66 |
+
log_level : int
|
| 67 |
+
When the signal is triggered, a logging event of the given level
|
| 68 |
+
will be fired in the dfviz logger.
|
| 69 |
+
auto : bool
|
| 70 |
+
If True, automatically connects with a method in this class of the
|
| 71 |
+
same name.
|
| 72 |
+
"""
|
| 73 |
+
if name not in self.signals:
|
| 74 |
+
raise ValueError(f"Attempt to assign an undeclared signal: {name}")
|
| 75 |
+
self._sigs[name] = {
|
| 76 |
+
"widget": widget,
|
| 77 |
+
"callbacks": [],
|
| 78 |
+
"thing": thing,
|
| 79 |
+
"log": log_level,
|
| 80 |
+
}
|
| 81 |
+
wn = "-".join(
|
| 82 |
+
[
|
| 83 |
+
getattr(widget, "name", str(widget)) if widget is not None else "none",
|
| 84 |
+
thing,
|
| 85 |
+
]
|
| 86 |
+
)
|
| 87 |
+
self._map[wn] = name
|
| 88 |
+
if widget is not None:
|
| 89 |
+
widget.param.watch(self._signal, thing, onlychanged=True)
|
| 90 |
+
if auto and hasattr(self, name):
|
| 91 |
+
self.connect(name, getattr(self, name))
|
| 92 |
+
|
| 93 |
+
def _repr_mimebundle_(self, *args, **kwargs):
|
| 94 |
+
"""Display in a notebook or a server"""
|
| 95 |
+
try:
|
| 96 |
+
return self.panel._repr_mimebundle_(*args, **kwargs)
|
| 97 |
+
except (ValueError, AttributeError) as exc:
|
| 98 |
+
raise NotImplementedError(
|
| 99 |
+
"Panel does not seem to be set up properly"
|
| 100 |
+
) from exc
|
| 101 |
+
|
| 102 |
+
def connect(self, signal, slot):
|
| 103 |
+
"""Associate call back with given event
|
| 104 |
+
|
| 105 |
+
The callback must be a function which takes the "new" value of the
|
| 106 |
+
watched attribute as the only parameter. If the callback return False,
|
| 107 |
+
this cancels any further processing of the given event.
|
| 108 |
+
|
| 109 |
+
Alternatively, the callback can be a string, in which case it means
|
| 110 |
+
emitting the correspondingly-named event (i.e., connect to self)
|
| 111 |
+
"""
|
| 112 |
+
self._sigs[signal]["callbacks"].append(slot)
|
| 113 |
+
|
| 114 |
+
def _signal(self, event):
|
| 115 |
+
"""This is called by a an action on a widget
|
| 116 |
+
|
| 117 |
+
Within an self.ignore_events context, nothing happens.
|
| 118 |
+
|
| 119 |
+
Tests can execute this method by directly changing the values of
|
| 120 |
+
widget components.
|
| 121 |
+
"""
|
| 122 |
+
if not self._ignoring_events:
|
| 123 |
+
wn = "-".join([event.obj.name, event.name])
|
| 124 |
+
if wn in self._map and self._map[wn] in self._sigs:
|
| 125 |
+
self._emit(self._map[wn], event.new)
|
| 126 |
+
|
| 127 |
+
@contextlib.contextmanager
|
| 128 |
+
def ignore_events(self):
|
| 129 |
+
"""Temporarily turn off events processing in this instance
|
| 130 |
+
|
| 131 |
+
(does not propagate to children)
|
| 132 |
+
"""
|
| 133 |
+
self._ignoring_events = True
|
| 134 |
+
try:
|
| 135 |
+
yield
|
| 136 |
+
finally:
|
| 137 |
+
self._ignoring_events = False
|
| 138 |
+
|
| 139 |
+
def _emit(self, sig, value=None):
|
| 140 |
+
"""An event happened, call its callbacks
|
| 141 |
+
|
| 142 |
+
This method can be used in tests to simulate message passing without
|
| 143 |
+
directly changing visual elements.
|
| 144 |
+
|
| 145 |
+
Calling of callbacks will halt whenever one returns False.
|
| 146 |
+
"""
|
| 147 |
+
logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
|
| 148 |
+
for callback in self._sigs[sig]["callbacks"]:
|
| 149 |
+
if isinstance(callback, str):
|
| 150 |
+
self._emit(callback)
|
| 151 |
+
else:
|
| 152 |
+
try:
|
| 153 |
+
# running callbacks should not break the interface
|
| 154 |
+
ret = callback(value)
|
| 155 |
+
if ret is False:
|
| 156 |
+
break
|
| 157 |
+
except Exception as e:
|
| 158 |
+
logger.exception(
|
| 159 |
+
"Exception (%s) while executing callback for signal: %s",
|
| 160 |
+
e,
|
| 161 |
+
sig,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
def show(self, threads=False):
|
| 165 |
+
"""Open a new browser tab and display this instance's interface"""
|
| 166 |
+
self.panel.show(threads=threads, verbose=False)
|
| 167 |
+
return self
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
class SingleSelect(SigSlot):
|
| 171 |
+
"""A multiselect which only allows you to select one item for an event"""
|
| 172 |
+
|
| 173 |
+
signals = ["_selected", "selected"] # the first is internal
|
| 174 |
+
slots = ["set_options", "set_selection", "add", "clear", "select"]
|
| 175 |
+
|
| 176 |
+
def __init__(self, **kwargs):
|
| 177 |
+
self.kwargs = kwargs
|
| 178 |
+
super().__init__()
|
| 179 |
+
|
| 180 |
+
def _setup(self):
|
| 181 |
+
self.panel = pn.widgets.MultiSelect(**self.kwargs)
|
| 182 |
+
self._register(self.panel, "_selected", "value")
|
| 183 |
+
self._register(None, "selected")
|
| 184 |
+
self.connect("_selected", self.select_one)
|
| 185 |
+
|
| 186 |
+
def _signal(self, *args, **kwargs):
|
| 187 |
+
super()._signal(*args, **kwargs)
|
| 188 |
+
|
| 189 |
+
def select_one(self, *_):
|
| 190 |
+
with self.ignore_events():
|
| 191 |
+
val = [self.panel.value[-1]] if self.panel.value else []
|
| 192 |
+
self.panel.value = val
|
| 193 |
+
self._emit("selected", self.panel.value)
|
| 194 |
+
|
| 195 |
+
def set_options(self, options):
|
| 196 |
+
self.panel.options = options
|
| 197 |
+
|
| 198 |
+
def clear(self):
|
| 199 |
+
self.panel.options = []
|
| 200 |
+
|
| 201 |
+
@property
|
| 202 |
+
def value(self):
|
| 203 |
+
return self.panel.value
|
| 204 |
+
|
| 205 |
+
def set_selection(self, selection):
|
| 206 |
+
self.panel.value = [selection]
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
class FileSelector(SigSlot):
|
| 210 |
+
"""Panel-based graphical file selector widget
|
| 211 |
+
|
| 212 |
+
Instances of this widget are interactive and can be displayed in jupyter by having
|
| 213 |
+
them as the output of a cell, or in a separate browser tab using ``.show()``.
|
| 214 |
+
"""
|
| 215 |
+
|
| 216 |
+
signals = [
|
| 217 |
+
"protocol_changed",
|
| 218 |
+
"selection_changed",
|
| 219 |
+
"directory_entered",
|
| 220 |
+
"home_clicked",
|
| 221 |
+
"up_clicked",
|
| 222 |
+
"go_clicked",
|
| 223 |
+
"filters_changed",
|
| 224 |
+
]
|
| 225 |
+
slots = ["set_filters", "go_home"]
|
| 226 |
+
|
| 227 |
+
def __init__(self, url=None, filters=None, ignore=None, kwargs=None):
|
| 228 |
+
"""
|
| 229 |
+
|
| 230 |
+
Parameters
|
| 231 |
+
----------
|
| 232 |
+
url : str (optional)
|
| 233 |
+
Initial value of the URL to populate the dialog; should include protocol
|
| 234 |
+
filters : list(str) (optional)
|
| 235 |
+
File endings to include in the listings. If not included, all files are
|
| 236 |
+
allowed. Does not affect directories.
|
| 237 |
+
If given, the endings will appear as checkboxes in the interface
|
| 238 |
+
ignore : list(str) (optional)
|
| 239 |
+
Regex(s) of file basename patterns to ignore, e.g., "\\." for typical
|
| 240 |
+
hidden files on posix
|
| 241 |
+
kwargs : dict (optional)
|
| 242 |
+
To pass to file system instance
|
| 243 |
+
"""
|
| 244 |
+
if url:
|
| 245 |
+
self.init_protocol, url = split_protocol(url)
|
| 246 |
+
else:
|
| 247 |
+
self.init_protocol, url = "file", os.getcwd()
|
| 248 |
+
self.init_url = url
|
| 249 |
+
self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
|
| 250 |
+
self.filters = filters
|
| 251 |
+
self.ignore = [re.compile(i) for i in ignore or []]
|
| 252 |
+
self._fs = None
|
| 253 |
+
super().__init__()
|
| 254 |
+
|
| 255 |
+
def _setup(self):
|
| 256 |
+
self.url = pn.widgets.TextInput(
|
| 257 |
+
name="url",
|
| 258 |
+
value=self.init_url,
|
| 259 |
+
align="end",
|
| 260 |
+
sizing_mode="stretch_width",
|
| 261 |
+
width_policy="max",
|
| 262 |
+
)
|
| 263 |
+
self.protocol = pn.widgets.Select(
|
| 264 |
+
options=sorted(known_implementations),
|
| 265 |
+
value=self.init_protocol,
|
| 266 |
+
name="protocol",
|
| 267 |
+
align="center",
|
| 268 |
+
)
|
| 269 |
+
self.kwargs = pn.widgets.TextInput(
|
| 270 |
+
name="kwargs", value=self.init_kwargs, align="center"
|
| 271 |
+
)
|
| 272 |
+
self.go = pn.widgets.Button(name="⇨", align="end", width=45)
|
| 273 |
+
self.main = SingleSelect(size=10)
|
| 274 |
+
self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
|
| 275 |
+
self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end")
|
| 276 |
+
|
| 277 |
+
self._register(self.protocol, "protocol_changed", auto=True)
|
| 278 |
+
self._register(self.go, "go_clicked", "clicks", auto=True)
|
| 279 |
+
self._register(self.up, "up_clicked", "clicks", auto=True)
|
| 280 |
+
self._register(self.home, "home_clicked", "clicks", auto=True)
|
| 281 |
+
self._register(None, "selection_changed")
|
| 282 |
+
self.main.connect("selected", self.selection_changed)
|
| 283 |
+
self._register(None, "directory_entered")
|
| 284 |
+
self.prev_protocol = self.protocol.value
|
| 285 |
+
self.prev_kwargs = self.storage_options
|
| 286 |
+
|
| 287 |
+
self.filter_sel = pn.widgets.CheckBoxGroup(
|
| 288 |
+
value=[], options=[], inline=False, align="end", width_policy="min"
|
| 289 |
+
)
|
| 290 |
+
self._register(self.filter_sel, "filters_changed", auto=True)
|
| 291 |
+
|
| 292 |
+
self.panel = pn.Column(
|
| 293 |
+
pn.Row(self.protocol, self.kwargs),
|
| 294 |
+
pn.Row(self.home, self.up, self.url, self.go, self.filter_sel),
|
| 295 |
+
self.main.panel,
|
| 296 |
+
)
|
| 297 |
+
self.set_filters(self.filters)
|
| 298 |
+
self.go_clicked()
|
| 299 |
+
|
| 300 |
+
def set_filters(self, filters=None):
|
| 301 |
+
self.filters = filters
|
| 302 |
+
if filters:
|
| 303 |
+
self.filter_sel.options = filters
|
| 304 |
+
self.filter_sel.value = filters
|
| 305 |
+
else:
|
| 306 |
+
self.filter_sel.options = []
|
| 307 |
+
self.filter_sel.value = []
|
| 308 |
+
|
| 309 |
+
@property
|
| 310 |
+
def storage_options(self):
|
| 311 |
+
"""Value of the kwargs box as a dictionary"""
|
| 312 |
+
return ast.literal_eval(self.kwargs.value) or {}
|
| 313 |
+
|
| 314 |
+
@property
|
| 315 |
+
def fs(self):
|
| 316 |
+
"""Current filesystem instance"""
|
| 317 |
+
if self._fs is None:
|
| 318 |
+
cls = get_filesystem_class(self.protocol.value)
|
| 319 |
+
self._fs = cls(**self.storage_options)
|
| 320 |
+
return self._fs
|
| 321 |
+
|
| 322 |
+
@property
|
| 323 |
+
def urlpath(self):
|
| 324 |
+
"""URL of currently selected item"""
|
| 325 |
+
return (
|
| 326 |
+
(f"{self.protocol.value}://{self.main.value[0]}")
|
| 327 |
+
if self.main.value
|
| 328 |
+
else None
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
def open_file(self, mode="rb", compression=None, encoding=None):
|
| 332 |
+
"""Create OpenFile instance for the currently selected item
|
| 333 |
+
|
| 334 |
+
For example, in a notebook you might do something like
|
| 335 |
+
|
| 336 |
+
.. code-block::
|
| 337 |
+
|
| 338 |
+
[ ]: sel = FileSelector(); sel
|
| 339 |
+
|
| 340 |
+
# user selects their file
|
| 341 |
+
|
| 342 |
+
[ ]: with sel.open_file('rb') as f:
|
| 343 |
+
... out = f.read()
|
| 344 |
+
|
| 345 |
+
Parameters
|
| 346 |
+
----------
|
| 347 |
+
mode: str (optional)
|
| 348 |
+
Open mode for the file.
|
| 349 |
+
compression: str (optional)
|
| 350 |
+
The interact with the file as compressed. Set to 'infer' to guess
|
| 351 |
+
compression from the file ending
|
| 352 |
+
encoding: str (optional)
|
| 353 |
+
If using text mode, use this encoding; defaults to UTF8.
|
| 354 |
+
"""
|
| 355 |
+
if self.urlpath is None:
|
| 356 |
+
raise ValueError("No file selected")
|
| 357 |
+
return OpenFile(self.fs, self.urlpath, mode, compression, encoding)
|
| 358 |
+
|
| 359 |
+
def filters_changed(self, values):
|
| 360 |
+
self.filters = values
|
| 361 |
+
self.go_clicked()
|
| 362 |
+
|
| 363 |
+
def selection_changed(self, *_):
|
| 364 |
+
if self.urlpath is None:
|
| 365 |
+
return
|
| 366 |
+
if self.fs.isdir(self.urlpath):
|
| 367 |
+
self.url.value = self.fs._strip_protocol(self.urlpath)
|
| 368 |
+
self.go_clicked()
|
| 369 |
+
|
| 370 |
+
def go_clicked(self, *_):
|
| 371 |
+
if (
|
| 372 |
+
self.prev_protocol != self.protocol.value
|
| 373 |
+
or self.prev_kwargs != self.storage_options
|
| 374 |
+
):
|
| 375 |
+
self._fs = None # causes fs to be recreated
|
| 376 |
+
self.prev_protocol = self.protocol.value
|
| 377 |
+
self.prev_kwargs = self.storage_options
|
| 378 |
+
listing = sorted(
|
| 379 |
+
self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"]
|
| 380 |
+
)
|
| 381 |
+
listing = [
|
| 382 |
+
l
|
| 383 |
+
for l in listing
|
| 384 |
+
if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore)
|
| 385 |
+
]
|
| 386 |
+
folders = {
|
| 387 |
+
"📁 " + o["name"].rsplit("/", 1)[-1]: o["name"]
|
| 388 |
+
for o in listing
|
| 389 |
+
if o["type"] == "directory"
|
| 390 |
+
}
|
| 391 |
+
files = {
|
| 392 |
+
"📄 " + o["name"].rsplit("/", 1)[-1]: o["name"]
|
| 393 |
+
for o in listing
|
| 394 |
+
if o["type"] == "file"
|
| 395 |
+
}
|
| 396 |
+
if self.filters:
|
| 397 |
+
files = {
|
| 398 |
+
k: v
|
| 399 |
+
for k, v in files.items()
|
| 400 |
+
if any(v.endswith(ext) for ext in self.filters)
|
| 401 |
+
}
|
| 402 |
+
self.main.set_options(dict(**folders, **files))
|
| 403 |
+
|
| 404 |
+
def protocol_changed(self, *_):
|
| 405 |
+
self._fs = None
|
| 406 |
+
self.main.options = []
|
| 407 |
+
self.url.value = ""
|
| 408 |
+
|
| 409 |
+
def home_clicked(self, *_):
|
| 410 |
+
self.protocol.value = self.init_protocol
|
| 411 |
+
self.kwargs.value = self.init_kwargs
|
| 412 |
+
self.url.value = self.init_url
|
| 413 |
+
self.go_clicked()
|
| 414 |
+
|
| 415 |
+
def up_clicked(self, *_):
|
| 416 |
+
self.url.value = self.fs._parent(self.url.value)
|
| 417 |
+
self.go_clicked()
|
venv/lib/python3.13/site-packages/fsspec/json.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from collections.abc import Mapping, Sequence
|
| 3 |
+
from contextlib import suppress
|
| 4 |
+
from pathlib import PurePath
|
| 5 |
+
from typing import (
|
| 6 |
+
Any,
|
| 7 |
+
Callable,
|
| 8 |
+
ClassVar,
|
| 9 |
+
Optional,
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
from .registry import _import_class, get_filesystem_class
|
| 13 |
+
from .spec import AbstractFileSystem
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class FilesystemJSONEncoder(json.JSONEncoder):
|
| 17 |
+
include_password: ClassVar[bool] = True
|
| 18 |
+
|
| 19 |
+
def default(self, o: Any) -> Any:
|
| 20 |
+
if isinstance(o, AbstractFileSystem):
|
| 21 |
+
return o.to_dict(include_password=self.include_password)
|
| 22 |
+
if isinstance(o, PurePath):
|
| 23 |
+
cls = type(o)
|
| 24 |
+
return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)}
|
| 25 |
+
|
| 26 |
+
return super().default(o)
|
| 27 |
+
|
| 28 |
+
def make_serializable(self, obj: Any) -> Any:
|
| 29 |
+
"""
|
| 30 |
+
Recursively converts an object so that it can be JSON serialized via
|
| 31 |
+
:func:`json.dumps` and :func:`json.dump`, without actually calling
|
| 32 |
+
said functions.
|
| 33 |
+
"""
|
| 34 |
+
if isinstance(obj, (str, int, float, bool)):
|
| 35 |
+
return obj
|
| 36 |
+
if isinstance(obj, Mapping):
|
| 37 |
+
return {k: self.make_serializable(v) for k, v in obj.items()}
|
| 38 |
+
if isinstance(obj, Sequence):
|
| 39 |
+
return [self.make_serializable(v) for v in obj]
|
| 40 |
+
|
| 41 |
+
return self.default(obj)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class FilesystemJSONDecoder(json.JSONDecoder):
|
| 45 |
+
def __init__(
|
| 46 |
+
self,
|
| 47 |
+
*,
|
| 48 |
+
object_hook: Optional[Callable[[dict[str, Any]], Any]] = None,
|
| 49 |
+
parse_float: Optional[Callable[[str], Any]] = None,
|
| 50 |
+
parse_int: Optional[Callable[[str], Any]] = None,
|
| 51 |
+
parse_constant: Optional[Callable[[str], Any]] = None,
|
| 52 |
+
strict: bool = True,
|
| 53 |
+
object_pairs_hook: Optional[Callable[[list[tuple[str, Any]]], Any]] = None,
|
| 54 |
+
) -> None:
|
| 55 |
+
self.original_object_hook = object_hook
|
| 56 |
+
|
| 57 |
+
super().__init__(
|
| 58 |
+
object_hook=self.custom_object_hook,
|
| 59 |
+
parse_float=parse_float,
|
| 60 |
+
parse_int=parse_int,
|
| 61 |
+
parse_constant=parse_constant,
|
| 62 |
+
strict=strict,
|
| 63 |
+
object_pairs_hook=object_pairs_hook,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
@classmethod
|
| 67 |
+
def try_resolve_path_cls(cls, dct: dict[str, Any]):
|
| 68 |
+
with suppress(Exception):
|
| 69 |
+
fqp = dct["cls"]
|
| 70 |
+
|
| 71 |
+
path_cls = _import_class(fqp)
|
| 72 |
+
|
| 73 |
+
if issubclass(path_cls, PurePath):
|
| 74 |
+
return path_cls
|
| 75 |
+
|
| 76 |
+
return None
|
| 77 |
+
|
| 78 |
+
@classmethod
|
| 79 |
+
def try_resolve_fs_cls(cls, dct: dict[str, Any]):
|
| 80 |
+
with suppress(Exception):
|
| 81 |
+
if "cls" in dct:
|
| 82 |
+
try:
|
| 83 |
+
fs_cls = _import_class(dct["cls"])
|
| 84 |
+
if issubclass(fs_cls, AbstractFileSystem):
|
| 85 |
+
return fs_cls
|
| 86 |
+
except Exception:
|
| 87 |
+
if "protocol" in dct: # Fallback if cls cannot be imported
|
| 88 |
+
return get_filesystem_class(dct["protocol"])
|
| 89 |
+
|
| 90 |
+
raise
|
| 91 |
+
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
def custom_object_hook(self, dct: dict[str, Any]):
|
| 95 |
+
if "cls" in dct:
|
| 96 |
+
if (obj_cls := self.try_resolve_fs_cls(dct)) is not None:
|
| 97 |
+
return AbstractFileSystem.from_dict(dct)
|
| 98 |
+
if (obj_cls := self.try_resolve_path_cls(dct)) is not None:
|
| 99 |
+
return obj_cls(dct["str"])
|
| 100 |
+
|
| 101 |
+
if self.original_object_hook is not None:
|
| 102 |
+
return self.original_object_hook(dct)
|
| 103 |
+
|
| 104 |
+
return dct
|
| 105 |
+
|
| 106 |
+
def unmake_serializable(self, obj: Any) -> Any:
|
| 107 |
+
"""
|
| 108 |
+
Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`.
|
| 109 |
+
"""
|
| 110 |
+
if isinstance(obj, dict):
|
| 111 |
+
obj = self.custom_object_hook(obj)
|
| 112 |
+
if isinstance(obj, dict):
|
| 113 |
+
return {k: self.unmake_serializable(v) for k, v in obj.items()}
|
| 114 |
+
if isinstance(obj, (list, tuple)):
|
| 115 |
+
return [self.unmake_serializable(v) for v in obj]
|
| 116 |
+
|
| 117 |
+
return obj
|
venv/lib/python3.13/site-packages/fsspec/mapping.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import array
|
| 2 |
+
import logging
|
| 3 |
+
import posixpath
|
| 4 |
+
import warnings
|
| 5 |
+
from collections.abc import MutableMapping
|
| 6 |
+
from functools import cached_property
|
| 7 |
+
|
| 8 |
+
from fsspec.core import url_to_fs
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger("fsspec.mapping")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class FSMap(MutableMapping):
|
| 14 |
+
"""Wrap a FileSystem instance as a mutable wrapping.
|
| 15 |
+
|
| 16 |
+
The keys of the mapping become files under the given root, and the
|
| 17 |
+
values (which must be bytes) the contents of those files.
|
| 18 |
+
|
| 19 |
+
Parameters
|
| 20 |
+
----------
|
| 21 |
+
root: string
|
| 22 |
+
prefix for all the files
|
| 23 |
+
fs: FileSystem instance
|
| 24 |
+
check: bool (=True)
|
| 25 |
+
performs a touch at the location, to check for write access.
|
| 26 |
+
|
| 27 |
+
Examples
|
| 28 |
+
--------
|
| 29 |
+
>>> fs = FileSystem(**parameters) # doctest: +SKIP
|
| 30 |
+
>>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
|
| 31 |
+
or, more likely
|
| 32 |
+
>>> d = fs.get_mapper('my-data/path/')
|
| 33 |
+
|
| 34 |
+
>>> d['loc1'] = b'Hello World' # doctest: +SKIP
|
| 35 |
+
>>> list(d.keys()) # doctest: +SKIP
|
| 36 |
+
['loc1']
|
| 37 |
+
>>> d['loc1'] # doctest: +SKIP
|
| 38 |
+
b'Hello World'
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
|
| 42 |
+
self.fs = fs
|
| 43 |
+
self.root = fs._strip_protocol(root)
|
| 44 |
+
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
|
| 45 |
+
if missing_exceptions is None:
|
| 46 |
+
missing_exceptions = (
|
| 47 |
+
FileNotFoundError,
|
| 48 |
+
IsADirectoryError,
|
| 49 |
+
NotADirectoryError,
|
| 50 |
+
)
|
| 51 |
+
self.missing_exceptions = missing_exceptions
|
| 52 |
+
self.check = check
|
| 53 |
+
self.create = create
|
| 54 |
+
if create:
|
| 55 |
+
if not self.fs.exists(root):
|
| 56 |
+
self.fs.mkdir(root)
|
| 57 |
+
if check:
|
| 58 |
+
if not self.fs.exists(root):
|
| 59 |
+
raise ValueError(
|
| 60 |
+
f"Path {root} does not exist. Create "
|
| 61 |
+
f" with the ``create=True`` keyword"
|
| 62 |
+
)
|
| 63 |
+
self.fs.touch(root + "/a")
|
| 64 |
+
self.fs.rm(root + "/a")
|
| 65 |
+
|
| 66 |
+
@cached_property
|
| 67 |
+
def dirfs(self):
|
| 68 |
+
"""dirfs instance that can be used with the same keys as the mapper"""
|
| 69 |
+
from .implementations.dirfs import DirFileSystem
|
| 70 |
+
|
| 71 |
+
return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
|
| 72 |
+
|
| 73 |
+
def clear(self):
|
| 74 |
+
"""Remove all keys below root - empties out mapping"""
|
| 75 |
+
logger.info("Clear mapping at %s", self.root)
|
| 76 |
+
try:
|
| 77 |
+
self.fs.rm(self.root, True)
|
| 78 |
+
self.fs.mkdir(self.root)
|
| 79 |
+
except: # noqa: E722
|
| 80 |
+
pass
|
| 81 |
+
|
| 82 |
+
def getitems(self, keys, on_error="raise"):
|
| 83 |
+
"""Fetch multiple items from the store
|
| 84 |
+
|
| 85 |
+
If the backend is async-able, this might proceed concurrently
|
| 86 |
+
|
| 87 |
+
Parameters
|
| 88 |
+
----------
|
| 89 |
+
keys: list(str)
|
| 90 |
+
They keys to be fetched
|
| 91 |
+
on_error : "raise", "omit", "return"
|
| 92 |
+
If raise, an underlying exception will be raised (converted to KeyError
|
| 93 |
+
if the type is in self.missing_exceptions); if omit, keys with exception
|
| 94 |
+
will simply not be included in the output; if "return", all keys are
|
| 95 |
+
included in the output, but the value will be bytes or an exception
|
| 96 |
+
instance.
|
| 97 |
+
|
| 98 |
+
Returns
|
| 99 |
+
-------
|
| 100 |
+
dict(key, bytes|exception)
|
| 101 |
+
"""
|
| 102 |
+
keys2 = [self._key_to_str(k) for k in keys]
|
| 103 |
+
oe = on_error if on_error == "raise" else "return"
|
| 104 |
+
try:
|
| 105 |
+
out = self.fs.cat(keys2, on_error=oe)
|
| 106 |
+
if isinstance(out, bytes):
|
| 107 |
+
out = {keys2[0]: out}
|
| 108 |
+
except self.missing_exceptions as e:
|
| 109 |
+
raise KeyError from e
|
| 110 |
+
out = {
|
| 111 |
+
k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
|
| 112 |
+
for k, v in out.items()
|
| 113 |
+
}
|
| 114 |
+
return {
|
| 115 |
+
key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
|
| 116 |
+
for key, k2 in zip(keys, keys2)
|
| 117 |
+
if on_error == "return" or not isinstance(out[k2], BaseException)
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
def setitems(self, values_dict):
|
| 121 |
+
"""Set the values of multiple items in the store
|
| 122 |
+
|
| 123 |
+
Parameters
|
| 124 |
+
----------
|
| 125 |
+
values_dict: dict(str, bytes)
|
| 126 |
+
"""
|
| 127 |
+
values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
|
| 128 |
+
self.fs.pipe(values)
|
| 129 |
+
|
| 130 |
+
def delitems(self, keys):
|
| 131 |
+
"""Remove multiple keys from the store"""
|
| 132 |
+
self.fs.rm([self._key_to_str(k) for k in keys])
|
| 133 |
+
|
| 134 |
+
def _key_to_str(self, key):
|
| 135 |
+
"""Generate full path for the key"""
|
| 136 |
+
if not isinstance(key, str):
|
| 137 |
+
# raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
|
| 138 |
+
warnings.warn(
|
| 139 |
+
"from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
|
| 140 |
+
DeprecationWarning,
|
| 141 |
+
)
|
| 142 |
+
if isinstance(key, list):
|
| 143 |
+
key = tuple(key)
|
| 144 |
+
key = str(key)
|
| 145 |
+
return f"{self._root_key_to_str}{key}".rstrip("/")
|
| 146 |
+
|
| 147 |
+
def _str_to_key(self, s):
|
| 148 |
+
"""Strip path of to leave key name"""
|
| 149 |
+
return s[len(self.root) :].lstrip("/")
|
| 150 |
+
|
| 151 |
+
def __getitem__(self, key, default=None):
|
| 152 |
+
"""Retrieve data"""
|
| 153 |
+
k = self._key_to_str(key)
|
| 154 |
+
try:
|
| 155 |
+
result = self.fs.cat(k)
|
| 156 |
+
except self.missing_exceptions as exc:
|
| 157 |
+
if default is not None:
|
| 158 |
+
return default
|
| 159 |
+
raise KeyError(key) from exc
|
| 160 |
+
return result
|
| 161 |
+
|
| 162 |
+
def pop(self, key, default=None):
|
| 163 |
+
"""Pop data"""
|
| 164 |
+
result = self.__getitem__(key, default)
|
| 165 |
+
try:
|
| 166 |
+
del self[key]
|
| 167 |
+
except KeyError:
|
| 168 |
+
pass
|
| 169 |
+
return result
|
| 170 |
+
|
| 171 |
+
def __setitem__(self, key, value):
|
| 172 |
+
"""Store value in key"""
|
| 173 |
+
key = self._key_to_str(key)
|
| 174 |
+
self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
|
| 175 |
+
self.fs.pipe_file(key, maybe_convert(value))
|
| 176 |
+
|
| 177 |
+
def __iter__(self):
|
| 178 |
+
return (self._str_to_key(x) for x in self.fs.find(self.root))
|
| 179 |
+
|
| 180 |
+
def __len__(self):
|
| 181 |
+
return len(self.fs.find(self.root))
|
| 182 |
+
|
| 183 |
+
def __delitem__(self, key):
|
| 184 |
+
"""Remove key"""
|
| 185 |
+
try:
|
| 186 |
+
self.fs.rm(self._key_to_str(key))
|
| 187 |
+
except Exception as exc:
|
| 188 |
+
raise KeyError from exc
|
| 189 |
+
|
| 190 |
+
def __contains__(self, key):
|
| 191 |
+
"""Does key exist in mapping?"""
|
| 192 |
+
path = self._key_to_str(key)
|
| 193 |
+
return self.fs.isfile(path)
|
| 194 |
+
|
| 195 |
+
def __reduce__(self):
|
| 196 |
+
return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def maybe_convert(value):
|
| 200 |
+
if isinstance(value, array.array) or hasattr(value, "__array__"):
|
| 201 |
+
# bytes-like things
|
| 202 |
+
if hasattr(value, "dtype") and value.dtype.kind in "Mm":
|
| 203 |
+
# The buffer interface doesn't support datetime64/timdelta64 numpy
|
| 204 |
+
# arrays
|
| 205 |
+
value = value.view("int64")
|
| 206 |
+
value = bytes(memoryview(value))
|
| 207 |
+
return value
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def get_mapper(
|
| 211 |
+
url="",
|
| 212 |
+
check=False,
|
| 213 |
+
create=False,
|
| 214 |
+
missing_exceptions=None,
|
| 215 |
+
alternate_root=None,
|
| 216 |
+
**kwargs,
|
| 217 |
+
):
|
| 218 |
+
"""Create key-value interface for given URL and options
|
| 219 |
+
|
| 220 |
+
The URL will be of the form "protocol://location" and point to the root
|
| 221 |
+
of the mapper required. All keys will be file-names below this location,
|
| 222 |
+
and their values the contents of each key.
|
| 223 |
+
|
| 224 |
+
Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
|
| 225 |
+
|
| 226 |
+
Parameters
|
| 227 |
+
----------
|
| 228 |
+
url: str
|
| 229 |
+
Root URL of mapping
|
| 230 |
+
check: bool
|
| 231 |
+
Whether to attempt to read from the location before instantiation, to
|
| 232 |
+
check that the mapping does exist
|
| 233 |
+
create: bool
|
| 234 |
+
Whether to make the directory corresponding to the root before
|
| 235 |
+
instantiating
|
| 236 |
+
missing_exceptions: None or tuple
|
| 237 |
+
If given, these exception types will be regarded as missing keys and
|
| 238 |
+
return KeyError when trying to read data. By default, you get
|
| 239 |
+
(FileNotFoundError, IsADirectoryError, NotADirectoryError)
|
| 240 |
+
alternate_root: None or str
|
| 241 |
+
In cases of complex URLs, the parser may fail to pick the correct part
|
| 242 |
+
for the mapper root, so this arg can override
|
| 243 |
+
|
| 244 |
+
Returns
|
| 245 |
+
-------
|
| 246 |
+
``FSMap`` instance, the dict-like key-value store.
|
| 247 |
+
"""
|
| 248 |
+
# Removing protocol here - could defer to each open() on the backend
|
| 249 |
+
fs, urlpath = url_to_fs(url, **kwargs)
|
| 250 |
+
root = alternate_root if alternate_root is not None else urlpath
|
| 251 |
+
return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
|
venv/lib/python3.13/site-packages/fsspec/parquet.py
ADDED
|
@@ -0,0 +1,541 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import json
|
| 3 |
+
import warnings
|
| 4 |
+
|
| 5 |
+
from .core import url_to_fs
|
| 6 |
+
from .utils import merge_offset_ranges
|
| 7 |
+
|
| 8 |
+
# Parquet-Specific Utilities for fsspec
|
| 9 |
+
#
|
| 10 |
+
# Most of the functions defined in this module are NOT
|
| 11 |
+
# intended for public consumption. The only exception
|
| 12 |
+
# to this is `open_parquet_file`, which should be used
|
| 13 |
+
# place of `fs.open()` to open parquet-formatted files
|
| 14 |
+
# on remote file systems.
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def open_parquet_file(
|
| 18 |
+
path,
|
| 19 |
+
mode="rb",
|
| 20 |
+
fs=None,
|
| 21 |
+
metadata=None,
|
| 22 |
+
columns=None,
|
| 23 |
+
row_groups=None,
|
| 24 |
+
storage_options=None,
|
| 25 |
+
strict=False,
|
| 26 |
+
engine="auto",
|
| 27 |
+
max_gap=64_000,
|
| 28 |
+
max_block=256_000_000,
|
| 29 |
+
footer_sample_size=1_000_000,
|
| 30 |
+
**kwargs,
|
| 31 |
+
):
|
| 32 |
+
"""
|
| 33 |
+
Return a file-like object for a single Parquet file.
|
| 34 |
+
|
| 35 |
+
The specified parquet `engine` will be used to parse the
|
| 36 |
+
footer metadata, and determine the required byte ranges
|
| 37 |
+
from the file. The target path will then be opened with
|
| 38 |
+
the "parts" (`KnownPartsOfAFile`) caching strategy.
|
| 39 |
+
|
| 40 |
+
Note that this method is intended for usage with remote
|
| 41 |
+
file systems, and is unlikely to improve parquet-read
|
| 42 |
+
performance on local file systems.
|
| 43 |
+
|
| 44 |
+
Parameters
|
| 45 |
+
----------
|
| 46 |
+
path: str
|
| 47 |
+
Target file path.
|
| 48 |
+
mode: str, optional
|
| 49 |
+
Mode option to be passed through to `fs.open`. Default is "rb".
|
| 50 |
+
metadata: Any, optional
|
| 51 |
+
Parquet metadata object. Object type must be supported
|
| 52 |
+
by the backend parquet engine. For now, only the "fastparquet"
|
| 53 |
+
engine supports an explicit `ParquetFile` metadata object.
|
| 54 |
+
If a metadata object is supplied, the remote footer metadata
|
| 55 |
+
will not need to be transferred into local memory.
|
| 56 |
+
fs: AbstractFileSystem, optional
|
| 57 |
+
Filesystem object to use for opening the file. If nothing is
|
| 58 |
+
specified, an `AbstractFileSystem` object will be inferred.
|
| 59 |
+
engine : str, default "auto"
|
| 60 |
+
Parquet engine to use for metadata parsing. Allowed options
|
| 61 |
+
include "fastparquet", "pyarrow", and "auto". The specified
|
| 62 |
+
engine must be installed in the current environment. If
|
| 63 |
+
"auto" is specified, and both engines are installed,
|
| 64 |
+
"fastparquet" will take precedence over "pyarrow".
|
| 65 |
+
columns: list, optional
|
| 66 |
+
List of all column names that may be read from the file.
|
| 67 |
+
row_groups : list, optional
|
| 68 |
+
List of all row-groups that may be read from the file. This
|
| 69 |
+
may be a list of row-group indices (integers), or it may be
|
| 70 |
+
a list of `RowGroup` metadata objects (if the "fastparquet"
|
| 71 |
+
engine is used).
|
| 72 |
+
storage_options : dict, optional
|
| 73 |
+
Used to generate an `AbstractFileSystem` object if `fs` was
|
| 74 |
+
not specified.
|
| 75 |
+
strict : bool, optional
|
| 76 |
+
Whether the resulting `KnownPartsOfAFile` cache should
|
| 77 |
+
fetch reads that go beyond a known byte-range boundary.
|
| 78 |
+
If `False` (the default), any read that ends outside a
|
| 79 |
+
known part will be zero padded. Note that using
|
| 80 |
+
`strict=True` may be useful for debugging.
|
| 81 |
+
max_gap : int, optional
|
| 82 |
+
Neighboring byte ranges will only be merged when their
|
| 83 |
+
inter-range gap is <= `max_gap`. Default is 64KB.
|
| 84 |
+
max_block : int, optional
|
| 85 |
+
Neighboring byte ranges will only be merged when the size of
|
| 86 |
+
the aggregated range is <= `max_block`. Default is 256MB.
|
| 87 |
+
footer_sample_size : int, optional
|
| 88 |
+
Number of bytes to read from the end of the path to look
|
| 89 |
+
for the footer metadata. If the sampled bytes do not contain
|
| 90 |
+
the footer, a second read request will be required, and
|
| 91 |
+
performance will suffer. Default is 1MB.
|
| 92 |
+
**kwargs :
|
| 93 |
+
Optional key-word arguments to pass to `fs.open`
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
# Make sure we have an `AbstractFileSystem` object
|
| 97 |
+
# to work with
|
| 98 |
+
if fs is None:
|
| 99 |
+
fs = url_to_fs(path, **(storage_options or {}))[0]
|
| 100 |
+
|
| 101 |
+
# For now, `columns == []` not supported. Just use
|
| 102 |
+
# default `open` command with `path` input
|
| 103 |
+
if columns is not None and len(columns) == 0:
|
| 104 |
+
return fs.open(path, mode=mode)
|
| 105 |
+
|
| 106 |
+
# Set the engine
|
| 107 |
+
engine = _set_engine(engine)
|
| 108 |
+
|
| 109 |
+
# Fetch the known byte ranges needed to read
|
| 110 |
+
# `columns` and/or `row_groups`
|
| 111 |
+
data = _get_parquet_byte_ranges(
|
| 112 |
+
[path],
|
| 113 |
+
fs,
|
| 114 |
+
metadata=metadata,
|
| 115 |
+
columns=columns,
|
| 116 |
+
row_groups=row_groups,
|
| 117 |
+
engine=engine,
|
| 118 |
+
max_gap=max_gap,
|
| 119 |
+
max_block=max_block,
|
| 120 |
+
footer_sample_size=footer_sample_size,
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Extract file name from `data`
|
| 124 |
+
fn = next(iter(data)) if data else path
|
| 125 |
+
|
| 126 |
+
# Call self.open with "parts" caching
|
| 127 |
+
options = kwargs.pop("cache_options", {}).copy()
|
| 128 |
+
return fs.open(
|
| 129 |
+
fn,
|
| 130 |
+
mode=mode,
|
| 131 |
+
cache_type="parts",
|
| 132 |
+
cache_options={
|
| 133 |
+
**options,
|
| 134 |
+
"data": data.get(fn, {}),
|
| 135 |
+
"strict": strict,
|
| 136 |
+
},
|
| 137 |
+
**kwargs,
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def _get_parquet_byte_ranges(
|
| 142 |
+
paths,
|
| 143 |
+
fs,
|
| 144 |
+
metadata=None,
|
| 145 |
+
columns=None,
|
| 146 |
+
row_groups=None,
|
| 147 |
+
max_gap=64_000,
|
| 148 |
+
max_block=256_000_000,
|
| 149 |
+
footer_sample_size=1_000_000,
|
| 150 |
+
engine="auto",
|
| 151 |
+
):
|
| 152 |
+
"""Get a dictionary of the known byte ranges needed
|
| 153 |
+
to read a specific column/row-group selection from a
|
| 154 |
+
Parquet dataset. Each value in the output dictionary
|
| 155 |
+
is intended for use as the `data` argument for the
|
| 156 |
+
`KnownPartsOfAFile` caching strategy of a single path.
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
# Set engine if necessary
|
| 160 |
+
if isinstance(engine, str):
|
| 161 |
+
engine = _set_engine(engine)
|
| 162 |
+
|
| 163 |
+
# Pass to specialized function if metadata is defined
|
| 164 |
+
if metadata is not None:
|
| 165 |
+
# Use the provided parquet metadata object
|
| 166 |
+
# to avoid transferring/parsing footer metadata
|
| 167 |
+
return _get_parquet_byte_ranges_from_metadata(
|
| 168 |
+
metadata,
|
| 169 |
+
fs,
|
| 170 |
+
engine,
|
| 171 |
+
columns=columns,
|
| 172 |
+
row_groups=row_groups,
|
| 173 |
+
max_gap=max_gap,
|
| 174 |
+
max_block=max_block,
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
# Get file sizes asynchronously
|
| 178 |
+
file_sizes = fs.sizes(paths)
|
| 179 |
+
|
| 180 |
+
# Populate global paths, starts, & ends
|
| 181 |
+
result = {}
|
| 182 |
+
data_paths = []
|
| 183 |
+
data_starts = []
|
| 184 |
+
data_ends = []
|
| 185 |
+
add_header_magic = True
|
| 186 |
+
if columns is None and row_groups is None:
|
| 187 |
+
# We are NOT selecting specific columns or row-groups.
|
| 188 |
+
#
|
| 189 |
+
# We can avoid sampling the footers, and just transfer
|
| 190 |
+
# all file data with cat_ranges
|
| 191 |
+
for i, path in enumerate(paths):
|
| 192 |
+
result[path] = {}
|
| 193 |
+
for b in range(0, file_sizes[i], max_block):
|
| 194 |
+
data_paths.append(path)
|
| 195 |
+
data_starts.append(b)
|
| 196 |
+
data_ends.append(min(b + max_block, file_sizes[i]))
|
| 197 |
+
add_header_magic = False # "Magic" should already be included
|
| 198 |
+
else:
|
| 199 |
+
# We ARE selecting specific columns or row-groups.
|
| 200 |
+
#
|
| 201 |
+
# Gather file footers.
|
| 202 |
+
# We just take the last `footer_sample_size` bytes of each
|
| 203 |
+
# file (or the entire file if it is smaller than that)
|
| 204 |
+
footer_starts = []
|
| 205 |
+
footer_ends = []
|
| 206 |
+
for i, path in enumerate(paths):
|
| 207 |
+
footer_ends.append(file_sizes[i])
|
| 208 |
+
sample_size = max(0, file_sizes[i] - footer_sample_size)
|
| 209 |
+
footer_starts.append(sample_size)
|
| 210 |
+
footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
|
| 211 |
+
|
| 212 |
+
# Check our footer samples and re-sample if necessary.
|
| 213 |
+
missing_footer_starts = footer_starts.copy()
|
| 214 |
+
large_footer = 0
|
| 215 |
+
for i, path in enumerate(paths):
|
| 216 |
+
footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
|
| 217 |
+
real_footer_start = file_sizes[i] - (footer_size + 8)
|
| 218 |
+
if real_footer_start < footer_starts[i]:
|
| 219 |
+
missing_footer_starts[i] = real_footer_start
|
| 220 |
+
large_footer = max(large_footer, (footer_size + 8))
|
| 221 |
+
if large_footer:
|
| 222 |
+
warnings.warn(
|
| 223 |
+
f"Not enough data was used to sample the parquet footer. "
|
| 224 |
+
f"Try setting footer_sample_size >= {large_footer}."
|
| 225 |
+
)
|
| 226 |
+
for i, block in enumerate(
|
| 227 |
+
fs.cat_ranges(
|
| 228 |
+
paths,
|
| 229 |
+
missing_footer_starts,
|
| 230 |
+
footer_starts,
|
| 231 |
+
)
|
| 232 |
+
):
|
| 233 |
+
footer_samples[i] = block + footer_samples[i]
|
| 234 |
+
footer_starts[i] = missing_footer_starts[i]
|
| 235 |
+
|
| 236 |
+
# Calculate required byte ranges for each path
|
| 237 |
+
for i, path in enumerate(paths):
|
| 238 |
+
# Deal with small-file case.
|
| 239 |
+
# Just include all remaining bytes of the file
|
| 240 |
+
# in a single range.
|
| 241 |
+
if file_sizes[i] < max_block:
|
| 242 |
+
if footer_starts[i] > 0:
|
| 243 |
+
# Only need to transfer the data if the
|
| 244 |
+
# footer sample isn't already the whole file
|
| 245 |
+
data_paths.append(path)
|
| 246 |
+
data_starts.append(0)
|
| 247 |
+
data_ends.append(footer_starts[i])
|
| 248 |
+
continue
|
| 249 |
+
|
| 250 |
+
# Use "engine" to collect data byte ranges
|
| 251 |
+
path_data_starts, path_data_ends = engine._parquet_byte_ranges(
|
| 252 |
+
columns,
|
| 253 |
+
row_groups=row_groups,
|
| 254 |
+
footer=footer_samples[i],
|
| 255 |
+
footer_start=footer_starts[i],
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
data_paths += [path] * len(path_data_starts)
|
| 259 |
+
data_starts += path_data_starts
|
| 260 |
+
data_ends += path_data_ends
|
| 261 |
+
|
| 262 |
+
# Merge adjacent offset ranges
|
| 263 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
| 264 |
+
data_paths,
|
| 265 |
+
data_starts,
|
| 266 |
+
data_ends,
|
| 267 |
+
max_gap=max_gap,
|
| 268 |
+
max_block=max_block,
|
| 269 |
+
sort=False, # Should already be sorted
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# Start by populating `result` with footer samples
|
| 273 |
+
for i, path in enumerate(paths):
|
| 274 |
+
result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
|
| 275 |
+
|
| 276 |
+
# Transfer the data byte-ranges into local memory
|
| 277 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
| 278 |
+
|
| 279 |
+
# Add b"PAR1" to header if necessary
|
| 280 |
+
if add_header_magic:
|
| 281 |
+
_add_header_magic(result)
|
| 282 |
+
|
| 283 |
+
return result
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def _get_parquet_byte_ranges_from_metadata(
|
| 287 |
+
metadata,
|
| 288 |
+
fs,
|
| 289 |
+
engine,
|
| 290 |
+
columns=None,
|
| 291 |
+
row_groups=None,
|
| 292 |
+
max_gap=64_000,
|
| 293 |
+
max_block=256_000_000,
|
| 294 |
+
):
|
| 295 |
+
"""Simplified version of `_get_parquet_byte_ranges` for
|
| 296 |
+
the case that an engine-specific `metadata` object is
|
| 297 |
+
provided, and the remote footer metadata does not need to
|
| 298 |
+
be transferred before calculating the required byte ranges.
|
| 299 |
+
"""
|
| 300 |
+
|
| 301 |
+
# Use "engine" to collect data byte ranges
|
| 302 |
+
data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
|
| 303 |
+
columns,
|
| 304 |
+
row_groups=row_groups,
|
| 305 |
+
metadata=metadata,
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
# Merge adjacent offset ranges
|
| 309 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
| 310 |
+
data_paths,
|
| 311 |
+
data_starts,
|
| 312 |
+
data_ends,
|
| 313 |
+
max_gap=max_gap,
|
| 314 |
+
max_block=max_block,
|
| 315 |
+
sort=False, # Should be sorted
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
# Transfer the data byte-ranges into local memory
|
| 319 |
+
result = {fn: {} for fn in list(set(data_paths))}
|
| 320 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
| 321 |
+
|
| 322 |
+
# Add b"PAR1" to header
|
| 323 |
+
_add_header_magic(result)
|
| 324 |
+
|
| 325 |
+
return result
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def _transfer_ranges(fs, blocks, paths, starts, ends):
|
| 329 |
+
# Use cat_ranges to gather the data byte_ranges
|
| 330 |
+
ranges = (paths, starts, ends)
|
| 331 |
+
for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
|
| 332 |
+
blocks[path][(start, stop)] = data
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
def _add_header_magic(data):
|
| 336 |
+
# Add b"PAR1" to file headers
|
| 337 |
+
for path in list(data.keys()):
|
| 338 |
+
add_magic = True
|
| 339 |
+
for k in data[path]:
|
| 340 |
+
if k[0] == 0 and k[1] >= 4:
|
| 341 |
+
add_magic = False
|
| 342 |
+
break
|
| 343 |
+
if add_magic:
|
| 344 |
+
data[path][(0, 4)] = b"PAR1"
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def _set_engine(engine_str):
|
| 348 |
+
# Define a list of parquet engines to try
|
| 349 |
+
if engine_str == "auto":
|
| 350 |
+
try_engines = ("fastparquet", "pyarrow")
|
| 351 |
+
elif not isinstance(engine_str, str):
|
| 352 |
+
raise ValueError(
|
| 353 |
+
"Failed to set parquet engine! "
|
| 354 |
+
"Please pass 'fastparquet', 'pyarrow', or 'auto'"
|
| 355 |
+
)
|
| 356 |
+
elif engine_str not in ("fastparquet", "pyarrow"):
|
| 357 |
+
raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
|
| 358 |
+
else:
|
| 359 |
+
try_engines = [engine_str]
|
| 360 |
+
|
| 361 |
+
# Try importing the engines in `try_engines`,
|
| 362 |
+
# and choose the first one that succeeds
|
| 363 |
+
for engine in try_engines:
|
| 364 |
+
try:
|
| 365 |
+
if engine == "fastparquet":
|
| 366 |
+
return FastparquetEngine()
|
| 367 |
+
elif engine == "pyarrow":
|
| 368 |
+
return PyarrowEngine()
|
| 369 |
+
except ImportError:
|
| 370 |
+
pass
|
| 371 |
+
|
| 372 |
+
# Raise an error if a supported parquet engine
|
| 373 |
+
# was not found
|
| 374 |
+
raise ImportError(
|
| 375 |
+
f"The following parquet engines are not installed "
|
| 376 |
+
f"in your python environment: {try_engines}."
|
| 377 |
+
f"Please install 'fastparquert' or 'pyarrow' to "
|
| 378 |
+
f"utilize the `fsspec.parquet` module."
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
class FastparquetEngine:
|
| 383 |
+
# The purpose of the FastparquetEngine class is
|
| 384 |
+
# to check if fastparquet can be imported (on initialization)
|
| 385 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
| 386 |
+
# future, this class may also be used to define other
|
| 387 |
+
# methods/logic that are specific to fastparquet.
|
| 388 |
+
|
| 389 |
+
def __init__(self):
|
| 390 |
+
import fastparquet as fp
|
| 391 |
+
|
| 392 |
+
self.fp = fp
|
| 393 |
+
|
| 394 |
+
def _row_group_filename(self, row_group, pf):
|
| 395 |
+
return pf.row_group_filename(row_group)
|
| 396 |
+
|
| 397 |
+
def _parquet_byte_ranges(
|
| 398 |
+
self,
|
| 399 |
+
columns,
|
| 400 |
+
row_groups=None,
|
| 401 |
+
metadata=None,
|
| 402 |
+
footer=None,
|
| 403 |
+
footer_start=None,
|
| 404 |
+
):
|
| 405 |
+
# Initialize offset ranges and define ParqetFile metadata
|
| 406 |
+
pf = metadata
|
| 407 |
+
data_paths, data_starts, data_ends = [], [], []
|
| 408 |
+
if pf is None:
|
| 409 |
+
pf = self.fp.ParquetFile(io.BytesIO(footer))
|
| 410 |
+
|
| 411 |
+
# Convert columns to a set and add any index columns
|
| 412 |
+
# specified in the pandas metadata (just in case)
|
| 413 |
+
column_set = None if columns is None else set(columns)
|
| 414 |
+
if column_set is not None and hasattr(pf, "pandas_metadata"):
|
| 415 |
+
md_index = [
|
| 416 |
+
ind
|
| 417 |
+
for ind in pf.pandas_metadata.get("index_columns", [])
|
| 418 |
+
# Ignore RangeIndex information
|
| 419 |
+
if not isinstance(ind, dict)
|
| 420 |
+
]
|
| 421 |
+
column_set |= set(md_index)
|
| 422 |
+
|
| 423 |
+
# Check if row_groups is a list of integers
|
| 424 |
+
# or a list of row-group metadata
|
| 425 |
+
if row_groups and not isinstance(row_groups[0], int):
|
| 426 |
+
# Input row_groups contains row-group metadata
|
| 427 |
+
row_group_indices = None
|
| 428 |
+
else:
|
| 429 |
+
# Input row_groups contains row-group indices
|
| 430 |
+
row_group_indices = row_groups
|
| 431 |
+
row_groups = pf.row_groups
|
| 432 |
+
|
| 433 |
+
# Loop through column chunks to add required byte ranges
|
| 434 |
+
for r, row_group in enumerate(row_groups):
|
| 435 |
+
# Skip this row-group if we are targeting
|
| 436 |
+
# specific row-groups
|
| 437 |
+
if row_group_indices is None or r in row_group_indices:
|
| 438 |
+
# Find the target parquet-file path for `row_group`
|
| 439 |
+
fn = self._row_group_filename(row_group, pf)
|
| 440 |
+
|
| 441 |
+
for column in row_group.columns:
|
| 442 |
+
name = column.meta_data.path_in_schema[0]
|
| 443 |
+
# Skip this column if we are targeting a
|
| 444 |
+
# specific columns
|
| 445 |
+
if column_set is None or name in column_set:
|
| 446 |
+
file_offset0 = column.meta_data.dictionary_page_offset
|
| 447 |
+
if file_offset0 is None:
|
| 448 |
+
file_offset0 = column.meta_data.data_page_offset
|
| 449 |
+
num_bytes = column.meta_data.total_compressed_size
|
| 450 |
+
if footer_start is None or file_offset0 < footer_start:
|
| 451 |
+
data_paths.append(fn)
|
| 452 |
+
data_starts.append(file_offset0)
|
| 453 |
+
data_ends.append(
|
| 454 |
+
min(
|
| 455 |
+
file_offset0 + num_bytes,
|
| 456 |
+
footer_start or (file_offset0 + num_bytes),
|
| 457 |
+
)
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
if metadata:
|
| 461 |
+
# The metadata in this call may map to multiple
|
| 462 |
+
# file paths. Need to include `data_paths`
|
| 463 |
+
return data_paths, data_starts, data_ends
|
| 464 |
+
return data_starts, data_ends
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
class PyarrowEngine:
|
| 468 |
+
# The purpose of the PyarrowEngine class is
|
| 469 |
+
# to check if pyarrow can be imported (on initialization)
|
| 470 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
| 471 |
+
# future, this class may also be used to define other
|
| 472 |
+
# methods/logic that are specific to pyarrow.
|
| 473 |
+
|
| 474 |
+
def __init__(self):
|
| 475 |
+
import pyarrow.parquet as pq
|
| 476 |
+
|
| 477 |
+
self.pq = pq
|
| 478 |
+
|
| 479 |
+
def _row_group_filename(self, row_group, metadata):
|
| 480 |
+
raise NotImplementedError
|
| 481 |
+
|
| 482 |
+
def _parquet_byte_ranges(
|
| 483 |
+
self,
|
| 484 |
+
columns,
|
| 485 |
+
row_groups=None,
|
| 486 |
+
metadata=None,
|
| 487 |
+
footer=None,
|
| 488 |
+
footer_start=None,
|
| 489 |
+
):
|
| 490 |
+
if metadata is not None:
|
| 491 |
+
raise ValueError("metadata input not supported for PyarrowEngine")
|
| 492 |
+
|
| 493 |
+
data_starts, data_ends = [], []
|
| 494 |
+
md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
|
| 495 |
+
|
| 496 |
+
# Convert columns to a set and add any index columns
|
| 497 |
+
# specified in the pandas metadata (just in case)
|
| 498 |
+
column_set = None if columns is None else set(columns)
|
| 499 |
+
if column_set is not None:
|
| 500 |
+
schema = md.schema.to_arrow_schema()
|
| 501 |
+
has_pandas_metadata = (
|
| 502 |
+
schema.metadata is not None and b"pandas" in schema.metadata
|
| 503 |
+
)
|
| 504 |
+
if has_pandas_metadata:
|
| 505 |
+
md_index = [
|
| 506 |
+
ind
|
| 507 |
+
for ind in json.loads(
|
| 508 |
+
schema.metadata[b"pandas"].decode("utf8")
|
| 509 |
+
).get("index_columns", [])
|
| 510 |
+
# Ignore RangeIndex information
|
| 511 |
+
if not isinstance(ind, dict)
|
| 512 |
+
]
|
| 513 |
+
column_set |= set(md_index)
|
| 514 |
+
|
| 515 |
+
# Loop through column chunks to add required byte ranges
|
| 516 |
+
for r in range(md.num_row_groups):
|
| 517 |
+
# Skip this row-group if we are targeting
|
| 518 |
+
# specific row-groups
|
| 519 |
+
if row_groups is None or r in row_groups:
|
| 520 |
+
row_group = md.row_group(r)
|
| 521 |
+
for c in range(row_group.num_columns):
|
| 522 |
+
column = row_group.column(c)
|
| 523 |
+
name = column.path_in_schema
|
| 524 |
+
# Skip this column if we are targeting a
|
| 525 |
+
# specific columns
|
| 526 |
+
split_name = name.split(".")[0]
|
| 527 |
+
if (
|
| 528 |
+
column_set is None
|
| 529 |
+
or name in column_set
|
| 530 |
+
or split_name in column_set
|
| 531 |
+
):
|
| 532 |
+
file_offset0 = column.dictionary_page_offset
|
| 533 |
+
if file_offset0 is None:
|
| 534 |
+
file_offset0 = column.data_page_offset
|
| 535 |
+
num_bytes = column.total_compressed_size
|
| 536 |
+
if file_offset0 < footer_start:
|
| 537 |
+
data_starts.append(file_offset0)
|
| 538 |
+
data_ends.append(
|
| 539 |
+
min(file_offset0 + num_bytes, footer_start)
|
| 540 |
+
)
|
| 541 |
+
return data_starts, data_ends
|
venv/lib/python3.13/site-packages/fsspec/registry.py
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import importlib
|
| 4 |
+
import types
|
| 5 |
+
import warnings
|
| 6 |
+
|
| 7 |
+
__all__ = ["registry", "get_filesystem_class", "default"]
|
| 8 |
+
|
| 9 |
+
# internal, mutable
|
| 10 |
+
_registry: dict[str, type] = {}
|
| 11 |
+
|
| 12 |
+
# external, immutable
|
| 13 |
+
registry = types.MappingProxyType(_registry)
|
| 14 |
+
default = "file"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def register_implementation(name, cls, clobber=False, errtxt=None):
|
| 18 |
+
"""Add implementation class to the registry
|
| 19 |
+
|
| 20 |
+
Parameters
|
| 21 |
+
----------
|
| 22 |
+
name: str
|
| 23 |
+
Protocol name to associate with the class
|
| 24 |
+
cls: class or str
|
| 25 |
+
if a class: fsspec-compliant implementation class (normally inherits from
|
| 26 |
+
``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
|
| 27 |
+
str, the full path to an implementation class like package.module.class,
|
| 28 |
+
which gets added to known_implementations,
|
| 29 |
+
so the import is deferred until the filesystem is actually used.
|
| 30 |
+
clobber: bool (optional)
|
| 31 |
+
Whether to overwrite a protocol with the same name; if False, will raise
|
| 32 |
+
instead.
|
| 33 |
+
errtxt: str (optional)
|
| 34 |
+
If given, then a failure to import the given class will result in this
|
| 35 |
+
text being given.
|
| 36 |
+
"""
|
| 37 |
+
if isinstance(cls, str):
|
| 38 |
+
if name in known_implementations and clobber is False:
|
| 39 |
+
if cls != known_implementations[name]["class"]:
|
| 40 |
+
raise ValueError(
|
| 41 |
+
f"Name ({name}) already in the known_implementations and clobber "
|
| 42 |
+
f"is False"
|
| 43 |
+
)
|
| 44 |
+
else:
|
| 45 |
+
known_implementations[name] = {
|
| 46 |
+
"class": cls,
|
| 47 |
+
"err": errtxt or f"{cls} import failed for protocol {name}",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
else:
|
| 51 |
+
if name in registry and clobber is False:
|
| 52 |
+
if _registry[name] is not cls:
|
| 53 |
+
raise ValueError(
|
| 54 |
+
f"Name ({name}) already in the registry and clobber is False"
|
| 55 |
+
)
|
| 56 |
+
else:
|
| 57 |
+
_registry[name] = cls
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# protocols mapped to the class which implements them. This dict can be
|
| 61 |
+
# updated with register_implementation
|
| 62 |
+
known_implementations = {
|
| 63 |
+
"abfs": {
|
| 64 |
+
"class": "adlfs.AzureBlobFileSystem",
|
| 65 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
| 66 |
+
},
|
| 67 |
+
"adl": {
|
| 68 |
+
"class": "adlfs.AzureDatalakeFileSystem",
|
| 69 |
+
"err": "Install adlfs to access Azure Datalake Gen1",
|
| 70 |
+
},
|
| 71 |
+
"arrow_hdfs": {
|
| 72 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
| 73 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
| 74 |
+
},
|
| 75 |
+
"asynclocal": {
|
| 76 |
+
"class": "morefs.asyn_local.AsyncLocalFileSystem",
|
| 77 |
+
"err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
|
| 78 |
+
},
|
| 79 |
+
"asyncwrapper": {
|
| 80 |
+
"class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
|
| 81 |
+
},
|
| 82 |
+
"az": {
|
| 83 |
+
"class": "adlfs.AzureBlobFileSystem",
|
| 84 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
| 85 |
+
},
|
| 86 |
+
"blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
| 87 |
+
"box": {
|
| 88 |
+
"class": "boxfs.BoxFileSystem",
|
| 89 |
+
"err": "Please install boxfs to access BoxFileSystem",
|
| 90 |
+
},
|
| 91 |
+
"cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
| 92 |
+
"dask": {
|
| 93 |
+
"class": "fsspec.implementations.dask.DaskWorkerFileSystem",
|
| 94 |
+
"err": "Install dask distributed to access worker file system",
|
| 95 |
+
},
|
| 96 |
+
"data": {"class": "fsspec.implementations.data.DataFileSystem"},
|
| 97 |
+
"dbfs": {
|
| 98 |
+
"class": "fsspec.implementations.dbfs.DatabricksFileSystem",
|
| 99 |
+
"err": "Install the requests package to use the DatabricksFileSystem",
|
| 100 |
+
},
|
| 101 |
+
"dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
|
| 102 |
+
"dropbox": {
|
| 103 |
+
"class": "dropboxdrivefs.DropboxDriveFileSystem",
|
| 104 |
+
"err": (
|
| 105 |
+
'DropboxFileSystem requires "dropboxdrivefs","requests" and "'
|
| 106 |
+
'"dropbox" to be installed'
|
| 107 |
+
),
|
| 108 |
+
},
|
| 109 |
+
"dvc": {
|
| 110 |
+
"class": "dvc.api.DVCFileSystem",
|
| 111 |
+
"err": "Install dvc to access DVCFileSystem",
|
| 112 |
+
},
|
| 113 |
+
"file": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
| 114 |
+
"filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
|
| 115 |
+
"ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
|
| 116 |
+
"gcs": {
|
| 117 |
+
"class": "gcsfs.GCSFileSystem",
|
| 118 |
+
"err": "Please install gcsfs to access Google Storage",
|
| 119 |
+
},
|
| 120 |
+
"gdrive": {
|
| 121 |
+
"class": "gdrive_fsspec.GoogleDriveFileSystem",
|
| 122 |
+
"err": "Please install gdrive_fs for access to Google Drive",
|
| 123 |
+
},
|
| 124 |
+
"generic": {"class": "fsspec.generic.GenericFileSystem"},
|
| 125 |
+
"gist": {
|
| 126 |
+
"class": "fsspec.implementations.gist.GistFileSystem",
|
| 127 |
+
"err": "Install the requests package to use the gist FS",
|
| 128 |
+
},
|
| 129 |
+
"git": {
|
| 130 |
+
"class": "fsspec.implementations.git.GitFileSystem",
|
| 131 |
+
"err": "Install pygit2 to browse local git repos",
|
| 132 |
+
},
|
| 133 |
+
"github": {
|
| 134 |
+
"class": "fsspec.implementations.github.GithubFileSystem",
|
| 135 |
+
"err": "Install the requests package to use the github FS",
|
| 136 |
+
},
|
| 137 |
+
"gs": {
|
| 138 |
+
"class": "gcsfs.GCSFileSystem",
|
| 139 |
+
"err": "Please install gcsfs to access Google Storage",
|
| 140 |
+
},
|
| 141 |
+
"hdfs": {
|
| 142 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
| 143 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
| 144 |
+
},
|
| 145 |
+
"hf": {
|
| 146 |
+
"class": "huggingface_hub.HfFileSystem",
|
| 147 |
+
"err": "Install huggingface_hub to access HfFileSystem",
|
| 148 |
+
},
|
| 149 |
+
"http": {
|
| 150 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
| 151 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
| 152 |
+
},
|
| 153 |
+
"https": {
|
| 154 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
| 155 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
| 156 |
+
},
|
| 157 |
+
"jlab": {
|
| 158 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
| 159 |
+
"err": "Jupyter FS requires requests to be installed",
|
| 160 |
+
},
|
| 161 |
+
"jupyter": {
|
| 162 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
| 163 |
+
"err": "Jupyter FS requires requests to be installed",
|
| 164 |
+
},
|
| 165 |
+
"lakefs": {
|
| 166 |
+
"class": "lakefs_spec.LakeFSFileSystem",
|
| 167 |
+
"err": "Please install lakefs-spec to access LakeFSFileSystem",
|
| 168 |
+
},
|
| 169 |
+
"libarchive": {
|
| 170 |
+
"class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
|
| 171 |
+
"err": "LibArchive requires to be installed",
|
| 172 |
+
},
|
| 173 |
+
"local": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
| 174 |
+
"memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
|
| 175 |
+
"oci": {
|
| 176 |
+
"class": "ocifs.OCIFileSystem",
|
| 177 |
+
"err": "Install ocifs to access OCI Object Storage",
|
| 178 |
+
},
|
| 179 |
+
"ocilake": {
|
| 180 |
+
"class": "ocifs.OCIFileSystem",
|
| 181 |
+
"err": "Install ocifs to access OCI Data Lake",
|
| 182 |
+
},
|
| 183 |
+
"oss": {
|
| 184 |
+
"class": "ossfs.OSSFileSystem",
|
| 185 |
+
"err": "Install ossfs to access Alibaba Object Storage System",
|
| 186 |
+
},
|
| 187 |
+
"pyscript": {
|
| 188 |
+
"class": "pyscript_fsspec_client.client.PyscriptFileSystem",
|
| 189 |
+
"err": "Install requests (cpython) or run in pyscript",
|
| 190 |
+
},
|
| 191 |
+
"reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
|
| 192 |
+
"root": {
|
| 193 |
+
"class": "fsspec_xrootd.XRootDFileSystem",
|
| 194 |
+
"err": (
|
| 195 |
+
"Install fsspec-xrootd to access xrootd storage system. "
|
| 196 |
+
"Note: 'root' is the protocol name for xrootd storage systems, "
|
| 197 |
+
"not referring to root directories"
|
| 198 |
+
),
|
| 199 |
+
},
|
| 200 |
+
"s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
| 201 |
+
"s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
| 202 |
+
"sftp": {
|
| 203 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
| 204 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
| 205 |
+
},
|
| 206 |
+
"simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
|
| 207 |
+
"smb": {
|
| 208 |
+
"class": "fsspec.implementations.smb.SMBFileSystem",
|
| 209 |
+
"err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
|
| 210 |
+
},
|
| 211 |
+
"ssh": {
|
| 212 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
| 213 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
| 214 |
+
},
|
| 215 |
+
"tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
|
| 216 |
+
"tos": {
|
| 217 |
+
"class": "tosfs.TosFileSystem",
|
| 218 |
+
"err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
|
| 219 |
+
},
|
| 220 |
+
"tosfs": {
|
| 221 |
+
"class": "tosfs.TosFileSystem",
|
| 222 |
+
"err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
|
| 223 |
+
},
|
| 224 |
+
"wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
|
| 225 |
+
"webdav": {
|
| 226 |
+
"class": "webdav4.fsspec.WebdavFileSystem",
|
| 227 |
+
"err": "Install webdav4 to access WebDAV",
|
| 228 |
+
},
|
| 229 |
+
"webhdfs": {
|
| 230 |
+
"class": "fsspec.implementations.webhdfs.WebHDFS",
|
| 231 |
+
"err": 'webHDFS access requires "requests" to be installed',
|
| 232 |
+
},
|
| 233 |
+
"zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
assert list(known_implementations) == sorted(known_implementations), (
|
| 237 |
+
"Not in alphabetical order"
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def get_filesystem_class(protocol):
|
| 242 |
+
"""Fetch named protocol implementation from the registry
|
| 243 |
+
|
| 244 |
+
The dict ``known_implementations`` maps protocol names to the locations
|
| 245 |
+
of classes implementing the corresponding file-system. When used for the
|
| 246 |
+
first time, appropriate imports will happen and the class will be placed in
|
| 247 |
+
the registry. All subsequent calls will fetch directly from the registry.
|
| 248 |
+
|
| 249 |
+
Some protocol implementations require additional dependencies, and so the
|
| 250 |
+
import may fail. In this case, the string in the "err" field of the
|
| 251 |
+
``known_implementations`` will be given as the error message.
|
| 252 |
+
"""
|
| 253 |
+
if not protocol:
|
| 254 |
+
protocol = default
|
| 255 |
+
|
| 256 |
+
if protocol not in registry:
|
| 257 |
+
if protocol not in known_implementations:
|
| 258 |
+
raise ValueError(f"Protocol not known: {protocol}")
|
| 259 |
+
bit = known_implementations[protocol]
|
| 260 |
+
try:
|
| 261 |
+
register_implementation(protocol, _import_class(bit["class"]))
|
| 262 |
+
except ImportError as e:
|
| 263 |
+
raise ImportError(bit.get("err")) from e
|
| 264 |
+
cls = registry[protocol]
|
| 265 |
+
if getattr(cls, "protocol", None) in ("abstract", None):
|
| 266 |
+
cls.protocol = protocol
|
| 267 |
+
|
| 268 |
+
return cls
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
s3_msg = """Your installed version of s3fs is very old and known to cause
|
| 272 |
+
severe performance issues, see also https://github.com/dask/dask/issues/10276
|
| 273 |
+
|
| 274 |
+
To fix, you should specify a lower version bound on s3fs, or
|
| 275 |
+
update the current installation.
|
| 276 |
+
"""
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def _import_class(fqp: str):
|
| 280 |
+
"""Take a fully-qualified path and return the imported class or identifier.
|
| 281 |
+
|
| 282 |
+
``fqp`` is of the form "package.module.klass" or
|
| 283 |
+
"package.module:subobject.klass".
|
| 284 |
+
|
| 285 |
+
Warnings
|
| 286 |
+
--------
|
| 287 |
+
This can import arbitrary modules. Make sure you haven't installed any modules
|
| 288 |
+
that may execute malicious code at import time.
|
| 289 |
+
"""
|
| 290 |
+
if ":" in fqp:
|
| 291 |
+
mod, name = fqp.rsplit(":", 1)
|
| 292 |
+
else:
|
| 293 |
+
mod, name = fqp.rsplit(".", 1)
|
| 294 |
+
|
| 295 |
+
is_s3 = mod == "s3fs"
|
| 296 |
+
mod = importlib.import_module(mod)
|
| 297 |
+
if is_s3 and mod.__version__.split(".") < ["0", "5"]:
|
| 298 |
+
warnings.warn(s3_msg)
|
| 299 |
+
for part in name.split("."):
|
| 300 |
+
mod = getattr(mod, part)
|
| 301 |
+
|
| 302 |
+
if not isinstance(mod, type):
|
| 303 |
+
raise TypeError(f"{fqp} is not a class")
|
| 304 |
+
|
| 305 |
+
return mod
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def filesystem(protocol, **storage_options):
|
| 309 |
+
"""Instantiate filesystems for given protocol and arguments
|
| 310 |
+
|
| 311 |
+
``storage_options`` are specific to the protocol being chosen, and are
|
| 312 |
+
passed directly to the class.
|
| 313 |
+
"""
|
| 314 |
+
if protocol == "arrow_hdfs":
|
| 315 |
+
warnings.warn(
|
| 316 |
+
"The 'arrow_hdfs' protocol has been deprecated and will be "
|
| 317 |
+
"removed in the future. Specify it as 'hdfs'.",
|
| 318 |
+
DeprecationWarning,
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
cls = get_filesystem_class(protocol)
|
| 322 |
+
return cls(**storage_options)
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def available_protocols():
|
| 326 |
+
"""Return a list of the implemented protocols.
|
| 327 |
+
|
| 328 |
+
Note that any given protocol may require extra packages to be importable.
|
| 329 |
+
"""
|
| 330 |
+
return list(known_implementations)
|
venv/lib/python3.13/site-packages/fsspec/spec.py
ADDED
|
@@ -0,0 +1,2281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
import threading
|
| 8 |
+
import warnings
|
| 9 |
+
import weakref
|
| 10 |
+
from errno import ESPIPE
|
| 11 |
+
from glob import has_magic
|
| 12 |
+
from hashlib import sha256
|
| 13 |
+
from typing import Any, ClassVar
|
| 14 |
+
|
| 15 |
+
from .callbacks import DEFAULT_CALLBACK
|
| 16 |
+
from .config import apply_config, conf
|
| 17 |
+
from .dircache import DirCache
|
| 18 |
+
from .transaction import Transaction
|
| 19 |
+
from .utils import (
|
| 20 |
+
_unstrip_protocol,
|
| 21 |
+
glob_translate,
|
| 22 |
+
isfilelike,
|
| 23 |
+
other_paths,
|
| 24 |
+
read_block,
|
| 25 |
+
stringify_path,
|
| 26 |
+
tokenize,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger("fsspec")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def make_instance(cls, args, kwargs):
|
| 33 |
+
return cls(*args, **kwargs)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class _Cached(type):
|
| 37 |
+
"""
|
| 38 |
+
Metaclass for caching file system instances.
|
| 39 |
+
|
| 40 |
+
Notes
|
| 41 |
+
-----
|
| 42 |
+
Instances are cached according to
|
| 43 |
+
|
| 44 |
+
* The values of the class attributes listed in `_extra_tokenize_attributes`
|
| 45 |
+
* The arguments passed to ``__init__``.
|
| 46 |
+
|
| 47 |
+
This creates an additional reference to the filesystem, which prevents the
|
| 48 |
+
filesystem from being garbage collected when all *user* references go away.
|
| 49 |
+
A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
|
| 50 |
+
be made for a filesystem instance to be garbage collected.
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
def __init__(cls, *args, **kwargs):
|
| 54 |
+
super().__init__(*args, **kwargs)
|
| 55 |
+
# Note: we intentionally create a reference here, to avoid garbage
|
| 56 |
+
# collecting instances when all other references are gone. To really
|
| 57 |
+
# delete a FileSystem, the cache must be cleared.
|
| 58 |
+
if conf.get("weakref_instance_cache"): # pragma: no cover
|
| 59 |
+
# debug option for analysing fork/spawn conditions
|
| 60 |
+
cls._cache = weakref.WeakValueDictionary()
|
| 61 |
+
else:
|
| 62 |
+
cls._cache = {}
|
| 63 |
+
cls._pid = os.getpid()
|
| 64 |
+
|
| 65 |
+
def __call__(cls, *args, **kwargs):
|
| 66 |
+
kwargs = apply_config(cls, kwargs)
|
| 67 |
+
extra_tokens = tuple(
|
| 68 |
+
getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
|
| 69 |
+
)
|
| 70 |
+
strip_tokenize_options = {
|
| 71 |
+
k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs
|
| 72 |
+
}
|
| 73 |
+
token = tokenize(
|
| 74 |
+
cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
|
| 75 |
+
)
|
| 76 |
+
skip = kwargs.pop("skip_instance_cache", False)
|
| 77 |
+
if os.getpid() != cls._pid:
|
| 78 |
+
cls._cache.clear()
|
| 79 |
+
cls._pid = os.getpid()
|
| 80 |
+
if not skip and cls.cachable and token in cls._cache:
|
| 81 |
+
cls._latest = token
|
| 82 |
+
return cls._cache[token]
|
| 83 |
+
else:
|
| 84 |
+
obj = super().__call__(*args, **kwargs, **strip_tokenize_options)
|
| 85 |
+
# Setting _fs_token here causes some static linters to complain.
|
| 86 |
+
obj._fs_token_ = token
|
| 87 |
+
obj.storage_args = args
|
| 88 |
+
obj.storage_options = kwargs
|
| 89 |
+
if obj.async_impl and obj.mirror_sync_methods:
|
| 90 |
+
from .asyn import mirror_sync_methods
|
| 91 |
+
|
| 92 |
+
mirror_sync_methods(obj)
|
| 93 |
+
|
| 94 |
+
if cls.cachable and not skip:
|
| 95 |
+
cls._latest = token
|
| 96 |
+
cls._cache[token] = obj
|
| 97 |
+
return obj
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
class AbstractFileSystem(metaclass=_Cached):
|
| 101 |
+
"""
|
| 102 |
+
An abstract super-class for pythonic file-systems
|
| 103 |
+
|
| 104 |
+
Implementations are expected to be compatible with or, better, subclass
|
| 105 |
+
from here.
|
| 106 |
+
"""
|
| 107 |
+
|
| 108 |
+
cachable = True # this class can be cached, instances reused
|
| 109 |
+
_cached = False
|
| 110 |
+
blocksize = 2**22
|
| 111 |
+
sep = "/"
|
| 112 |
+
protocol: ClassVar[str | tuple[str, ...]] = "abstract"
|
| 113 |
+
_latest = None
|
| 114 |
+
async_impl = False
|
| 115 |
+
mirror_sync_methods = False
|
| 116 |
+
root_marker = "" # For some FSs, may require leading '/' or other character
|
| 117 |
+
transaction_type = Transaction
|
| 118 |
+
|
| 119 |
+
#: Extra *class attributes* that should be considered when hashing.
|
| 120 |
+
_extra_tokenize_attributes = ()
|
| 121 |
+
#: *storage options* that should not be considered when hashing.
|
| 122 |
+
_strip_tokenize_options = ()
|
| 123 |
+
|
| 124 |
+
# Set by _Cached metaclass
|
| 125 |
+
storage_args: tuple[Any, ...]
|
| 126 |
+
storage_options: dict[str, Any]
|
| 127 |
+
|
| 128 |
+
def __init__(self, *args, **storage_options):
|
| 129 |
+
"""Create and configure file-system instance
|
| 130 |
+
|
| 131 |
+
Instances may be cachable, so if similar enough arguments are seen
|
| 132 |
+
a new instance is not required. The token attribute exists to allow
|
| 133 |
+
implementations to cache instances if they wish.
|
| 134 |
+
|
| 135 |
+
A reasonable default should be provided if there are no arguments.
|
| 136 |
+
|
| 137 |
+
Subclasses should call this method.
|
| 138 |
+
|
| 139 |
+
Parameters
|
| 140 |
+
----------
|
| 141 |
+
use_listings_cache, listings_expiry_time, max_paths:
|
| 142 |
+
passed to ``DirCache``, if the implementation supports
|
| 143 |
+
directory listing caching. Pass use_listings_cache=False
|
| 144 |
+
to disable such caching.
|
| 145 |
+
skip_instance_cache: bool
|
| 146 |
+
If this is a cachable implementation, pass True here to force
|
| 147 |
+
creating a new instance even if a matching instance exists, and prevent
|
| 148 |
+
storing this instance.
|
| 149 |
+
asynchronous: bool
|
| 150 |
+
loop: asyncio-compatible IOLoop or None
|
| 151 |
+
"""
|
| 152 |
+
if self._cached:
|
| 153 |
+
# reusing instance, don't change
|
| 154 |
+
return
|
| 155 |
+
self._cached = True
|
| 156 |
+
self._intrans = False
|
| 157 |
+
self._transaction = None
|
| 158 |
+
self._invalidated_caches_in_transaction = []
|
| 159 |
+
self.dircache = DirCache(**storage_options)
|
| 160 |
+
|
| 161 |
+
if storage_options.pop("add_docs", None):
|
| 162 |
+
warnings.warn("add_docs is no longer supported.", FutureWarning)
|
| 163 |
+
|
| 164 |
+
if storage_options.pop("add_aliases", None):
|
| 165 |
+
warnings.warn("add_aliases has been removed.", FutureWarning)
|
| 166 |
+
# This is set in _Cached
|
| 167 |
+
self._fs_token_ = None
|
| 168 |
+
|
| 169 |
+
@property
|
| 170 |
+
def fsid(self):
|
| 171 |
+
"""Persistent filesystem id that can be used to compare filesystems
|
| 172 |
+
across sessions.
|
| 173 |
+
"""
|
| 174 |
+
raise NotImplementedError
|
| 175 |
+
|
| 176 |
+
@property
|
| 177 |
+
def _fs_token(self):
|
| 178 |
+
return self._fs_token_
|
| 179 |
+
|
| 180 |
+
def __dask_tokenize__(self):
|
| 181 |
+
return self._fs_token
|
| 182 |
+
|
| 183 |
+
def __hash__(self):
|
| 184 |
+
return int(self._fs_token, 16)
|
| 185 |
+
|
| 186 |
+
def __eq__(self, other):
|
| 187 |
+
return isinstance(other, type(self)) and self._fs_token == other._fs_token
|
| 188 |
+
|
| 189 |
+
def __reduce__(self):
|
| 190 |
+
return make_instance, (type(self), self.storage_args, self.storage_options)
|
| 191 |
+
|
| 192 |
+
@classmethod
|
| 193 |
+
def _strip_protocol(cls, path):
|
| 194 |
+
"""Turn path from fully-qualified to file-system-specific
|
| 195 |
+
|
| 196 |
+
May require FS-specific handling, e.g., for relative paths or links.
|
| 197 |
+
"""
|
| 198 |
+
if isinstance(path, list):
|
| 199 |
+
return [cls._strip_protocol(p) for p in path]
|
| 200 |
+
path = stringify_path(path)
|
| 201 |
+
protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
|
| 202 |
+
for protocol in protos:
|
| 203 |
+
if path.startswith(protocol + "://"):
|
| 204 |
+
path = path[len(protocol) + 3 :]
|
| 205 |
+
elif path.startswith(protocol + "::"):
|
| 206 |
+
path = path[len(protocol) + 2 :]
|
| 207 |
+
path = path.rstrip("/")
|
| 208 |
+
# use of root_marker to make minimum required path, e.g., "/"
|
| 209 |
+
return path or cls.root_marker
|
| 210 |
+
|
| 211 |
+
def unstrip_protocol(self, name: str) -> str:
|
| 212 |
+
"""Format FS-specific path to generic, including protocol"""
|
| 213 |
+
protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
|
| 214 |
+
for protocol in protos:
|
| 215 |
+
if name.startswith(f"{protocol}://"):
|
| 216 |
+
return name
|
| 217 |
+
return f"{protos[0]}://{name}"
|
| 218 |
+
|
| 219 |
+
@staticmethod
|
| 220 |
+
def _get_kwargs_from_urls(path):
|
| 221 |
+
"""If kwargs can be encoded in the paths, extract them here
|
| 222 |
+
|
| 223 |
+
This should happen before instantiation of the class; incoming paths
|
| 224 |
+
then should be amended to strip the options in methods.
|
| 225 |
+
|
| 226 |
+
Examples may look like an sftp path "sftp://user@host:/my/path", where
|
| 227 |
+
the user and host should become kwargs and later get stripped.
|
| 228 |
+
"""
|
| 229 |
+
# by default, nothing happens
|
| 230 |
+
return {}
|
| 231 |
+
|
| 232 |
+
@classmethod
|
| 233 |
+
def current(cls):
|
| 234 |
+
"""Return the most recently instantiated FileSystem
|
| 235 |
+
|
| 236 |
+
If no instance has been created, then create one with defaults
|
| 237 |
+
"""
|
| 238 |
+
if cls._latest in cls._cache:
|
| 239 |
+
return cls._cache[cls._latest]
|
| 240 |
+
return cls()
|
| 241 |
+
|
| 242 |
+
@property
|
| 243 |
+
def transaction(self):
|
| 244 |
+
"""A context within which files are committed together upon exit
|
| 245 |
+
|
| 246 |
+
Requires the file class to implement `.commit()` and `.discard()`
|
| 247 |
+
for the normal and exception cases.
|
| 248 |
+
"""
|
| 249 |
+
if self._transaction is None:
|
| 250 |
+
self._transaction = self.transaction_type(self)
|
| 251 |
+
return self._transaction
|
| 252 |
+
|
| 253 |
+
def start_transaction(self):
|
| 254 |
+
"""Begin write transaction for deferring files, non-context version"""
|
| 255 |
+
self._intrans = True
|
| 256 |
+
self._transaction = self.transaction_type(self)
|
| 257 |
+
return self.transaction
|
| 258 |
+
|
| 259 |
+
def end_transaction(self):
|
| 260 |
+
"""Finish write transaction, non-context version"""
|
| 261 |
+
self.transaction.complete()
|
| 262 |
+
self._transaction = None
|
| 263 |
+
# The invalid cache must be cleared after the transaction is completed.
|
| 264 |
+
for path in self._invalidated_caches_in_transaction:
|
| 265 |
+
self.invalidate_cache(path)
|
| 266 |
+
self._invalidated_caches_in_transaction.clear()
|
| 267 |
+
|
| 268 |
+
def invalidate_cache(self, path=None):
|
| 269 |
+
"""
|
| 270 |
+
Discard any cached directory information
|
| 271 |
+
|
| 272 |
+
Parameters
|
| 273 |
+
----------
|
| 274 |
+
path: string or None
|
| 275 |
+
If None, clear all listings cached else listings at or under given
|
| 276 |
+
path.
|
| 277 |
+
"""
|
| 278 |
+
# Not necessary to implement invalidation mechanism, may have no cache.
|
| 279 |
+
# But if have, you should call this method of parent class from your
|
| 280 |
+
# subclass to ensure expiring caches after transacations correctly.
|
| 281 |
+
# See the implementation of FTPFileSystem in ftp.py
|
| 282 |
+
if self._intrans:
|
| 283 |
+
self._invalidated_caches_in_transaction.append(path)
|
| 284 |
+
|
| 285 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
| 286 |
+
"""
|
| 287 |
+
Create directory entry at path
|
| 288 |
+
|
| 289 |
+
For systems that don't have true directories, may create an for
|
| 290 |
+
this instance only and not touch the real filesystem
|
| 291 |
+
|
| 292 |
+
Parameters
|
| 293 |
+
----------
|
| 294 |
+
path: str
|
| 295 |
+
location
|
| 296 |
+
create_parents: bool
|
| 297 |
+
if True, this is equivalent to ``makedirs``
|
| 298 |
+
kwargs:
|
| 299 |
+
may be permissions, etc.
|
| 300 |
+
"""
|
| 301 |
+
pass # not necessary to implement, may not have directories
|
| 302 |
+
|
| 303 |
+
def makedirs(self, path, exist_ok=False):
|
| 304 |
+
"""Recursively make directories
|
| 305 |
+
|
| 306 |
+
Creates directory at path and any intervening required directories.
|
| 307 |
+
Raises exception if, for instance, the path already exists but is a
|
| 308 |
+
file.
|
| 309 |
+
|
| 310 |
+
Parameters
|
| 311 |
+
----------
|
| 312 |
+
path: str
|
| 313 |
+
leaf directory name
|
| 314 |
+
exist_ok: bool (False)
|
| 315 |
+
If False, will error if the target already exists
|
| 316 |
+
"""
|
| 317 |
+
pass # not necessary to implement, may not have directories
|
| 318 |
+
|
| 319 |
+
def rmdir(self, path):
|
| 320 |
+
"""Remove a directory, if empty"""
|
| 321 |
+
pass # not necessary to implement, may not have directories
|
| 322 |
+
|
| 323 |
+
def ls(self, path, detail=True, **kwargs):
|
| 324 |
+
"""List objects at path.
|
| 325 |
+
|
| 326 |
+
This should include subdirectories and files at that location. The
|
| 327 |
+
difference between a file and a directory must be clear when details
|
| 328 |
+
are requested.
|
| 329 |
+
|
| 330 |
+
The specific keys, or perhaps a FileInfo class, or similar, is TBD,
|
| 331 |
+
but must be consistent across implementations.
|
| 332 |
+
Must include:
|
| 333 |
+
|
| 334 |
+
- full path to the entry (without protocol)
|
| 335 |
+
- size of the entry, in bytes. If the value cannot be determined, will
|
| 336 |
+
be ``None``.
|
| 337 |
+
- type of entry, "file", "directory" or other
|
| 338 |
+
|
| 339 |
+
Additional information
|
| 340 |
+
may be present, appropriate to the file-system, e.g., generation,
|
| 341 |
+
checksum, etc.
|
| 342 |
+
|
| 343 |
+
May use refresh=True|False to allow use of self._ls_from_cache to
|
| 344 |
+
check for a saved listing and avoid calling the backend. This would be
|
| 345 |
+
common where listing may be expensive.
|
| 346 |
+
|
| 347 |
+
Parameters
|
| 348 |
+
----------
|
| 349 |
+
path: str
|
| 350 |
+
detail: bool
|
| 351 |
+
if True, gives a list of dictionaries, where each is the same as
|
| 352 |
+
the result of ``info(path)``. If False, gives a list of paths
|
| 353 |
+
(str).
|
| 354 |
+
kwargs: may have additional backend-specific options, such as version
|
| 355 |
+
information
|
| 356 |
+
|
| 357 |
+
Returns
|
| 358 |
+
-------
|
| 359 |
+
List of strings if detail is False, or list of directory information
|
| 360 |
+
dicts if detail is True.
|
| 361 |
+
"""
|
| 362 |
+
raise NotImplementedError
|
| 363 |
+
|
| 364 |
+
def _ls_from_cache(self, path):
|
| 365 |
+
"""Check cache for listing
|
| 366 |
+
|
| 367 |
+
Returns listing, if found (may be empty list for a directly that exists
|
| 368 |
+
but contains nothing), None if not in cache.
|
| 369 |
+
"""
|
| 370 |
+
parent = self._parent(path)
|
| 371 |
+
try:
|
| 372 |
+
return self.dircache[path.rstrip("/")]
|
| 373 |
+
except KeyError:
|
| 374 |
+
pass
|
| 375 |
+
try:
|
| 376 |
+
files = [
|
| 377 |
+
f
|
| 378 |
+
for f in self.dircache[parent]
|
| 379 |
+
if f["name"] == path
|
| 380 |
+
or (f["name"] == path.rstrip("/") and f["type"] == "directory")
|
| 381 |
+
]
|
| 382 |
+
if len(files) == 0:
|
| 383 |
+
# parent dir was listed but did not contain this file
|
| 384 |
+
raise FileNotFoundError(path)
|
| 385 |
+
return files
|
| 386 |
+
except KeyError:
|
| 387 |
+
pass
|
| 388 |
+
|
| 389 |
+
def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
|
| 390 |
+
"""Return all files under the given path.
|
| 391 |
+
|
| 392 |
+
List all files, recursing into subdirectories; output is iterator-style,
|
| 393 |
+
like ``os.walk()``. For a simple list of files, ``find()`` is available.
|
| 394 |
+
|
| 395 |
+
When topdown is True, the caller can modify the dirnames list in-place (perhaps
|
| 396 |
+
using del or slice assignment), and walk() will
|
| 397 |
+
only recurse into the subdirectories whose names remain in dirnames;
|
| 398 |
+
this can be used to prune the search, impose a specific order of visiting,
|
| 399 |
+
or even to inform walk() about directories the caller creates or renames before
|
| 400 |
+
it resumes walk() again.
|
| 401 |
+
Modifying dirnames when topdown is False has no effect. (see os.walk)
|
| 402 |
+
|
| 403 |
+
Note that the "files" outputted will include anything that is not
|
| 404 |
+
a directory, such as links.
|
| 405 |
+
|
| 406 |
+
Parameters
|
| 407 |
+
----------
|
| 408 |
+
path: str
|
| 409 |
+
Root to recurse into
|
| 410 |
+
maxdepth: int
|
| 411 |
+
Maximum recursion depth. None means limitless, but not recommended
|
| 412 |
+
on link-based file-systems.
|
| 413 |
+
topdown: bool (True)
|
| 414 |
+
Whether to walk the directory tree from the top downwards or from
|
| 415 |
+
the bottom upwards.
|
| 416 |
+
on_error: "omit", "raise", a callable
|
| 417 |
+
if omit (default), path with exception will simply be empty;
|
| 418 |
+
If raise, an underlying exception will be raised;
|
| 419 |
+
if callable, it will be called with a single OSError instance as argument
|
| 420 |
+
kwargs: passed to ``ls``
|
| 421 |
+
"""
|
| 422 |
+
if maxdepth is not None and maxdepth < 1:
|
| 423 |
+
raise ValueError("maxdepth must be at least 1")
|
| 424 |
+
|
| 425 |
+
path = self._strip_protocol(path)
|
| 426 |
+
full_dirs = {}
|
| 427 |
+
dirs = {}
|
| 428 |
+
files = {}
|
| 429 |
+
|
| 430 |
+
detail = kwargs.pop("detail", False)
|
| 431 |
+
try:
|
| 432 |
+
listing = self.ls(path, detail=True, **kwargs)
|
| 433 |
+
except (FileNotFoundError, OSError) as e:
|
| 434 |
+
if on_error == "raise":
|
| 435 |
+
raise
|
| 436 |
+
if callable(on_error):
|
| 437 |
+
on_error(e)
|
| 438 |
+
return
|
| 439 |
+
|
| 440 |
+
for info in listing:
|
| 441 |
+
# each info name must be at least [path]/part , but here
|
| 442 |
+
# we check also for names like [path]/part/
|
| 443 |
+
pathname = info["name"].rstrip("/")
|
| 444 |
+
name = pathname.rsplit("/", 1)[-1]
|
| 445 |
+
if info["type"] == "directory" and pathname != path:
|
| 446 |
+
# do not include "self" path
|
| 447 |
+
full_dirs[name] = pathname
|
| 448 |
+
dirs[name] = info
|
| 449 |
+
elif pathname == path:
|
| 450 |
+
# file-like with same name as give path
|
| 451 |
+
files[""] = info
|
| 452 |
+
else:
|
| 453 |
+
files[name] = info
|
| 454 |
+
|
| 455 |
+
if not detail:
|
| 456 |
+
dirs = list(dirs)
|
| 457 |
+
files = list(files)
|
| 458 |
+
|
| 459 |
+
if topdown:
|
| 460 |
+
# Yield before recursion if walking top down
|
| 461 |
+
yield path, dirs, files
|
| 462 |
+
|
| 463 |
+
if maxdepth is not None:
|
| 464 |
+
maxdepth -= 1
|
| 465 |
+
if maxdepth < 1:
|
| 466 |
+
if not topdown:
|
| 467 |
+
yield path, dirs, files
|
| 468 |
+
return
|
| 469 |
+
|
| 470 |
+
for d in dirs:
|
| 471 |
+
yield from self.walk(
|
| 472 |
+
full_dirs[d],
|
| 473 |
+
maxdepth=maxdepth,
|
| 474 |
+
detail=detail,
|
| 475 |
+
topdown=topdown,
|
| 476 |
+
**kwargs,
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
if not topdown:
|
| 480 |
+
# Yield after recursion if walking bottom up
|
| 481 |
+
yield path, dirs, files
|
| 482 |
+
|
| 483 |
+
def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
| 484 |
+
"""List all files below path.
|
| 485 |
+
|
| 486 |
+
Like posix ``find`` command without conditions
|
| 487 |
+
|
| 488 |
+
Parameters
|
| 489 |
+
----------
|
| 490 |
+
path : str
|
| 491 |
+
maxdepth: int or None
|
| 492 |
+
If not None, the maximum number of levels to descend
|
| 493 |
+
withdirs: bool
|
| 494 |
+
Whether to include directory paths in the output. This is True
|
| 495 |
+
when used by glob, but users usually only want files.
|
| 496 |
+
kwargs are passed to ``ls``.
|
| 497 |
+
"""
|
| 498 |
+
# TODO: allow equivalent of -name parameter
|
| 499 |
+
path = self._strip_protocol(path)
|
| 500 |
+
out = {}
|
| 501 |
+
|
| 502 |
+
# Add the root directory if withdirs is requested
|
| 503 |
+
# This is needed for posix glob compliance
|
| 504 |
+
if withdirs and path != "" and self.isdir(path):
|
| 505 |
+
out[path] = self.info(path)
|
| 506 |
+
|
| 507 |
+
for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
|
| 508 |
+
if withdirs:
|
| 509 |
+
files.update(dirs)
|
| 510 |
+
out.update({info["name"]: info for name, info in files.items()})
|
| 511 |
+
if not out and self.isfile(path):
|
| 512 |
+
# walk works on directories, but find should also return [path]
|
| 513 |
+
# when path happens to be a file
|
| 514 |
+
out[path] = {}
|
| 515 |
+
names = sorted(out)
|
| 516 |
+
if not detail:
|
| 517 |
+
return names
|
| 518 |
+
else:
|
| 519 |
+
return {name: out[name] for name in names}
|
| 520 |
+
|
| 521 |
+
def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
|
| 522 |
+
"""Space used by files and optionally directories within a path
|
| 523 |
+
|
| 524 |
+
Directory size does not include the size of its contents.
|
| 525 |
+
|
| 526 |
+
Parameters
|
| 527 |
+
----------
|
| 528 |
+
path: str
|
| 529 |
+
total: bool
|
| 530 |
+
Whether to sum all the file sizes
|
| 531 |
+
maxdepth: int or None
|
| 532 |
+
Maximum number of directory levels to descend, None for unlimited.
|
| 533 |
+
withdirs: bool
|
| 534 |
+
Whether to include directory paths in the output.
|
| 535 |
+
kwargs: passed to ``find``
|
| 536 |
+
|
| 537 |
+
Returns
|
| 538 |
+
-------
|
| 539 |
+
Dict of {path: size} if total=False, or int otherwise, where numbers
|
| 540 |
+
refer to bytes used.
|
| 541 |
+
"""
|
| 542 |
+
sizes = {}
|
| 543 |
+
if withdirs and self.isdir(path):
|
| 544 |
+
# Include top-level directory in output
|
| 545 |
+
info = self.info(path)
|
| 546 |
+
sizes[info["name"]] = info["size"]
|
| 547 |
+
for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
|
| 548 |
+
info = self.info(f)
|
| 549 |
+
sizes[info["name"]] = info["size"]
|
| 550 |
+
if total:
|
| 551 |
+
return sum(sizes.values())
|
| 552 |
+
else:
|
| 553 |
+
return sizes
|
| 554 |
+
|
| 555 |
+
def glob(self, path, maxdepth=None, **kwargs):
|
| 556 |
+
"""Find files by glob-matching.
|
| 557 |
+
|
| 558 |
+
Pattern matching capabilities for finding files that match the given pattern.
|
| 559 |
+
|
| 560 |
+
Parameters
|
| 561 |
+
----------
|
| 562 |
+
path: str
|
| 563 |
+
The glob pattern to match against
|
| 564 |
+
maxdepth: int or None
|
| 565 |
+
Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found.
|
| 566 |
+
Must be at least 1 if provided.
|
| 567 |
+
kwargs:
|
| 568 |
+
Additional arguments passed to ``find`` (e.g., detail=True)
|
| 569 |
+
|
| 570 |
+
Returns
|
| 571 |
+
-------
|
| 572 |
+
List of matched paths, or dict of paths and their info if detail=True
|
| 573 |
+
|
| 574 |
+
Notes
|
| 575 |
+
-----
|
| 576 |
+
Supported patterns:
|
| 577 |
+
- '*': Matches any sequence of characters within a single directory level
|
| 578 |
+
- ``'**'``: Matches any number of directory levels (must be an entire path component)
|
| 579 |
+
- '?': Matches exactly one character
|
| 580 |
+
- '[abc]': Matches any character in the set
|
| 581 |
+
- '[a-z]': Matches any character in the range
|
| 582 |
+
- '[!abc]': Matches any character NOT in the set
|
| 583 |
+
|
| 584 |
+
Special behaviors:
|
| 585 |
+
- If the path ends with '/', only folders are returned
|
| 586 |
+
- Consecutive '*' characters are compressed into a single '*'
|
| 587 |
+
- Empty brackets '[]' never match anything
|
| 588 |
+
- Negated empty brackets '[!]' match any single character
|
| 589 |
+
- Special characters in character classes are escaped properly
|
| 590 |
+
|
| 591 |
+
Limitations:
|
| 592 |
+
- ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``)
|
| 593 |
+
- No brace expansion ('{a,b}.txt')
|
| 594 |
+
- No extended glob patterns ('+(pattern)', '!(pattern)')
|
| 595 |
+
"""
|
| 596 |
+
if maxdepth is not None and maxdepth < 1:
|
| 597 |
+
raise ValueError("maxdepth must be at least 1")
|
| 598 |
+
|
| 599 |
+
import re
|
| 600 |
+
|
| 601 |
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
| 602 |
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
| 603 |
+
path = self._strip_protocol(path)
|
| 604 |
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
| 605 |
+
tuple(sep + "**" for sep in seps)
|
| 606 |
+
)
|
| 607 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
| 608 |
+
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
| 609 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
| 610 |
+
|
| 611 |
+
min_idx = min(idx_star, idx_qmark, idx_brace)
|
| 612 |
+
|
| 613 |
+
detail = kwargs.pop("detail", False)
|
| 614 |
+
|
| 615 |
+
if not has_magic(path):
|
| 616 |
+
if self.exists(path, **kwargs):
|
| 617 |
+
if not detail:
|
| 618 |
+
return [path]
|
| 619 |
+
else:
|
| 620 |
+
return {path: self.info(path, **kwargs)}
|
| 621 |
+
else:
|
| 622 |
+
if not detail:
|
| 623 |
+
return [] # glob of non-existent returns empty
|
| 624 |
+
else:
|
| 625 |
+
return {}
|
| 626 |
+
elif "/" in path[:min_idx]:
|
| 627 |
+
min_idx = path[:min_idx].rindex("/")
|
| 628 |
+
root = path[: min_idx + 1]
|
| 629 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 630 |
+
else:
|
| 631 |
+
root = ""
|
| 632 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 633 |
+
|
| 634 |
+
if "**" in path:
|
| 635 |
+
if maxdepth is not None:
|
| 636 |
+
idx_double_stars = path.find("**")
|
| 637 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
| 638 |
+
depth = depth - depth_double_stars + maxdepth
|
| 639 |
+
else:
|
| 640 |
+
depth = None
|
| 641 |
+
|
| 642 |
+
allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
|
| 643 |
+
|
| 644 |
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
| 645 |
+
pattern = re.compile(pattern)
|
| 646 |
+
|
| 647 |
+
out = {
|
| 648 |
+
p: info
|
| 649 |
+
for p, info in sorted(allpaths.items())
|
| 650 |
+
if pattern.match(
|
| 651 |
+
p + "/"
|
| 652 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
| 653 |
+
else p
|
| 654 |
+
)
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
if detail:
|
| 658 |
+
return out
|
| 659 |
+
else:
|
| 660 |
+
return list(out)
|
| 661 |
+
|
| 662 |
+
def exists(self, path, **kwargs):
|
| 663 |
+
"""Is there a file at the given path"""
|
| 664 |
+
try:
|
| 665 |
+
self.info(path, **kwargs)
|
| 666 |
+
return True
|
| 667 |
+
except: # noqa: E722
|
| 668 |
+
# any exception allowed bar FileNotFoundError?
|
| 669 |
+
return False
|
| 670 |
+
|
| 671 |
+
def lexists(self, path, **kwargs):
|
| 672 |
+
"""If there is a file at the given path (including
|
| 673 |
+
broken links)"""
|
| 674 |
+
return self.exists(path)
|
| 675 |
+
|
| 676 |
+
def info(self, path, **kwargs):
|
| 677 |
+
"""Give details of entry at path
|
| 678 |
+
|
| 679 |
+
Returns a single dictionary, with exactly the same information as ``ls``
|
| 680 |
+
would with ``detail=True``.
|
| 681 |
+
|
| 682 |
+
The default implementation calls ls and could be overridden by a
|
| 683 |
+
shortcut. kwargs are passed on to ```ls()``.
|
| 684 |
+
|
| 685 |
+
Some file systems might not be able to measure the file's size, in
|
| 686 |
+
which case, the returned dict will include ``'size': None``.
|
| 687 |
+
|
| 688 |
+
Returns
|
| 689 |
+
-------
|
| 690 |
+
dict with keys: name (full path in the FS), size (in bytes), type (file,
|
| 691 |
+
directory, or something else) and other FS-specific keys.
|
| 692 |
+
"""
|
| 693 |
+
path = self._strip_protocol(path)
|
| 694 |
+
out = self.ls(self._parent(path), detail=True, **kwargs)
|
| 695 |
+
out = [o for o in out if o["name"].rstrip("/") == path]
|
| 696 |
+
if out:
|
| 697 |
+
return out[0]
|
| 698 |
+
out = self.ls(path, detail=True, **kwargs)
|
| 699 |
+
path = path.rstrip("/")
|
| 700 |
+
out1 = [o for o in out if o["name"].rstrip("/") == path]
|
| 701 |
+
if len(out1) == 1:
|
| 702 |
+
if "size" not in out1[0]:
|
| 703 |
+
out1[0]["size"] = None
|
| 704 |
+
return out1[0]
|
| 705 |
+
elif len(out1) > 1 or out:
|
| 706 |
+
return {"name": path, "size": 0, "type": "directory"}
|
| 707 |
+
else:
|
| 708 |
+
raise FileNotFoundError(path)
|
| 709 |
+
|
| 710 |
+
def checksum(self, path):
|
| 711 |
+
"""Unique value for current version of file
|
| 712 |
+
|
| 713 |
+
If the checksum is the same from one moment to another, the contents
|
| 714 |
+
are guaranteed to be the same. If the checksum changes, the contents
|
| 715 |
+
*might* have changed.
|
| 716 |
+
|
| 717 |
+
This should normally be overridden; default will probably capture
|
| 718 |
+
creation/modification timestamp (which would be good) or maybe
|
| 719 |
+
access timestamp (which would be bad)
|
| 720 |
+
"""
|
| 721 |
+
return int(tokenize(self.info(path)), 16)
|
| 722 |
+
|
| 723 |
+
def size(self, path):
|
| 724 |
+
"""Size in bytes of file"""
|
| 725 |
+
return self.info(path).get("size", None)
|
| 726 |
+
|
| 727 |
+
def sizes(self, paths):
|
| 728 |
+
"""Size in bytes of each file in a list of paths"""
|
| 729 |
+
return [self.size(p) for p in paths]
|
| 730 |
+
|
| 731 |
+
def isdir(self, path):
|
| 732 |
+
"""Is this entry directory-like?"""
|
| 733 |
+
try:
|
| 734 |
+
return self.info(path)["type"] == "directory"
|
| 735 |
+
except OSError:
|
| 736 |
+
return False
|
| 737 |
+
|
| 738 |
+
def isfile(self, path):
|
| 739 |
+
"""Is this entry file-like?"""
|
| 740 |
+
try:
|
| 741 |
+
return self.info(path)["type"] == "file"
|
| 742 |
+
except: # noqa: E722
|
| 743 |
+
return False
|
| 744 |
+
|
| 745 |
+
def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
|
| 746 |
+
"""Get the contents of the file as a string.
|
| 747 |
+
|
| 748 |
+
Parameters
|
| 749 |
+
----------
|
| 750 |
+
path: str
|
| 751 |
+
URL of file on this filesystems
|
| 752 |
+
encoding, errors, newline: same as `open`.
|
| 753 |
+
"""
|
| 754 |
+
with self.open(
|
| 755 |
+
path,
|
| 756 |
+
mode="r",
|
| 757 |
+
encoding=encoding,
|
| 758 |
+
errors=errors,
|
| 759 |
+
newline=newline,
|
| 760 |
+
**kwargs,
|
| 761 |
+
) as f:
|
| 762 |
+
return f.read()
|
| 763 |
+
|
| 764 |
+
def write_text(
|
| 765 |
+
self, path, value, encoding=None, errors=None, newline=None, **kwargs
|
| 766 |
+
):
|
| 767 |
+
"""Write the text to the given file.
|
| 768 |
+
|
| 769 |
+
An existing file will be overwritten.
|
| 770 |
+
|
| 771 |
+
Parameters
|
| 772 |
+
----------
|
| 773 |
+
path: str
|
| 774 |
+
URL of file on this filesystems
|
| 775 |
+
value: str
|
| 776 |
+
Text to write.
|
| 777 |
+
encoding, errors, newline: same as `open`.
|
| 778 |
+
"""
|
| 779 |
+
with self.open(
|
| 780 |
+
path,
|
| 781 |
+
mode="w",
|
| 782 |
+
encoding=encoding,
|
| 783 |
+
errors=errors,
|
| 784 |
+
newline=newline,
|
| 785 |
+
**kwargs,
|
| 786 |
+
) as f:
|
| 787 |
+
return f.write(value)
|
| 788 |
+
|
| 789 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
| 790 |
+
"""Get the content of a file
|
| 791 |
+
|
| 792 |
+
Parameters
|
| 793 |
+
----------
|
| 794 |
+
path: URL of file on this filesystems
|
| 795 |
+
start, end: int
|
| 796 |
+
Bytes limits of the read. If negative, backwards from end,
|
| 797 |
+
like usual python slices. Either can be None for start or
|
| 798 |
+
end of file, respectively
|
| 799 |
+
kwargs: passed to ``open()``.
|
| 800 |
+
"""
|
| 801 |
+
# explicitly set buffering off?
|
| 802 |
+
with self.open(path, "rb", **kwargs) as f:
|
| 803 |
+
if start is not None:
|
| 804 |
+
if start >= 0:
|
| 805 |
+
f.seek(start)
|
| 806 |
+
else:
|
| 807 |
+
f.seek(max(0, f.size + start))
|
| 808 |
+
if end is not None:
|
| 809 |
+
if end < 0:
|
| 810 |
+
end = f.size + end
|
| 811 |
+
return f.read(end - f.tell())
|
| 812 |
+
return f.read()
|
| 813 |
+
|
| 814 |
+
def pipe_file(self, path, value, mode="overwrite", **kwargs):
|
| 815 |
+
"""Set the bytes of given file"""
|
| 816 |
+
if mode == "create" and self.exists(path):
|
| 817 |
+
# non-atomic but simple way; or could use "xb" in open(), which is likely
|
| 818 |
+
# not as well supported
|
| 819 |
+
raise FileExistsError
|
| 820 |
+
with self.open(path, "wb", **kwargs) as f:
|
| 821 |
+
f.write(value)
|
| 822 |
+
|
| 823 |
+
def pipe(self, path, value=None, **kwargs):
|
| 824 |
+
"""Put value into path
|
| 825 |
+
|
| 826 |
+
(counterpart to ``cat``)
|
| 827 |
+
|
| 828 |
+
Parameters
|
| 829 |
+
----------
|
| 830 |
+
path: string or dict(str, bytes)
|
| 831 |
+
If a string, a single remote location to put ``value`` bytes; if a dict,
|
| 832 |
+
a mapping of {path: bytesvalue}.
|
| 833 |
+
value: bytes, optional
|
| 834 |
+
If using a single path, these are the bytes to put there. Ignored if
|
| 835 |
+
``path`` is a dict
|
| 836 |
+
"""
|
| 837 |
+
if isinstance(path, str):
|
| 838 |
+
self.pipe_file(self._strip_protocol(path), value, **kwargs)
|
| 839 |
+
elif isinstance(path, dict):
|
| 840 |
+
for k, v in path.items():
|
| 841 |
+
self.pipe_file(self._strip_protocol(k), v, **kwargs)
|
| 842 |
+
else:
|
| 843 |
+
raise ValueError("path must be str or dict")
|
| 844 |
+
|
| 845 |
+
def cat_ranges(
|
| 846 |
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
| 847 |
+
):
|
| 848 |
+
"""Get the contents of byte ranges from one or more files
|
| 849 |
+
|
| 850 |
+
Parameters
|
| 851 |
+
----------
|
| 852 |
+
paths: list
|
| 853 |
+
A list of of filepaths on this filesystems
|
| 854 |
+
starts, ends: int or list
|
| 855 |
+
Bytes limits of the read. If using a single int, the same value will be
|
| 856 |
+
used to read all the specified files.
|
| 857 |
+
"""
|
| 858 |
+
if max_gap is not None:
|
| 859 |
+
raise NotImplementedError
|
| 860 |
+
if not isinstance(paths, list):
|
| 861 |
+
raise TypeError
|
| 862 |
+
if not isinstance(starts, list):
|
| 863 |
+
starts = [starts] * len(paths)
|
| 864 |
+
if not isinstance(ends, list):
|
| 865 |
+
ends = [ends] * len(paths)
|
| 866 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 867 |
+
raise ValueError
|
| 868 |
+
out = []
|
| 869 |
+
for p, s, e in zip(paths, starts, ends):
|
| 870 |
+
try:
|
| 871 |
+
out.append(self.cat_file(p, s, e))
|
| 872 |
+
except Exception as e:
|
| 873 |
+
if on_error == "return":
|
| 874 |
+
out.append(e)
|
| 875 |
+
else:
|
| 876 |
+
raise
|
| 877 |
+
return out
|
| 878 |
+
|
| 879 |
+
def cat(self, path, recursive=False, on_error="raise", **kwargs):
|
| 880 |
+
"""Fetch (potentially multiple) paths' contents
|
| 881 |
+
|
| 882 |
+
Parameters
|
| 883 |
+
----------
|
| 884 |
+
recursive: bool
|
| 885 |
+
If True, assume the path(s) are directories, and get all the
|
| 886 |
+
contained files
|
| 887 |
+
on_error : "raise", "omit", "return"
|
| 888 |
+
If raise, an underlying exception will be raised (converted to KeyError
|
| 889 |
+
if the type is in self.missing_exceptions); if omit, keys with exception
|
| 890 |
+
will simply not be included in the output; if "return", all keys are
|
| 891 |
+
included in the output, but the value will be bytes or an exception
|
| 892 |
+
instance.
|
| 893 |
+
kwargs: passed to cat_file
|
| 894 |
+
|
| 895 |
+
Returns
|
| 896 |
+
-------
|
| 897 |
+
dict of {path: contents} if there are multiple paths
|
| 898 |
+
or the path has been otherwise expanded
|
| 899 |
+
"""
|
| 900 |
+
paths = self.expand_path(path, recursive=recursive, **kwargs)
|
| 901 |
+
if (
|
| 902 |
+
len(paths) > 1
|
| 903 |
+
or isinstance(path, list)
|
| 904 |
+
or paths[0] != self._strip_protocol(path)
|
| 905 |
+
):
|
| 906 |
+
out = {}
|
| 907 |
+
for path in paths:
|
| 908 |
+
try:
|
| 909 |
+
out[path] = self.cat_file(path, **kwargs)
|
| 910 |
+
except Exception as e:
|
| 911 |
+
if on_error == "raise":
|
| 912 |
+
raise
|
| 913 |
+
if on_error == "return":
|
| 914 |
+
out[path] = e
|
| 915 |
+
return out
|
| 916 |
+
else:
|
| 917 |
+
return self.cat_file(paths[0], **kwargs)
|
| 918 |
+
|
| 919 |
+
def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
|
| 920 |
+
"""Copy single remote file to local"""
|
| 921 |
+
from .implementations.local import LocalFileSystem
|
| 922 |
+
|
| 923 |
+
if isfilelike(lpath):
|
| 924 |
+
outfile = lpath
|
| 925 |
+
elif self.isdir(rpath):
|
| 926 |
+
os.makedirs(lpath, exist_ok=True)
|
| 927 |
+
return None
|
| 928 |
+
|
| 929 |
+
fs = LocalFileSystem(auto_mkdir=True)
|
| 930 |
+
fs.makedirs(fs._parent(lpath), exist_ok=True)
|
| 931 |
+
|
| 932 |
+
with self.open(rpath, "rb", **kwargs) as f1:
|
| 933 |
+
if outfile is None:
|
| 934 |
+
outfile = open(lpath, "wb")
|
| 935 |
+
|
| 936 |
+
try:
|
| 937 |
+
callback.set_size(getattr(f1, "size", None))
|
| 938 |
+
data = True
|
| 939 |
+
while data:
|
| 940 |
+
data = f1.read(self.blocksize)
|
| 941 |
+
segment_len = outfile.write(data)
|
| 942 |
+
if segment_len is None:
|
| 943 |
+
segment_len = len(data)
|
| 944 |
+
callback.relative_update(segment_len)
|
| 945 |
+
finally:
|
| 946 |
+
if not isfilelike(lpath):
|
| 947 |
+
outfile.close()
|
| 948 |
+
|
| 949 |
+
def get(
|
| 950 |
+
self,
|
| 951 |
+
rpath,
|
| 952 |
+
lpath,
|
| 953 |
+
recursive=False,
|
| 954 |
+
callback=DEFAULT_CALLBACK,
|
| 955 |
+
maxdepth=None,
|
| 956 |
+
**kwargs,
|
| 957 |
+
):
|
| 958 |
+
"""Copy file(s) to local.
|
| 959 |
+
|
| 960 |
+
Copies a specific file or tree of files (if recursive=True). If lpath
|
| 961 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 962 |
+
will go within. Can submit a list of paths, which may be glob-patterns
|
| 963 |
+
and will be expanded.
|
| 964 |
+
|
| 965 |
+
Calls get_file for each source.
|
| 966 |
+
"""
|
| 967 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 968 |
+
# No need to expand paths when both source and destination
|
| 969 |
+
# are provided as lists
|
| 970 |
+
rpaths = rpath
|
| 971 |
+
lpaths = lpath
|
| 972 |
+
else:
|
| 973 |
+
from .implementations.local import (
|
| 974 |
+
LocalFileSystem,
|
| 975 |
+
make_path_posix,
|
| 976 |
+
trailing_sep,
|
| 977 |
+
)
|
| 978 |
+
|
| 979 |
+
source_is_str = isinstance(rpath, str)
|
| 980 |
+
rpaths = self.expand_path(
|
| 981 |
+
rpath, recursive=recursive, maxdepth=maxdepth, **kwargs
|
| 982 |
+
)
|
| 983 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 984 |
+
# Non-recursive glob does not copy directories
|
| 985 |
+
rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
|
| 986 |
+
if not rpaths:
|
| 987 |
+
return
|
| 988 |
+
|
| 989 |
+
if isinstance(lpath, str):
|
| 990 |
+
lpath = make_path_posix(lpath)
|
| 991 |
+
|
| 992 |
+
source_is_file = len(rpaths) == 1
|
| 993 |
+
dest_is_dir = isinstance(lpath, str) and (
|
| 994 |
+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
|
| 995 |
+
)
|
| 996 |
+
|
| 997 |
+
exists = source_is_str and (
|
| 998 |
+
(has_magic(rpath) and source_is_file)
|
| 999 |
+
or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
|
| 1000 |
+
)
|
| 1001 |
+
lpaths = other_paths(
|
| 1002 |
+
rpaths,
|
| 1003 |
+
lpath,
|
| 1004 |
+
exists=exists,
|
| 1005 |
+
flatten=not source_is_str,
|
| 1006 |
+
)
|
| 1007 |
+
|
| 1008 |
+
callback.set_size(len(lpaths))
|
| 1009 |
+
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
| 1010 |
+
with callback.branched(rpath, lpath) as child:
|
| 1011 |
+
self.get_file(rpath, lpath, callback=child, **kwargs)
|
| 1012 |
+
|
| 1013 |
+
def put_file(
|
| 1014 |
+
self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
|
| 1015 |
+
):
|
| 1016 |
+
"""Copy single file to remote"""
|
| 1017 |
+
if mode == "create" and self.exists(rpath):
|
| 1018 |
+
raise FileExistsError
|
| 1019 |
+
if os.path.isdir(lpath):
|
| 1020 |
+
self.makedirs(rpath, exist_ok=True)
|
| 1021 |
+
return None
|
| 1022 |
+
|
| 1023 |
+
with open(lpath, "rb") as f1:
|
| 1024 |
+
size = f1.seek(0, 2)
|
| 1025 |
+
callback.set_size(size)
|
| 1026 |
+
f1.seek(0)
|
| 1027 |
+
|
| 1028 |
+
self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
|
| 1029 |
+
with self.open(rpath, "wb", **kwargs) as f2:
|
| 1030 |
+
while f1.tell() < size:
|
| 1031 |
+
data = f1.read(self.blocksize)
|
| 1032 |
+
segment_len = f2.write(data)
|
| 1033 |
+
if segment_len is None:
|
| 1034 |
+
segment_len = len(data)
|
| 1035 |
+
callback.relative_update(segment_len)
|
| 1036 |
+
|
| 1037 |
+
def put(
|
| 1038 |
+
self,
|
| 1039 |
+
lpath,
|
| 1040 |
+
rpath,
|
| 1041 |
+
recursive=False,
|
| 1042 |
+
callback=DEFAULT_CALLBACK,
|
| 1043 |
+
maxdepth=None,
|
| 1044 |
+
**kwargs,
|
| 1045 |
+
):
|
| 1046 |
+
"""Copy file(s) from local.
|
| 1047 |
+
|
| 1048 |
+
Copies a specific file or tree of files (if recursive=True). If rpath
|
| 1049 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 1050 |
+
will go within.
|
| 1051 |
+
|
| 1052 |
+
Calls put_file for each source.
|
| 1053 |
+
"""
|
| 1054 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 1055 |
+
# No need to expand paths when both source and destination
|
| 1056 |
+
# are provided as lists
|
| 1057 |
+
rpaths = rpath
|
| 1058 |
+
lpaths = lpath
|
| 1059 |
+
else:
|
| 1060 |
+
from .implementations.local import (
|
| 1061 |
+
LocalFileSystem,
|
| 1062 |
+
make_path_posix,
|
| 1063 |
+
trailing_sep,
|
| 1064 |
+
)
|
| 1065 |
+
|
| 1066 |
+
source_is_str = isinstance(lpath, str)
|
| 1067 |
+
if source_is_str:
|
| 1068 |
+
lpath = make_path_posix(lpath)
|
| 1069 |
+
fs = LocalFileSystem()
|
| 1070 |
+
lpaths = fs.expand_path(
|
| 1071 |
+
lpath, recursive=recursive, maxdepth=maxdepth, **kwargs
|
| 1072 |
+
)
|
| 1073 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 1074 |
+
# Non-recursive glob does not copy directories
|
| 1075 |
+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
|
| 1076 |
+
if not lpaths:
|
| 1077 |
+
return
|
| 1078 |
+
|
| 1079 |
+
source_is_file = len(lpaths) == 1
|
| 1080 |
+
dest_is_dir = isinstance(rpath, str) and (
|
| 1081 |
+
trailing_sep(rpath) or self.isdir(rpath)
|
| 1082 |
+
)
|
| 1083 |
+
|
| 1084 |
+
rpath = (
|
| 1085 |
+
self._strip_protocol(rpath)
|
| 1086 |
+
if isinstance(rpath, str)
|
| 1087 |
+
else [self._strip_protocol(p) for p in rpath]
|
| 1088 |
+
)
|
| 1089 |
+
exists = source_is_str and (
|
| 1090 |
+
(has_magic(lpath) and source_is_file)
|
| 1091 |
+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
|
| 1092 |
+
)
|
| 1093 |
+
rpaths = other_paths(
|
| 1094 |
+
lpaths,
|
| 1095 |
+
rpath,
|
| 1096 |
+
exists=exists,
|
| 1097 |
+
flatten=not source_is_str,
|
| 1098 |
+
)
|
| 1099 |
+
|
| 1100 |
+
callback.set_size(len(rpaths))
|
| 1101 |
+
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
| 1102 |
+
with callback.branched(lpath, rpath) as child:
|
| 1103 |
+
self.put_file(lpath, rpath, callback=child, **kwargs)
|
| 1104 |
+
|
| 1105 |
+
def head(self, path, size=1024):
|
| 1106 |
+
"""Get the first ``size`` bytes from file"""
|
| 1107 |
+
with self.open(path, "rb") as f:
|
| 1108 |
+
return f.read(size)
|
| 1109 |
+
|
| 1110 |
+
def tail(self, path, size=1024):
|
| 1111 |
+
"""Get the last ``size`` bytes from file"""
|
| 1112 |
+
with self.open(path, "rb") as f:
|
| 1113 |
+
f.seek(max(-size, -f.size), 2)
|
| 1114 |
+
return f.read()
|
| 1115 |
+
|
| 1116 |
+
def cp_file(self, path1, path2, **kwargs):
|
| 1117 |
+
raise NotImplementedError
|
| 1118 |
+
|
| 1119 |
+
def copy(
|
| 1120 |
+
self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
|
| 1121 |
+
):
|
| 1122 |
+
"""Copy within two locations in the filesystem
|
| 1123 |
+
|
| 1124 |
+
on_error : "raise", "ignore"
|
| 1125 |
+
If raise, any not-found exceptions will be raised; if ignore any
|
| 1126 |
+
not-found exceptions will cause the path to be skipped; defaults to
|
| 1127 |
+
raise unless recursive is true, where the default is ignore
|
| 1128 |
+
"""
|
| 1129 |
+
if on_error is None and recursive:
|
| 1130 |
+
on_error = "ignore"
|
| 1131 |
+
elif on_error is None:
|
| 1132 |
+
on_error = "raise"
|
| 1133 |
+
|
| 1134 |
+
if isinstance(path1, list) and isinstance(path2, list):
|
| 1135 |
+
# No need to expand paths when both source and destination
|
| 1136 |
+
# are provided as lists
|
| 1137 |
+
paths1 = path1
|
| 1138 |
+
paths2 = path2
|
| 1139 |
+
else:
|
| 1140 |
+
from .implementations.local import trailing_sep
|
| 1141 |
+
|
| 1142 |
+
source_is_str = isinstance(path1, str)
|
| 1143 |
+
paths1 = self.expand_path(
|
| 1144 |
+
path1, recursive=recursive, maxdepth=maxdepth, **kwargs
|
| 1145 |
+
)
|
| 1146 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 1147 |
+
# Non-recursive glob does not copy directories
|
| 1148 |
+
paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
|
| 1149 |
+
if not paths1:
|
| 1150 |
+
return
|
| 1151 |
+
|
| 1152 |
+
source_is_file = len(paths1) == 1
|
| 1153 |
+
dest_is_dir = isinstance(path2, str) and (
|
| 1154 |
+
trailing_sep(path2) or self.isdir(path2)
|
| 1155 |
+
)
|
| 1156 |
+
|
| 1157 |
+
exists = source_is_str and (
|
| 1158 |
+
(has_magic(path1) and source_is_file)
|
| 1159 |
+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
|
| 1160 |
+
)
|
| 1161 |
+
paths2 = other_paths(
|
| 1162 |
+
paths1,
|
| 1163 |
+
path2,
|
| 1164 |
+
exists=exists,
|
| 1165 |
+
flatten=not source_is_str,
|
| 1166 |
+
)
|
| 1167 |
+
|
| 1168 |
+
for p1, p2 in zip(paths1, paths2):
|
| 1169 |
+
try:
|
| 1170 |
+
self.cp_file(p1, p2, **kwargs)
|
| 1171 |
+
except FileNotFoundError:
|
| 1172 |
+
if on_error == "raise":
|
| 1173 |
+
raise
|
| 1174 |
+
|
| 1175 |
+
def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
|
| 1176 |
+
"""Turn one or more globs or directories into a list of all matching paths
|
| 1177 |
+
to files or directories.
|
| 1178 |
+
|
| 1179 |
+
kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
|
| 1180 |
+
"""
|
| 1181 |
+
|
| 1182 |
+
if maxdepth is not None and maxdepth < 1:
|
| 1183 |
+
raise ValueError("maxdepth must be at least 1")
|
| 1184 |
+
|
| 1185 |
+
if isinstance(path, (str, os.PathLike)):
|
| 1186 |
+
out = self.expand_path([path], recursive, maxdepth, **kwargs)
|
| 1187 |
+
else:
|
| 1188 |
+
out = set()
|
| 1189 |
+
path = [self._strip_protocol(p) for p in path]
|
| 1190 |
+
for p in path:
|
| 1191 |
+
if has_magic(p):
|
| 1192 |
+
bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
|
| 1193 |
+
out |= bit
|
| 1194 |
+
if recursive:
|
| 1195 |
+
# glob call above expanded one depth so if maxdepth is defined
|
| 1196 |
+
# then decrement it in expand_path call below. If it is zero
|
| 1197 |
+
# after decrementing then avoid expand_path call.
|
| 1198 |
+
if maxdepth is not None and maxdepth <= 1:
|
| 1199 |
+
continue
|
| 1200 |
+
out |= set(
|
| 1201 |
+
self.expand_path(
|
| 1202 |
+
list(bit),
|
| 1203 |
+
recursive=recursive,
|
| 1204 |
+
maxdepth=maxdepth - 1 if maxdepth is not None else None,
|
| 1205 |
+
**kwargs,
|
| 1206 |
+
)
|
| 1207 |
+
)
|
| 1208 |
+
continue
|
| 1209 |
+
elif recursive:
|
| 1210 |
+
rec = set(
|
| 1211 |
+
self.find(
|
| 1212 |
+
p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
|
| 1213 |
+
)
|
| 1214 |
+
)
|
| 1215 |
+
out |= rec
|
| 1216 |
+
if p not in out and (recursive is False or self.exists(p)):
|
| 1217 |
+
# should only check once, for the root
|
| 1218 |
+
out.add(p)
|
| 1219 |
+
if not out:
|
| 1220 |
+
raise FileNotFoundError(path)
|
| 1221 |
+
return sorted(out)
|
| 1222 |
+
|
| 1223 |
+
def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
|
| 1224 |
+
"""Move file(s) from one location to another"""
|
| 1225 |
+
if path1 == path2:
|
| 1226 |
+
logger.debug("%s mv: The paths are the same, so no files were moved.", self)
|
| 1227 |
+
else:
|
| 1228 |
+
# explicitly raise exception to prevent data corruption
|
| 1229 |
+
self.copy(
|
| 1230 |
+
path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise"
|
| 1231 |
+
)
|
| 1232 |
+
self.rm(path1, recursive=recursive)
|
| 1233 |
+
|
| 1234 |
+
def rm_file(self, path):
|
| 1235 |
+
"""Delete a file"""
|
| 1236 |
+
self._rm(path)
|
| 1237 |
+
|
| 1238 |
+
def _rm(self, path):
|
| 1239 |
+
"""Delete one file"""
|
| 1240 |
+
# this is the old name for the method, prefer rm_file
|
| 1241 |
+
raise NotImplementedError
|
| 1242 |
+
|
| 1243 |
+
def rm(self, path, recursive=False, maxdepth=None):
|
| 1244 |
+
"""Delete files.
|
| 1245 |
+
|
| 1246 |
+
Parameters
|
| 1247 |
+
----------
|
| 1248 |
+
path: str or list of str
|
| 1249 |
+
File(s) to delete.
|
| 1250 |
+
recursive: bool
|
| 1251 |
+
If file(s) are directories, recursively delete contents and then
|
| 1252 |
+
also remove the directory
|
| 1253 |
+
maxdepth: int or None
|
| 1254 |
+
Depth to pass to walk for finding files to delete, if recursive.
|
| 1255 |
+
If None, there will be no limit and infinite recursion may be
|
| 1256 |
+
possible.
|
| 1257 |
+
"""
|
| 1258 |
+
path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
|
| 1259 |
+
for p in reversed(path):
|
| 1260 |
+
self.rm_file(p)
|
| 1261 |
+
|
| 1262 |
+
@classmethod
|
| 1263 |
+
def _parent(cls, path):
|
| 1264 |
+
path = cls._strip_protocol(path)
|
| 1265 |
+
if "/" in path:
|
| 1266 |
+
parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
|
| 1267 |
+
return cls.root_marker + parent
|
| 1268 |
+
else:
|
| 1269 |
+
return cls.root_marker
|
| 1270 |
+
|
| 1271 |
+
def _open(
|
| 1272 |
+
self,
|
| 1273 |
+
path,
|
| 1274 |
+
mode="rb",
|
| 1275 |
+
block_size=None,
|
| 1276 |
+
autocommit=True,
|
| 1277 |
+
cache_options=None,
|
| 1278 |
+
**kwargs,
|
| 1279 |
+
):
|
| 1280 |
+
"""Return raw bytes-mode file-like from the file-system"""
|
| 1281 |
+
return AbstractBufferedFile(
|
| 1282 |
+
self,
|
| 1283 |
+
path,
|
| 1284 |
+
mode,
|
| 1285 |
+
block_size,
|
| 1286 |
+
autocommit,
|
| 1287 |
+
cache_options=cache_options,
|
| 1288 |
+
**kwargs,
|
| 1289 |
+
)
|
| 1290 |
+
|
| 1291 |
+
def open(
|
| 1292 |
+
self,
|
| 1293 |
+
path,
|
| 1294 |
+
mode="rb",
|
| 1295 |
+
block_size=None,
|
| 1296 |
+
cache_options=None,
|
| 1297 |
+
compression=None,
|
| 1298 |
+
**kwargs,
|
| 1299 |
+
):
|
| 1300 |
+
"""
|
| 1301 |
+
Return a file-like object from the filesystem
|
| 1302 |
+
|
| 1303 |
+
The resultant instance must function correctly in a context ``with``
|
| 1304 |
+
block.
|
| 1305 |
+
|
| 1306 |
+
Parameters
|
| 1307 |
+
----------
|
| 1308 |
+
path: str
|
| 1309 |
+
Target file
|
| 1310 |
+
mode: str like 'rb', 'w'
|
| 1311 |
+
See builtin ``open()``
|
| 1312 |
+
Mode "x" (exclusive write) may be implemented by the backend. Even if
|
| 1313 |
+
it is, whether it is checked up front or on commit, and whether it is
|
| 1314 |
+
atomic is implementation-dependent.
|
| 1315 |
+
block_size: int
|
| 1316 |
+
Some indication of buffering - this is a value in bytes
|
| 1317 |
+
cache_options : dict, optional
|
| 1318 |
+
Extra arguments to pass through to the cache.
|
| 1319 |
+
compression: string or None
|
| 1320 |
+
If given, open file using compression codec. Can either be a compression
|
| 1321 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 1322 |
+
compression from the filename suffix.
|
| 1323 |
+
encoding, errors, newline: passed on to TextIOWrapper for text mode
|
| 1324 |
+
"""
|
| 1325 |
+
import io
|
| 1326 |
+
|
| 1327 |
+
path = self._strip_protocol(path)
|
| 1328 |
+
if "b" not in mode:
|
| 1329 |
+
mode = mode.replace("t", "") + "b"
|
| 1330 |
+
|
| 1331 |
+
text_kwargs = {
|
| 1332 |
+
k: kwargs.pop(k)
|
| 1333 |
+
for k in ["encoding", "errors", "newline"]
|
| 1334 |
+
if k in kwargs
|
| 1335 |
+
}
|
| 1336 |
+
return io.TextIOWrapper(
|
| 1337 |
+
self.open(
|
| 1338 |
+
path,
|
| 1339 |
+
mode,
|
| 1340 |
+
block_size=block_size,
|
| 1341 |
+
cache_options=cache_options,
|
| 1342 |
+
compression=compression,
|
| 1343 |
+
**kwargs,
|
| 1344 |
+
),
|
| 1345 |
+
**text_kwargs,
|
| 1346 |
+
)
|
| 1347 |
+
else:
|
| 1348 |
+
ac = kwargs.pop("autocommit", not self._intrans)
|
| 1349 |
+
f = self._open(
|
| 1350 |
+
path,
|
| 1351 |
+
mode=mode,
|
| 1352 |
+
block_size=block_size,
|
| 1353 |
+
autocommit=ac,
|
| 1354 |
+
cache_options=cache_options,
|
| 1355 |
+
**kwargs,
|
| 1356 |
+
)
|
| 1357 |
+
if compression is not None:
|
| 1358 |
+
from fsspec.compression import compr
|
| 1359 |
+
from fsspec.core import get_compression
|
| 1360 |
+
|
| 1361 |
+
compression = get_compression(path, compression)
|
| 1362 |
+
compress = compr[compression]
|
| 1363 |
+
f = compress(f, mode=mode[0])
|
| 1364 |
+
|
| 1365 |
+
if not ac and "r" not in mode:
|
| 1366 |
+
self.transaction.files.append(f)
|
| 1367 |
+
return f
|
| 1368 |
+
|
| 1369 |
+
def touch(self, path, truncate=True, **kwargs):
|
| 1370 |
+
"""Create empty file, or update timestamp
|
| 1371 |
+
|
| 1372 |
+
Parameters
|
| 1373 |
+
----------
|
| 1374 |
+
path: str
|
| 1375 |
+
file location
|
| 1376 |
+
truncate: bool
|
| 1377 |
+
If True, always set file size to 0; if False, update timestamp and
|
| 1378 |
+
leave file unchanged, if backend allows this
|
| 1379 |
+
"""
|
| 1380 |
+
if truncate or not self.exists(path):
|
| 1381 |
+
with self.open(path, "wb", **kwargs):
|
| 1382 |
+
pass
|
| 1383 |
+
else:
|
| 1384 |
+
raise NotImplementedError # update timestamp, if possible
|
| 1385 |
+
|
| 1386 |
+
def ukey(self, path):
|
| 1387 |
+
"""Hash of file properties, to tell if it has changed"""
|
| 1388 |
+
return sha256(str(self.info(path)).encode()).hexdigest()
|
| 1389 |
+
|
| 1390 |
+
def read_block(self, fn, offset, length, delimiter=None):
|
| 1391 |
+
"""Read a block of bytes from
|
| 1392 |
+
|
| 1393 |
+
Starting at ``offset`` of the file, read ``length`` bytes. If
|
| 1394 |
+
``delimiter`` is set then we ensure that the read starts and stops at
|
| 1395 |
+
delimiter boundaries that follow the locations ``offset`` and ``offset
|
| 1396 |
+
+ length``. If ``offset`` is zero then we start at zero. The
|
| 1397 |
+
bytestring returned WILL include the end delimiter string.
|
| 1398 |
+
|
| 1399 |
+
If offset+length is beyond the eof, reads to eof.
|
| 1400 |
+
|
| 1401 |
+
Parameters
|
| 1402 |
+
----------
|
| 1403 |
+
fn: string
|
| 1404 |
+
Path to filename
|
| 1405 |
+
offset: int
|
| 1406 |
+
Byte offset to start read
|
| 1407 |
+
length: int
|
| 1408 |
+
Number of bytes to read. If None, read to end.
|
| 1409 |
+
delimiter: bytes (optional)
|
| 1410 |
+
Ensure reading starts and stops at delimiter bytestring
|
| 1411 |
+
|
| 1412 |
+
Examples
|
| 1413 |
+
--------
|
| 1414 |
+
>>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP
|
| 1415 |
+
b'Alice, 100\\nBo'
|
| 1416 |
+
>>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
| 1417 |
+
b'Alice, 100\\nBob, 200\\n'
|
| 1418 |
+
|
| 1419 |
+
Use ``length=None`` to read to the end of the file.
|
| 1420 |
+
>>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP
|
| 1421 |
+
b'Alice, 100\\nBob, 200\\nCharlie, 300'
|
| 1422 |
+
|
| 1423 |
+
See Also
|
| 1424 |
+
--------
|
| 1425 |
+
:func:`fsspec.utils.read_block`
|
| 1426 |
+
"""
|
| 1427 |
+
with self.open(fn, "rb") as f:
|
| 1428 |
+
size = f.size
|
| 1429 |
+
if length is None:
|
| 1430 |
+
length = size
|
| 1431 |
+
if size is not None and offset + length > size:
|
| 1432 |
+
length = size - offset
|
| 1433 |
+
return read_block(f, offset, length, delimiter)
|
| 1434 |
+
|
| 1435 |
+
def to_json(self, *, include_password: bool = True) -> str:
|
| 1436 |
+
"""
|
| 1437 |
+
JSON representation of this filesystem instance.
|
| 1438 |
+
|
| 1439 |
+
Parameters
|
| 1440 |
+
----------
|
| 1441 |
+
include_password: bool, default True
|
| 1442 |
+
Whether to include the password (if any) in the output.
|
| 1443 |
+
|
| 1444 |
+
Returns
|
| 1445 |
+
-------
|
| 1446 |
+
JSON string with keys ``cls`` (the python location of this class),
|
| 1447 |
+
protocol (text name of this class's protocol, first one in case of
|
| 1448 |
+
multiple), ``args`` (positional args, usually empty), and all other
|
| 1449 |
+
keyword arguments as their own keys.
|
| 1450 |
+
|
| 1451 |
+
Warnings
|
| 1452 |
+
--------
|
| 1453 |
+
Serialized filesystems may contain sensitive information which have been
|
| 1454 |
+
passed to the constructor, such as passwords and tokens. Make sure you
|
| 1455 |
+
store and send them in a secure environment!
|
| 1456 |
+
"""
|
| 1457 |
+
from .json import FilesystemJSONEncoder
|
| 1458 |
+
|
| 1459 |
+
return json.dumps(
|
| 1460 |
+
self,
|
| 1461 |
+
cls=type(
|
| 1462 |
+
"_FilesystemJSONEncoder",
|
| 1463 |
+
(FilesystemJSONEncoder,),
|
| 1464 |
+
{"include_password": include_password},
|
| 1465 |
+
),
|
| 1466 |
+
)
|
| 1467 |
+
|
| 1468 |
+
@staticmethod
|
| 1469 |
+
def from_json(blob: str) -> AbstractFileSystem:
|
| 1470 |
+
"""
|
| 1471 |
+
Recreate a filesystem instance from JSON representation.
|
| 1472 |
+
|
| 1473 |
+
See ``.to_json()`` for the expected structure of the input.
|
| 1474 |
+
|
| 1475 |
+
Parameters
|
| 1476 |
+
----------
|
| 1477 |
+
blob: str
|
| 1478 |
+
|
| 1479 |
+
Returns
|
| 1480 |
+
-------
|
| 1481 |
+
file system instance, not necessarily of this particular class.
|
| 1482 |
+
|
| 1483 |
+
Warnings
|
| 1484 |
+
--------
|
| 1485 |
+
This can import arbitrary modules (as determined by the ``cls`` key).
|
| 1486 |
+
Make sure you haven't installed any modules that may execute malicious code
|
| 1487 |
+
at import time.
|
| 1488 |
+
"""
|
| 1489 |
+
from .json import FilesystemJSONDecoder
|
| 1490 |
+
|
| 1491 |
+
return json.loads(blob, cls=FilesystemJSONDecoder)
|
| 1492 |
+
|
| 1493 |
+
def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
|
| 1494 |
+
"""
|
| 1495 |
+
JSON-serializable dictionary representation of this filesystem instance.
|
| 1496 |
+
|
| 1497 |
+
Parameters
|
| 1498 |
+
----------
|
| 1499 |
+
include_password: bool, default True
|
| 1500 |
+
Whether to include the password (if any) in the output.
|
| 1501 |
+
|
| 1502 |
+
Returns
|
| 1503 |
+
-------
|
| 1504 |
+
Dictionary with keys ``cls`` (the python location of this class),
|
| 1505 |
+
protocol (text name of this class's protocol, first one in case of
|
| 1506 |
+
multiple), ``args`` (positional args, usually empty), and all other
|
| 1507 |
+
keyword arguments as their own keys.
|
| 1508 |
+
|
| 1509 |
+
Warnings
|
| 1510 |
+
--------
|
| 1511 |
+
Serialized filesystems may contain sensitive information which have been
|
| 1512 |
+
passed to the constructor, such as passwords and tokens. Make sure you
|
| 1513 |
+
store and send them in a secure environment!
|
| 1514 |
+
"""
|
| 1515 |
+
from .json import FilesystemJSONEncoder
|
| 1516 |
+
|
| 1517 |
+
json_encoder = FilesystemJSONEncoder()
|
| 1518 |
+
|
| 1519 |
+
cls = type(self)
|
| 1520 |
+
proto = self.protocol
|
| 1521 |
+
|
| 1522 |
+
storage_options = dict(self.storage_options)
|
| 1523 |
+
if not include_password:
|
| 1524 |
+
storage_options.pop("password", None)
|
| 1525 |
+
|
| 1526 |
+
return dict(
|
| 1527 |
+
cls=f"{cls.__module__}:{cls.__name__}",
|
| 1528 |
+
protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
|
| 1529 |
+
args=json_encoder.make_serializable(self.storage_args),
|
| 1530 |
+
**json_encoder.make_serializable(storage_options),
|
| 1531 |
+
)
|
| 1532 |
+
|
| 1533 |
+
@staticmethod
|
| 1534 |
+
def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
|
| 1535 |
+
"""
|
| 1536 |
+
Recreate a filesystem instance from dictionary representation.
|
| 1537 |
+
|
| 1538 |
+
See ``.to_dict()`` for the expected structure of the input.
|
| 1539 |
+
|
| 1540 |
+
Parameters
|
| 1541 |
+
----------
|
| 1542 |
+
dct: Dict[str, Any]
|
| 1543 |
+
|
| 1544 |
+
Returns
|
| 1545 |
+
-------
|
| 1546 |
+
file system instance, not necessarily of this particular class.
|
| 1547 |
+
|
| 1548 |
+
Warnings
|
| 1549 |
+
--------
|
| 1550 |
+
This can import arbitrary modules (as determined by the ``cls`` key).
|
| 1551 |
+
Make sure you haven't installed any modules that may execute malicious code
|
| 1552 |
+
at import time.
|
| 1553 |
+
"""
|
| 1554 |
+
from .json import FilesystemJSONDecoder
|
| 1555 |
+
|
| 1556 |
+
json_decoder = FilesystemJSONDecoder()
|
| 1557 |
+
|
| 1558 |
+
dct = dict(dct) # Defensive copy
|
| 1559 |
+
|
| 1560 |
+
cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
|
| 1561 |
+
if cls is None:
|
| 1562 |
+
raise ValueError("Not a serialized AbstractFileSystem")
|
| 1563 |
+
|
| 1564 |
+
dct.pop("cls", None)
|
| 1565 |
+
dct.pop("protocol", None)
|
| 1566 |
+
|
| 1567 |
+
return cls(
|
| 1568 |
+
*json_decoder.unmake_serializable(dct.pop("args", ())),
|
| 1569 |
+
**json_decoder.unmake_serializable(dct),
|
| 1570 |
+
)
|
| 1571 |
+
|
| 1572 |
+
def _get_pyarrow_filesystem(self):
|
| 1573 |
+
"""
|
| 1574 |
+
Make a version of the FS instance which will be acceptable to pyarrow
|
| 1575 |
+
"""
|
| 1576 |
+
# all instances already also derive from pyarrow
|
| 1577 |
+
return self
|
| 1578 |
+
|
| 1579 |
+
def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
|
| 1580 |
+
"""Create key/value store based on this file-system
|
| 1581 |
+
|
| 1582 |
+
Makes a MutableMapping interface to the FS at the given root path.
|
| 1583 |
+
See ``fsspec.mapping.FSMap`` for further details.
|
| 1584 |
+
"""
|
| 1585 |
+
from .mapping import FSMap
|
| 1586 |
+
|
| 1587 |
+
return FSMap(
|
| 1588 |
+
root,
|
| 1589 |
+
self,
|
| 1590 |
+
check=check,
|
| 1591 |
+
create=create,
|
| 1592 |
+
missing_exceptions=missing_exceptions,
|
| 1593 |
+
)
|
| 1594 |
+
|
| 1595 |
+
@classmethod
|
| 1596 |
+
def clear_instance_cache(cls):
|
| 1597 |
+
"""
|
| 1598 |
+
Clear the cache of filesystem instances.
|
| 1599 |
+
|
| 1600 |
+
Notes
|
| 1601 |
+
-----
|
| 1602 |
+
Unless overridden by setting the ``cachable`` class attribute to False,
|
| 1603 |
+
the filesystem class stores a reference to newly created instances. This
|
| 1604 |
+
prevents Python's normal rules around garbage collection from working,
|
| 1605 |
+
since the instances refcount will not drop to zero until
|
| 1606 |
+
``clear_instance_cache`` is called.
|
| 1607 |
+
"""
|
| 1608 |
+
cls._cache.clear()
|
| 1609 |
+
|
| 1610 |
+
def created(self, path):
|
| 1611 |
+
"""Return the created timestamp of a file as a datetime.datetime"""
|
| 1612 |
+
raise NotImplementedError
|
| 1613 |
+
|
| 1614 |
+
def modified(self, path):
|
| 1615 |
+
"""Return the modified timestamp of a file as a datetime.datetime"""
|
| 1616 |
+
raise NotImplementedError
|
| 1617 |
+
|
| 1618 |
+
def tree(
|
| 1619 |
+
self,
|
| 1620 |
+
path: str = "/",
|
| 1621 |
+
recursion_limit: int = 2,
|
| 1622 |
+
max_display: int = 25,
|
| 1623 |
+
display_size: bool = False,
|
| 1624 |
+
prefix: str = "",
|
| 1625 |
+
is_last: bool = True,
|
| 1626 |
+
first: bool = True,
|
| 1627 |
+
indent_size: int = 4,
|
| 1628 |
+
) -> str:
|
| 1629 |
+
"""
|
| 1630 |
+
Return a tree-like structure of the filesystem starting from the given path as a string.
|
| 1631 |
+
|
| 1632 |
+
Parameters
|
| 1633 |
+
----------
|
| 1634 |
+
path: Root path to start traversal from
|
| 1635 |
+
recursion_limit: Maximum depth of directory traversal
|
| 1636 |
+
max_display: Maximum number of items to display per directory
|
| 1637 |
+
display_size: Whether to display file sizes
|
| 1638 |
+
prefix: Current line prefix for visual tree structure
|
| 1639 |
+
is_last: Whether current item is last in its level
|
| 1640 |
+
first: Whether this is the first call (displays root path)
|
| 1641 |
+
indent_size: Number of spaces by indent
|
| 1642 |
+
|
| 1643 |
+
Returns
|
| 1644 |
+
-------
|
| 1645 |
+
str: A string representing the tree structure.
|
| 1646 |
+
|
| 1647 |
+
Example
|
| 1648 |
+
-------
|
| 1649 |
+
>>> from fsspec import filesystem
|
| 1650 |
+
|
| 1651 |
+
>>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
|
| 1652 |
+
>>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
|
| 1653 |
+
>>> print(tree)
|
| 1654 |
+
"""
|
| 1655 |
+
|
| 1656 |
+
def format_bytes(n: int) -> str:
|
| 1657 |
+
"""Format bytes as text."""
|
| 1658 |
+
for prefix, k in (
|
| 1659 |
+
("P", 2**50),
|
| 1660 |
+
("T", 2**40),
|
| 1661 |
+
("G", 2**30),
|
| 1662 |
+
("M", 2**20),
|
| 1663 |
+
("k", 2**10),
|
| 1664 |
+
):
|
| 1665 |
+
if n >= 0.9 * k:
|
| 1666 |
+
return f"{n / k:.2f} {prefix}b"
|
| 1667 |
+
return f"{n}B"
|
| 1668 |
+
|
| 1669 |
+
result = []
|
| 1670 |
+
|
| 1671 |
+
if first:
|
| 1672 |
+
result.append(path)
|
| 1673 |
+
|
| 1674 |
+
if recursion_limit:
|
| 1675 |
+
indent = " " * indent_size
|
| 1676 |
+
contents = self.ls(path, detail=True)
|
| 1677 |
+
contents.sort(
|
| 1678 |
+
key=lambda x: (x.get("type") != "directory", x.get("name", ""))
|
| 1679 |
+
)
|
| 1680 |
+
|
| 1681 |
+
if max_display is not None and len(contents) > max_display:
|
| 1682 |
+
displayed_contents = contents[:max_display]
|
| 1683 |
+
remaining_count = len(contents) - max_display
|
| 1684 |
+
else:
|
| 1685 |
+
displayed_contents = contents
|
| 1686 |
+
remaining_count = 0
|
| 1687 |
+
|
| 1688 |
+
for i, item in enumerate(displayed_contents):
|
| 1689 |
+
is_last_item = (i == len(displayed_contents) - 1) and (
|
| 1690 |
+
remaining_count == 0
|
| 1691 |
+
)
|
| 1692 |
+
|
| 1693 |
+
branch = (
|
| 1694 |
+
"└" + ("─" * (indent_size - 2))
|
| 1695 |
+
if is_last_item
|
| 1696 |
+
else "├" + ("─" * (indent_size - 2))
|
| 1697 |
+
)
|
| 1698 |
+
branch += " "
|
| 1699 |
+
new_prefix = prefix + (
|
| 1700 |
+
indent if is_last_item else "│" + " " * (indent_size - 1)
|
| 1701 |
+
)
|
| 1702 |
+
|
| 1703 |
+
name = os.path.basename(item.get("name", ""))
|
| 1704 |
+
|
| 1705 |
+
if display_size and item.get("type") == "directory":
|
| 1706 |
+
sub_contents = self.ls(item.get("name", ""), detail=True)
|
| 1707 |
+
num_files = sum(
|
| 1708 |
+
1 for sub_item in sub_contents if sub_item.get("type") == "file"
|
| 1709 |
+
)
|
| 1710 |
+
num_folders = sum(
|
| 1711 |
+
1
|
| 1712 |
+
for sub_item in sub_contents
|
| 1713 |
+
if sub_item.get("type") == "directory"
|
| 1714 |
+
)
|
| 1715 |
+
|
| 1716 |
+
if num_files == 0 and num_folders == 0:
|
| 1717 |
+
size = " (empty folder)"
|
| 1718 |
+
elif num_files == 0:
|
| 1719 |
+
size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
|
| 1720 |
+
elif num_folders == 0:
|
| 1721 |
+
size = f" ({num_files} file{'s' if num_files > 1 else ''})"
|
| 1722 |
+
else:
|
| 1723 |
+
size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
|
| 1724 |
+
elif display_size and item.get("type") == "file":
|
| 1725 |
+
size = f" ({format_bytes(item.get('size', 0))})"
|
| 1726 |
+
else:
|
| 1727 |
+
size = ""
|
| 1728 |
+
|
| 1729 |
+
result.append(f"{prefix}{branch}{name}{size}")
|
| 1730 |
+
|
| 1731 |
+
if item.get("type") == "directory" and recursion_limit > 0:
|
| 1732 |
+
result.append(
|
| 1733 |
+
self.tree(
|
| 1734 |
+
path=item.get("name", ""),
|
| 1735 |
+
recursion_limit=recursion_limit - 1,
|
| 1736 |
+
max_display=max_display,
|
| 1737 |
+
display_size=display_size,
|
| 1738 |
+
prefix=new_prefix,
|
| 1739 |
+
is_last=is_last_item,
|
| 1740 |
+
first=False,
|
| 1741 |
+
indent_size=indent_size,
|
| 1742 |
+
)
|
| 1743 |
+
)
|
| 1744 |
+
|
| 1745 |
+
if remaining_count > 0:
|
| 1746 |
+
more_message = f"{remaining_count} more item(s) not displayed."
|
| 1747 |
+
result.append(
|
| 1748 |
+
f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
|
| 1749 |
+
)
|
| 1750 |
+
|
| 1751 |
+
return "\n".join(_ for _ in result if _)
|
| 1752 |
+
|
| 1753 |
+
# ------------------------------------------------------------------------
|
| 1754 |
+
# Aliases
|
| 1755 |
+
|
| 1756 |
+
def read_bytes(self, path, start=None, end=None, **kwargs):
|
| 1757 |
+
"""Alias of `AbstractFileSystem.cat_file`."""
|
| 1758 |
+
return self.cat_file(path, start=start, end=end, **kwargs)
|
| 1759 |
+
|
| 1760 |
+
def write_bytes(self, path, value, **kwargs):
|
| 1761 |
+
"""Alias of `AbstractFileSystem.pipe_file`."""
|
| 1762 |
+
self.pipe_file(path, value, **kwargs)
|
| 1763 |
+
|
| 1764 |
+
def makedir(self, path, create_parents=True, **kwargs):
|
| 1765 |
+
"""Alias of `AbstractFileSystem.mkdir`."""
|
| 1766 |
+
return self.mkdir(path, create_parents=create_parents, **kwargs)
|
| 1767 |
+
|
| 1768 |
+
def mkdirs(self, path, exist_ok=False):
|
| 1769 |
+
"""Alias of `AbstractFileSystem.makedirs`."""
|
| 1770 |
+
return self.makedirs(path, exist_ok=exist_ok)
|
| 1771 |
+
|
| 1772 |
+
def listdir(self, path, detail=True, **kwargs):
|
| 1773 |
+
"""Alias of `AbstractFileSystem.ls`."""
|
| 1774 |
+
return self.ls(path, detail=detail, **kwargs)
|
| 1775 |
+
|
| 1776 |
+
def cp(self, path1, path2, **kwargs):
|
| 1777 |
+
"""Alias of `AbstractFileSystem.copy`."""
|
| 1778 |
+
return self.copy(path1, path2, **kwargs)
|
| 1779 |
+
|
| 1780 |
+
def move(self, path1, path2, **kwargs):
|
| 1781 |
+
"""Alias of `AbstractFileSystem.mv`."""
|
| 1782 |
+
return self.mv(path1, path2, **kwargs)
|
| 1783 |
+
|
| 1784 |
+
def stat(self, path, **kwargs):
|
| 1785 |
+
"""Alias of `AbstractFileSystem.info`."""
|
| 1786 |
+
return self.info(path, **kwargs)
|
| 1787 |
+
|
| 1788 |
+
def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
|
| 1789 |
+
"""Alias of `AbstractFileSystem.du`."""
|
| 1790 |
+
return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
|
| 1791 |
+
|
| 1792 |
+
def rename(self, path1, path2, **kwargs):
|
| 1793 |
+
"""Alias of `AbstractFileSystem.mv`."""
|
| 1794 |
+
return self.mv(path1, path2, **kwargs)
|
| 1795 |
+
|
| 1796 |
+
def delete(self, path, recursive=False, maxdepth=None):
|
| 1797 |
+
"""Alias of `AbstractFileSystem.rm`."""
|
| 1798 |
+
return self.rm(path, recursive=recursive, maxdepth=maxdepth)
|
| 1799 |
+
|
| 1800 |
+
def upload(self, lpath, rpath, recursive=False, **kwargs):
|
| 1801 |
+
"""Alias of `AbstractFileSystem.put`."""
|
| 1802 |
+
return self.put(lpath, rpath, recursive=recursive, **kwargs)
|
| 1803 |
+
|
| 1804 |
+
def download(self, rpath, lpath, recursive=False, **kwargs):
|
| 1805 |
+
"""Alias of `AbstractFileSystem.get`."""
|
| 1806 |
+
return self.get(rpath, lpath, recursive=recursive, **kwargs)
|
| 1807 |
+
|
| 1808 |
+
def sign(self, path, expiration=100, **kwargs):
|
| 1809 |
+
"""Create a signed URL representing the given path
|
| 1810 |
+
|
| 1811 |
+
Some implementations allow temporary URLs to be generated, as a
|
| 1812 |
+
way of delegating credentials.
|
| 1813 |
+
|
| 1814 |
+
Parameters
|
| 1815 |
+
----------
|
| 1816 |
+
path : str
|
| 1817 |
+
The path on the filesystem
|
| 1818 |
+
expiration : int
|
| 1819 |
+
Number of seconds to enable the URL for (if supported)
|
| 1820 |
+
|
| 1821 |
+
Returns
|
| 1822 |
+
-------
|
| 1823 |
+
URL : str
|
| 1824 |
+
The signed URL
|
| 1825 |
+
|
| 1826 |
+
Raises
|
| 1827 |
+
------
|
| 1828 |
+
NotImplementedError : if method is not implemented for a filesystem
|
| 1829 |
+
"""
|
| 1830 |
+
raise NotImplementedError("Sign is not implemented for this filesystem")
|
| 1831 |
+
|
| 1832 |
+
def _isfilestore(self):
|
| 1833 |
+
# Originally inherited from pyarrow DaskFileSystem. Keeping this
|
| 1834 |
+
# here for backwards compatibility as long as pyarrow uses its
|
| 1835 |
+
# legacy fsspec-compatible filesystems and thus accepts fsspec
|
| 1836 |
+
# filesystems as well
|
| 1837 |
+
return False
|
| 1838 |
+
|
| 1839 |
+
|
| 1840 |
+
class AbstractBufferedFile(io.IOBase):
|
| 1841 |
+
"""Convenient class to derive from to provide buffering
|
| 1842 |
+
|
| 1843 |
+
In the case that the backend does not provide a pythonic file-like object
|
| 1844 |
+
already, this class contains much of the logic to build one. The only
|
| 1845 |
+
methods that need to be overridden are ``_upload_chunk``,
|
| 1846 |
+
``_initiate_upload`` and ``_fetch_range``.
|
| 1847 |
+
"""
|
| 1848 |
+
|
| 1849 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
| 1850 |
+
_details = None
|
| 1851 |
+
|
| 1852 |
+
def __init__(
|
| 1853 |
+
self,
|
| 1854 |
+
fs,
|
| 1855 |
+
path,
|
| 1856 |
+
mode="rb",
|
| 1857 |
+
block_size="default",
|
| 1858 |
+
autocommit=True,
|
| 1859 |
+
cache_type="readahead",
|
| 1860 |
+
cache_options=None,
|
| 1861 |
+
size=None,
|
| 1862 |
+
**kwargs,
|
| 1863 |
+
):
|
| 1864 |
+
"""
|
| 1865 |
+
Template for files with buffered reading and writing
|
| 1866 |
+
|
| 1867 |
+
Parameters
|
| 1868 |
+
----------
|
| 1869 |
+
fs: instance of FileSystem
|
| 1870 |
+
path: str
|
| 1871 |
+
location in file-system
|
| 1872 |
+
mode: str
|
| 1873 |
+
Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
|
| 1874 |
+
systems may be read-only, and some may not support append.
|
| 1875 |
+
block_size: int
|
| 1876 |
+
Buffer size for reading or writing, 'default' for class default
|
| 1877 |
+
autocommit: bool
|
| 1878 |
+
Whether to write to final destination; may only impact what
|
| 1879 |
+
happens when file is being closed.
|
| 1880 |
+
cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
|
| 1881 |
+
Caching policy in read mode. See the definitions in ``core``.
|
| 1882 |
+
cache_options : dict
|
| 1883 |
+
Additional options passed to the constructor for the cache specified
|
| 1884 |
+
by `cache_type`.
|
| 1885 |
+
size: int
|
| 1886 |
+
If given and in read mode, suppressed having to look up the file size
|
| 1887 |
+
kwargs:
|
| 1888 |
+
Gets stored as self.kwargs
|
| 1889 |
+
"""
|
| 1890 |
+
from .core import caches
|
| 1891 |
+
|
| 1892 |
+
self.path = path
|
| 1893 |
+
self.fs = fs
|
| 1894 |
+
self.mode = mode
|
| 1895 |
+
self.blocksize = (
|
| 1896 |
+
self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
|
| 1897 |
+
)
|
| 1898 |
+
self.loc = 0
|
| 1899 |
+
self.autocommit = autocommit
|
| 1900 |
+
self.end = None
|
| 1901 |
+
self.start = None
|
| 1902 |
+
self.closed = False
|
| 1903 |
+
|
| 1904 |
+
if cache_options is None:
|
| 1905 |
+
cache_options = {}
|
| 1906 |
+
|
| 1907 |
+
if "trim" in kwargs:
|
| 1908 |
+
warnings.warn(
|
| 1909 |
+
"Passing 'trim' to control the cache behavior has been deprecated. "
|
| 1910 |
+
"Specify it within the 'cache_options' argument instead.",
|
| 1911 |
+
FutureWarning,
|
| 1912 |
+
)
|
| 1913 |
+
cache_options["trim"] = kwargs.pop("trim")
|
| 1914 |
+
|
| 1915 |
+
self.kwargs = kwargs
|
| 1916 |
+
|
| 1917 |
+
if mode not in {"ab", "rb", "wb", "xb"}:
|
| 1918 |
+
raise NotImplementedError("File mode not supported")
|
| 1919 |
+
if mode == "rb":
|
| 1920 |
+
if size is not None:
|
| 1921 |
+
self.size = size
|
| 1922 |
+
else:
|
| 1923 |
+
self.size = self.details["size"]
|
| 1924 |
+
self.cache = caches[cache_type](
|
| 1925 |
+
self.blocksize, self._fetch_range, self.size, **cache_options
|
| 1926 |
+
)
|
| 1927 |
+
else:
|
| 1928 |
+
self.buffer = io.BytesIO()
|
| 1929 |
+
self.offset = None
|
| 1930 |
+
self.forced = False
|
| 1931 |
+
self.location = None
|
| 1932 |
+
|
| 1933 |
+
@property
|
| 1934 |
+
def details(self):
|
| 1935 |
+
if self._details is None:
|
| 1936 |
+
self._details = self.fs.info(self.path)
|
| 1937 |
+
return self._details
|
| 1938 |
+
|
| 1939 |
+
@details.setter
|
| 1940 |
+
def details(self, value):
|
| 1941 |
+
self._details = value
|
| 1942 |
+
self.size = value["size"]
|
| 1943 |
+
|
| 1944 |
+
@property
|
| 1945 |
+
def full_name(self):
|
| 1946 |
+
return _unstrip_protocol(self.path, self.fs)
|
| 1947 |
+
|
| 1948 |
+
@property
|
| 1949 |
+
def closed(self):
|
| 1950 |
+
# get around this attr being read-only in IOBase
|
| 1951 |
+
# use getattr here, since this can be called during del
|
| 1952 |
+
return getattr(self, "_closed", True)
|
| 1953 |
+
|
| 1954 |
+
@closed.setter
|
| 1955 |
+
def closed(self, c):
|
| 1956 |
+
self._closed = c
|
| 1957 |
+
|
| 1958 |
+
def __hash__(self):
|
| 1959 |
+
if "w" in self.mode:
|
| 1960 |
+
return id(self)
|
| 1961 |
+
else:
|
| 1962 |
+
return int(tokenize(self.details), 16)
|
| 1963 |
+
|
| 1964 |
+
def __eq__(self, other):
|
| 1965 |
+
"""Files are equal if they have the same checksum, only in read mode"""
|
| 1966 |
+
if self is other:
|
| 1967 |
+
return True
|
| 1968 |
+
return (
|
| 1969 |
+
isinstance(other, type(self))
|
| 1970 |
+
and self.mode == "rb"
|
| 1971 |
+
and other.mode == "rb"
|
| 1972 |
+
and hash(self) == hash(other)
|
| 1973 |
+
)
|
| 1974 |
+
|
| 1975 |
+
def commit(self):
|
| 1976 |
+
"""Move from temp to final destination"""
|
| 1977 |
+
|
| 1978 |
+
def discard(self):
|
| 1979 |
+
"""Throw away temporary file"""
|
| 1980 |
+
|
| 1981 |
+
def info(self):
|
| 1982 |
+
"""File information about this path"""
|
| 1983 |
+
if self.readable():
|
| 1984 |
+
return self.details
|
| 1985 |
+
else:
|
| 1986 |
+
raise ValueError("Info not available while writing")
|
| 1987 |
+
|
| 1988 |
+
def tell(self):
|
| 1989 |
+
"""Current file location"""
|
| 1990 |
+
return self.loc
|
| 1991 |
+
|
| 1992 |
+
def seek(self, loc, whence=0):
|
| 1993 |
+
"""Set current file location
|
| 1994 |
+
|
| 1995 |
+
Parameters
|
| 1996 |
+
----------
|
| 1997 |
+
loc: int
|
| 1998 |
+
byte location
|
| 1999 |
+
whence: {0, 1, 2}
|
| 2000 |
+
from start of file, current location or end of file, resp.
|
| 2001 |
+
"""
|
| 2002 |
+
loc = int(loc)
|
| 2003 |
+
if not self.mode == "rb":
|
| 2004 |
+
raise OSError(ESPIPE, "Seek only available in read mode")
|
| 2005 |
+
if whence == 0:
|
| 2006 |
+
nloc = loc
|
| 2007 |
+
elif whence == 1:
|
| 2008 |
+
nloc = self.loc + loc
|
| 2009 |
+
elif whence == 2:
|
| 2010 |
+
nloc = self.size + loc
|
| 2011 |
+
else:
|
| 2012 |
+
raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
|
| 2013 |
+
if nloc < 0:
|
| 2014 |
+
raise ValueError("Seek before start of file")
|
| 2015 |
+
self.loc = nloc
|
| 2016 |
+
return self.loc
|
| 2017 |
+
|
| 2018 |
+
def write(self, data):
|
| 2019 |
+
"""
|
| 2020 |
+
Write data to buffer.
|
| 2021 |
+
|
| 2022 |
+
Buffer only sent on flush() or if buffer is greater than
|
| 2023 |
+
or equal to blocksize.
|
| 2024 |
+
|
| 2025 |
+
Parameters
|
| 2026 |
+
----------
|
| 2027 |
+
data: bytes
|
| 2028 |
+
Set of bytes to be written.
|
| 2029 |
+
"""
|
| 2030 |
+
if not self.writable():
|
| 2031 |
+
raise ValueError("File not in write mode")
|
| 2032 |
+
if self.closed:
|
| 2033 |
+
raise ValueError("I/O operation on closed file.")
|
| 2034 |
+
if self.forced:
|
| 2035 |
+
raise ValueError("This file has been force-flushed, can only close")
|
| 2036 |
+
out = self.buffer.write(data)
|
| 2037 |
+
self.loc += out
|
| 2038 |
+
if self.buffer.tell() >= self.blocksize:
|
| 2039 |
+
self.flush()
|
| 2040 |
+
return out
|
| 2041 |
+
|
| 2042 |
+
def flush(self, force=False):
|
| 2043 |
+
"""
|
| 2044 |
+
Write buffered data to backend store.
|
| 2045 |
+
|
| 2046 |
+
Writes the current buffer, if it is larger than the block-size, or if
|
| 2047 |
+
the file is being closed.
|
| 2048 |
+
|
| 2049 |
+
Parameters
|
| 2050 |
+
----------
|
| 2051 |
+
force: bool
|
| 2052 |
+
When closing, write the last block even if it is smaller than
|
| 2053 |
+
blocks are allowed to be. Disallows further writing to this file.
|
| 2054 |
+
"""
|
| 2055 |
+
|
| 2056 |
+
if self.closed:
|
| 2057 |
+
raise ValueError("Flush on closed file")
|
| 2058 |
+
if force and self.forced:
|
| 2059 |
+
raise ValueError("Force flush cannot be called more than once")
|
| 2060 |
+
if force:
|
| 2061 |
+
self.forced = True
|
| 2062 |
+
|
| 2063 |
+
if self.readable():
|
| 2064 |
+
# no-op to flush on read-mode
|
| 2065 |
+
return
|
| 2066 |
+
|
| 2067 |
+
if not force and self.buffer.tell() < self.blocksize:
|
| 2068 |
+
# Defer write on small block
|
| 2069 |
+
return
|
| 2070 |
+
|
| 2071 |
+
if self.offset is None:
|
| 2072 |
+
# Initialize a multipart upload
|
| 2073 |
+
self.offset = 0
|
| 2074 |
+
try:
|
| 2075 |
+
self._initiate_upload()
|
| 2076 |
+
except:
|
| 2077 |
+
self.closed = True
|
| 2078 |
+
raise
|
| 2079 |
+
|
| 2080 |
+
if self._upload_chunk(final=force) is not False:
|
| 2081 |
+
self.offset += self.buffer.seek(0, 2)
|
| 2082 |
+
self.buffer = io.BytesIO()
|
| 2083 |
+
|
| 2084 |
+
def _upload_chunk(self, final=False):
|
| 2085 |
+
"""Write one part of a multi-block file upload
|
| 2086 |
+
|
| 2087 |
+
Parameters
|
| 2088 |
+
==========
|
| 2089 |
+
final: bool
|
| 2090 |
+
This is the last block, so should complete file, if
|
| 2091 |
+
self.autocommit is True.
|
| 2092 |
+
"""
|
| 2093 |
+
# may not yet have been initialized, may need to call _initialize_upload
|
| 2094 |
+
|
| 2095 |
+
def _initiate_upload(self):
|
| 2096 |
+
"""Create remote file/upload"""
|
| 2097 |
+
pass
|
| 2098 |
+
|
| 2099 |
+
def _fetch_range(self, start, end):
|
| 2100 |
+
"""Get the specified set of bytes from remote"""
|
| 2101 |
+
return self.fs.cat_file(self.path, start=start, end=end)
|
| 2102 |
+
|
| 2103 |
+
def read(self, length=-1):
|
| 2104 |
+
"""
|
| 2105 |
+
Return data from cache, or fetch pieces as necessary
|
| 2106 |
+
|
| 2107 |
+
Parameters
|
| 2108 |
+
----------
|
| 2109 |
+
length: int (-1)
|
| 2110 |
+
Number of bytes to read; if <0, all remaining bytes.
|
| 2111 |
+
"""
|
| 2112 |
+
length = -1 if length is None else int(length)
|
| 2113 |
+
if self.mode != "rb":
|
| 2114 |
+
raise ValueError("File not in read mode")
|
| 2115 |
+
if length < 0:
|
| 2116 |
+
length = self.size - self.loc
|
| 2117 |
+
if self.closed:
|
| 2118 |
+
raise ValueError("I/O operation on closed file.")
|
| 2119 |
+
if length == 0:
|
| 2120 |
+
# don't even bother calling fetch
|
| 2121 |
+
return b""
|
| 2122 |
+
out = self.cache._fetch(self.loc, self.loc + length)
|
| 2123 |
+
|
| 2124 |
+
logger.debug(
|
| 2125 |
+
"%s read: %i - %i %s",
|
| 2126 |
+
self,
|
| 2127 |
+
self.loc,
|
| 2128 |
+
self.loc + length,
|
| 2129 |
+
self.cache._log_stats(),
|
| 2130 |
+
)
|
| 2131 |
+
self.loc += len(out)
|
| 2132 |
+
return out
|
| 2133 |
+
|
| 2134 |
+
def readinto(self, b):
|
| 2135 |
+
"""mirrors builtin file's readinto method
|
| 2136 |
+
|
| 2137 |
+
https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
|
| 2138 |
+
"""
|
| 2139 |
+
out = memoryview(b).cast("B")
|
| 2140 |
+
data = self.read(out.nbytes)
|
| 2141 |
+
out[: len(data)] = data
|
| 2142 |
+
return len(data)
|
| 2143 |
+
|
| 2144 |
+
def readuntil(self, char=b"\n", blocks=None):
|
| 2145 |
+
"""Return data between current position and first occurrence of char
|
| 2146 |
+
|
| 2147 |
+
char is included in the output, except if the end of the tile is
|
| 2148 |
+
encountered first.
|
| 2149 |
+
|
| 2150 |
+
Parameters
|
| 2151 |
+
----------
|
| 2152 |
+
char: bytes
|
| 2153 |
+
Thing to find
|
| 2154 |
+
blocks: None or int
|
| 2155 |
+
How much to read in each go. Defaults to file blocksize - which may
|
| 2156 |
+
mean a new read on every call.
|
| 2157 |
+
"""
|
| 2158 |
+
out = []
|
| 2159 |
+
while True:
|
| 2160 |
+
start = self.tell()
|
| 2161 |
+
part = self.read(blocks or self.blocksize)
|
| 2162 |
+
if len(part) == 0:
|
| 2163 |
+
break
|
| 2164 |
+
found = part.find(char)
|
| 2165 |
+
if found > -1:
|
| 2166 |
+
out.append(part[: found + len(char)])
|
| 2167 |
+
self.seek(start + found + len(char))
|
| 2168 |
+
break
|
| 2169 |
+
out.append(part)
|
| 2170 |
+
return b"".join(out)
|
| 2171 |
+
|
| 2172 |
+
def readline(self):
|
| 2173 |
+
"""Read until and including the first occurrence of newline character
|
| 2174 |
+
|
| 2175 |
+
Note that, because of character encoding, this is not necessarily a
|
| 2176 |
+
true line ending.
|
| 2177 |
+
"""
|
| 2178 |
+
return self.readuntil(b"\n")
|
| 2179 |
+
|
| 2180 |
+
def __next__(self):
|
| 2181 |
+
out = self.readline()
|
| 2182 |
+
if out:
|
| 2183 |
+
return out
|
| 2184 |
+
raise StopIteration
|
| 2185 |
+
|
| 2186 |
+
def __iter__(self):
|
| 2187 |
+
return self
|
| 2188 |
+
|
| 2189 |
+
def readlines(self):
|
| 2190 |
+
"""Return all data, split by the newline character, including the newline character"""
|
| 2191 |
+
data = self.read()
|
| 2192 |
+
lines = data.split(b"\n")
|
| 2193 |
+
out = [l + b"\n" for l in lines[:-1]]
|
| 2194 |
+
if data.endswith(b"\n"):
|
| 2195 |
+
return out
|
| 2196 |
+
else:
|
| 2197 |
+
return out + [lines[-1]]
|
| 2198 |
+
# return list(self) ???
|
| 2199 |
+
|
| 2200 |
+
def readinto1(self, b):
|
| 2201 |
+
return self.readinto(b)
|
| 2202 |
+
|
| 2203 |
+
def close(self):
|
| 2204 |
+
"""Close file
|
| 2205 |
+
|
| 2206 |
+
Finalizes writes, discards cache
|
| 2207 |
+
"""
|
| 2208 |
+
if getattr(self, "_unclosable", False):
|
| 2209 |
+
return
|
| 2210 |
+
if self.closed:
|
| 2211 |
+
return
|
| 2212 |
+
try:
|
| 2213 |
+
if self.mode == "rb":
|
| 2214 |
+
self.cache = None
|
| 2215 |
+
else:
|
| 2216 |
+
if not self.forced:
|
| 2217 |
+
self.flush(force=True)
|
| 2218 |
+
|
| 2219 |
+
if self.fs is not None:
|
| 2220 |
+
self.fs.invalidate_cache(self.path)
|
| 2221 |
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
| 2222 |
+
finally:
|
| 2223 |
+
self.closed = True
|
| 2224 |
+
|
| 2225 |
+
def readable(self):
|
| 2226 |
+
"""Whether opened for reading"""
|
| 2227 |
+
return "r" in self.mode and not self.closed
|
| 2228 |
+
|
| 2229 |
+
def seekable(self):
|
| 2230 |
+
"""Whether is seekable (only in read mode)"""
|
| 2231 |
+
return self.readable()
|
| 2232 |
+
|
| 2233 |
+
def writable(self):
|
| 2234 |
+
"""Whether opened for writing"""
|
| 2235 |
+
return self.mode in {"wb", "ab", "xb"} and not self.closed
|
| 2236 |
+
|
| 2237 |
+
def __reduce__(self):
|
| 2238 |
+
if self.mode != "rb":
|
| 2239 |
+
raise RuntimeError("Pickling a writeable file is not supported")
|
| 2240 |
+
|
| 2241 |
+
return reopen, (
|
| 2242 |
+
self.fs,
|
| 2243 |
+
self.path,
|
| 2244 |
+
self.mode,
|
| 2245 |
+
self.blocksize,
|
| 2246 |
+
self.loc,
|
| 2247 |
+
self.size,
|
| 2248 |
+
self.autocommit,
|
| 2249 |
+
self.cache.name if self.cache else "none",
|
| 2250 |
+
self.kwargs,
|
| 2251 |
+
)
|
| 2252 |
+
|
| 2253 |
+
def __del__(self):
|
| 2254 |
+
if not self.closed:
|
| 2255 |
+
self.close()
|
| 2256 |
+
|
| 2257 |
+
def __str__(self):
|
| 2258 |
+
return f"<File-like object {type(self.fs).__name__}, {self.path}>"
|
| 2259 |
+
|
| 2260 |
+
__repr__ = __str__
|
| 2261 |
+
|
| 2262 |
+
def __enter__(self):
|
| 2263 |
+
return self
|
| 2264 |
+
|
| 2265 |
+
def __exit__(self, *args):
|
| 2266 |
+
self.close()
|
| 2267 |
+
|
| 2268 |
+
|
| 2269 |
+
def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
|
| 2270 |
+
file = fs.open(
|
| 2271 |
+
path,
|
| 2272 |
+
mode=mode,
|
| 2273 |
+
block_size=blocksize,
|
| 2274 |
+
autocommit=autocommit,
|
| 2275 |
+
cache_type=cache_type,
|
| 2276 |
+
size=size,
|
| 2277 |
+
**kwargs,
|
| 2278 |
+
)
|
| 2279 |
+
if loc > 0:
|
| 2280 |
+
file.seek(loc)
|
| 2281 |
+
return file
|
venv/lib/python3.13/site-packages/fsspec/transaction.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import deque
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Transaction:
|
| 5 |
+
"""Filesystem transaction write context
|
| 6 |
+
|
| 7 |
+
Gathers files for deferred commit or discard, so that several write
|
| 8 |
+
operations can be finalized semi-atomically. This works by having this
|
| 9 |
+
instance as the ``.transaction`` attribute of the given filesystem
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, fs, **kwargs):
|
| 13 |
+
"""
|
| 14 |
+
Parameters
|
| 15 |
+
----------
|
| 16 |
+
fs: FileSystem instance
|
| 17 |
+
"""
|
| 18 |
+
self.fs = fs
|
| 19 |
+
self.files = deque()
|
| 20 |
+
|
| 21 |
+
def __enter__(self):
|
| 22 |
+
self.start()
|
| 23 |
+
return self
|
| 24 |
+
|
| 25 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 26 |
+
"""End transaction and commit, if exit is not due to exception"""
|
| 27 |
+
# only commit if there was no exception
|
| 28 |
+
self.complete(commit=exc_type is None)
|
| 29 |
+
if self.fs:
|
| 30 |
+
self.fs._intrans = False
|
| 31 |
+
self.fs._transaction = None
|
| 32 |
+
self.fs = None
|
| 33 |
+
|
| 34 |
+
def start(self):
|
| 35 |
+
"""Start a transaction on this FileSystem"""
|
| 36 |
+
self.files = deque() # clean up after previous failed completions
|
| 37 |
+
self.fs._intrans = True
|
| 38 |
+
|
| 39 |
+
def complete(self, commit=True):
|
| 40 |
+
"""Finish transaction: commit or discard all deferred files"""
|
| 41 |
+
while self.files:
|
| 42 |
+
f = self.files.popleft()
|
| 43 |
+
if commit:
|
| 44 |
+
f.commit()
|
| 45 |
+
else:
|
| 46 |
+
f.discard()
|
| 47 |
+
self.fs._intrans = False
|
| 48 |
+
self.fs._transaction = None
|
| 49 |
+
self.fs = None
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class FileActor:
|
| 53 |
+
def __init__(self):
|
| 54 |
+
self.files = []
|
| 55 |
+
|
| 56 |
+
def commit(self):
|
| 57 |
+
for f in self.files:
|
| 58 |
+
f.commit()
|
| 59 |
+
self.files.clear()
|
| 60 |
+
|
| 61 |
+
def discard(self):
|
| 62 |
+
for f in self.files:
|
| 63 |
+
f.discard()
|
| 64 |
+
self.files.clear()
|
| 65 |
+
|
| 66 |
+
def append(self, f):
|
| 67 |
+
self.files.append(f)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class DaskTransaction(Transaction):
|
| 71 |
+
def __init__(self, fs):
|
| 72 |
+
"""
|
| 73 |
+
Parameters
|
| 74 |
+
----------
|
| 75 |
+
fs: FileSystem instance
|
| 76 |
+
"""
|
| 77 |
+
import distributed
|
| 78 |
+
|
| 79 |
+
super().__init__(fs)
|
| 80 |
+
client = distributed.default_client()
|
| 81 |
+
self.files = client.submit(FileActor, actor=True).result()
|
| 82 |
+
|
| 83 |
+
def complete(self, commit=True):
|
| 84 |
+
"""Finish transaction: commit or discard all deferred files"""
|
| 85 |
+
if commit:
|
| 86 |
+
self.files.commit().result()
|
| 87 |
+
else:
|
| 88 |
+
self.files.discard().result()
|
| 89 |
+
self.fs._intrans = False
|
| 90 |
+
self.fs = None
|
venv/lib/python3.13/site-packages/hf_xet/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .hf_xet import *
|
| 2 |
+
|
| 3 |
+
__doc__ = hf_xet.__doc__
|
| 4 |
+
if hasattr(hf_xet, "__all__"):
|
| 5 |
+
__all__ = hf_xet.__all__
|
venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: idna
|
| 3 |
+
Version: 3.11
|
| 4 |
+
Summary: Internationalized Domain Names in Applications (IDNA)
|
| 5 |
+
Author-email: Kim Davies <kim+pypi@gumleaf.org>
|
| 6 |
+
Requires-Python: >=3.8
|
| 7 |
+
Description-Content-Type: text/x-rst
|
| 8 |
+
License-Expression: BSD-3-Clause
|
| 9 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 10 |
+
Classifier: Intended Audience :: Developers
|
| 11 |
+
Classifier: Intended Audience :: System Administrators
|
| 12 |
+
Classifier: Operating System :: OS Independent
|
| 13 |
+
Classifier: Programming Language :: Python
|
| 14 |
+
Classifier: Programming Language :: Python :: 3
|
| 15 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
| 16 |
+
Classifier: Programming Language :: Python :: 3.8
|
| 17 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 18 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 20 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 21 |
+
Classifier: Programming Language :: Python :: 3.13
|
| 22 |
+
Classifier: Programming Language :: Python :: 3.14
|
| 23 |
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
| 24 |
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
| 25 |
+
Classifier: Topic :: Internet :: Name Service (DNS)
|
| 26 |
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
| 27 |
+
Classifier: Topic :: Utilities
|
| 28 |
+
License-File: LICENSE.md
|
| 29 |
+
Requires-Dist: ruff >= 0.6.2 ; extra == "all"
|
| 30 |
+
Requires-Dist: mypy >= 1.11.2 ; extra == "all"
|
| 31 |
+
Requires-Dist: pytest >= 8.3.2 ; extra == "all"
|
| 32 |
+
Requires-Dist: flake8 >= 7.1.1 ; extra == "all"
|
| 33 |
+
Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst
|
| 34 |
+
Project-URL: Issue tracker, https://github.com/kjd/idna/issues
|
| 35 |
+
Project-URL: Source, https://github.com/kjd/idna
|
| 36 |
+
Provides-Extra: all
|
| 37 |
+
|
| 38 |
+
Internationalized Domain Names in Applications (IDNA)
|
| 39 |
+
=====================================================
|
| 40 |
+
|
| 41 |
+
Support for `Internationalized Domain Names in
|
| 42 |
+
Applications (IDNA) <https://tools.ietf.org/html/rfc5891>`_
|
| 43 |
+
and `Unicode IDNA Compatibility Processing
|
| 44 |
+
<https://unicode.org/reports/tr46/>`_.
|
| 45 |
+
|
| 46 |
+
The latest versions of these standards supplied here provide
|
| 47 |
+
more comprehensive language coverage and reduce the potential of
|
| 48 |
+
allowing domains with known security vulnerabilities. This library
|
| 49 |
+
is a suitable replacement for the “encodings.idna”
|
| 50 |
+
module that comes with the Python standard library, but which
|
| 51 |
+
only supports an older superseded IDNA specification from 2003.
|
| 52 |
+
|
| 53 |
+
Basic functions are simply executed:
|
| 54 |
+
|
| 55 |
+
.. code-block:: pycon
|
| 56 |
+
|
| 57 |
+
>>> import idna
|
| 58 |
+
>>> idna.encode('ドメイン.テスト')
|
| 59 |
+
b'xn--eckwd4c7c.xn--zckzah'
|
| 60 |
+
>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
|
| 61 |
+
ドメイン.テスト
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Installation
|
| 65 |
+
------------
|
| 66 |
+
|
| 67 |
+
This package is available for installation from PyPI via the
|
| 68 |
+
typical mechanisms, such as:
|
| 69 |
+
|
| 70 |
+
.. code-block:: bash
|
| 71 |
+
|
| 72 |
+
$ python3 -m pip install idna
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
Usage
|
| 76 |
+
-----
|
| 77 |
+
|
| 78 |
+
For typical usage, the ``encode`` and ``decode`` functions will take a
|
| 79 |
+
domain name argument and perform a conversion to ASCII compatible encoding
|
| 80 |
+
(known as A-labels), or to Unicode strings (known as U-labels)
|
| 81 |
+
respectively.
|
| 82 |
+
|
| 83 |
+
.. code-block:: pycon
|
| 84 |
+
|
| 85 |
+
>>> import idna
|
| 86 |
+
>>> idna.encode('ドメイン.テスト')
|
| 87 |
+
b'xn--eckwd4c7c.xn--zckzah'
|
| 88 |
+
>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
|
| 89 |
+
ドメイン.テスト
|
| 90 |
+
|
| 91 |
+
Conversions can be applied at a per-label basis using the ``ulabel`` or
|
| 92 |
+
``alabel`` functions if necessary:
|
| 93 |
+
|
| 94 |
+
.. code-block:: pycon
|
| 95 |
+
|
| 96 |
+
>>> idna.alabel('测试')
|
| 97 |
+
b'xn--0zwm56d'
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
Compatibility Mapping (UTS #46)
|
| 101 |
+
+++++++++++++++++++++++++++++++
|
| 102 |
+
|
| 103 |
+
This library provides support for `Unicode IDNA Compatibility
|
| 104 |
+
Processing <https://unicode.org/reports/tr46/>`_ which normalizes input from
|
| 105 |
+
different potential ways a user may input a domain prior to performing the IDNA
|
| 106 |
+
conversion operations. This functionality, known as a
|
| 107 |
+
`mapping <https://tools.ietf.org/html/rfc5895>`_, is considered by the
|
| 108 |
+
specification to be a local user-interface issue distinct from IDNA
|
| 109 |
+
conversion functionality.
|
| 110 |
+
|
| 111 |
+
For example, “Königsgäßchen” is not a permissible label as *LATIN
|
| 112 |
+
CAPITAL LETTER K* is not allowed (nor are capital letters in general).
|
| 113 |
+
UTS 46 will convert this into lower case prior to applying the IDNA
|
| 114 |
+
conversion.
|
| 115 |
+
|
| 116 |
+
.. code-block:: pycon
|
| 117 |
+
|
| 118 |
+
>>> import idna
|
| 119 |
+
>>> idna.encode('Königsgäßchen')
|
| 120 |
+
...
|
| 121 |
+
idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
|
| 122 |
+
>>> idna.encode('Königsgäßchen', uts46=True)
|
| 123 |
+
b'xn--knigsgchen-b4a3dun'
|
| 124 |
+
>>> print(idna.decode('xn--knigsgchen-b4a3dun'))
|
| 125 |
+
königsgäßchen
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
Exceptions
|
| 129 |
+
----------
|
| 130 |
+
|
| 131 |
+
All errors raised during the conversion following the specification
|
| 132 |
+
should raise an exception derived from the ``idna.IDNAError`` base
|
| 133 |
+
class.
|
| 134 |
+
|
| 135 |
+
More specific exceptions that may be generated as ``idna.IDNABidiError``
|
| 136 |
+
when the error reflects an illegal combination of left-to-right and
|
| 137 |
+
right-to-left characters in a label; ``idna.InvalidCodepoint`` when
|
| 138 |
+
a specific codepoint is an illegal character in an IDN label (i.e.
|
| 139 |
+
INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is
|
| 140 |
+
illegal based on its position in the string (i.e. it is CONTEXTO or CONTEXTJ
|
| 141 |
+
but the contextual requirements are not satisfied.)
|
| 142 |
+
|
| 143 |
+
Building and Diagnostics
|
| 144 |
+
------------------------
|
| 145 |
+
|
| 146 |
+
The IDNA and UTS 46 functionality relies upon pre-calculated lookup
|
| 147 |
+
tables for performance. These tables are derived from computing against
|
| 148 |
+
eligibility criteria in the respective standards using the command-line
|
| 149 |
+
script ``tools/idna-data``.
|
| 150 |
+
|
| 151 |
+
This tool will fetch relevant codepoint data from the Unicode repository
|
| 152 |
+
and perform the required calculations to identify eligibility. There are
|
| 153 |
+
three main modes:
|
| 154 |
+
|
| 155 |
+
* ``idna-data make-libdata``. Generates ``idnadata.py`` and
|
| 156 |
+
``uts46data.py``, the pre-calculated lookup tables used for IDNA and
|
| 157 |
+
UTS 46 conversions. Implementers who wish to track this library against
|
| 158 |
+
a different Unicode version may use this tool to manually generate a
|
| 159 |
+
different version of the ``idnadata.py`` and ``uts46data.py`` files.
|
| 160 |
+
|
| 161 |
+
* ``idna-data make-table``. Generate a table of the IDNA disposition
|
| 162 |
+
(e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix
|
| 163 |
+
B.1 of RFC 5892 and the pre-computed tables published by `IANA
|
| 164 |
+
<https://www.iana.org/>`_.
|
| 165 |
+
|
| 166 |
+
* ``idna-data U+0061``. Prints debugging output on the various
|
| 167 |
+
properties associated with an individual Unicode codepoint (in this
|
| 168 |
+
case, U+0061), that are used to assess the IDNA and UTS 46 status of a
|
| 169 |
+
codepoint. This is helpful in debugging or analysis.
|
| 170 |
+
|
| 171 |
+
The tool accepts a number of arguments, described using ``idna-data
|
| 172 |
+
-h``. Most notably, the ``--version`` argument allows the specification
|
| 173 |
+
of the version of Unicode to be used in computing the table data. For
|
| 174 |
+
example, ``idna-data --version 9.0.0 make-libdata`` will generate
|
| 175 |
+
library data against Unicode 9.0.0.
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
Additional Notes
|
| 179 |
+
----------------
|
| 180 |
+
|
| 181 |
+
* **Packages**. The latest tagged release version is published in the
|
| 182 |
+
`Python Package Index <https://pypi.org/project/idna/>`_.
|
| 183 |
+
|
| 184 |
+
* **Version support**. This library supports Python 3.8 and higher.
|
| 185 |
+
As this library serves as a low-level toolkit for a variety of
|
| 186 |
+
applications, many of which strive for broad compatibility with older
|
| 187 |
+
Python versions, there is no rush to remove older interpreter support.
|
| 188 |
+
Support for older versions are likely to be removed from new releases
|
| 189 |
+
as automated tests can no longer easily be run, i.e. once the Python
|
| 190 |
+
version is officially end-of-life.
|
| 191 |
+
|
| 192 |
+
* **Testing**. The library has a test suite based on each rule of the
|
| 193 |
+
IDNA specification, as well as tests that are provided as part of the
|
| 194 |
+
Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing
|
| 195 |
+
<https://unicode.org/reports/tr46/>`_.
|
| 196 |
+
|
| 197 |
+
* **Emoji**. It is an occasional request to support emoji domains in
|
| 198 |
+
this library. Encoding of symbols like emoji is expressly prohibited by
|
| 199 |
+
the technical standard IDNA 2008 and emoji domains are broadly phased
|
| 200 |
+
out across the domain industry due to associated security risks. For
|
| 201 |
+
now, applications that need to support these non-compliant labels
|
| 202 |
+
may wish to consider trying the encode/decode operation in this library
|
| 203 |
+
first, and then falling back to using `encodings.idna`. See `the Github
|
| 204 |
+
project <https://github.com/kjd/idna/issues/18>`_ for more discussion.
|
| 205 |
+
|
| 206 |
+
* **Transitional processing**. Unicode 16.0.0 removed transitional
|
| 207 |
+
processing so the `transitional` argument for the encode() method
|
| 208 |
+
no longer has any effect and will be removed at a later date.
|
| 209 |
+
|
venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
idna-3.11.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
idna-3.11.dist-info/METADATA,sha256=fCwSww9SuiN8TIHllFSASUQCW55hAs8dzKnr9RaEEbA,8378
|
| 3 |
+
idna-3.11.dist-info/RECORD,,
|
| 4 |
+
idna-3.11.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
| 5 |
+
idna-3.11.dist-info/licenses/LICENSE.md,sha256=t6M2q_OwThgOwGXN0W5wXQeeHMehT5EKpukYfza5zYc,1541
|
| 6 |
+
idna/__init__.py,sha256=MPqNDLZbXqGaNdXxAFhiqFPKEQXju2jNQhCey6-5eJM,868
|
| 7 |
+
idna/__pycache__/__init__.cpython-313.pyc,,
|
| 8 |
+
idna/__pycache__/codec.cpython-313.pyc,,
|
| 9 |
+
idna/__pycache__/compat.cpython-313.pyc,,
|
| 10 |
+
idna/__pycache__/core.cpython-313.pyc,,
|
| 11 |
+
idna/__pycache__/idnadata.cpython-313.pyc,,
|
| 12 |
+
idna/__pycache__/intranges.cpython-313.pyc,,
|
| 13 |
+
idna/__pycache__/package_data.cpython-313.pyc,,
|
| 14 |
+
idna/__pycache__/uts46data.cpython-313.pyc,,
|
| 15 |
+
idna/codec.py,sha256=M2SGWN7cs_6B32QmKTyTN6xQGZeYQgQ2wiX3_DR6loE,3438
|
| 16 |
+
idna/compat.py,sha256=RzLy6QQCdl9784aFhb2EX9EKGCJjg0P3PilGdeXXcx8,316
|
| 17 |
+
idna/core.py,sha256=P26_XVycuMTZ1R2mNK1ZREVzM5mvTzdabBXfyZVU1Lc,13246
|
| 18 |
+
idna/idnadata.py,sha256=SG8jhaGE53iiD6B49pt2pwTv_UvClciWE-N54oR2p4U,79623
|
| 19 |
+
idna/intranges.py,sha256=amUtkdhYcQG8Zr-CoMM_kVRacxkivC1WgxN1b63KKdU,1898
|
| 20 |
+
idna/package_data.py,sha256=_CUavOxobnbyNG2FLyHoN8QHP3QM9W1tKuw7eq9QwBk,21
|
| 21 |
+
idna/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 22 |
+
idna/uts46data.py,sha256=H9J35VkD0F9L9mKOqjeNGd2A-Va6FlPoz6Jz4K7h-ps,243725
|
venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: flit 3.12.0
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
venv/lib/python3.13/site-packages/packaging/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
|
| 5 |
+
__title__ = "packaging"
|
| 6 |
+
__summary__ = "Core utilities for Python packages"
|
| 7 |
+
__uri__ = "https://github.com/pypa/packaging"
|
| 8 |
+
|
| 9 |
+
__version__ = "25.0"
|
| 10 |
+
|
| 11 |
+
__author__ = "Donald Stufft and individual contributors"
|
| 12 |
+
__email__ = "donald@stufft.io"
|
| 13 |
+
|
| 14 |
+
__license__ = "BSD-2-Clause or Apache-2.0"
|
| 15 |
+
__copyright__ = f"2014 {__author__}"
|
venv/lib/python3.13/site-packages/packaging/_elffile.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ELF file parser.
|
| 3 |
+
|
| 4 |
+
This provides a class ``ELFFile`` that parses an ELF executable in a similar
|
| 5 |
+
interface to ``ZipFile``. Only the read interface is implemented.
|
| 6 |
+
|
| 7 |
+
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
|
| 8 |
+
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import enum
|
| 14 |
+
import os
|
| 15 |
+
import struct
|
| 16 |
+
from typing import IO
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class ELFInvalid(ValueError):
|
| 20 |
+
pass
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class EIClass(enum.IntEnum):
|
| 24 |
+
C32 = 1
|
| 25 |
+
C64 = 2
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class EIData(enum.IntEnum):
|
| 29 |
+
Lsb = 1
|
| 30 |
+
Msb = 2
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class EMachine(enum.IntEnum):
|
| 34 |
+
I386 = 3
|
| 35 |
+
S390 = 22
|
| 36 |
+
Arm = 40
|
| 37 |
+
X8664 = 62
|
| 38 |
+
AArc64 = 183
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class ELFFile:
|
| 42 |
+
"""
|
| 43 |
+
Representation of an ELF executable.
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
def __init__(self, f: IO[bytes]) -> None:
|
| 47 |
+
self._f = f
|
| 48 |
+
|
| 49 |
+
try:
|
| 50 |
+
ident = self._read("16B")
|
| 51 |
+
except struct.error as e:
|
| 52 |
+
raise ELFInvalid("unable to parse identification") from e
|
| 53 |
+
magic = bytes(ident[:4])
|
| 54 |
+
if magic != b"\x7fELF":
|
| 55 |
+
raise ELFInvalid(f"invalid magic: {magic!r}")
|
| 56 |
+
|
| 57 |
+
self.capacity = ident[4] # Format for program header (bitness).
|
| 58 |
+
self.encoding = ident[5] # Data structure encoding (endianness).
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
# e_fmt: Format for program header.
|
| 62 |
+
# p_fmt: Format for section header.
|
| 63 |
+
# p_idx: Indexes to find p_type, p_offset, and p_filesz.
|
| 64 |
+
e_fmt, self._p_fmt, self._p_idx = {
|
| 65 |
+
(1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
|
| 66 |
+
(1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
|
| 67 |
+
(2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
|
| 68 |
+
(2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
|
| 69 |
+
}[(self.capacity, self.encoding)]
|
| 70 |
+
except KeyError as e:
|
| 71 |
+
raise ELFInvalid(
|
| 72 |
+
f"unrecognized capacity ({self.capacity}) or encoding ({self.encoding})"
|
| 73 |
+
) from e
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
(
|
| 77 |
+
_,
|
| 78 |
+
self.machine, # Architecture type.
|
| 79 |
+
_,
|
| 80 |
+
_,
|
| 81 |
+
self._e_phoff, # Offset of program header.
|
| 82 |
+
_,
|
| 83 |
+
self.flags, # Processor-specific flags.
|
| 84 |
+
_,
|
| 85 |
+
self._e_phentsize, # Size of section.
|
| 86 |
+
self._e_phnum, # Number of sections.
|
| 87 |
+
) = self._read(e_fmt)
|
| 88 |
+
except struct.error as e:
|
| 89 |
+
raise ELFInvalid("unable to parse machine and section information") from e
|
| 90 |
+
|
| 91 |
+
def _read(self, fmt: str) -> tuple[int, ...]:
|
| 92 |
+
return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
|
| 93 |
+
|
| 94 |
+
@property
|
| 95 |
+
def interpreter(self) -> str | None:
|
| 96 |
+
"""
|
| 97 |
+
The path recorded in the ``PT_INTERP`` section header.
|
| 98 |
+
"""
|
| 99 |
+
for index in range(self._e_phnum):
|
| 100 |
+
self._f.seek(self._e_phoff + self._e_phentsize * index)
|
| 101 |
+
try:
|
| 102 |
+
data = self._read(self._p_fmt)
|
| 103 |
+
except struct.error:
|
| 104 |
+
continue
|
| 105 |
+
if data[self._p_idx[0]] != 3: # Not PT_INTERP.
|
| 106 |
+
continue
|
| 107 |
+
self._f.seek(data[self._p_idx[1]])
|
| 108 |
+
return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
|
| 109 |
+
return None
|
venv/lib/python3.13/site-packages/packaging/_manylinux.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import collections
|
| 4 |
+
import contextlib
|
| 5 |
+
import functools
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import sys
|
| 9 |
+
import warnings
|
| 10 |
+
from typing import Generator, Iterator, NamedTuple, Sequence
|
| 11 |
+
|
| 12 |
+
from ._elffile import EIClass, EIData, ELFFile, EMachine
|
| 13 |
+
|
| 14 |
+
EF_ARM_ABIMASK = 0xFF000000
|
| 15 |
+
EF_ARM_ABI_VER5 = 0x05000000
|
| 16 |
+
EF_ARM_ABI_FLOAT_HARD = 0x00000400
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
|
| 20 |
+
# as the type for `path` until then.
|
| 21 |
+
@contextlib.contextmanager
|
| 22 |
+
def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]:
|
| 23 |
+
try:
|
| 24 |
+
with open(path, "rb") as f:
|
| 25 |
+
yield ELFFile(f)
|
| 26 |
+
except (OSError, TypeError, ValueError):
|
| 27 |
+
yield None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _is_linux_armhf(executable: str) -> bool:
|
| 31 |
+
# hard-float ABI can be detected from the ELF header of the running
|
| 32 |
+
# process
|
| 33 |
+
# https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
|
| 34 |
+
with _parse_elf(executable) as f:
|
| 35 |
+
return (
|
| 36 |
+
f is not None
|
| 37 |
+
and f.capacity == EIClass.C32
|
| 38 |
+
and f.encoding == EIData.Lsb
|
| 39 |
+
and f.machine == EMachine.Arm
|
| 40 |
+
and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
|
| 41 |
+
and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _is_linux_i686(executable: str) -> bool:
|
| 46 |
+
with _parse_elf(executable) as f:
|
| 47 |
+
return (
|
| 48 |
+
f is not None
|
| 49 |
+
and f.capacity == EIClass.C32
|
| 50 |
+
and f.encoding == EIData.Lsb
|
| 51 |
+
and f.machine == EMachine.I386
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
|
| 56 |
+
if "armv7l" in archs:
|
| 57 |
+
return _is_linux_armhf(executable)
|
| 58 |
+
if "i686" in archs:
|
| 59 |
+
return _is_linux_i686(executable)
|
| 60 |
+
allowed_archs = {
|
| 61 |
+
"x86_64",
|
| 62 |
+
"aarch64",
|
| 63 |
+
"ppc64",
|
| 64 |
+
"ppc64le",
|
| 65 |
+
"s390x",
|
| 66 |
+
"loongarch64",
|
| 67 |
+
"riscv64",
|
| 68 |
+
}
|
| 69 |
+
return any(arch in allowed_archs for arch in archs)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# If glibc ever changes its major version, we need to know what the last
|
| 73 |
+
# minor version was, so we can build the complete list of all versions.
|
| 74 |
+
# For now, guess what the highest minor version might be, assume it will
|
| 75 |
+
# be 50 for testing. Once this actually happens, update the dictionary
|
| 76 |
+
# with the actual value.
|
| 77 |
+
_LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class _GLibCVersion(NamedTuple):
|
| 81 |
+
major: int
|
| 82 |
+
minor: int
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _glibc_version_string_confstr() -> str | None:
|
| 86 |
+
"""
|
| 87 |
+
Primary implementation of glibc_version_string using os.confstr.
|
| 88 |
+
"""
|
| 89 |
+
# os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
|
| 90 |
+
# to be broken or missing. This strategy is used in the standard library
|
| 91 |
+
# platform module.
|
| 92 |
+
# https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
|
| 93 |
+
try:
|
| 94 |
+
# Should be a string like "glibc 2.17".
|
| 95 |
+
version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
|
| 96 |
+
assert version_string is not None
|
| 97 |
+
_, version = version_string.rsplit()
|
| 98 |
+
except (AssertionError, AttributeError, OSError, ValueError):
|
| 99 |
+
# os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
|
| 100 |
+
return None
|
| 101 |
+
return version
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def _glibc_version_string_ctypes() -> str | None:
|
| 105 |
+
"""
|
| 106 |
+
Fallback implementation of glibc_version_string using ctypes.
|
| 107 |
+
"""
|
| 108 |
+
try:
|
| 109 |
+
import ctypes
|
| 110 |
+
except ImportError:
|
| 111 |
+
return None
|
| 112 |
+
|
| 113 |
+
# ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
|
| 114 |
+
# manpage says, "If filename is NULL, then the returned handle is for the
|
| 115 |
+
# main program". This way we can let the linker do the work to figure out
|
| 116 |
+
# which libc our process is actually using.
|
| 117 |
+
#
|
| 118 |
+
# We must also handle the special case where the executable is not a
|
| 119 |
+
# dynamically linked executable. This can occur when using musl libc,
|
| 120 |
+
# for example. In this situation, dlopen() will error, leading to an
|
| 121 |
+
# OSError. Interestingly, at least in the case of musl, there is no
|
| 122 |
+
# errno set on the OSError. The single string argument used to construct
|
| 123 |
+
# OSError comes from libc itself and is therefore not portable to
|
| 124 |
+
# hard code here. In any case, failure to call dlopen() means we
|
| 125 |
+
# can proceed, so we bail on our attempt.
|
| 126 |
+
try:
|
| 127 |
+
process_namespace = ctypes.CDLL(None)
|
| 128 |
+
except OSError:
|
| 129 |
+
return None
|
| 130 |
+
|
| 131 |
+
try:
|
| 132 |
+
gnu_get_libc_version = process_namespace.gnu_get_libc_version
|
| 133 |
+
except AttributeError:
|
| 134 |
+
# Symbol doesn't exist -> therefore, we are not linked to
|
| 135 |
+
# glibc.
|
| 136 |
+
return None
|
| 137 |
+
|
| 138 |
+
# Call gnu_get_libc_version, which returns a string like "2.5"
|
| 139 |
+
gnu_get_libc_version.restype = ctypes.c_char_p
|
| 140 |
+
version_str: str = gnu_get_libc_version()
|
| 141 |
+
# py2 / py3 compatibility:
|
| 142 |
+
if not isinstance(version_str, str):
|
| 143 |
+
version_str = version_str.decode("ascii")
|
| 144 |
+
|
| 145 |
+
return version_str
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _glibc_version_string() -> str | None:
|
| 149 |
+
"""Returns glibc version string, or None if not using glibc."""
|
| 150 |
+
return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def _parse_glibc_version(version_str: str) -> tuple[int, int]:
|
| 154 |
+
"""Parse glibc version.
|
| 155 |
+
|
| 156 |
+
We use a regexp instead of str.split because we want to discard any
|
| 157 |
+
random junk that might come after the minor version -- this might happen
|
| 158 |
+
in patched/forked versions of glibc (e.g. Linaro's version of glibc
|
| 159 |
+
uses version strings like "2.20-2014.11"). See gh-3588.
|
| 160 |
+
"""
|
| 161 |
+
m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
|
| 162 |
+
if not m:
|
| 163 |
+
warnings.warn(
|
| 164 |
+
f"Expected glibc version with 2 components major.minor, got: {version_str}",
|
| 165 |
+
RuntimeWarning,
|
| 166 |
+
stacklevel=2,
|
| 167 |
+
)
|
| 168 |
+
return -1, -1
|
| 169 |
+
return int(m.group("major")), int(m.group("minor"))
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
@functools.lru_cache
|
| 173 |
+
def _get_glibc_version() -> tuple[int, int]:
|
| 174 |
+
version_str = _glibc_version_string()
|
| 175 |
+
if version_str is None:
|
| 176 |
+
return (-1, -1)
|
| 177 |
+
return _parse_glibc_version(version_str)
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
# From PEP 513, PEP 600
|
| 181 |
+
def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
|
| 182 |
+
sys_glibc = _get_glibc_version()
|
| 183 |
+
if sys_glibc < version:
|
| 184 |
+
return False
|
| 185 |
+
# Check for presence of _manylinux module.
|
| 186 |
+
try:
|
| 187 |
+
import _manylinux
|
| 188 |
+
except ImportError:
|
| 189 |
+
return True
|
| 190 |
+
if hasattr(_manylinux, "manylinux_compatible"):
|
| 191 |
+
result = _manylinux.manylinux_compatible(version[0], version[1], arch)
|
| 192 |
+
if result is not None:
|
| 193 |
+
return bool(result)
|
| 194 |
+
return True
|
| 195 |
+
if version == _GLibCVersion(2, 5):
|
| 196 |
+
if hasattr(_manylinux, "manylinux1_compatible"):
|
| 197 |
+
return bool(_manylinux.manylinux1_compatible)
|
| 198 |
+
if version == _GLibCVersion(2, 12):
|
| 199 |
+
if hasattr(_manylinux, "manylinux2010_compatible"):
|
| 200 |
+
return bool(_manylinux.manylinux2010_compatible)
|
| 201 |
+
if version == _GLibCVersion(2, 17):
|
| 202 |
+
if hasattr(_manylinux, "manylinux2014_compatible"):
|
| 203 |
+
return bool(_manylinux.manylinux2014_compatible)
|
| 204 |
+
return True
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
_LEGACY_MANYLINUX_MAP = {
|
| 208 |
+
# CentOS 7 w/ glibc 2.17 (PEP 599)
|
| 209 |
+
(2, 17): "manylinux2014",
|
| 210 |
+
# CentOS 6 w/ glibc 2.12 (PEP 571)
|
| 211 |
+
(2, 12): "manylinux2010",
|
| 212 |
+
# CentOS 5 w/ glibc 2.5 (PEP 513)
|
| 213 |
+
(2, 5): "manylinux1",
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
|
| 218 |
+
"""Generate manylinux tags compatible to the current platform.
|
| 219 |
+
|
| 220 |
+
:param archs: Sequence of compatible architectures.
|
| 221 |
+
The first one shall be the closest to the actual architecture and be the part of
|
| 222 |
+
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
|
| 223 |
+
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
|
| 224 |
+
be manylinux-compatible.
|
| 225 |
+
|
| 226 |
+
:returns: An iterator of compatible manylinux tags.
|
| 227 |
+
"""
|
| 228 |
+
if not _have_compatible_abi(sys.executable, archs):
|
| 229 |
+
return
|
| 230 |
+
# Oldest glibc to be supported regardless of architecture is (2, 17).
|
| 231 |
+
too_old_glibc2 = _GLibCVersion(2, 16)
|
| 232 |
+
if set(archs) & {"x86_64", "i686"}:
|
| 233 |
+
# On x86/i686 also oldest glibc to be supported is (2, 5).
|
| 234 |
+
too_old_glibc2 = _GLibCVersion(2, 4)
|
| 235 |
+
current_glibc = _GLibCVersion(*_get_glibc_version())
|
| 236 |
+
glibc_max_list = [current_glibc]
|
| 237 |
+
# We can assume compatibility across glibc major versions.
|
| 238 |
+
# https://sourceware.org/bugzilla/show_bug.cgi?id=24636
|
| 239 |
+
#
|
| 240 |
+
# Build a list of maximum glibc versions so that we can
|
| 241 |
+
# output the canonical list of all glibc from current_glibc
|
| 242 |
+
# down to too_old_glibc2, including all intermediary versions.
|
| 243 |
+
for glibc_major in range(current_glibc.major - 1, 1, -1):
|
| 244 |
+
glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
|
| 245 |
+
glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
|
| 246 |
+
for arch in archs:
|
| 247 |
+
for glibc_max in glibc_max_list:
|
| 248 |
+
if glibc_max.major == too_old_glibc2.major:
|
| 249 |
+
min_minor = too_old_glibc2.minor
|
| 250 |
+
else:
|
| 251 |
+
# For other glibc major versions oldest supported is (x, 0).
|
| 252 |
+
min_minor = -1
|
| 253 |
+
for glibc_minor in range(glibc_max.minor, min_minor, -1):
|
| 254 |
+
glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
|
| 255 |
+
tag = "manylinux_{}_{}".format(*glibc_version)
|
| 256 |
+
if _is_compatible(arch, glibc_version):
|
| 257 |
+
yield f"{tag}_{arch}"
|
| 258 |
+
# Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
|
| 259 |
+
if glibc_version in _LEGACY_MANYLINUX_MAP:
|
| 260 |
+
legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
|
| 261 |
+
if _is_compatible(arch, glibc_version):
|
| 262 |
+
yield f"{legacy_tag}_{arch}"
|
venv/lib/python3.13/site-packages/packaging/_musllinux.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""PEP 656 support.
|
| 2 |
+
|
| 3 |
+
This module implements logic to detect if the currently running Python is
|
| 4 |
+
linked against musl, and what musl version is used.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import functools
|
| 10 |
+
import re
|
| 11 |
+
import subprocess
|
| 12 |
+
import sys
|
| 13 |
+
from typing import Iterator, NamedTuple, Sequence
|
| 14 |
+
|
| 15 |
+
from ._elffile import ELFFile
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class _MuslVersion(NamedTuple):
|
| 19 |
+
major: int
|
| 20 |
+
minor: int
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _parse_musl_version(output: str) -> _MuslVersion | None:
|
| 24 |
+
lines = [n for n in (n.strip() for n in output.splitlines()) if n]
|
| 25 |
+
if len(lines) < 2 or lines[0][:4] != "musl":
|
| 26 |
+
return None
|
| 27 |
+
m = re.match(r"Version (\d+)\.(\d+)", lines[1])
|
| 28 |
+
if not m:
|
| 29 |
+
return None
|
| 30 |
+
return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@functools.lru_cache
|
| 34 |
+
def _get_musl_version(executable: str) -> _MuslVersion | None:
|
| 35 |
+
"""Detect currently-running musl runtime version.
|
| 36 |
+
|
| 37 |
+
This is done by checking the specified executable's dynamic linking
|
| 38 |
+
information, and invoking the loader to parse its output for a version
|
| 39 |
+
string. If the loader is musl, the output would be something like::
|
| 40 |
+
|
| 41 |
+
musl libc (x86_64)
|
| 42 |
+
Version 1.2.2
|
| 43 |
+
Dynamic Program Loader
|
| 44 |
+
"""
|
| 45 |
+
try:
|
| 46 |
+
with open(executable, "rb") as f:
|
| 47 |
+
ld = ELFFile(f).interpreter
|
| 48 |
+
except (OSError, TypeError, ValueError):
|
| 49 |
+
return None
|
| 50 |
+
if ld is None or "musl" not in ld:
|
| 51 |
+
return None
|
| 52 |
+
proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True)
|
| 53 |
+
return _parse_musl_version(proc.stderr)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
|
| 57 |
+
"""Generate musllinux tags compatible to the current platform.
|
| 58 |
+
|
| 59 |
+
:param archs: Sequence of compatible architectures.
|
| 60 |
+
The first one shall be the closest to the actual architecture and be the part of
|
| 61 |
+
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
|
| 62 |
+
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
|
| 63 |
+
be musllinux-compatible.
|
| 64 |
+
|
| 65 |
+
:returns: An iterator of compatible musllinux tags.
|
| 66 |
+
"""
|
| 67 |
+
sys_musl = _get_musl_version(sys.executable)
|
| 68 |
+
if sys_musl is None: # Python not dynamically linked against musl.
|
| 69 |
+
return
|
| 70 |
+
for arch in archs:
|
| 71 |
+
for minor in range(sys_musl.minor, -1, -1):
|
| 72 |
+
yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__": # pragma: no cover
|
| 76 |
+
import sysconfig
|
| 77 |
+
|
| 78 |
+
plat = sysconfig.get_platform()
|
| 79 |
+
assert plat.startswith("linux-"), "not linux"
|
| 80 |
+
|
| 81 |
+
print("plat:", plat)
|
| 82 |
+
print("musl:", _get_musl_version(sys.executable))
|
| 83 |
+
print("tags:", end=" ")
|
| 84 |
+
for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
|
| 85 |
+
print(t, end="\n ")
|
venv/lib/python3.13/site-packages/packaging/_parser.py
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Handwritten parser of dependency specifiers.
|
| 2 |
+
|
| 3 |
+
The docstring for each __parse_* function contains EBNF-inspired grammar representing
|
| 4 |
+
the implementation.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import ast
|
| 10 |
+
from typing import NamedTuple, Sequence, Tuple, Union
|
| 11 |
+
|
| 12 |
+
from ._tokenizer import DEFAULT_RULES, Tokenizer
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Node:
|
| 16 |
+
def __init__(self, value: str) -> None:
|
| 17 |
+
self.value = value
|
| 18 |
+
|
| 19 |
+
def __str__(self) -> str:
|
| 20 |
+
return self.value
|
| 21 |
+
|
| 22 |
+
def __repr__(self) -> str:
|
| 23 |
+
return f"<{self.__class__.__name__}('{self}')>"
|
| 24 |
+
|
| 25 |
+
def serialize(self) -> str:
|
| 26 |
+
raise NotImplementedError
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class Variable(Node):
|
| 30 |
+
def serialize(self) -> str:
|
| 31 |
+
return str(self)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class Value(Node):
|
| 35 |
+
def serialize(self) -> str:
|
| 36 |
+
return f'"{self}"'
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class Op(Node):
|
| 40 |
+
def serialize(self) -> str:
|
| 41 |
+
return str(self)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
MarkerVar = Union[Variable, Value]
|
| 45 |
+
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
|
| 46 |
+
MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
|
| 47 |
+
MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class ParsedRequirement(NamedTuple):
|
| 51 |
+
name: str
|
| 52 |
+
url: str
|
| 53 |
+
extras: list[str]
|
| 54 |
+
specifier: str
|
| 55 |
+
marker: MarkerList | None
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# --------------------------------------------------------------------------------------
|
| 59 |
+
# Recursive descent parser for dependency specifier
|
| 60 |
+
# --------------------------------------------------------------------------------------
|
| 61 |
+
def parse_requirement(source: str) -> ParsedRequirement:
|
| 62 |
+
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
|
| 66 |
+
"""
|
| 67 |
+
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
|
| 68 |
+
"""
|
| 69 |
+
tokenizer.consume("WS")
|
| 70 |
+
|
| 71 |
+
name_token = tokenizer.expect(
|
| 72 |
+
"IDENTIFIER", expected="package name at the start of dependency specifier"
|
| 73 |
+
)
|
| 74 |
+
name = name_token.text
|
| 75 |
+
tokenizer.consume("WS")
|
| 76 |
+
|
| 77 |
+
extras = _parse_extras(tokenizer)
|
| 78 |
+
tokenizer.consume("WS")
|
| 79 |
+
|
| 80 |
+
url, specifier, marker = _parse_requirement_details(tokenizer)
|
| 81 |
+
tokenizer.expect("END", expected="end of dependency specifier")
|
| 82 |
+
|
| 83 |
+
return ParsedRequirement(name, url, extras, specifier, marker)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _parse_requirement_details(
|
| 87 |
+
tokenizer: Tokenizer,
|
| 88 |
+
) -> tuple[str, str, MarkerList | None]:
|
| 89 |
+
"""
|
| 90 |
+
requirement_details = AT URL (WS requirement_marker?)?
|
| 91 |
+
| specifier WS? (requirement_marker)?
|
| 92 |
+
"""
|
| 93 |
+
|
| 94 |
+
specifier = ""
|
| 95 |
+
url = ""
|
| 96 |
+
marker = None
|
| 97 |
+
|
| 98 |
+
if tokenizer.check("AT"):
|
| 99 |
+
tokenizer.read()
|
| 100 |
+
tokenizer.consume("WS")
|
| 101 |
+
|
| 102 |
+
url_start = tokenizer.position
|
| 103 |
+
url = tokenizer.expect("URL", expected="URL after @").text
|
| 104 |
+
if tokenizer.check("END", peek=True):
|
| 105 |
+
return (url, specifier, marker)
|
| 106 |
+
|
| 107 |
+
tokenizer.expect("WS", expected="whitespace after URL")
|
| 108 |
+
|
| 109 |
+
# The input might end after whitespace.
|
| 110 |
+
if tokenizer.check("END", peek=True):
|
| 111 |
+
return (url, specifier, marker)
|
| 112 |
+
|
| 113 |
+
marker = _parse_requirement_marker(
|
| 114 |
+
tokenizer, span_start=url_start, after="URL and whitespace"
|
| 115 |
+
)
|
| 116 |
+
else:
|
| 117 |
+
specifier_start = tokenizer.position
|
| 118 |
+
specifier = _parse_specifier(tokenizer)
|
| 119 |
+
tokenizer.consume("WS")
|
| 120 |
+
|
| 121 |
+
if tokenizer.check("END", peek=True):
|
| 122 |
+
return (url, specifier, marker)
|
| 123 |
+
|
| 124 |
+
marker = _parse_requirement_marker(
|
| 125 |
+
tokenizer,
|
| 126 |
+
span_start=specifier_start,
|
| 127 |
+
after=(
|
| 128 |
+
"version specifier"
|
| 129 |
+
if specifier
|
| 130 |
+
else "name and no valid version specifier"
|
| 131 |
+
),
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
return (url, specifier, marker)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def _parse_requirement_marker(
|
| 138 |
+
tokenizer: Tokenizer, *, span_start: int, after: str
|
| 139 |
+
) -> MarkerList:
|
| 140 |
+
"""
|
| 141 |
+
requirement_marker = SEMICOLON marker WS?
|
| 142 |
+
"""
|
| 143 |
+
|
| 144 |
+
if not tokenizer.check("SEMICOLON"):
|
| 145 |
+
tokenizer.raise_syntax_error(
|
| 146 |
+
f"Expected end or semicolon (after {after})",
|
| 147 |
+
span_start=span_start,
|
| 148 |
+
)
|
| 149 |
+
tokenizer.read()
|
| 150 |
+
|
| 151 |
+
marker = _parse_marker(tokenizer)
|
| 152 |
+
tokenizer.consume("WS")
|
| 153 |
+
|
| 154 |
+
return marker
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def _parse_extras(tokenizer: Tokenizer) -> list[str]:
|
| 158 |
+
"""
|
| 159 |
+
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
|
| 160 |
+
"""
|
| 161 |
+
if not tokenizer.check("LEFT_BRACKET", peek=True):
|
| 162 |
+
return []
|
| 163 |
+
|
| 164 |
+
with tokenizer.enclosing_tokens(
|
| 165 |
+
"LEFT_BRACKET",
|
| 166 |
+
"RIGHT_BRACKET",
|
| 167 |
+
around="extras",
|
| 168 |
+
):
|
| 169 |
+
tokenizer.consume("WS")
|
| 170 |
+
extras = _parse_extras_list(tokenizer)
|
| 171 |
+
tokenizer.consume("WS")
|
| 172 |
+
|
| 173 |
+
return extras
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
|
| 177 |
+
"""
|
| 178 |
+
extras_list = identifier (wsp* ',' wsp* identifier)*
|
| 179 |
+
"""
|
| 180 |
+
extras: list[str] = []
|
| 181 |
+
|
| 182 |
+
if not tokenizer.check("IDENTIFIER"):
|
| 183 |
+
return extras
|
| 184 |
+
|
| 185 |
+
extras.append(tokenizer.read().text)
|
| 186 |
+
|
| 187 |
+
while True:
|
| 188 |
+
tokenizer.consume("WS")
|
| 189 |
+
if tokenizer.check("IDENTIFIER", peek=True):
|
| 190 |
+
tokenizer.raise_syntax_error("Expected comma between extra names")
|
| 191 |
+
elif not tokenizer.check("COMMA"):
|
| 192 |
+
break
|
| 193 |
+
|
| 194 |
+
tokenizer.read()
|
| 195 |
+
tokenizer.consume("WS")
|
| 196 |
+
|
| 197 |
+
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
|
| 198 |
+
extras.append(extra_token.text)
|
| 199 |
+
|
| 200 |
+
return extras
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def _parse_specifier(tokenizer: Tokenizer) -> str:
|
| 204 |
+
"""
|
| 205 |
+
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
|
| 206 |
+
| WS? version_many WS?
|
| 207 |
+
"""
|
| 208 |
+
with tokenizer.enclosing_tokens(
|
| 209 |
+
"LEFT_PARENTHESIS",
|
| 210 |
+
"RIGHT_PARENTHESIS",
|
| 211 |
+
around="version specifier",
|
| 212 |
+
):
|
| 213 |
+
tokenizer.consume("WS")
|
| 214 |
+
parsed_specifiers = _parse_version_many(tokenizer)
|
| 215 |
+
tokenizer.consume("WS")
|
| 216 |
+
|
| 217 |
+
return parsed_specifiers
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def _parse_version_many(tokenizer: Tokenizer) -> str:
|
| 221 |
+
"""
|
| 222 |
+
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
|
| 223 |
+
"""
|
| 224 |
+
parsed_specifiers = ""
|
| 225 |
+
while tokenizer.check("SPECIFIER"):
|
| 226 |
+
span_start = tokenizer.position
|
| 227 |
+
parsed_specifiers += tokenizer.read().text
|
| 228 |
+
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
|
| 229 |
+
tokenizer.raise_syntax_error(
|
| 230 |
+
".* suffix can only be used with `==` or `!=` operators",
|
| 231 |
+
span_start=span_start,
|
| 232 |
+
span_end=tokenizer.position + 1,
|
| 233 |
+
)
|
| 234 |
+
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
|
| 235 |
+
tokenizer.raise_syntax_error(
|
| 236 |
+
"Local version label can only be used with `==` or `!=` operators",
|
| 237 |
+
span_start=span_start,
|
| 238 |
+
span_end=tokenizer.position,
|
| 239 |
+
)
|
| 240 |
+
tokenizer.consume("WS")
|
| 241 |
+
if not tokenizer.check("COMMA"):
|
| 242 |
+
break
|
| 243 |
+
parsed_specifiers += tokenizer.read().text
|
| 244 |
+
tokenizer.consume("WS")
|
| 245 |
+
|
| 246 |
+
return parsed_specifiers
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# --------------------------------------------------------------------------------------
|
| 250 |
+
# Recursive descent parser for marker expression
|
| 251 |
+
# --------------------------------------------------------------------------------------
|
| 252 |
+
def parse_marker(source: str) -> MarkerList:
|
| 253 |
+
return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
|
| 257 |
+
retval = _parse_marker(tokenizer)
|
| 258 |
+
tokenizer.expect("END", expected="end of marker expression")
|
| 259 |
+
return retval
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
|
| 263 |
+
"""
|
| 264 |
+
marker = marker_atom (BOOLOP marker_atom)+
|
| 265 |
+
"""
|
| 266 |
+
expression = [_parse_marker_atom(tokenizer)]
|
| 267 |
+
while tokenizer.check("BOOLOP"):
|
| 268 |
+
token = tokenizer.read()
|
| 269 |
+
expr_right = _parse_marker_atom(tokenizer)
|
| 270 |
+
expression.extend((token.text, expr_right))
|
| 271 |
+
return expression
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
|
| 275 |
+
"""
|
| 276 |
+
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
|
| 277 |
+
| WS? marker_item WS?
|
| 278 |
+
"""
|
| 279 |
+
|
| 280 |
+
tokenizer.consume("WS")
|
| 281 |
+
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
|
| 282 |
+
with tokenizer.enclosing_tokens(
|
| 283 |
+
"LEFT_PARENTHESIS",
|
| 284 |
+
"RIGHT_PARENTHESIS",
|
| 285 |
+
around="marker expression",
|
| 286 |
+
):
|
| 287 |
+
tokenizer.consume("WS")
|
| 288 |
+
marker: MarkerAtom = _parse_marker(tokenizer)
|
| 289 |
+
tokenizer.consume("WS")
|
| 290 |
+
else:
|
| 291 |
+
marker = _parse_marker_item(tokenizer)
|
| 292 |
+
tokenizer.consume("WS")
|
| 293 |
+
return marker
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
|
| 297 |
+
"""
|
| 298 |
+
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
|
| 299 |
+
"""
|
| 300 |
+
tokenizer.consume("WS")
|
| 301 |
+
marker_var_left = _parse_marker_var(tokenizer)
|
| 302 |
+
tokenizer.consume("WS")
|
| 303 |
+
marker_op = _parse_marker_op(tokenizer)
|
| 304 |
+
tokenizer.consume("WS")
|
| 305 |
+
marker_var_right = _parse_marker_var(tokenizer)
|
| 306 |
+
tokenizer.consume("WS")
|
| 307 |
+
return (marker_var_left, marker_op, marker_var_right)
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
|
| 311 |
+
"""
|
| 312 |
+
marker_var = VARIABLE | QUOTED_STRING
|
| 313 |
+
"""
|
| 314 |
+
if tokenizer.check("VARIABLE"):
|
| 315 |
+
return process_env_var(tokenizer.read().text.replace(".", "_"))
|
| 316 |
+
elif tokenizer.check("QUOTED_STRING"):
|
| 317 |
+
return process_python_str(tokenizer.read().text)
|
| 318 |
+
else:
|
| 319 |
+
tokenizer.raise_syntax_error(
|
| 320 |
+
message="Expected a marker variable or quoted string"
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def process_env_var(env_var: str) -> Variable:
|
| 325 |
+
if env_var in ("platform_python_implementation", "python_implementation"):
|
| 326 |
+
return Variable("platform_python_implementation")
|
| 327 |
+
else:
|
| 328 |
+
return Variable(env_var)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def process_python_str(python_str: str) -> Value:
|
| 332 |
+
value = ast.literal_eval(python_str)
|
| 333 |
+
return Value(str(value))
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
|
| 337 |
+
"""
|
| 338 |
+
marker_op = IN | NOT IN | OP
|
| 339 |
+
"""
|
| 340 |
+
if tokenizer.check("IN"):
|
| 341 |
+
tokenizer.read()
|
| 342 |
+
return Op("in")
|
| 343 |
+
elif tokenizer.check("NOT"):
|
| 344 |
+
tokenizer.read()
|
| 345 |
+
tokenizer.expect("WS", expected="whitespace after 'not'")
|
| 346 |
+
tokenizer.expect("IN", expected="'in' after 'not'")
|
| 347 |
+
return Op("not in")
|
| 348 |
+
elif tokenizer.check("OP"):
|
| 349 |
+
return Op(tokenizer.read().text)
|
| 350 |
+
else:
|
| 351 |
+
return tokenizer.raise_syntax_error(
|
| 352 |
+
"Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in"
|
| 353 |
+
)
|
venv/lib/python3.13/site-packages/packaging/_structures.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class InfinityType:
|
| 7 |
+
def __repr__(self) -> str:
|
| 8 |
+
return "Infinity"
|
| 9 |
+
|
| 10 |
+
def __hash__(self) -> int:
|
| 11 |
+
return hash(repr(self))
|
| 12 |
+
|
| 13 |
+
def __lt__(self, other: object) -> bool:
|
| 14 |
+
return False
|
| 15 |
+
|
| 16 |
+
def __le__(self, other: object) -> bool:
|
| 17 |
+
return False
|
| 18 |
+
|
| 19 |
+
def __eq__(self, other: object) -> bool:
|
| 20 |
+
return isinstance(other, self.__class__)
|
| 21 |
+
|
| 22 |
+
def __gt__(self, other: object) -> bool:
|
| 23 |
+
return True
|
| 24 |
+
|
| 25 |
+
def __ge__(self, other: object) -> bool:
|
| 26 |
+
return True
|
| 27 |
+
|
| 28 |
+
def __neg__(self: object) -> "NegativeInfinityType":
|
| 29 |
+
return NegativeInfinity
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
Infinity = InfinityType()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class NegativeInfinityType:
|
| 36 |
+
def __repr__(self) -> str:
|
| 37 |
+
return "-Infinity"
|
| 38 |
+
|
| 39 |
+
def __hash__(self) -> int:
|
| 40 |
+
return hash(repr(self))
|
| 41 |
+
|
| 42 |
+
def __lt__(self, other: object) -> bool:
|
| 43 |
+
return True
|
| 44 |
+
|
| 45 |
+
def __le__(self, other: object) -> bool:
|
| 46 |
+
return True
|
| 47 |
+
|
| 48 |
+
def __eq__(self, other: object) -> bool:
|
| 49 |
+
return isinstance(other, self.__class__)
|
| 50 |
+
|
| 51 |
+
def __gt__(self, other: object) -> bool:
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
def __ge__(self, other: object) -> bool:
|
| 55 |
+
return False
|
| 56 |
+
|
| 57 |
+
def __neg__(self: object) -> InfinityType:
|
| 58 |
+
return Infinity
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
NegativeInfinity = NegativeInfinityType()
|
venv/lib/python3.13/site-packages/packaging/_tokenizer.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import re
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
from typing import Iterator, NoReturn
|
| 7 |
+
|
| 8 |
+
from .specifiers import Specifier
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class Token:
|
| 13 |
+
name: str
|
| 14 |
+
text: str
|
| 15 |
+
position: int
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ParserSyntaxError(Exception):
|
| 19 |
+
"""The provided source text could not be parsed correctly."""
|
| 20 |
+
|
| 21 |
+
def __init__(
|
| 22 |
+
self,
|
| 23 |
+
message: str,
|
| 24 |
+
*,
|
| 25 |
+
source: str,
|
| 26 |
+
span: tuple[int, int],
|
| 27 |
+
) -> None:
|
| 28 |
+
self.span = span
|
| 29 |
+
self.message = message
|
| 30 |
+
self.source = source
|
| 31 |
+
|
| 32 |
+
super().__init__()
|
| 33 |
+
|
| 34 |
+
def __str__(self) -> str:
|
| 35 |
+
marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
|
| 36 |
+
return "\n ".join([self.message, self.source, marker])
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
DEFAULT_RULES: dict[str, str | re.Pattern[str]] = {
|
| 40 |
+
"LEFT_PARENTHESIS": r"\(",
|
| 41 |
+
"RIGHT_PARENTHESIS": r"\)",
|
| 42 |
+
"LEFT_BRACKET": r"\[",
|
| 43 |
+
"RIGHT_BRACKET": r"\]",
|
| 44 |
+
"SEMICOLON": r";",
|
| 45 |
+
"COMMA": r",",
|
| 46 |
+
"QUOTED_STRING": re.compile(
|
| 47 |
+
r"""
|
| 48 |
+
(
|
| 49 |
+
('[^']*')
|
| 50 |
+
|
|
| 51 |
+
("[^"]*")
|
| 52 |
+
)
|
| 53 |
+
""",
|
| 54 |
+
re.VERBOSE,
|
| 55 |
+
),
|
| 56 |
+
"OP": r"(===|==|~=|!=|<=|>=|<|>)",
|
| 57 |
+
"BOOLOP": r"\b(or|and)\b",
|
| 58 |
+
"IN": r"\bin\b",
|
| 59 |
+
"NOT": r"\bnot\b",
|
| 60 |
+
"VARIABLE": re.compile(
|
| 61 |
+
r"""
|
| 62 |
+
\b(
|
| 63 |
+
python_version
|
| 64 |
+
|python_full_version
|
| 65 |
+
|os[._]name
|
| 66 |
+
|sys[._]platform
|
| 67 |
+
|platform_(release|system)
|
| 68 |
+
|platform[._](version|machine|python_implementation)
|
| 69 |
+
|python_implementation
|
| 70 |
+
|implementation_(name|version)
|
| 71 |
+
|extras?
|
| 72 |
+
|dependency_groups
|
| 73 |
+
)\b
|
| 74 |
+
""",
|
| 75 |
+
re.VERBOSE,
|
| 76 |
+
),
|
| 77 |
+
"SPECIFIER": re.compile(
|
| 78 |
+
Specifier._operator_regex_str + Specifier._version_regex_str,
|
| 79 |
+
re.VERBOSE | re.IGNORECASE,
|
| 80 |
+
),
|
| 81 |
+
"AT": r"\@",
|
| 82 |
+
"URL": r"[^ \t]+",
|
| 83 |
+
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
|
| 84 |
+
"VERSION_PREFIX_TRAIL": r"\.\*",
|
| 85 |
+
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
|
| 86 |
+
"WS": r"[ \t]+",
|
| 87 |
+
"END": r"$",
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
class Tokenizer:
|
| 92 |
+
"""Context-sensitive token parsing.
|
| 93 |
+
|
| 94 |
+
Provides methods to examine the input stream to check whether the next token
|
| 95 |
+
matches.
|
| 96 |
+
"""
|
| 97 |
+
|
| 98 |
+
def __init__(
|
| 99 |
+
self,
|
| 100 |
+
source: str,
|
| 101 |
+
*,
|
| 102 |
+
rules: dict[str, str | re.Pattern[str]],
|
| 103 |
+
) -> None:
|
| 104 |
+
self.source = source
|
| 105 |
+
self.rules: dict[str, re.Pattern[str]] = {
|
| 106 |
+
name: re.compile(pattern) for name, pattern in rules.items()
|
| 107 |
+
}
|
| 108 |
+
self.next_token: Token | None = None
|
| 109 |
+
self.position = 0
|
| 110 |
+
|
| 111 |
+
def consume(self, name: str) -> None:
|
| 112 |
+
"""Move beyond provided token name, if at current position."""
|
| 113 |
+
if self.check(name):
|
| 114 |
+
self.read()
|
| 115 |
+
|
| 116 |
+
def check(self, name: str, *, peek: bool = False) -> bool:
|
| 117 |
+
"""Check whether the next token has the provided name.
|
| 118 |
+
|
| 119 |
+
By default, if the check succeeds, the token *must* be read before
|
| 120 |
+
another check. If `peek` is set to `True`, the token is not loaded and
|
| 121 |
+
would need to be checked again.
|
| 122 |
+
"""
|
| 123 |
+
assert self.next_token is None, (
|
| 124 |
+
f"Cannot check for {name!r}, already have {self.next_token!r}"
|
| 125 |
+
)
|
| 126 |
+
assert name in self.rules, f"Unknown token name: {name!r}"
|
| 127 |
+
|
| 128 |
+
expression = self.rules[name]
|
| 129 |
+
|
| 130 |
+
match = expression.match(self.source, self.position)
|
| 131 |
+
if match is None:
|
| 132 |
+
return False
|
| 133 |
+
if not peek:
|
| 134 |
+
self.next_token = Token(name, match[0], self.position)
|
| 135 |
+
return True
|
| 136 |
+
|
| 137 |
+
def expect(self, name: str, *, expected: str) -> Token:
|
| 138 |
+
"""Expect a certain token name next, failing with a syntax error otherwise.
|
| 139 |
+
|
| 140 |
+
The token is *not* read.
|
| 141 |
+
"""
|
| 142 |
+
if not self.check(name):
|
| 143 |
+
raise self.raise_syntax_error(f"Expected {expected}")
|
| 144 |
+
return self.read()
|
| 145 |
+
|
| 146 |
+
def read(self) -> Token:
|
| 147 |
+
"""Consume the next token and return it."""
|
| 148 |
+
token = self.next_token
|
| 149 |
+
assert token is not None
|
| 150 |
+
|
| 151 |
+
self.position += len(token.text)
|
| 152 |
+
self.next_token = None
|
| 153 |
+
|
| 154 |
+
return token
|
| 155 |
+
|
| 156 |
+
def raise_syntax_error(
|
| 157 |
+
self,
|
| 158 |
+
message: str,
|
| 159 |
+
*,
|
| 160 |
+
span_start: int | None = None,
|
| 161 |
+
span_end: int | None = None,
|
| 162 |
+
) -> NoReturn:
|
| 163 |
+
"""Raise ParserSyntaxError at the given position."""
|
| 164 |
+
span = (
|
| 165 |
+
self.position if span_start is None else span_start,
|
| 166 |
+
self.position if span_end is None else span_end,
|
| 167 |
+
)
|
| 168 |
+
raise ParserSyntaxError(
|
| 169 |
+
message,
|
| 170 |
+
source=self.source,
|
| 171 |
+
span=span,
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
@contextlib.contextmanager
|
| 175 |
+
def enclosing_tokens(
|
| 176 |
+
self, open_token: str, close_token: str, *, around: str
|
| 177 |
+
) -> Iterator[None]:
|
| 178 |
+
if self.check(open_token):
|
| 179 |
+
open_position = self.position
|
| 180 |
+
self.read()
|
| 181 |
+
else:
|
| 182 |
+
open_position = None
|
| 183 |
+
|
| 184 |
+
yield
|
| 185 |
+
|
| 186 |
+
if open_position is None:
|
| 187 |
+
return
|
| 188 |
+
|
| 189 |
+
if not self.check(close_token):
|
| 190 |
+
self.raise_syntax_error(
|
| 191 |
+
f"Expected matching {close_token} for {open_token}, after {around}",
|
| 192 |
+
span_start=open_position,
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
self.read()
|
venv/lib/python3.13/site-packages/packaging/markers.py
ADDED
|
@@ -0,0 +1,362 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import operator
|
| 8 |
+
import os
|
| 9 |
+
import platform
|
| 10 |
+
import sys
|
| 11 |
+
from typing import AbstractSet, Any, Callable, Literal, TypedDict, Union, cast
|
| 12 |
+
|
| 13 |
+
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable
|
| 14 |
+
from ._parser import parse_marker as _parse_marker
|
| 15 |
+
from ._tokenizer import ParserSyntaxError
|
| 16 |
+
from .specifiers import InvalidSpecifier, Specifier
|
| 17 |
+
from .utils import canonicalize_name
|
| 18 |
+
|
| 19 |
+
__all__ = [
|
| 20 |
+
"EvaluateContext",
|
| 21 |
+
"InvalidMarker",
|
| 22 |
+
"Marker",
|
| 23 |
+
"UndefinedComparison",
|
| 24 |
+
"UndefinedEnvironmentName",
|
| 25 |
+
"default_environment",
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
Operator = Callable[[str, Union[str, AbstractSet[str]]], bool]
|
| 29 |
+
EvaluateContext = Literal["metadata", "lock_file", "requirement"]
|
| 30 |
+
MARKERS_ALLOWING_SET = {"extras", "dependency_groups"}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class InvalidMarker(ValueError):
|
| 34 |
+
"""
|
| 35 |
+
An invalid marker was found, users should refer to PEP 508.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class UndefinedComparison(ValueError):
|
| 40 |
+
"""
|
| 41 |
+
An invalid operation was attempted on a value that doesn't support it.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class UndefinedEnvironmentName(ValueError):
|
| 46 |
+
"""
|
| 47 |
+
A name was attempted to be used that does not exist inside of the
|
| 48 |
+
environment.
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class Environment(TypedDict):
|
| 53 |
+
implementation_name: str
|
| 54 |
+
"""The implementation's identifier, e.g. ``'cpython'``."""
|
| 55 |
+
|
| 56 |
+
implementation_version: str
|
| 57 |
+
"""
|
| 58 |
+
The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or
|
| 59 |
+
``'7.3.13'`` for PyPy3.10 v7.3.13.
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
os_name: str
|
| 63 |
+
"""
|
| 64 |
+
The value of :py:data:`os.name`. The name of the operating system dependent module
|
| 65 |
+
imported, e.g. ``'posix'``.
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
platform_machine: str
|
| 69 |
+
"""
|
| 70 |
+
Returns the machine type, e.g. ``'i386'``.
|
| 71 |
+
|
| 72 |
+
An empty string if the value cannot be determined.
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
platform_release: str
|
| 76 |
+
"""
|
| 77 |
+
The system's release, e.g. ``'2.2.0'`` or ``'NT'``.
|
| 78 |
+
|
| 79 |
+
An empty string if the value cannot be determined.
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
platform_system: str
|
| 83 |
+
"""
|
| 84 |
+
The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``.
|
| 85 |
+
|
| 86 |
+
An empty string if the value cannot be determined.
|
| 87 |
+
"""
|
| 88 |
+
|
| 89 |
+
platform_version: str
|
| 90 |
+
"""
|
| 91 |
+
The system's release version, e.g. ``'#3 on degas'``.
|
| 92 |
+
|
| 93 |
+
An empty string if the value cannot be determined.
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
python_full_version: str
|
| 97 |
+
"""
|
| 98 |
+
The Python version as string ``'major.minor.patchlevel'``.
|
| 99 |
+
|
| 100 |
+
Note that unlike the Python :py:data:`sys.version`, this value will always include
|
| 101 |
+
the patchlevel (it defaults to 0).
|
| 102 |
+
"""
|
| 103 |
+
|
| 104 |
+
platform_python_implementation: str
|
| 105 |
+
"""
|
| 106 |
+
A string identifying the Python implementation, e.g. ``'CPython'``.
|
| 107 |
+
"""
|
| 108 |
+
|
| 109 |
+
python_version: str
|
| 110 |
+
"""The Python version as string ``'major.minor'``."""
|
| 111 |
+
|
| 112 |
+
sys_platform: str
|
| 113 |
+
"""
|
| 114 |
+
This string contains a platform identifier that can be used to append
|
| 115 |
+
platform-specific components to :py:data:`sys.path`, for instance.
|
| 116 |
+
|
| 117 |
+
For Unix systems, except on Linux and AIX, this is the lowercased OS name as
|
| 118 |
+
returned by ``uname -s`` with the first part of the version as returned by
|
| 119 |
+
``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python
|
| 120 |
+
was built.
|
| 121 |
+
"""
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _normalize_extra_values(results: Any) -> Any:
|
| 125 |
+
"""
|
| 126 |
+
Normalize extra values.
|
| 127 |
+
"""
|
| 128 |
+
if isinstance(results[0], tuple):
|
| 129 |
+
lhs, op, rhs = results[0]
|
| 130 |
+
if isinstance(lhs, Variable) and lhs.value == "extra":
|
| 131 |
+
normalized_extra = canonicalize_name(rhs.value)
|
| 132 |
+
rhs = Value(normalized_extra)
|
| 133 |
+
elif isinstance(rhs, Variable) and rhs.value == "extra":
|
| 134 |
+
normalized_extra = canonicalize_name(lhs.value)
|
| 135 |
+
lhs = Value(normalized_extra)
|
| 136 |
+
results[0] = lhs, op, rhs
|
| 137 |
+
return results
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def _format_marker(
|
| 141 |
+
marker: list[str] | MarkerAtom | str, first: bool | None = True
|
| 142 |
+
) -> str:
|
| 143 |
+
assert isinstance(marker, (list, tuple, str))
|
| 144 |
+
|
| 145 |
+
# Sometimes we have a structure like [[...]] which is a single item list
|
| 146 |
+
# where the single item is itself it's own list. In that case we want skip
|
| 147 |
+
# the rest of this function so that we don't get extraneous () on the
|
| 148 |
+
# outside.
|
| 149 |
+
if (
|
| 150 |
+
isinstance(marker, list)
|
| 151 |
+
and len(marker) == 1
|
| 152 |
+
and isinstance(marker[0], (list, tuple))
|
| 153 |
+
):
|
| 154 |
+
return _format_marker(marker[0])
|
| 155 |
+
|
| 156 |
+
if isinstance(marker, list):
|
| 157 |
+
inner = (_format_marker(m, first=False) for m in marker)
|
| 158 |
+
if first:
|
| 159 |
+
return " ".join(inner)
|
| 160 |
+
else:
|
| 161 |
+
return "(" + " ".join(inner) + ")"
|
| 162 |
+
elif isinstance(marker, tuple):
|
| 163 |
+
return " ".join([m.serialize() for m in marker])
|
| 164 |
+
else:
|
| 165 |
+
return marker
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
_operators: dict[str, Operator] = {
|
| 169 |
+
"in": lambda lhs, rhs: lhs in rhs,
|
| 170 |
+
"not in": lambda lhs, rhs: lhs not in rhs,
|
| 171 |
+
"<": operator.lt,
|
| 172 |
+
"<=": operator.le,
|
| 173 |
+
"==": operator.eq,
|
| 174 |
+
"!=": operator.ne,
|
| 175 |
+
">=": operator.ge,
|
| 176 |
+
">": operator.gt,
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def _eval_op(lhs: str, op: Op, rhs: str | AbstractSet[str]) -> bool:
|
| 181 |
+
if isinstance(rhs, str):
|
| 182 |
+
try:
|
| 183 |
+
spec = Specifier("".join([op.serialize(), rhs]))
|
| 184 |
+
except InvalidSpecifier:
|
| 185 |
+
pass
|
| 186 |
+
else:
|
| 187 |
+
return spec.contains(lhs, prereleases=True)
|
| 188 |
+
|
| 189 |
+
oper: Operator | None = _operators.get(op.serialize())
|
| 190 |
+
if oper is None:
|
| 191 |
+
raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
|
| 192 |
+
|
| 193 |
+
return oper(lhs, rhs)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def _normalize(
|
| 197 |
+
lhs: str, rhs: str | AbstractSet[str], key: str
|
| 198 |
+
) -> tuple[str, str | AbstractSet[str]]:
|
| 199 |
+
# PEP 685 – Comparison of extra names for optional distribution dependencies
|
| 200 |
+
# https://peps.python.org/pep-0685/
|
| 201 |
+
# > When comparing extra names, tools MUST normalize the names being
|
| 202 |
+
# > compared using the semantics outlined in PEP 503 for names
|
| 203 |
+
if key == "extra":
|
| 204 |
+
assert isinstance(rhs, str), "extra value must be a string"
|
| 205 |
+
return (canonicalize_name(lhs), canonicalize_name(rhs))
|
| 206 |
+
if key in MARKERS_ALLOWING_SET:
|
| 207 |
+
if isinstance(rhs, str): # pragma: no cover
|
| 208 |
+
return (canonicalize_name(lhs), canonicalize_name(rhs))
|
| 209 |
+
else:
|
| 210 |
+
return (canonicalize_name(lhs), {canonicalize_name(v) for v in rhs})
|
| 211 |
+
|
| 212 |
+
# other environment markers don't have such standards
|
| 213 |
+
return lhs, rhs
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def _evaluate_markers(
|
| 217 |
+
markers: MarkerList, environment: dict[str, str | AbstractSet[str]]
|
| 218 |
+
) -> bool:
|
| 219 |
+
groups: list[list[bool]] = [[]]
|
| 220 |
+
|
| 221 |
+
for marker in markers:
|
| 222 |
+
assert isinstance(marker, (list, tuple, str))
|
| 223 |
+
|
| 224 |
+
if isinstance(marker, list):
|
| 225 |
+
groups[-1].append(_evaluate_markers(marker, environment))
|
| 226 |
+
elif isinstance(marker, tuple):
|
| 227 |
+
lhs, op, rhs = marker
|
| 228 |
+
|
| 229 |
+
if isinstance(lhs, Variable):
|
| 230 |
+
environment_key = lhs.value
|
| 231 |
+
lhs_value = environment[environment_key]
|
| 232 |
+
rhs_value = rhs.value
|
| 233 |
+
else:
|
| 234 |
+
lhs_value = lhs.value
|
| 235 |
+
environment_key = rhs.value
|
| 236 |
+
rhs_value = environment[environment_key]
|
| 237 |
+
assert isinstance(lhs_value, str), "lhs must be a string"
|
| 238 |
+
lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
|
| 239 |
+
groups[-1].append(_eval_op(lhs_value, op, rhs_value))
|
| 240 |
+
else:
|
| 241 |
+
assert marker in ["and", "or"]
|
| 242 |
+
if marker == "or":
|
| 243 |
+
groups.append([])
|
| 244 |
+
|
| 245 |
+
return any(all(item) for item in groups)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def format_full_version(info: sys._version_info) -> str:
|
| 249 |
+
version = f"{info.major}.{info.minor}.{info.micro}"
|
| 250 |
+
kind = info.releaselevel
|
| 251 |
+
if kind != "final":
|
| 252 |
+
version += kind[0] + str(info.serial)
|
| 253 |
+
return version
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def default_environment() -> Environment:
|
| 257 |
+
iver = format_full_version(sys.implementation.version)
|
| 258 |
+
implementation_name = sys.implementation.name
|
| 259 |
+
return {
|
| 260 |
+
"implementation_name": implementation_name,
|
| 261 |
+
"implementation_version": iver,
|
| 262 |
+
"os_name": os.name,
|
| 263 |
+
"platform_machine": platform.machine(),
|
| 264 |
+
"platform_release": platform.release(),
|
| 265 |
+
"platform_system": platform.system(),
|
| 266 |
+
"platform_version": platform.version(),
|
| 267 |
+
"python_full_version": platform.python_version(),
|
| 268 |
+
"platform_python_implementation": platform.python_implementation(),
|
| 269 |
+
"python_version": ".".join(platform.python_version_tuple()[:2]),
|
| 270 |
+
"sys_platform": sys.platform,
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
class Marker:
|
| 275 |
+
def __init__(self, marker: str) -> None:
|
| 276 |
+
# Note: We create a Marker object without calling this constructor in
|
| 277 |
+
# packaging.requirements.Requirement. If any additional logic is
|
| 278 |
+
# added here, make sure to mirror/adapt Requirement.
|
| 279 |
+
try:
|
| 280 |
+
self._markers = _normalize_extra_values(_parse_marker(marker))
|
| 281 |
+
# The attribute `_markers` can be described in terms of a recursive type:
|
| 282 |
+
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
|
| 283 |
+
#
|
| 284 |
+
# For example, the following expression:
|
| 285 |
+
# python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
|
| 286 |
+
#
|
| 287 |
+
# is parsed into:
|
| 288 |
+
# [
|
| 289 |
+
# (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
|
| 290 |
+
# 'and',
|
| 291 |
+
# [
|
| 292 |
+
# (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
|
| 293 |
+
# 'or',
|
| 294 |
+
# (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
|
| 295 |
+
# ]
|
| 296 |
+
# ]
|
| 297 |
+
except ParserSyntaxError as e:
|
| 298 |
+
raise InvalidMarker(str(e)) from e
|
| 299 |
+
|
| 300 |
+
def __str__(self) -> str:
|
| 301 |
+
return _format_marker(self._markers)
|
| 302 |
+
|
| 303 |
+
def __repr__(self) -> str:
|
| 304 |
+
return f"<Marker('{self}')>"
|
| 305 |
+
|
| 306 |
+
def __hash__(self) -> int:
|
| 307 |
+
return hash((self.__class__.__name__, str(self)))
|
| 308 |
+
|
| 309 |
+
def __eq__(self, other: Any) -> bool:
|
| 310 |
+
if not isinstance(other, Marker):
|
| 311 |
+
return NotImplemented
|
| 312 |
+
|
| 313 |
+
return str(self) == str(other)
|
| 314 |
+
|
| 315 |
+
def evaluate(
|
| 316 |
+
self,
|
| 317 |
+
environment: dict[str, str] | None = None,
|
| 318 |
+
context: EvaluateContext = "metadata",
|
| 319 |
+
) -> bool:
|
| 320 |
+
"""Evaluate a marker.
|
| 321 |
+
|
| 322 |
+
Return the boolean from evaluating the given marker against the
|
| 323 |
+
environment. environment is an optional argument to override all or
|
| 324 |
+
part of the determined environment. The *context* parameter specifies what
|
| 325 |
+
context the markers are being evaluated for, which influences what markers
|
| 326 |
+
are considered valid. Acceptable values are "metadata" (for core metadata;
|
| 327 |
+
default), "lock_file", and "requirement" (i.e. all other situations).
|
| 328 |
+
|
| 329 |
+
The environment is determined from the current Python process.
|
| 330 |
+
"""
|
| 331 |
+
current_environment = cast(
|
| 332 |
+
"dict[str, str | AbstractSet[str]]", default_environment()
|
| 333 |
+
)
|
| 334 |
+
if context == "lock_file":
|
| 335 |
+
current_environment.update(
|
| 336 |
+
extras=frozenset(), dependency_groups=frozenset()
|
| 337 |
+
)
|
| 338 |
+
elif context == "metadata":
|
| 339 |
+
current_environment["extra"] = ""
|
| 340 |
+
if environment is not None:
|
| 341 |
+
current_environment.update(environment)
|
| 342 |
+
# The API used to allow setting extra to None. We need to handle this
|
| 343 |
+
# case for backwards compatibility.
|
| 344 |
+
if "extra" in current_environment and current_environment["extra"] is None:
|
| 345 |
+
current_environment["extra"] = ""
|
| 346 |
+
|
| 347 |
+
return _evaluate_markers(
|
| 348 |
+
self._markers, _repair_python_full_version(current_environment)
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def _repair_python_full_version(
|
| 353 |
+
env: dict[str, str | AbstractSet[str]],
|
| 354 |
+
) -> dict[str, str | AbstractSet[str]]:
|
| 355 |
+
"""
|
| 356 |
+
Work around platform.python_version() returning something that is not PEP 440
|
| 357 |
+
compliant for non-tagged Python builds.
|
| 358 |
+
"""
|
| 359 |
+
python_full_version = cast(str, env["python_full_version"])
|
| 360 |
+
if python_full_version.endswith("+"):
|
| 361 |
+
env["python_full_version"] = f"{python_full_version}local"
|
| 362 |
+
return env
|
venv/lib/python3.13/site-packages/packaging/metadata.py
ADDED
|
@@ -0,0 +1,862 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import email.feedparser
|
| 4 |
+
import email.header
|
| 5 |
+
import email.message
|
| 6 |
+
import email.parser
|
| 7 |
+
import email.policy
|
| 8 |
+
import pathlib
|
| 9 |
+
import sys
|
| 10 |
+
import typing
|
| 11 |
+
from typing import (
|
| 12 |
+
Any,
|
| 13 |
+
Callable,
|
| 14 |
+
Generic,
|
| 15 |
+
Literal,
|
| 16 |
+
TypedDict,
|
| 17 |
+
cast,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
from . import licenses, requirements, specifiers, utils
|
| 21 |
+
from . import version as version_module
|
| 22 |
+
from .licenses import NormalizedLicenseExpression
|
| 23 |
+
|
| 24 |
+
T = typing.TypeVar("T")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
if sys.version_info >= (3, 11): # pragma: no cover
|
| 28 |
+
ExceptionGroup = ExceptionGroup
|
| 29 |
+
else: # pragma: no cover
|
| 30 |
+
|
| 31 |
+
class ExceptionGroup(Exception):
|
| 32 |
+
"""A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
|
| 33 |
+
|
| 34 |
+
If :external:exc:`ExceptionGroup` is already defined by Python itself,
|
| 35 |
+
that version is used instead.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
message: str
|
| 39 |
+
exceptions: list[Exception]
|
| 40 |
+
|
| 41 |
+
def __init__(self, message: str, exceptions: list[Exception]) -> None:
|
| 42 |
+
self.message = message
|
| 43 |
+
self.exceptions = exceptions
|
| 44 |
+
|
| 45 |
+
def __repr__(self) -> str:
|
| 46 |
+
return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class InvalidMetadata(ValueError):
|
| 50 |
+
"""A metadata field contains invalid data."""
|
| 51 |
+
|
| 52 |
+
field: str
|
| 53 |
+
"""The name of the field that contains invalid data."""
|
| 54 |
+
|
| 55 |
+
def __init__(self, field: str, message: str) -> None:
|
| 56 |
+
self.field = field
|
| 57 |
+
super().__init__(message)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# The RawMetadata class attempts to make as few assumptions about the underlying
|
| 61 |
+
# serialization formats as possible. The idea is that as long as a serialization
|
| 62 |
+
# formats offer some very basic primitives in *some* way then we can support
|
| 63 |
+
# serializing to and from that format.
|
| 64 |
+
class RawMetadata(TypedDict, total=False):
|
| 65 |
+
"""A dictionary of raw core metadata.
|
| 66 |
+
|
| 67 |
+
Each field in core metadata maps to a key of this dictionary (when data is
|
| 68 |
+
provided). The key is lower-case and underscores are used instead of dashes
|
| 69 |
+
compared to the equivalent core metadata field. Any core metadata field that
|
| 70 |
+
can be specified multiple times or can hold multiple values in a single
|
| 71 |
+
field have a key with a plural name. See :class:`Metadata` whose attributes
|
| 72 |
+
match the keys of this dictionary.
|
| 73 |
+
|
| 74 |
+
Core metadata fields that can be specified multiple times are stored as a
|
| 75 |
+
list or dict depending on which is appropriate for the field. Any fields
|
| 76 |
+
which hold multiple values in a single field are stored as a list.
|
| 77 |
+
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
# Metadata 1.0 - PEP 241
|
| 81 |
+
metadata_version: str
|
| 82 |
+
name: str
|
| 83 |
+
version: str
|
| 84 |
+
platforms: list[str]
|
| 85 |
+
summary: str
|
| 86 |
+
description: str
|
| 87 |
+
keywords: list[str]
|
| 88 |
+
home_page: str
|
| 89 |
+
author: str
|
| 90 |
+
author_email: str
|
| 91 |
+
license: str
|
| 92 |
+
|
| 93 |
+
# Metadata 1.1 - PEP 314
|
| 94 |
+
supported_platforms: list[str]
|
| 95 |
+
download_url: str
|
| 96 |
+
classifiers: list[str]
|
| 97 |
+
requires: list[str]
|
| 98 |
+
provides: list[str]
|
| 99 |
+
obsoletes: list[str]
|
| 100 |
+
|
| 101 |
+
# Metadata 1.2 - PEP 345
|
| 102 |
+
maintainer: str
|
| 103 |
+
maintainer_email: str
|
| 104 |
+
requires_dist: list[str]
|
| 105 |
+
provides_dist: list[str]
|
| 106 |
+
obsoletes_dist: list[str]
|
| 107 |
+
requires_python: str
|
| 108 |
+
requires_external: list[str]
|
| 109 |
+
project_urls: dict[str, str]
|
| 110 |
+
|
| 111 |
+
# Metadata 2.0
|
| 112 |
+
# PEP 426 attempted to completely revamp the metadata format
|
| 113 |
+
# but got stuck without ever being able to build consensus on
|
| 114 |
+
# it and ultimately ended up withdrawn.
|
| 115 |
+
#
|
| 116 |
+
# However, a number of tools had started emitting METADATA with
|
| 117 |
+
# `2.0` Metadata-Version, so for historical reasons, this version
|
| 118 |
+
# was skipped.
|
| 119 |
+
|
| 120 |
+
# Metadata 2.1 - PEP 566
|
| 121 |
+
description_content_type: str
|
| 122 |
+
provides_extra: list[str]
|
| 123 |
+
|
| 124 |
+
# Metadata 2.2 - PEP 643
|
| 125 |
+
dynamic: list[str]
|
| 126 |
+
|
| 127 |
+
# Metadata 2.3 - PEP 685
|
| 128 |
+
# No new fields were added in PEP 685, just some edge case were
|
| 129 |
+
# tightened up to provide better interoptability.
|
| 130 |
+
|
| 131 |
+
# Metadata 2.4 - PEP 639
|
| 132 |
+
license_expression: str
|
| 133 |
+
license_files: list[str]
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
_STRING_FIELDS = {
|
| 137 |
+
"author",
|
| 138 |
+
"author_email",
|
| 139 |
+
"description",
|
| 140 |
+
"description_content_type",
|
| 141 |
+
"download_url",
|
| 142 |
+
"home_page",
|
| 143 |
+
"license",
|
| 144 |
+
"license_expression",
|
| 145 |
+
"maintainer",
|
| 146 |
+
"maintainer_email",
|
| 147 |
+
"metadata_version",
|
| 148 |
+
"name",
|
| 149 |
+
"requires_python",
|
| 150 |
+
"summary",
|
| 151 |
+
"version",
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
_LIST_FIELDS = {
|
| 155 |
+
"classifiers",
|
| 156 |
+
"dynamic",
|
| 157 |
+
"license_files",
|
| 158 |
+
"obsoletes",
|
| 159 |
+
"obsoletes_dist",
|
| 160 |
+
"platforms",
|
| 161 |
+
"provides",
|
| 162 |
+
"provides_dist",
|
| 163 |
+
"provides_extra",
|
| 164 |
+
"requires",
|
| 165 |
+
"requires_dist",
|
| 166 |
+
"requires_external",
|
| 167 |
+
"supported_platforms",
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
_DICT_FIELDS = {
|
| 171 |
+
"project_urls",
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def _parse_keywords(data: str) -> list[str]:
|
| 176 |
+
"""Split a string of comma-separated keywords into a list of keywords."""
|
| 177 |
+
return [k.strip() for k in data.split(",")]
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def _parse_project_urls(data: list[str]) -> dict[str, str]:
|
| 181 |
+
"""Parse a list of label/URL string pairings separated by a comma."""
|
| 182 |
+
urls = {}
|
| 183 |
+
for pair in data:
|
| 184 |
+
# Our logic is slightly tricky here as we want to try and do
|
| 185 |
+
# *something* reasonable with malformed data.
|
| 186 |
+
#
|
| 187 |
+
# The main thing that we have to worry about, is data that does
|
| 188 |
+
# not have a ',' at all to split the label from the Value. There
|
| 189 |
+
# isn't a singular right answer here, and we will fail validation
|
| 190 |
+
# later on (if the caller is validating) so it doesn't *really*
|
| 191 |
+
# matter, but since the missing value has to be an empty str
|
| 192 |
+
# and our return value is dict[str, str], if we let the key
|
| 193 |
+
# be the missing value, then they'd have multiple '' values that
|
| 194 |
+
# overwrite each other in a accumulating dict.
|
| 195 |
+
#
|
| 196 |
+
# The other potentional issue is that it's possible to have the
|
| 197 |
+
# same label multiple times in the metadata, with no solid "right"
|
| 198 |
+
# answer with what to do in that case. As such, we'll do the only
|
| 199 |
+
# thing we can, which is treat the field as unparseable and add it
|
| 200 |
+
# to our list of unparsed fields.
|
| 201 |
+
parts = [p.strip() for p in pair.split(",", 1)]
|
| 202 |
+
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
|
| 203 |
+
|
| 204 |
+
# TODO: The spec doesn't say anything about if the keys should be
|
| 205 |
+
# considered case sensitive or not... logically they should
|
| 206 |
+
# be case-preserving and case-insensitive, but doing that
|
| 207 |
+
# would open up more cases where we might have duplicate
|
| 208 |
+
# entries.
|
| 209 |
+
label, url = parts
|
| 210 |
+
if label in urls:
|
| 211 |
+
# The label already exists in our set of urls, so this field
|
| 212 |
+
# is unparseable, and we can just add the whole thing to our
|
| 213 |
+
# unparseable data and stop processing it.
|
| 214 |
+
raise KeyError("duplicate labels in project urls")
|
| 215 |
+
urls[label] = url
|
| 216 |
+
|
| 217 |
+
return urls
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
|
| 221 |
+
"""Get the body of the message."""
|
| 222 |
+
# If our source is a str, then our caller has managed encodings for us,
|
| 223 |
+
# and we don't need to deal with it.
|
| 224 |
+
if isinstance(source, str):
|
| 225 |
+
payload = msg.get_payload()
|
| 226 |
+
assert isinstance(payload, str)
|
| 227 |
+
return payload
|
| 228 |
+
# If our source is a bytes, then we're managing the encoding and we need
|
| 229 |
+
# to deal with it.
|
| 230 |
+
else:
|
| 231 |
+
bpayload = msg.get_payload(decode=True)
|
| 232 |
+
assert isinstance(bpayload, bytes)
|
| 233 |
+
try:
|
| 234 |
+
return bpayload.decode("utf8", "strict")
|
| 235 |
+
except UnicodeDecodeError as exc:
|
| 236 |
+
raise ValueError("payload in an invalid encoding") from exc
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
# The various parse_FORMAT functions here are intended to be as lenient as
|
| 240 |
+
# possible in their parsing, while still returning a correctly typed
|
| 241 |
+
# RawMetadata.
|
| 242 |
+
#
|
| 243 |
+
# To aid in this, we also generally want to do as little touching of the
|
| 244 |
+
# data as possible, except where there are possibly some historic holdovers
|
| 245 |
+
# that make valid data awkward to work with.
|
| 246 |
+
#
|
| 247 |
+
# While this is a lower level, intermediate format than our ``Metadata``
|
| 248 |
+
# class, some light touch ups can make a massive difference in usability.
|
| 249 |
+
|
| 250 |
+
# Map METADATA fields to RawMetadata.
|
| 251 |
+
_EMAIL_TO_RAW_MAPPING = {
|
| 252 |
+
"author": "author",
|
| 253 |
+
"author-email": "author_email",
|
| 254 |
+
"classifier": "classifiers",
|
| 255 |
+
"description": "description",
|
| 256 |
+
"description-content-type": "description_content_type",
|
| 257 |
+
"download-url": "download_url",
|
| 258 |
+
"dynamic": "dynamic",
|
| 259 |
+
"home-page": "home_page",
|
| 260 |
+
"keywords": "keywords",
|
| 261 |
+
"license": "license",
|
| 262 |
+
"license-expression": "license_expression",
|
| 263 |
+
"license-file": "license_files",
|
| 264 |
+
"maintainer": "maintainer",
|
| 265 |
+
"maintainer-email": "maintainer_email",
|
| 266 |
+
"metadata-version": "metadata_version",
|
| 267 |
+
"name": "name",
|
| 268 |
+
"obsoletes": "obsoletes",
|
| 269 |
+
"obsoletes-dist": "obsoletes_dist",
|
| 270 |
+
"platform": "platforms",
|
| 271 |
+
"project-url": "project_urls",
|
| 272 |
+
"provides": "provides",
|
| 273 |
+
"provides-dist": "provides_dist",
|
| 274 |
+
"provides-extra": "provides_extra",
|
| 275 |
+
"requires": "requires",
|
| 276 |
+
"requires-dist": "requires_dist",
|
| 277 |
+
"requires-external": "requires_external",
|
| 278 |
+
"requires-python": "requires_python",
|
| 279 |
+
"summary": "summary",
|
| 280 |
+
"supported-platform": "supported_platforms",
|
| 281 |
+
"version": "version",
|
| 282 |
+
}
|
| 283 |
+
_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
|
| 287 |
+
"""Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
|
| 288 |
+
|
| 289 |
+
This function returns a two-item tuple of dicts. The first dict is of
|
| 290 |
+
recognized fields from the core metadata specification. Fields that can be
|
| 291 |
+
parsed and translated into Python's built-in types are converted
|
| 292 |
+
appropriately. All other fields are left as-is. Fields that are allowed to
|
| 293 |
+
appear multiple times are stored as lists.
|
| 294 |
+
|
| 295 |
+
The second dict contains all other fields from the metadata. This includes
|
| 296 |
+
any unrecognized fields. It also includes any fields which are expected to
|
| 297 |
+
be parsed into a built-in type but were not formatted appropriately. Finally,
|
| 298 |
+
any fields that are expected to appear only once but are repeated are
|
| 299 |
+
included in this dict.
|
| 300 |
+
|
| 301 |
+
"""
|
| 302 |
+
raw: dict[str, str | list[str] | dict[str, str]] = {}
|
| 303 |
+
unparsed: dict[str, list[str]] = {}
|
| 304 |
+
|
| 305 |
+
if isinstance(data, str):
|
| 306 |
+
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
|
| 307 |
+
else:
|
| 308 |
+
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
|
| 309 |
+
|
| 310 |
+
# We have to wrap parsed.keys() in a set, because in the case of multiple
|
| 311 |
+
# values for a key (a list), the key will appear multiple times in the
|
| 312 |
+
# list of keys, but we're avoiding that by using get_all().
|
| 313 |
+
for name in frozenset(parsed.keys()):
|
| 314 |
+
# Header names in RFC are case insensitive, so we'll normalize to all
|
| 315 |
+
# lower case to make comparisons easier.
|
| 316 |
+
name = name.lower()
|
| 317 |
+
|
| 318 |
+
# We use get_all() here, even for fields that aren't multiple use,
|
| 319 |
+
# because otherwise someone could have e.g. two Name fields, and we
|
| 320 |
+
# would just silently ignore it rather than doing something about it.
|
| 321 |
+
headers = parsed.get_all(name) or []
|
| 322 |
+
|
| 323 |
+
# The way the email module works when parsing bytes is that it
|
| 324 |
+
# unconditionally decodes the bytes as ascii using the surrogateescape
|
| 325 |
+
# handler. When you pull that data back out (such as with get_all() ),
|
| 326 |
+
# it looks to see if the str has any surrogate escapes, and if it does
|
| 327 |
+
# it wraps it in a Header object instead of returning the string.
|
| 328 |
+
#
|
| 329 |
+
# As such, we'll look for those Header objects, and fix up the encoding.
|
| 330 |
+
value = []
|
| 331 |
+
# Flag if we have run into any issues processing the headers, thus
|
| 332 |
+
# signalling that the data belongs in 'unparsed'.
|
| 333 |
+
valid_encoding = True
|
| 334 |
+
for h in headers:
|
| 335 |
+
# It's unclear if this can return more types than just a Header or
|
| 336 |
+
# a str, so we'll just assert here to make sure.
|
| 337 |
+
assert isinstance(h, (email.header.Header, str))
|
| 338 |
+
|
| 339 |
+
# If it's a header object, we need to do our little dance to get
|
| 340 |
+
# the real data out of it. In cases where there is invalid data
|
| 341 |
+
# we're going to end up with mojibake, but there's no obvious, good
|
| 342 |
+
# way around that without reimplementing parts of the Header object
|
| 343 |
+
# ourselves.
|
| 344 |
+
#
|
| 345 |
+
# That should be fine since, if mojibacked happens, this key is
|
| 346 |
+
# going into the unparsed dict anyways.
|
| 347 |
+
if isinstance(h, email.header.Header):
|
| 348 |
+
# The Header object stores it's data as chunks, and each chunk
|
| 349 |
+
# can be independently encoded, so we'll need to check each
|
| 350 |
+
# of them.
|
| 351 |
+
chunks: list[tuple[bytes, str | None]] = []
|
| 352 |
+
for bin, encoding in email.header.decode_header(h):
|
| 353 |
+
try:
|
| 354 |
+
bin.decode("utf8", "strict")
|
| 355 |
+
except UnicodeDecodeError:
|
| 356 |
+
# Enable mojibake.
|
| 357 |
+
encoding = "latin1"
|
| 358 |
+
valid_encoding = False
|
| 359 |
+
else:
|
| 360 |
+
encoding = "utf8"
|
| 361 |
+
chunks.append((bin, encoding))
|
| 362 |
+
|
| 363 |
+
# Turn our chunks back into a Header object, then let that
|
| 364 |
+
# Header object do the right thing to turn them into a
|
| 365 |
+
# string for us.
|
| 366 |
+
value.append(str(email.header.make_header(chunks)))
|
| 367 |
+
# This is already a string, so just add it.
|
| 368 |
+
else:
|
| 369 |
+
value.append(h)
|
| 370 |
+
|
| 371 |
+
# We've processed all of our values to get them into a list of str,
|
| 372 |
+
# but we may have mojibake data, in which case this is an unparsed
|
| 373 |
+
# field.
|
| 374 |
+
if not valid_encoding:
|
| 375 |
+
unparsed[name] = value
|
| 376 |
+
continue
|
| 377 |
+
|
| 378 |
+
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
|
| 379 |
+
if raw_name is None:
|
| 380 |
+
# This is a bit of a weird situation, we've encountered a key that
|
| 381 |
+
# we don't know what it means, so we don't know whether it's meant
|
| 382 |
+
# to be a list or not.
|
| 383 |
+
#
|
| 384 |
+
# Since we can't really tell one way or another, we'll just leave it
|
| 385 |
+
# as a list, even though it may be a single item list, because that's
|
| 386 |
+
# what makes the most sense for email headers.
|
| 387 |
+
unparsed[name] = value
|
| 388 |
+
continue
|
| 389 |
+
|
| 390 |
+
# If this is one of our string fields, then we'll check to see if our
|
| 391 |
+
# value is a list of a single item. If it is then we'll assume that
|
| 392 |
+
# it was emitted as a single string, and unwrap the str from inside
|
| 393 |
+
# the list.
|
| 394 |
+
#
|
| 395 |
+
# If it's any other kind of data, then we haven't the faintest clue
|
| 396 |
+
# what we should parse it as, and we have to just add it to our list
|
| 397 |
+
# of unparsed stuff.
|
| 398 |
+
if raw_name in _STRING_FIELDS and len(value) == 1:
|
| 399 |
+
raw[raw_name] = value[0]
|
| 400 |
+
# If this is one of our list of string fields, then we can just assign
|
| 401 |
+
# the value, since email *only* has strings, and our get_all() call
|
| 402 |
+
# above ensures that this is a list.
|
| 403 |
+
elif raw_name in _LIST_FIELDS:
|
| 404 |
+
raw[raw_name] = value
|
| 405 |
+
# Special Case: Keywords
|
| 406 |
+
# The keywords field is implemented in the metadata spec as a str,
|
| 407 |
+
# but it conceptually is a list of strings, and is serialized using
|
| 408 |
+
# ", ".join(keywords), so we'll do some light data massaging to turn
|
| 409 |
+
# this into what it logically is.
|
| 410 |
+
elif raw_name == "keywords" and len(value) == 1:
|
| 411 |
+
raw[raw_name] = _parse_keywords(value[0])
|
| 412 |
+
# Special Case: Project-URL
|
| 413 |
+
# The project urls is implemented in the metadata spec as a list of
|
| 414 |
+
# specially-formatted strings that represent a key and a value, which
|
| 415 |
+
# is fundamentally a mapping, however the email format doesn't support
|
| 416 |
+
# mappings in a sane way, so it was crammed into a list of strings
|
| 417 |
+
# instead.
|
| 418 |
+
#
|
| 419 |
+
# We will do a little light data massaging to turn this into a map as
|
| 420 |
+
# it logically should be.
|
| 421 |
+
elif raw_name == "project_urls":
|
| 422 |
+
try:
|
| 423 |
+
raw[raw_name] = _parse_project_urls(value)
|
| 424 |
+
except KeyError:
|
| 425 |
+
unparsed[name] = value
|
| 426 |
+
# Nothing that we've done has managed to parse this, so it'll just
|
| 427 |
+
# throw it in our unparseable data and move on.
|
| 428 |
+
else:
|
| 429 |
+
unparsed[name] = value
|
| 430 |
+
|
| 431 |
+
# We need to support getting the Description from the message payload in
|
| 432 |
+
# addition to getting it from the the headers. This does mean, though, there
|
| 433 |
+
# is the possibility of it being set both ways, in which case we put both
|
| 434 |
+
# in 'unparsed' since we don't know which is right.
|
| 435 |
+
try:
|
| 436 |
+
payload = _get_payload(parsed, data)
|
| 437 |
+
except ValueError:
|
| 438 |
+
unparsed.setdefault("description", []).append(
|
| 439 |
+
parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload]
|
| 440 |
+
)
|
| 441 |
+
else:
|
| 442 |
+
if payload:
|
| 443 |
+
# Check to see if we've already got a description, if so then both
|
| 444 |
+
# it, and this body move to unparseable.
|
| 445 |
+
if "description" in raw:
|
| 446 |
+
description_header = cast(str, raw.pop("description"))
|
| 447 |
+
unparsed.setdefault("description", []).extend(
|
| 448 |
+
[description_header, payload]
|
| 449 |
+
)
|
| 450 |
+
elif "description" in unparsed:
|
| 451 |
+
unparsed["description"].append(payload)
|
| 452 |
+
else:
|
| 453 |
+
raw["description"] = payload
|
| 454 |
+
|
| 455 |
+
# We need to cast our `raw` to a metadata, because a TypedDict only support
|
| 456 |
+
# literal key names, but we're computing our key names on purpose, but the
|
| 457 |
+
# way this function is implemented, our `TypedDict` can only have valid key
|
| 458 |
+
# names.
|
| 459 |
+
return cast(RawMetadata, raw), unparsed
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
_NOT_FOUND = object()
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
# Keep the two values in sync.
|
| 466 |
+
_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
|
| 467 |
+
_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
|
| 468 |
+
|
| 469 |
+
_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
class _Validator(Generic[T]):
|
| 473 |
+
"""Validate a metadata field.
|
| 474 |
+
|
| 475 |
+
All _process_*() methods correspond to a core metadata field. The method is
|
| 476 |
+
called with the field's raw value. If the raw value is valid it is returned
|
| 477 |
+
in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
|
| 478 |
+
If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
|
| 479 |
+
as appropriate).
|
| 480 |
+
"""
|
| 481 |
+
|
| 482 |
+
name: str
|
| 483 |
+
raw_name: str
|
| 484 |
+
added: _MetadataVersion
|
| 485 |
+
|
| 486 |
+
def __init__(
|
| 487 |
+
self,
|
| 488 |
+
*,
|
| 489 |
+
added: _MetadataVersion = "1.0",
|
| 490 |
+
) -> None:
|
| 491 |
+
self.added = added
|
| 492 |
+
|
| 493 |
+
def __set_name__(self, _owner: Metadata, name: str) -> None:
|
| 494 |
+
self.name = name
|
| 495 |
+
self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
|
| 496 |
+
|
| 497 |
+
def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
|
| 498 |
+
# With Python 3.8, the caching can be replaced with functools.cached_property().
|
| 499 |
+
# No need to check the cache as attribute lookup will resolve into the
|
| 500 |
+
# instance's __dict__ before __get__ is called.
|
| 501 |
+
cache = instance.__dict__
|
| 502 |
+
value = instance._raw.get(self.name)
|
| 503 |
+
|
| 504 |
+
# To make the _process_* methods easier, we'll check if the value is None
|
| 505 |
+
# and if this field is NOT a required attribute, and if both of those
|
| 506 |
+
# things are true, we'll skip the the converter. This will mean that the
|
| 507 |
+
# converters never have to deal with the None union.
|
| 508 |
+
if self.name in _REQUIRED_ATTRS or value is not None:
|
| 509 |
+
try:
|
| 510 |
+
converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
|
| 511 |
+
except AttributeError:
|
| 512 |
+
pass
|
| 513 |
+
else:
|
| 514 |
+
value = converter(value)
|
| 515 |
+
|
| 516 |
+
cache[self.name] = value
|
| 517 |
+
try:
|
| 518 |
+
del instance._raw[self.name] # type: ignore[misc]
|
| 519 |
+
except KeyError:
|
| 520 |
+
pass
|
| 521 |
+
|
| 522 |
+
return cast(T, value)
|
| 523 |
+
|
| 524 |
+
def _invalid_metadata(
|
| 525 |
+
self, msg: str, cause: Exception | None = None
|
| 526 |
+
) -> InvalidMetadata:
|
| 527 |
+
exc = InvalidMetadata(
|
| 528 |
+
self.raw_name, msg.format_map({"field": repr(self.raw_name)})
|
| 529 |
+
)
|
| 530 |
+
exc.__cause__ = cause
|
| 531 |
+
return exc
|
| 532 |
+
|
| 533 |
+
def _process_metadata_version(self, value: str) -> _MetadataVersion:
|
| 534 |
+
# Implicitly makes Metadata-Version required.
|
| 535 |
+
if value not in _VALID_METADATA_VERSIONS:
|
| 536 |
+
raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
|
| 537 |
+
return cast(_MetadataVersion, value)
|
| 538 |
+
|
| 539 |
+
def _process_name(self, value: str) -> str:
|
| 540 |
+
if not value:
|
| 541 |
+
raise self._invalid_metadata("{field} is a required field")
|
| 542 |
+
# Validate the name as a side-effect.
|
| 543 |
+
try:
|
| 544 |
+
utils.canonicalize_name(value, validate=True)
|
| 545 |
+
except utils.InvalidName as exc:
|
| 546 |
+
raise self._invalid_metadata(
|
| 547 |
+
f"{value!r} is invalid for {{field}}", cause=exc
|
| 548 |
+
) from exc
|
| 549 |
+
else:
|
| 550 |
+
return value
|
| 551 |
+
|
| 552 |
+
def _process_version(self, value: str) -> version_module.Version:
|
| 553 |
+
if not value:
|
| 554 |
+
raise self._invalid_metadata("{field} is a required field")
|
| 555 |
+
try:
|
| 556 |
+
return version_module.parse(value)
|
| 557 |
+
except version_module.InvalidVersion as exc:
|
| 558 |
+
raise self._invalid_metadata(
|
| 559 |
+
f"{value!r} is invalid for {{field}}", cause=exc
|
| 560 |
+
) from exc
|
| 561 |
+
|
| 562 |
+
def _process_summary(self, value: str) -> str:
|
| 563 |
+
"""Check the field contains no newlines."""
|
| 564 |
+
if "\n" in value:
|
| 565 |
+
raise self._invalid_metadata("{field} must be a single line")
|
| 566 |
+
return value
|
| 567 |
+
|
| 568 |
+
def _process_description_content_type(self, value: str) -> str:
|
| 569 |
+
content_types = {"text/plain", "text/x-rst", "text/markdown"}
|
| 570 |
+
message = email.message.EmailMessage()
|
| 571 |
+
message["content-type"] = value
|
| 572 |
+
|
| 573 |
+
content_type, parameters = (
|
| 574 |
+
# Defaults to `text/plain` if parsing failed.
|
| 575 |
+
message.get_content_type().lower(),
|
| 576 |
+
message["content-type"].params,
|
| 577 |
+
)
|
| 578 |
+
# Check if content-type is valid or defaulted to `text/plain` and thus was
|
| 579 |
+
# not parseable.
|
| 580 |
+
if content_type not in content_types or content_type not in value.lower():
|
| 581 |
+
raise self._invalid_metadata(
|
| 582 |
+
f"{{field}} must be one of {list(content_types)}, not {value!r}"
|
| 583 |
+
)
|
| 584 |
+
|
| 585 |
+
charset = parameters.get("charset", "UTF-8")
|
| 586 |
+
if charset != "UTF-8":
|
| 587 |
+
raise self._invalid_metadata(
|
| 588 |
+
f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
|
| 589 |
+
)
|
| 590 |
+
|
| 591 |
+
markdown_variants = {"GFM", "CommonMark"}
|
| 592 |
+
variant = parameters.get("variant", "GFM") # Use an acceptable default.
|
| 593 |
+
if content_type == "text/markdown" and variant not in markdown_variants:
|
| 594 |
+
raise self._invalid_metadata(
|
| 595 |
+
f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
|
| 596 |
+
f"not {variant!r}",
|
| 597 |
+
)
|
| 598 |
+
return value
|
| 599 |
+
|
| 600 |
+
def _process_dynamic(self, value: list[str]) -> list[str]:
|
| 601 |
+
for dynamic_field in map(str.lower, value):
|
| 602 |
+
if dynamic_field in {"name", "version", "metadata-version"}:
|
| 603 |
+
raise self._invalid_metadata(
|
| 604 |
+
f"{dynamic_field!r} is not allowed as a dynamic field"
|
| 605 |
+
)
|
| 606 |
+
elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
|
| 607 |
+
raise self._invalid_metadata(
|
| 608 |
+
f"{dynamic_field!r} is not a valid dynamic field"
|
| 609 |
+
)
|
| 610 |
+
return list(map(str.lower, value))
|
| 611 |
+
|
| 612 |
+
def _process_provides_extra(
|
| 613 |
+
self,
|
| 614 |
+
value: list[str],
|
| 615 |
+
) -> list[utils.NormalizedName]:
|
| 616 |
+
normalized_names = []
|
| 617 |
+
try:
|
| 618 |
+
for name in value:
|
| 619 |
+
normalized_names.append(utils.canonicalize_name(name, validate=True))
|
| 620 |
+
except utils.InvalidName as exc:
|
| 621 |
+
raise self._invalid_metadata(
|
| 622 |
+
f"{name!r} is invalid for {{field}}", cause=exc
|
| 623 |
+
) from exc
|
| 624 |
+
else:
|
| 625 |
+
return normalized_names
|
| 626 |
+
|
| 627 |
+
def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
|
| 628 |
+
try:
|
| 629 |
+
return specifiers.SpecifierSet(value)
|
| 630 |
+
except specifiers.InvalidSpecifier as exc:
|
| 631 |
+
raise self._invalid_metadata(
|
| 632 |
+
f"{value!r} is invalid for {{field}}", cause=exc
|
| 633 |
+
) from exc
|
| 634 |
+
|
| 635 |
+
def _process_requires_dist(
|
| 636 |
+
self,
|
| 637 |
+
value: list[str],
|
| 638 |
+
) -> list[requirements.Requirement]:
|
| 639 |
+
reqs = []
|
| 640 |
+
try:
|
| 641 |
+
for req in value:
|
| 642 |
+
reqs.append(requirements.Requirement(req))
|
| 643 |
+
except requirements.InvalidRequirement as exc:
|
| 644 |
+
raise self._invalid_metadata(
|
| 645 |
+
f"{req!r} is invalid for {{field}}", cause=exc
|
| 646 |
+
) from exc
|
| 647 |
+
else:
|
| 648 |
+
return reqs
|
| 649 |
+
|
| 650 |
+
def _process_license_expression(
|
| 651 |
+
self, value: str
|
| 652 |
+
) -> NormalizedLicenseExpression | None:
|
| 653 |
+
try:
|
| 654 |
+
return licenses.canonicalize_license_expression(value)
|
| 655 |
+
except ValueError as exc:
|
| 656 |
+
raise self._invalid_metadata(
|
| 657 |
+
f"{value!r} is invalid for {{field}}", cause=exc
|
| 658 |
+
) from exc
|
| 659 |
+
|
| 660 |
+
def _process_license_files(self, value: list[str]) -> list[str]:
|
| 661 |
+
paths = []
|
| 662 |
+
for path in value:
|
| 663 |
+
if ".." in path:
|
| 664 |
+
raise self._invalid_metadata(
|
| 665 |
+
f"{path!r} is invalid for {{field}}, "
|
| 666 |
+
"parent directory indicators are not allowed"
|
| 667 |
+
)
|
| 668 |
+
if "*" in path:
|
| 669 |
+
raise self._invalid_metadata(
|
| 670 |
+
f"{path!r} is invalid for {{field}}, paths must be resolved"
|
| 671 |
+
)
|
| 672 |
+
if (
|
| 673 |
+
pathlib.PurePosixPath(path).is_absolute()
|
| 674 |
+
or pathlib.PureWindowsPath(path).is_absolute()
|
| 675 |
+
):
|
| 676 |
+
raise self._invalid_metadata(
|
| 677 |
+
f"{path!r} is invalid for {{field}}, paths must be relative"
|
| 678 |
+
)
|
| 679 |
+
if pathlib.PureWindowsPath(path).as_posix() != path:
|
| 680 |
+
raise self._invalid_metadata(
|
| 681 |
+
f"{path!r} is invalid for {{field}}, paths must use '/' delimiter"
|
| 682 |
+
)
|
| 683 |
+
paths.append(path)
|
| 684 |
+
return paths
|
| 685 |
+
|
| 686 |
+
|
| 687 |
+
class Metadata:
|
| 688 |
+
"""Representation of distribution metadata.
|
| 689 |
+
|
| 690 |
+
Compared to :class:`RawMetadata`, this class provides objects representing
|
| 691 |
+
metadata fields instead of only using built-in types. Any invalid metadata
|
| 692 |
+
will cause :exc:`InvalidMetadata` to be raised (with a
|
| 693 |
+
:py:attr:`~BaseException.__cause__` attribute as appropriate).
|
| 694 |
+
"""
|
| 695 |
+
|
| 696 |
+
_raw: RawMetadata
|
| 697 |
+
|
| 698 |
+
@classmethod
|
| 699 |
+
def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
|
| 700 |
+
"""Create an instance from :class:`RawMetadata`.
|
| 701 |
+
|
| 702 |
+
If *validate* is true, all metadata will be validated. All exceptions
|
| 703 |
+
related to validation will be gathered and raised as an :class:`ExceptionGroup`.
|
| 704 |
+
"""
|
| 705 |
+
ins = cls()
|
| 706 |
+
ins._raw = data.copy() # Mutations occur due to caching enriched values.
|
| 707 |
+
|
| 708 |
+
if validate:
|
| 709 |
+
exceptions: list[Exception] = []
|
| 710 |
+
try:
|
| 711 |
+
metadata_version = ins.metadata_version
|
| 712 |
+
metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
|
| 713 |
+
except InvalidMetadata as metadata_version_exc:
|
| 714 |
+
exceptions.append(metadata_version_exc)
|
| 715 |
+
metadata_version = None
|
| 716 |
+
|
| 717 |
+
# Make sure to check for the fields that are present, the required
|
| 718 |
+
# fields (so their absence can be reported).
|
| 719 |
+
fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
|
| 720 |
+
# Remove fields that have already been checked.
|
| 721 |
+
fields_to_check -= {"metadata_version"}
|
| 722 |
+
|
| 723 |
+
for key in fields_to_check:
|
| 724 |
+
try:
|
| 725 |
+
if metadata_version:
|
| 726 |
+
# Can't use getattr() as that triggers descriptor protocol which
|
| 727 |
+
# will fail due to no value for the instance argument.
|
| 728 |
+
try:
|
| 729 |
+
field_metadata_version = cls.__dict__[key].added
|
| 730 |
+
except KeyError:
|
| 731 |
+
exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
|
| 732 |
+
exceptions.append(exc)
|
| 733 |
+
continue
|
| 734 |
+
field_age = _VALID_METADATA_VERSIONS.index(
|
| 735 |
+
field_metadata_version
|
| 736 |
+
)
|
| 737 |
+
if field_age > metadata_age:
|
| 738 |
+
field = _RAW_TO_EMAIL_MAPPING[key]
|
| 739 |
+
exc = InvalidMetadata(
|
| 740 |
+
field,
|
| 741 |
+
f"{field} introduced in metadata version "
|
| 742 |
+
f"{field_metadata_version}, not {metadata_version}",
|
| 743 |
+
)
|
| 744 |
+
exceptions.append(exc)
|
| 745 |
+
continue
|
| 746 |
+
getattr(ins, key)
|
| 747 |
+
except InvalidMetadata as exc:
|
| 748 |
+
exceptions.append(exc)
|
| 749 |
+
|
| 750 |
+
if exceptions:
|
| 751 |
+
raise ExceptionGroup("invalid metadata", exceptions)
|
| 752 |
+
|
| 753 |
+
return ins
|
| 754 |
+
|
| 755 |
+
@classmethod
|
| 756 |
+
def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
|
| 757 |
+
"""Parse metadata from email headers.
|
| 758 |
+
|
| 759 |
+
If *validate* is true, the metadata will be validated. All exceptions
|
| 760 |
+
related to validation will be gathered and raised as an :class:`ExceptionGroup`.
|
| 761 |
+
"""
|
| 762 |
+
raw, unparsed = parse_email(data)
|
| 763 |
+
|
| 764 |
+
if validate:
|
| 765 |
+
exceptions: list[Exception] = []
|
| 766 |
+
for unparsed_key in unparsed:
|
| 767 |
+
if unparsed_key in _EMAIL_TO_RAW_MAPPING:
|
| 768 |
+
message = f"{unparsed_key!r} has invalid data"
|
| 769 |
+
else:
|
| 770 |
+
message = f"unrecognized field: {unparsed_key!r}"
|
| 771 |
+
exceptions.append(InvalidMetadata(unparsed_key, message))
|
| 772 |
+
|
| 773 |
+
if exceptions:
|
| 774 |
+
raise ExceptionGroup("unparsed", exceptions)
|
| 775 |
+
|
| 776 |
+
try:
|
| 777 |
+
return cls.from_raw(raw, validate=validate)
|
| 778 |
+
except ExceptionGroup as exc_group:
|
| 779 |
+
raise ExceptionGroup(
|
| 780 |
+
"invalid or unparsed metadata", exc_group.exceptions
|
| 781 |
+
) from None
|
| 782 |
+
|
| 783 |
+
metadata_version: _Validator[_MetadataVersion] = _Validator()
|
| 784 |
+
""":external:ref:`core-metadata-metadata-version`
|
| 785 |
+
(required; validated to be a valid metadata version)"""
|
| 786 |
+
# `name` is not normalized/typed to NormalizedName so as to provide access to
|
| 787 |
+
# the original/raw name.
|
| 788 |
+
name: _Validator[str] = _Validator()
|
| 789 |
+
""":external:ref:`core-metadata-name`
|
| 790 |
+
(required; validated using :func:`~packaging.utils.canonicalize_name` and its
|
| 791 |
+
*validate* parameter)"""
|
| 792 |
+
version: _Validator[version_module.Version] = _Validator()
|
| 793 |
+
""":external:ref:`core-metadata-version` (required)"""
|
| 794 |
+
dynamic: _Validator[list[str] | None] = _Validator(
|
| 795 |
+
added="2.2",
|
| 796 |
+
)
|
| 797 |
+
""":external:ref:`core-metadata-dynamic`
|
| 798 |
+
(validated against core metadata field names and lowercased)"""
|
| 799 |
+
platforms: _Validator[list[str] | None] = _Validator()
|
| 800 |
+
""":external:ref:`core-metadata-platform`"""
|
| 801 |
+
supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
|
| 802 |
+
""":external:ref:`core-metadata-supported-platform`"""
|
| 803 |
+
summary: _Validator[str | None] = _Validator()
|
| 804 |
+
""":external:ref:`core-metadata-summary` (validated to contain no newlines)"""
|
| 805 |
+
description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body
|
| 806 |
+
""":external:ref:`core-metadata-description`"""
|
| 807 |
+
description_content_type: _Validator[str | None] = _Validator(added="2.1")
|
| 808 |
+
""":external:ref:`core-metadata-description-content-type` (validated)"""
|
| 809 |
+
keywords: _Validator[list[str] | None] = _Validator()
|
| 810 |
+
""":external:ref:`core-metadata-keywords`"""
|
| 811 |
+
home_page: _Validator[str | None] = _Validator()
|
| 812 |
+
""":external:ref:`core-metadata-home-page`"""
|
| 813 |
+
download_url: _Validator[str | None] = _Validator(added="1.1")
|
| 814 |
+
""":external:ref:`core-metadata-download-url`"""
|
| 815 |
+
author: _Validator[str | None] = _Validator()
|
| 816 |
+
""":external:ref:`core-metadata-author`"""
|
| 817 |
+
author_email: _Validator[str | None] = _Validator()
|
| 818 |
+
""":external:ref:`core-metadata-author-email`"""
|
| 819 |
+
maintainer: _Validator[str | None] = _Validator(added="1.2")
|
| 820 |
+
""":external:ref:`core-metadata-maintainer`"""
|
| 821 |
+
maintainer_email: _Validator[str | None] = _Validator(added="1.2")
|
| 822 |
+
""":external:ref:`core-metadata-maintainer-email`"""
|
| 823 |
+
license: _Validator[str | None] = _Validator()
|
| 824 |
+
""":external:ref:`core-metadata-license`"""
|
| 825 |
+
license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
|
| 826 |
+
added="2.4"
|
| 827 |
+
)
|
| 828 |
+
""":external:ref:`core-metadata-license-expression`"""
|
| 829 |
+
license_files: _Validator[list[str] | None] = _Validator(added="2.4")
|
| 830 |
+
""":external:ref:`core-metadata-license-file`"""
|
| 831 |
+
classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
|
| 832 |
+
""":external:ref:`core-metadata-classifier`"""
|
| 833 |
+
requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
|
| 834 |
+
added="1.2"
|
| 835 |
+
)
|
| 836 |
+
""":external:ref:`core-metadata-requires-dist`"""
|
| 837 |
+
requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
|
| 838 |
+
added="1.2"
|
| 839 |
+
)
|
| 840 |
+
""":external:ref:`core-metadata-requires-python`"""
|
| 841 |
+
# Because `Requires-External` allows for non-PEP 440 version specifiers, we
|
| 842 |
+
# don't do any processing on the values.
|
| 843 |
+
requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
|
| 844 |
+
""":external:ref:`core-metadata-requires-external`"""
|
| 845 |
+
project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
|
| 846 |
+
""":external:ref:`core-metadata-project-url`"""
|
| 847 |
+
# PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
|
| 848 |
+
# regardless of metadata version.
|
| 849 |
+
provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
|
| 850 |
+
added="2.1",
|
| 851 |
+
)
|
| 852 |
+
""":external:ref:`core-metadata-provides-extra`"""
|
| 853 |
+
provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
|
| 854 |
+
""":external:ref:`core-metadata-provides-dist`"""
|
| 855 |
+
obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
|
| 856 |
+
""":external:ref:`core-metadata-obsoletes-dist`"""
|
| 857 |
+
requires: _Validator[list[str] | None] = _Validator(added="1.1")
|
| 858 |
+
"""``Requires`` (deprecated)"""
|
| 859 |
+
provides: _Validator[list[str] | None] = _Validator(added="1.1")
|
| 860 |
+
"""``Provides`` (deprecated)"""
|
| 861 |
+
obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
|
| 862 |
+
"""``Obsoletes`` (deprecated)"""
|
venv/lib/python3.13/site-packages/packaging/py.typed
ADDED
|
File without changes
|
venv/lib/python3.13/site-packages/packaging/requirements.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
from typing import Any, Iterator
|
| 7 |
+
|
| 8 |
+
from ._parser import parse_requirement as _parse_requirement
|
| 9 |
+
from ._tokenizer import ParserSyntaxError
|
| 10 |
+
from .markers import Marker, _normalize_extra_values
|
| 11 |
+
from .specifiers import SpecifierSet
|
| 12 |
+
from .utils import canonicalize_name
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class InvalidRequirement(ValueError):
|
| 16 |
+
"""
|
| 17 |
+
An invalid requirement was found, users should refer to PEP 508.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class Requirement:
|
| 22 |
+
"""Parse a requirement.
|
| 23 |
+
|
| 24 |
+
Parse a given requirement string into its parts, such as name, specifier,
|
| 25 |
+
URL, and extras. Raises InvalidRequirement on a badly-formed requirement
|
| 26 |
+
string.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
# TODO: Can we test whether something is contained within a requirement?
|
| 30 |
+
# If so how do we do that? Do we need to test against the _name_ of
|
| 31 |
+
# the thing as well as the version? What about the markers?
|
| 32 |
+
# TODO: Can we normalize the name and extra name?
|
| 33 |
+
|
| 34 |
+
def __init__(self, requirement_string: str) -> None:
|
| 35 |
+
try:
|
| 36 |
+
parsed = _parse_requirement(requirement_string)
|
| 37 |
+
except ParserSyntaxError as e:
|
| 38 |
+
raise InvalidRequirement(str(e)) from e
|
| 39 |
+
|
| 40 |
+
self.name: str = parsed.name
|
| 41 |
+
self.url: str | None = parsed.url or None
|
| 42 |
+
self.extras: set[str] = set(parsed.extras or [])
|
| 43 |
+
self.specifier: SpecifierSet = SpecifierSet(parsed.specifier)
|
| 44 |
+
self.marker: Marker | None = None
|
| 45 |
+
if parsed.marker is not None:
|
| 46 |
+
self.marker = Marker.__new__(Marker)
|
| 47 |
+
self.marker._markers = _normalize_extra_values(parsed.marker)
|
| 48 |
+
|
| 49 |
+
def _iter_parts(self, name: str) -> Iterator[str]:
|
| 50 |
+
yield name
|
| 51 |
+
|
| 52 |
+
if self.extras:
|
| 53 |
+
formatted_extras = ",".join(sorted(self.extras))
|
| 54 |
+
yield f"[{formatted_extras}]"
|
| 55 |
+
|
| 56 |
+
if self.specifier:
|
| 57 |
+
yield str(self.specifier)
|
| 58 |
+
|
| 59 |
+
if self.url:
|
| 60 |
+
yield f"@ {self.url}"
|
| 61 |
+
if self.marker:
|
| 62 |
+
yield " "
|
| 63 |
+
|
| 64 |
+
if self.marker:
|
| 65 |
+
yield f"; {self.marker}"
|
| 66 |
+
|
| 67 |
+
def __str__(self) -> str:
|
| 68 |
+
return "".join(self._iter_parts(self.name))
|
| 69 |
+
|
| 70 |
+
def __repr__(self) -> str:
|
| 71 |
+
return f"<Requirement('{self}')>"
|
| 72 |
+
|
| 73 |
+
def __hash__(self) -> int:
|
| 74 |
+
return hash(
|
| 75 |
+
(
|
| 76 |
+
self.__class__.__name__,
|
| 77 |
+
*self._iter_parts(canonicalize_name(self.name)),
|
| 78 |
+
)
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
def __eq__(self, other: Any) -> bool:
|
| 82 |
+
if not isinstance(other, Requirement):
|
| 83 |
+
return NotImplemented
|
| 84 |
+
|
| 85 |
+
return (
|
| 86 |
+
canonicalize_name(self.name) == canonicalize_name(other.name)
|
| 87 |
+
and self.extras == other.extras
|
| 88 |
+
and self.specifier == other.specifier
|
| 89 |
+
and self.url == other.url
|
| 90 |
+
and self.marker == other.marker
|
| 91 |
+
)
|