dheeena commited on Nov 25, 2025

Commit

7cb8c9d

verified ·

1 Parent(s): aabd464

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

venv/lib/python3.13/site-packages/_yaml/__init__.py +33 -0
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER +1 -0
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA +764 -0
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD +35 -0
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL +7 -0
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt +2 -0
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt +1 -0
venv/lib/python3.13/site-packages/filelock/__init__.py +70 -0
venv/lib/python3.13/site-packages/filelock/_api.py +403 -0
venv/lib/python3.13/site-packages/filelock/_error.py +30 -0
venv/lib/python3.13/site-packages/filelock/_soft.py +47 -0
venv/lib/python3.13/site-packages/filelock/_unix.py +70 -0
venv/lib/python3.13/site-packages/filelock/_util.py +52 -0
venv/lib/python3.13/site-packages/filelock/_windows.py +65 -0
venv/lib/python3.13/site-packages/filelock/asyncio.py +344 -0
venv/lib/python3.13/site-packages/filelock/py.typed +0 -0
venv/lib/python3.13/site-packages/filelock/version.py +34 -0
venv/lib/python3.13/site-packages/fsspec/__init__.py +71 -0
venv/lib/python3.13/site-packages/fsspec/_version.py +34 -0
venv/lib/python3.13/site-packages/fsspec/caching.py +1004 -0
venv/lib/python3.13/site-packages/fsspec/compression.py +182 -0
venv/lib/python3.13/site-packages/fsspec/config.py +131 -0
venv/lib/python3.13/site-packages/fsspec/conftest.py +125 -0
venv/lib/python3.13/site-packages/fsspec/core.py +743 -0
venv/lib/python3.13/site-packages/fsspec/dircache.py +98 -0
venv/lib/python3.13/site-packages/fsspec/fuse.py +324 -0
venv/lib/python3.13/site-packages/fsspec/generic.py +396 -0
venv/lib/python3.13/site-packages/fsspec/gui.py +417 -0
venv/lib/python3.13/site-packages/fsspec/json.py +117 -0
venv/lib/python3.13/site-packages/fsspec/mapping.py +251 -0
venv/lib/python3.13/site-packages/fsspec/parquet.py +541 -0
venv/lib/python3.13/site-packages/fsspec/registry.py +330 -0
venv/lib/python3.13/site-packages/fsspec/spec.py +2281 -0
venv/lib/python3.13/site-packages/fsspec/transaction.py +90 -0
venv/lib/python3.13/site-packages/hf_xet/__init__.py +5 -0
venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER +1 -0
venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA +209 -0
venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD +22 -0
venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL +4 -0
venv/lib/python3.13/site-packages/packaging/__init__.py +15 -0
venv/lib/python3.13/site-packages/packaging/_elffile.py +109 -0
venv/lib/python3.13/site-packages/packaging/_manylinux.py +262 -0
venv/lib/python3.13/site-packages/packaging/_musllinux.py +85 -0
venv/lib/python3.13/site-packages/packaging/_parser.py +353 -0
venv/lib/python3.13/site-packages/packaging/_structures.py +61 -0
venv/lib/python3.13/site-packages/packaging/_tokenizer.py +195 -0
venv/lib/python3.13/site-packages/packaging/markers.py +362 -0
venv/lib/python3.13/site-packages/packaging/metadata.py +862 -0
venv/lib/python3.13/site-packages/packaging/py.typed +0 -0
venv/lib/python3.13/site-packages/packaging/requirements.py +91 -0

venv/lib/python3.13/site-packages/_yaml/__init__.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# This is a stub package designed to roughly emulate the _yaml
+# extension module, which previously existed as a standalone module
+# and has been moved into the `yaml` package namespace.
+# It does not perfectly mimic its old counterpart, but should get
+# close enough for anyone who's relying on it even when they shouldn't.
+import yaml
+# in some circumstances, the yaml module we imoprted may be from a different version, so we need
+# to tread carefully when poking at it here (it may not have the attributes we expect)
+if not getattr(yaml, '__with_libyaml__', False):
+    from sys import version_info
+    exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
+    raise exc("No module named '_yaml'")
+else:
+    from yaml._yaml import *
+    import warnings
+    warnings.warn(
+        'The _yaml extension module is now located at yaml._yaml'
+        ' and its location is subject to change.  To use the'
+        ' LibYAML-based parser and emitter, import from `yaml`:'
+        ' `from yaml import CLoader as Loader, CDumper as Dumper`.',
+        DeprecationWarning
+    )
+    del warnings
+    # Don't `del yaml` here because yaml is actually an existing
+    # namespace member of _yaml.
+__name__ = '_yaml'
+# If the module is top-level (i.e. not a part of any specific package)
+# then the attribute should be set to ''.
+# https://docs.python.org/3.8/library/types.html
+__package__ = ''

venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,764 @@

+Metadata-Version: 2.4
+Name: charset-normalizer
+Version: 3.4.4
+Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
+Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
+Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
+License: MIT
+Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
+Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
+Project-URL: Code, https://github.com/jawah/charset_normalizer
+Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
+Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Text Processing :: Linguistic
+Classifier: Topic :: Utilities
+Classifier: Typing :: Typed
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Provides-Extra: unicode-backport
+Dynamic: license-file
+<h1 align="center">Charset Detection, for Everyone 👋</h1>
+<p align="center">
+  <sup>The Real First Universal Charset Detector</sup><br>
+  <a href="https://pypi.org/project/charset-normalizer">
+    <img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
+  </a>
+  <a href="https://pepy.tech/project/charset-normalizer/">
+    <img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
+  </a>
+  <a href="https://bestpractices.coreinfrastructure.org/projects/7297">
+    <img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
+  </a>
+</p>
+<p align="center">
+  <sup><i>Featured Packages</i></sup><br>
+  <a href="https://github.com/jawah/niquests">
+   <img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
+  </a>
+  <a href="https://github.com/jawah/wassima">
+   <img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
+  </a>
+</p>
+<p align="center">
+  <sup><i>In other language (unofficial port - by the community)</i></sup><br>
+  <a href="https://github.com/nickspring/charset-normalizer-rs">
+   <img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
+  </a>
+</p>
+> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
+> I'm trying to resolve the issue by taking a new approach.
+> All IANA character set names for which the Python core library provides codecs are supported.
+<p align="center">
+  >>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
+</p>
+This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
+| Feature                                          | [Chardet](https://github.com/chardet/chardet) |                                         Charset Normalizer                                         | [cChardet](https://github.com/PyYoshi/cChardet) |
+|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
+| `Fast`                                           |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
+| `Universal**`                                    |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
+| `Reliable` **without** distinguishable standards |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
+| `Reliable` **with** distinguishable standards    |                       ✅                       |                                                 ✅                                                  |                        ✅                        |
+| `License`                                        |           LGPL-2.1<br>_restrictive_           |                                                MIT                                                 |            MPL-1.1<br>_restrictive_             |
+| `Native Python`                                  |                       ✅                       |                                                 ✅                                                  |                        ❌                        |
+| `Detect spoken language`                         |                       ❌                       |                                                 ✅                                                  |                       N/A                       |
+| `UnicodeDecodeError Safety`                      |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
+| `Whl Size (min)`                                 |                   193.6 kB                    |                                               42 kB                                                |                     ~200 kB                     |
+| `Supported Encoding`                             |                      33                       | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) |                       40                        |
+<p align="center">
+<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
+</p>
+*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
+## ⚡ Performance
+This package offer better performance than its counterpart Chardet. Here are some numbers.
+| Package                                       | Accuracy | Mean per file (ms) | File per sec (est) |
+|-----------------------------------------------|:--------:|:------------------:|:------------------:|
+| [chardet](https://github.com/chardet/chardet) |   86 %   |       63 ms        |    16 file/sec     |
+| charset-normalizer                            | **98 %** |     **10 ms**      |    100 file/sec    |
+| Package                                       | 99th percentile | 95th percentile | 50th percentile |
+|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
+| [chardet](https://github.com/chardet/chardet) |     265 ms      |      71 ms      |      7 ms       |
+| charset-normalizer                            |     100 ms      |      50 ms      |      5 ms       |
+_updated as of december 2024 using CPython 3.12_
+Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
+> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
+> And yes, these results might change at any time. The dataset can be updated to include more files.
+> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
+> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
+> (e.g. Supported Encoding) Challenge-them if you want.
+## ✨ Installation
+Using pip:
+```sh
+pip install charset-normalizer -U
+```
+## 🚀 Basic Usage
+### CLI
+This package comes with a CLI.
+```
+usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
+                  file [file ...]
+The Real First Universal Charset Detector. Discover originating encoding used
+on text file. Normalize text to unicode.
+positional arguments:
+  files                 File(s) to be analysed
+optional arguments:
+  -h, --help            show this help message and exit
+  -v, --verbose         Display complementary information about file if any.
+                        Stdout will contain logs about the detection process.
+  -a, --with-alternative
+                        Output complementary possibilities if any. Top-level
+                        JSON WILL be a list.
+  -n, --normalize       Permit to normalize input file. If not set, program
+                        does not write anything.
+  -m, --minimal         Only output the charset detected to STDOUT. Disabling
+                        JSON output.
+  -r, --replace         Replace file when trying to normalize it instead of
+                        creating a new one.
+  -f, --force           Replace file without asking if you are sure, use this
+                        flag with caution.
+  -t THRESHOLD, --threshold THRESHOLD
+                        Define a custom maximum amount of chaos allowed in
+                        decoded content. 0. <= chaos <= 1.
+  --version             Show version information and exit.
+```
+```bash
+normalizer ./data/sample.1.fr.srt
+```
+or
+```bash
+python -m charset_normalizer ./data/sample.1.fr.srt
+```
+🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
+```json
+{
+    "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
+    "encoding": "cp1252",
+    "encoding_aliases": [
+        "1252",
+        "windows_1252"
+    ],
+    "alternative_encodings": [
+        "cp1254",
+        "cp1256",
+        "cp1258",
+        "iso8859_14",
+        "iso8859_15",
+        "iso8859_16",
+        "iso8859_3",
+        "iso8859_9",
+        "latin_1",
+        "mbcs"
+    ],
+    "language": "French",
+    "alphabets": [
+        "Basic Latin",
+        "Latin-1 Supplement"
+    ],
+    "has_sig_or_bom": false,
+    "chaos": 0.149,
+    "coherence": 97.152,
+    "unicode_path": null,
+    "is_preferred": true
+}
+```
+### Python
+*Just print out normalized text*
+```python
+from charset_normalizer import from_path
+results = from_path('./my_subtitle.srt')
+print(str(results.best()))
+```
+*Upgrade your code without effort*
+```python
+from charset_normalizer import detect
+```
+The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
+See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
+## 😇 Why
+When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
+reliable alternative using a completely different method. Also! I never back down on a good challenge!
+I **don't care** about the **originating charset** encoding, because **two different tables** can
+produce **two identical rendered string.**
+What I want is to get readable text, the best I can.
+In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
+Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
+## 🍰 How
+  - Discard all charset encoding table that could not fit the binary content.
+  - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
+  - Extract matches with the lowest mess detected.
+  - Additionally, we measure coherence / probe for a language.
+**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
+*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
+**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
+ I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
+ improve or rewrite it.
+*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
+that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
+## ⚡ Known limitations
+  - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
+  - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
+## ⚠️ About Python EOLs
+**If you are running:**
+- Python >=2.7,<3.5: Unsupported
+- Python 3.5: charset-normalizer < 2.1
+- Python 3.6: charset-normalizer < 3.1
+- Python 3.7: charset-normalizer < 4.0
+Upgrade your Python interpreter as soon as possible.
+## 👤 Contributing
+Contributions, issues and feature requests are very much welcome.<br />
+Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
+## 📝 License
+Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
+This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
+Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
+## 💼 For Enterprise
+Professional support for charset-normalizer is available as part of the [Tidelift
+Subscription][1]. Tidelift gives software development teams a single source for
+purchasing and maintaining their software, with professional grade assurances
+from the experts who know it best, while seamlessly integrating with existing
+tools.
+[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
+# Changelog
+All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
+## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
+### Changed
+- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
+- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
+### Removed
+- `setuptools-scm` as a build dependency.
+### Misc
+- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
+- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
+- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
+## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
+### Changed
+- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
+- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
+### Added
+- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
+- Support for Python 3.14
+### Fixed
+- sdist archive contained useless directories.
+- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
+### Misc
+- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
+  Each published wheel comes with its SBOM. We choose CycloneDX as the format.
+- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
+## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
+### Fixed
+- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
+- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
+### Changed
+- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
+## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
+### Changed
+- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
+- Enforce annotation delayed loading for a simpler and consistent types in the project.
+- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
+### Added
+- pre-commit configuration.
+- noxfile.
+### Removed
+- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
+- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
+- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
+- Unused `utils.range_scan` function.
+### Fixed
+- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
+- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
+## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
+### Added
+- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
+- Support for Python 3.13 (#512)
+### Fixed
+- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
+- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
+- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
+## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
+### Fixed
+- Unintentional memory usage regression when using large payload that match several encoding (#376)
+- Regression on some detection case showcased in the documentation (#371)
+### Added
+- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
+## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
+### Changed
+- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
+- Improved the general detection reliability based on reports from the community
+## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
+### Added
+- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
+- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
+### Removed
+- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
+- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
+### Changed
+- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
+- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
+### Fixed
+- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
+## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
+### Changed
+- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
+- Minor improvement over the global detection reliability
+### Added
+- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
+- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
+- Explicit support for Python 3.12
+### Fixed
+- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
+## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
+### Added
+- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
+### Removed
+- Support for Python 3.6 (PR #260)
+### Changed
+- Optional speedup provided by mypy/c 1.0.1
+## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
+### Fixed
+- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
+### Changed
+- Speedup provided by mypy/c 0.990 on Python >= 3.7
+## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
+### Added
+- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
+- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
+- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
+- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
+### Changed
+- Build with static metadata using 'build' frontend
+- Make the language detection stricter
+- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
+### Fixed
+- CLI with opt --normalize fail when using full path for files
+- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
+- Sphinx warnings when generating the documentation
+### Removed
+- Coherence detector no longer return 'Simple English' instead return 'English'
+- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
+- Breaking: Method `first()` and `best()` from CharsetMatch
+- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
+- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
+- Breaking: Top-level function `normalize`
+- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
+- Support for the backport `unicodedata2`
+## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
+### Added
+- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
+- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
+- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
+### Changed
+- Build with static metadata using 'build' frontend
+- Make the language detection stricter
+### Fixed
+- CLI with opt --normalize fail when using full path for files
+- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
+### Removed
+- Coherence detector no longer return 'Simple English' instead return 'English'
+- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
+## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
+### Added
+- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
+### Removed
+- Breaking: Method `first()` and `best()` from CharsetMatch
+- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
+### Fixed
+- Sphinx warnings when generating the documentation
+## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
+### Changed
+- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
+### Removed
+- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
+- Breaking: Top-level function `normalize`
+- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
+- Support for the backport `unicodedata2`
+## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
+### Deprecated
+- Function `normalize` scheduled for removal in 3.0
+### Changed
+- Removed useless call to decode in fn is_unprintable (#206)
+### Fixed
+- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
+## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
+### Added
+- Output the Unicode table version when running the CLI with `--version` (PR #194)
+### Changed
+- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
+- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
+### Fixed
+- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
+- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
+### Removed
+- Support for Python 3.5 (PR #192)
+### Deprecated
+- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
+## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
+### Fixed
+- ASCII miss-detection on rare cases (PR #170)
+## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
+### Added
+- Explicit support for Python 3.11 (PR #164)
+### Changed
+- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
+## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
+### Fixed
+- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
+### Changed
+- Skipping the language-detection (CD) on ASCII (PR #155)
+## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
+### Changed
+- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
+### Fixed
+- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
+## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
+### Changed
+- Improvement over Vietnamese detection (PR #126)
+- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
+- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
+- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
+- Code style as refactored by Sourcery-AI (PR #131)
+- Minor adjustment on the MD around european words (PR #133)
+- Remove and replace SRTs from assets / tests (PR #139)
+- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
+- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
+### Fixed
+- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
+- Avoid using too insignificant chunk (PR #137)
+### Added
+- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
+- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
+## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
+### Added
+- Add support for Kazakh (Cyrillic) language detection (PR #109)
+### Changed
+- Further, improve inferring the language from a given single-byte code page (PR #112)
+- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
+- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
+- Various detection improvement (MD+CD) (PR #117)
+### Removed
+- Remove redundant logging entry about detected language(s) (PR #115)
+### Fixed
+- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
+## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
+### Fixed
+- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
+- Fix CLI crash when using --minimal output in certain cases (PR #103)
+### Changed
+- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
+## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
+### Changed
+- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
+- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
+- The Unicode detection is slightly improved (PR #93)
+- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
+### Removed
+- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
+### Fixed
+- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
+- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
+- The MANIFEST.in was not exhaustive (PR #78)
+## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
+### Fixed
+- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
+- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
+- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
+- Submatch factoring could be wrong in rare edge cases (PR #72)
+- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
+- Fix line endings from CRLF to LF for certain project files (PR #67)
+### Changed
+- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
+- Allow fallback on specified encoding if any (PR #71)
+## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
+### Changed
+- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
+- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
+## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
+### Fixed
+- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
+### Changed
+- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
+## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
+### Fixed
+- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
+- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
+- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
+- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
+### Changed
+- Public function normalize default args values were not aligned with from_bytes (PR #53)
+### Added
+- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
+## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
+### Changed
+- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
+- Accent has been made on UTF-8 detection, should perform rather instantaneous.
+- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
+- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
+- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
+- utf_7 detection has been reinstated.
+### Removed
+- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
+- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
+- The exception hook on UnicodeDecodeError has been removed.
+### Deprecated
+- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
+### Fixed
+- The CLI output used the relative path of the file(s). Should be absolute.
+## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
+### Fixed
+- Logger configuration/usage no longer conflict with others (PR #44)
+## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
+### Removed
+- Using standard logging instead of using the package loguru.
+- Dropping nose test framework in favor of the maintained pytest.
+- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
+- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
+- Stop support for UTF-7 that does not contain a SIG.
+- Dropping PrettyTable, replaced with pure JSON output in CLI.
+### Fixed
+- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
+- Not searching properly for the BOM when trying utf32/16 parent codec.
+### Changed
+- Improving the package final size by compressing frequencies.json.
+- Huge improvement over the larges payload.
+### Added
+- CLI now produces JSON consumable output.
+- Return ASCII if given sequences fit. Given reasonable confidence.
+## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
+### Fixed
+- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
+## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
+### Fixed
+- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
+## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
+### Fixed
+- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
+## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
+### Changed
+- Amend the previous release to allow prettytable 2.0 (PR #35)
+## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
+### Fixed
+- Fix error while using the package with a python pre-release interpreter (PR #33)
+### Changed
+- Dependencies refactoring, constraints revised.
+### Added
+- Add python 3.9 and 3.10 to the supported interpreters
+MIT License
+Copyright (c) 2025 TAHRI Ahmed R.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,35 @@

+../../../bin/normalizer,sha256=0NCCWHGXwNJFGXe9vG0dHrG67nHnzOFp4ZWd0RQ0qoI,225
+charset_normalizer-3.4.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+charset_normalizer-3.4.4.dist-info/METADATA,sha256=jVuUFBti8dav19YLvWissTihVdF2ozUY4KKMw7jdkBQ,37303
+charset_normalizer-3.4.4.dist-info/RECORD,,
+charset_normalizer-3.4.4.dist-info/WHEEL,sha256=2iHh9e2o6T3nHtu_NVT7Cs7pebIqF94rZK8zrQfgoJI,190
+charset_normalizer-3.4.4.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65
+charset_normalizer-3.4.4.dist-info/licenses/LICENSE,sha256=bQ1Bv-FwrGx9wkjJpj4lTQ-0WmDVCoJX0K-SxuJJuIc,1071
+charset_normalizer-3.4.4.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
+charset_normalizer/__init__.py,sha256=OKRxRv2Zhnqk00tqkN0c1BtJjm165fWXLydE52IKuHc,1590
+charset_normalizer/__main__.py,sha256=yzYxMR-IhKRHYwcSlavEv8oGdwxsR89mr2X09qXGdps,109
+charset_normalizer/__pycache__/__init__.cpython-313.pyc,,
+charset_normalizer/__pycache__/__main__.cpython-313.pyc,,
+charset_normalizer/__pycache__/api.cpython-313.pyc,,
+charset_normalizer/__pycache__/cd.cpython-313.pyc,,
+charset_normalizer/__pycache__/constant.cpython-313.pyc,,
+charset_normalizer/__pycache__/legacy.cpython-313.pyc,,
+charset_normalizer/__pycache__/md.cpython-313.pyc,,
+charset_normalizer/__pycache__/models.cpython-313.pyc,,
+charset_normalizer/__pycache__/utils.cpython-313.pyc,,
+charset_normalizer/__pycache__/version.cpython-313.pyc,,
+charset_normalizer/api.py,sha256=V07i8aVeCD8T2fSia3C-fn0i9t8qQguEBhsqszg32Ns,22668
+charset_normalizer/cd.py,sha256=WKTo1HDb-H9HfCDc3Bfwq5jzS25Ziy9SE2a74SgTq88,12522
+charset_normalizer/cli/__init__.py,sha256=D8I86lFk2-py45JvqxniTirSj_sFyE6sjaY_0-G1shc,136
+charset_normalizer/cli/__main__.py,sha256=dMaXG6IJXRvqq8z2tig7Qb83-BpWTln55ooiku5_uvg,12646
+charset_normalizer/cli/__pycache__/__init__.cpython-313.pyc,,
+charset_normalizer/cli/__pycache__/__main__.cpython-313.pyc,,
+charset_normalizer/constant.py,sha256=7UVY4ldYhmQMHUdgQ_sgZmzcQ0xxYxpBunqSZ-XJZ8U,42713
+charset_normalizer/legacy.py,sha256=sYBzSpzsRrg_wF4LP536pG64BItw7Tqtc3SMQAHvFLM,2731
+charset_normalizer/md.cpython-313-x86_64-linux-gnu.so,sha256=sZ7umtJLjKfA83NFJ7npkiDyr06zDT8cWtl6uIx2MsM,15912
+charset_normalizer/md.py,sha256=-_oN3h3_X99nkFfqamD3yu45DC_wfk5odH0Tr_CQiXs,20145
+charset_normalizer/md__mypyc.cpython-313-x86_64-linux-gnu.so,sha256=i-yavqPJtZwjTKvP9hBLZ8CLZD88rVtguaSoLHso_Oc,291056
+charset_normalizer/models.py,sha256=lKXhOnIPtiakbK3i__J9wpOfzx3JDTKj7Dn3Rg0VaRI,12394
+charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+charset_normalizer/utils.py,sha256=sTejPgrdlNsKNucZfJCxJ95lMTLA0ShHLLE3n5wpT9Q,12170
+charset_normalizer/version.py,sha256=nKE4qBNk5WA4LIJ_yIH_aSDfvtsyizkWMg-PUG-UZVk,115

venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,7 @@

+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: false
+Tag: cp313-cp313-manylinux_2_17_x86_64
+Tag: cp313-cp313-manylinux2014_x86_64
+Tag: cp313-cp313-manylinux_2_28_x86_64

venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [console_scripts]
2	+ normalizer = charset_normalizer.cli:cli_detect

venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ charset_normalizer

venv/lib/python3.13/site-packages/filelock/__init__.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+A platform independent file lock that supports the with-statement.
+.. autodata:: filelock.__version__
+   :no-value:
+"""
+from __future__ import annotations
+import sys
+import warnings
+from typing import TYPE_CHECKING
+from ._api import AcquireReturnProxy, BaseFileLock
+from ._error import Timeout
+from ._soft import SoftFileLock
+from ._unix import UnixFileLock, has_fcntl
+from ._windows import WindowsFileLock
+from .asyncio import (
+    AsyncAcquireReturnProxy,
+    AsyncSoftFileLock,
+    AsyncUnixFileLock,
+    AsyncWindowsFileLock,
+    BaseAsyncFileLock,
+)
+from .version import version
+#: version of the project as a string
+__version__: str = version
+if sys.platform == "win32":  # pragma: win32 cover
+    _FileLock: type[BaseFileLock] = WindowsFileLock
+    _AsyncFileLock: type[BaseAsyncFileLock] = AsyncWindowsFileLock
+else:  # pragma: win32 no cover # noqa: PLR5501
+    if has_fcntl:
+        _FileLock: type[BaseFileLock] = UnixFileLock
+        _AsyncFileLock: type[BaseAsyncFileLock] = AsyncUnixFileLock
+    else:
+        _FileLock = SoftFileLock
+        _AsyncFileLock = AsyncSoftFileLock
+        if warnings is not None:
+            warnings.warn("only soft file lock is available", stacklevel=2)
+if TYPE_CHECKING:
+    FileLock = SoftFileLock
+    AsyncFileLock = AsyncSoftFileLock
+else:
+    #: Alias for the lock, which should be used for the current platform.
+    FileLock = _FileLock
+    AsyncFileLock = _AsyncFileLock
+__all__ = [
+    "AcquireReturnProxy",
+    "AsyncAcquireReturnProxy",
+    "AsyncFileLock",
+    "AsyncSoftFileLock",
+    "AsyncUnixFileLock",
+    "AsyncWindowsFileLock",
+    "BaseAsyncFileLock",
+    "BaseFileLock",
+    "FileLock",
+    "SoftFileLock",
+    "Timeout",
+    "UnixFileLock",
+    "WindowsFileLock",
+    "__version__",
+]

venv/lib/python3.13/site-packages/filelock/_api.py ADDED Viewed

	@@ -0,0 +1,403 @@

+from __future__ import annotations
+import contextlib
+import inspect
+import logging
+import os
+import time
+import warnings
+from abc import ABCMeta, abstractmethod
+from dataclasses import dataclass
+from threading import local
+from typing import TYPE_CHECKING, Any, cast
+from weakref import WeakValueDictionary
+from ._error import Timeout
+if TYPE_CHECKING:
+    import sys
+    from types import TracebackType
+    if sys.version_info >= (3, 11):  # pragma: no cover (py311+)
+        from typing import Self
+    else:  # pragma: no cover (<py311)
+        from typing_extensions import Self
+_LOGGER = logging.getLogger("filelock")
+# This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__
+# is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired
+# again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak)
+class AcquireReturnProxy:
+    """A context-aware object that will release the lock file when exiting."""
+    def __init__(self, lock: BaseFileLock) -> None:
+        self.lock = lock
+    def __enter__(self) -> BaseFileLock:
+        return self.lock
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        self.lock.release()
+@dataclass
+class FileLockContext:
+    """A dataclass which holds the context for a ``BaseFileLock`` object."""
+    # The context is held in a separate class to allow optional use of thread local storage via the
+    # ThreadLocalFileContext class.
+    #: The path to the lock file.
+    lock_file: str
+    #: The default timeout value.
+    timeout: float
+    #: The mode for the lock files
+    mode: int
+    #: Whether the lock should be blocking or not
+    blocking: bool
+    #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held
+    lock_file_fd: int | None = None
+    #: The lock counter is used for implementing the nested locking mechanism.
+    lock_counter: int = 0  # When the lock is acquired is increased and the lock is only released, when this value is 0
+class ThreadLocalFileContext(FileLockContext, local):
+    """A thread local version of the ``FileLockContext`` class."""
+class FileLockMeta(ABCMeta):
+    def __call__(  # noqa: PLR0913
+        cls,
+        lock_file: str | os.PathLike[str],
+        timeout: float = -1,
+        mode: int = 0o644,
+        thread_local: bool = True,  # noqa: FBT001, FBT002
+        *,
+        blocking: bool = True,
+        is_singleton: bool = False,
+        **kwargs: Any,  # capture remaining kwargs for subclasses  # noqa: ANN401
+    ) -> BaseFileLock:
+        if is_singleton:
+            instance = cls._instances.get(str(lock_file))  # type: ignore[attr-defined]
+            if instance:
+                params_to_check = {
+                    "thread_local": (thread_local, instance.is_thread_local()),
+                    "timeout": (timeout, instance.timeout),
+                    "mode": (mode, instance.mode),
+                    "blocking": (blocking, instance.blocking),
+                }
+                non_matching_params = {
+                    name: (passed_param, set_param)
+                    for name, (passed_param, set_param) in params_to_check.items()
+                    if passed_param != set_param
+                }
+                if not non_matching_params:
+                    return cast("BaseFileLock", instance)
+                # parameters do not match; raise error
+                msg = "Singleton lock instances cannot be initialized with differing arguments"
+                msg += "\nNon-matching arguments: "
+                for param_name, (passed_param, set_param) in non_matching_params.items():
+                    msg += f"\n\t{param_name} (existing lock has {set_param} but {passed_param} was passed)"
+                raise ValueError(msg)
+        # Workaround to make `__init__`'s params optional in subclasses
+        # E.g. virtualenv changes the signature of the `__init__` method in the `BaseFileLock` class descendant
+        # (https://github.com/tox-dev/filelock/pull/340)
+        all_params = {
+            "timeout": timeout,
+            "mode": mode,
+            "thread_local": thread_local,
+            "blocking": blocking,
+            "is_singleton": is_singleton,
+            **kwargs,
+        }
+        present_params = inspect.signature(cls.__init__).parameters  # type: ignore[misc]
+        init_params = {key: value for key, value in all_params.items() if key in present_params}
+        instance = super().__call__(lock_file, **init_params)
+        if is_singleton:
+            cls._instances[str(lock_file)] = instance  # type: ignore[attr-defined]
+        return cast("BaseFileLock", instance)
+class BaseFileLock(contextlib.ContextDecorator, metaclass=FileLockMeta):
+    """Abstract base class for a file lock object."""
+    _instances: WeakValueDictionary[str, BaseFileLock]
+    def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None:
+        """Setup unique state for lock subclasses."""
+        super().__init_subclass__(**kwargs)
+        cls._instances = WeakValueDictionary()
+    def __init__(  # noqa: PLR0913
+        self,
+        lock_file: str | os.PathLike[str],
+        timeout: float = -1,
+        mode: int = 0o644,
+        thread_local: bool = True,  # noqa: FBT001, FBT002
+        *,
+        blocking: bool = True,
+        is_singleton: bool = False,
+    ) -> None:
+        """
+        Create a new lock object.
+        :param lock_file: path to the file
+        :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
+            the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
+            to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock.
+        :param mode: file permissions for the lockfile
+        :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
+            ``False`` then the lock will be reentrant across threads.
+        :param blocking: whether the lock should be blocking or not
+        :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
+            per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
+            to pass the same object around.
+        """
+        self._is_thread_local = thread_local
+        self._is_singleton = is_singleton
+        # Create the context. Note that external code should not work with the context directly and should instead use
+        # properties of this class.
+        kwargs: dict[str, Any] = {
+            "lock_file": os.fspath(lock_file),
+            "timeout": timeout,
+            "mode": mode,
+            "blocking": blocking,
+        }
+        self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs)
+    def is_thread_local(self) -> bool:
+        """:return: a flag indicating if this lock is thread local or not"""
+        return self._is_thread_local
+    @property
+    def is_singleton(self) -> bool:
+        """:return: a flag indicating if this lock is singleton or not"""
+        return self._is_singleton
+    @property
+    def lock_file(self) -> str:
+        """:return: path to the lock file"""
+        return self._context.lock_file
+    @property
+    def timeout(self) -> float:
+        """
+        :return: the default timeout value, in seconds
+        .. versionadded:: 2.0.0
+        """
+        return self._context.timeout
+    @timeout.setter
+    def timeout(self, value: float | str) -> None:
+        """
+        Change the default timeout value.
+        :param value: the new value, in seconds
+        """
+        self._context.timeout = float(value)
+    @property
+    def blocking(self) -> bool:
+        """:return: whether the locking is blocking or not"""
+        return self._context.blocking
+    @blocking.setter
+    def blocking(self, value: bool) -> None:
+        """
+        Change the default blocking value.
+        :param value: the new value as bool
+        """
+        self._context.blocking = value
+    @property
+    def mode(self) -> int:
+        """:return: the file permissions for the lockfile"""
+        return self._context.mode
+    @abstractmethod
+    def _acquire(self) -> None:
+        """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file."""
+        raise NotImplementedError
+    @abstractmethod
+    def _release(self) -> None:
+        """Releases the lock and sets self._context.lock_file_fd to None."""
+        raise NotImplementedError
+    @property
+    def is_locked(self) -> bool:
+        """
+        :return: A boolean indicating if the lock file is holding the lock currently.
+        .. versionchanged:: 2.0.0
+            This was previously a method and is now a property.
+        """
+        return self._context.lock_file_fd is not None
+    @property
+    def lock_counter(self) -> int:
+        """:return: The number of times this lock has been acquired (but not yet released)."""
+        return self._context.lock_counter
+    def acquire(
+        self,
+        timeout: float | None = None,
+        poll_interval: float = 0.05,
+        *,
+        poll_intervall: float | None = None,
+        blocking: bool | None = None,
+    ) -> AcquireReturnProxy:
+        """
+        Try to acquire the file lock.
+        :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and
+         if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired
+        :param poll_interval: interval of trying to acquire the lock file
+        :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead
+        :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
+         first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
+        :raises Timeout: if fails to acquire lock within the timeout period
+        :return: a context object that will unlock the file when the context is exited
+        .. code-block:: python
+            # You can use this method in the context manager (recommended)
+            with lock.acquire():
+                pass
+            # Or use an equivalent try-finally construct:
+            lock.acquire()
+            try:
+                pass
+            finally:
+                lock.release()
+        .. versionchanged:: 2.0.0
+            This method returns now a *proxy* object instead of *self*,
+            so that it can be used in a with statement without side effects.
+        """
+        # Use the default timeout, if no timeout is provided.
+        if timeout is None:
+            timeout = self._context.timeout
+        if blocking is None:
+            blocking = self._context.blocking
+        if poll_intervall is not None:
+            msg = "use poll_interval instead of poll_intervall"
+            warnings.warn(msg, DeprecationWarning, stacklevel=2)
+            poll_interval = poll_intervall
+        # Increment the number right at the beginning. We can still undo it, if something fails.
+        self._context.lock_counter += 1
+        lock_id = id(self)
+        lock_filename = self.lock_file
+        start_time = time.perf_counter()
+        try:
+            while True:
+                if not self.is_locked:
+                    _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
+                    self._acquire()
+                if self.is_locked:
+                    _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
+                    break
+                if blocking is False:
+                    _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
+                    raise Timeout(lock_filename)  # noqa: TRY301
+                if 0 <= timeout < time.perf_counter() - start_time:
+                    _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
+                    raise Timeout(lock_filename)  # noqa: TRY301
+                msg = "Lock %s not acquired on %s, waiting %s seconds ..."
+                _LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
+                time.sleep(poll_interval)
+        except BaseException:  # Something did go wrong, so decrement the counter.
+            self._context.lock_counter = max(0, self._context.lock_counter - 1)
+            raise
+        return AcquireReturnProxy(lock=self)
+    def release(self, force: bool = False) -> None:  # noqa: FBT001, FBT002
+        """
+        Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0.
+        Also note, that the lock file itself is not automatically deleted.
+        :param force: If true, the lock counter is ignored and the lock is released in every case/
+        """
+        if self.is_locked:
+            self._context.lock_counter -= 1
+            if self._context.lock_counter == 0 or force:
+                lock_id, lock_filename = id(self), self.lock_file
+                _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
+                self._release()
+                self._context.lock_counter = 0
+                _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
+    def __enter__(self) -> Self:
+        """
+        Acquire the lock.
+        :return: the lock object
+        """
+        self.acquire()
+        return self
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        """
+        Release the lock.
+        :param exc_type: the exception type if raised
+        :param exc_value: the exception value if raised
+        :param traceback: the exception traceback if raised
+        """
+        self.release()
+    def __del__(self) -> None:
+        """Called when the lock object is deleted."""
+        self.release(force=True)
+__all__ = [
+    "AcquireReturnProxy",
+    "BaseFileLock",
+]

venv/lib/python3.13/site-packages/filelock/_error.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from __future__ import annotations
+from typing import Any
+class Timeout(TimeoutError):  # noqa: N818
+    """Raised when the lock could not be acquired in *timeout* seconds."""
+    def __init__(self, lock_file: str) -> None:
+        super().__init__()
+        self._lock_file = lock_file
+    def __reduce__(self) -> str | tuple[Any, ...]:
+        return self.__class__, (self._lock_file,)  # Properly pickle the exception
+    def __str__(self) -> str:
+        return f"The file lock '{self._lock_file}' could not be acquired."
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({self.lock_file!r})"
+    @property
+    def lock_file(self) -> str:
+        """:return: The path of the file lock."""
+        return self._lock_file
+__all__ = [
+    "Timeout",
+]

venv/lib/python3.13/site-packages/filelock/_soft.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from __future__ import annotations
+import os
+import sys
+from contextlib import suppress
+from errno import EACCES, EEXIST
+from pathlib import Path
+from ._api import BaseFileLock
+from ._util import ensure_directory_exists, raise_on_not_writable_file
+class SoftFileLock(BaseFileLock):
+    """Simply watches the existence of the lock file."""
+    def _acquire(self) -> None:
+        raise_on_not_writable_file(self.lock_file)
+        ensure_directory_exists(self.lock_file)
+        # first check for exists and read-only mode as the open will mask this case as EEXIST
+        flags = (
+            os.O_WRONLY  # open for writing only
+            | os.O_CREAT
+            | os.O_EXCL  # together with above raise EEXIST if the file specified by filename exists
+            | os.O_TRUNC  # truncate the file to zero byte
+        )
+        try:
+            file_handler = os.open(self.lock_file, flags, self._context.mode)
+        except OSError as exception:  # re-raise unless expected exception
+            if not (
+                exception.errno == EEXIST  # lock already exist
+                or (exception.errno == EACCES and sys.platform == "win32")  # has no access to this lock
+            ):  # pragma: win32 no cover
+                raise
+        else:
+            self._context.lock_file_fd = file_handler
+    def _release(self) -> None:
+        assert self._context.lock_file_fd is not None  # noqa: S101
+        os.close(self._context.lock_file_fd)  # the lock file is definitely not None
+        self._context.lock_file_fd = None
+        with suppress(OSError):  # the file is already deleted and that's what we want
+            Path(self.lock_file).unlink()
+__all__ = [
+    "SoftFileLock",
+]

venv/lib/python3.13/site-packages/filelock/_unix.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from __future__ import annotations
+import os
+import sys
+from contextlib import suppress
+from errno import ENOSYS
+from pathlib import Path
+from typing import cast
+from ._api import BaseFileLock
+from ._util import ensure_directory_exists
+#: a flag to indicate if the fcntl API is available
+has_fcntl = False
+if sys.platform == "win32":  # pragma: win32 cover
+    class UnixFileLock(BaseFileLock):
+        """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
+        def _acquire(self) -> None:
+            raise NotImplementedError
+        def _release(self) -> None:
+            raise NotImplementedError
+else:  # pragma: win32 no cover
+    try:
+        import fcntl
+        _ = (fcntl.flock, fcntl.LOCK_EX, fcntl.LOCK_NB, fcntl.LOCK_UN)
+    except (ImportError, AttributeError):
+        pass
+    else:
+        has_fcntl = True
+    class UnixFileLock(BaseFileLock):
+        """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
+        def _acquire(self) -> None:
+            ensure_directory_exists(self.lock_file)
+            open_flags = os.O_RDWR | os.O_TRUNC
+            if not Path(self.lock_file).exists():
+                open_flags |= os.O_CREAT
+            fd = os.open(self.lock_file, open_flags, self._context.mode)
+            with suppress(PermissionError):  # This locked is not owned by this UID
+                os.fchmod(fd, self._context.mode)
+            try:
+                fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+            except OSError as exception:
+                os.close(fd)
+                if exception.errno == ENOSYS:  # NotImplemented error
+                    msg = "FileSystem does not appear to support flock; use SoftFileLock instead"
+                    raise NotImplementedError(msg) from exception
+            else:
+                self._context.lock_file_fd = fd
+        def _release(self) -> None:
+            # Do not remove the lockfile:
+            #   https://github.com/tox-dev/py-filelock/issues/31
+            #   https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
+            fd = cast("int", self._context.lock_file_fd)
+            self._context.lock_file_fd = None
+            fcntl.flock(fd, fcntl.LOCK_UN)
+            os.close(fd)
+__all__ = [
+    "UnixFileLock",
+    "has_fcntl",
+]

venv/lib/python3.13/site-packages/filelock/_util.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from __future__ import annotations
+import os
+import stat
+import sys
+from errno import EACCES, EISDIR
+from pathlib import Path
+def raise_on_not_writable_file(filename: str) -> None:
+    """
+    Raise an exception if attempting to open the file for writing would fail.
+    This is done so files that will never be writable can be separated from files that are writable but currently
+    locked.
+    :param filename: file to check
+    :raises OSError: as if the file was opened for writing.
+    """
+    try:  # use stat to do exists + can write to check without race condition
+        file_stat = os.stat(filename)  # noqa: PTH116
+    except OSError:
+        return  # swallow does not exist or other errors
+    if file_stat.st_mtime != 0:  # if os.stat returns but modification is zero that's an invalid os.stat - ignore it
+        if not (file_stat.st_mode & stat.S_IWUSR):
+            raise PermissionError(EACCES, "Permission denied", filename)
+        if stat.S_ISDIR(file_stat.st_mode):
+            if sys.platform == "win32":  # pragma: win32 cover
+                # On Windows, this is PermissionError
+                raise PermissionError(EACCES, "Permission denied", filename)
+            else:  # pragma: win32 no cover # noqa: RET506
+                # On linux / macOS, this is IsADirectoryError
+                raise IsADirectoryError(EISDIR, "Is a directory", filename)
+def ensure_directory_exists(filename: Path | str) -> None:
+    """
+    Ensure the directory containing the file exists (create it if necessary).
+    :param filename: file.
+    """
+    Path(filename).parent.mkdir(parents=True, exist_ok=True)
+__all__ = [
+    "ensure_directory_exists",
+    "raise_on_not_writable_file",
+]

venv/lib/python3.13/site-packages/filelock/_windows.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from __future__ import annotations
+import os
+import sys
+from contextlib import suppress
+from errno import EACCES
+from pathlib import Path
+from typing import cast
+from ._api import BaseFileLock
+from ._util import ensure_directory_exists, raise_on_not_writable_file
+if sys.platform == "win32":  # pragma: win32 cover
+    import msvcrt
+    class WindowsFileLock(BaseFileLock):
+        """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
+        def _acquire(self) -> None:
+            raise_on_not_writable_file(self.lock_file)
+            ensure_directory_exists(self.lock_file)
+            flags = (
+                os.O_RDWR  # open for read and write
+                | os.O_CREAT  # create file if not exists
+                | os.O_TRUNC  # truncate file if not empty
+            )
+            try:
+                fd = os.open(self.lock_file, flags, self._context.mode)
+            except OSError as exception:
+                if exception.errno != EACCES:  # has no access to this lock
+                    raise
+            else:
+                try:
+                    msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
+                except OSError as exception:
+                    os.close(fd)  # close file first
+                    if exception.errno != EACCES:  # file is already locked
+                        raise
+                else:
+                    self._context.lock_file_fd = fd
+        def _release(self) -> None:
+            fd = cast("int", self._context.lock_file_fd)
+            self._context.lock_file_fd = None
+            msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
+            os.close(fd)
+            with suppress(OSError):  # Probably another instance of the application hat acquired the file lock.
+                Path(self.lock_file).unlink()
+else:  # pragma: win32 no cover
+    class WindowsFileLock(BaseFileLock):
+        """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
+        def _acquire(self) -> None:
+            raise NotImplementedError
+        def _release(self) -> None:
+            raise NotImplementedError
+__all__ = [
+    "WindowsFileLock",
+]

venv/lib/python3.13/site-packages/filelock/asyncio.py ADDED Viewed

	@@ -0,0 +1,344 @@

+"""An asyncio-based implementation of the file lock."""
+from __future__ import annotations
+import asyncio
+import contextlib
+import logging
+import os
+import time
+from dataclasses import dataclass
+from inspect import iscoroutinefunction
+from threading import local
+from typing import TYPE_CHECKING, Any, NoReturn, cast
+from ._api import BaseFileLock, FileLockContext, FileLockMeta
+from ._error import Timeout
+from ._soft import SoftFileLock
+from ._unix import UnixFileLock
+from ._windows import WindowsFileLock
+if TYPE_CHECKING:
+    import sys
+    from collections.abc import Callable
+    from concurrent import futures
+    from types import TracebackType
+    if sys.version_info >= (3, 11):  # pragma: no cover (py311+)
+        from typing import Self
+    else:  # pragma: no cover (<py311)
+        from typing_extensions import Self
+_LOGGER = logging.getLogger("filelock")
+@dataclass
+class AsyncFileLockContext(FileLockContext):
+    """A dataclass which holds the context for a ``BaseAsyncFileLock`` object."""
+    #: Whether run in executor
+    run_in_executor: bool = True
+    #: The executor
+    executor: futures.Executor | None = None
+    #: The loop
+    loop: asyncio.AbstractEventLoop | None = None
+class AsyncThreadLocalFileContext(AsyncFileLockContext, local):
+    """A thread local version of the ``FileLockContext`` class."""
+class AsyncAcquireReturnProxy:
+    """A context-aware object that will release the lock file when exiting."""
+    def __init__(self, lock: BaseAsyncFileLock) -> None:  # noqa: D107
+        self.lock = lock
+    async def __aenter__(self) -> BaseAsyncFileLock:  # noqa: D105
+        return self.lock
+    async def __aexit__(  # noqa: D105
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        await self.lock.release()
+class AsyncFileLockMeta(FileLockMeta):
+    def __call__(  # type: ignore[override] # noqa: PLR0913
+        cls,  # noqa: N805
+        lock_file: str | os.PathLike[str],
+        timeout: float = -1,
+        mode: int = 0o644,
+        thread_local: bool = False,  # noqa: FBT001, FBT002
+        *,
+        blocking: bool = True,
+        is_singleton: bool = False,
+        loop: asyncio.AbstractEventLoop | None = None,
+        run_in_executor: bool = True,
+        executor: futures.Executor | None = None,
+    ) -> BaseAsyncFileLock:
+        if thread_local and run_in_executor:
+            msg = "run_in_executor is not supported when thread_local is True"
+            raise ValueError(msg)
+        instance = super().__call__(
+            lock_file=lock_file,
+            timeout=timeout,
+            mode=mode,
+            thread_local=thread_local,
+            blocking=blocking,
+            is_singleton=is_singleton,
+            loop=loop,
+            run_in_executor=run_in_executor,
+            executor=executor,
+        )
+        return cast("BaseAsyncFileLock", instance)
+class BaseAsyncFileLock(BaseFileLock, metaclass=AsyncFileLockMeta):
+    """Base class for asynchronous file locks."""
+    def __init__(  # noqa: PLR0913
+        self,
+        lock_file: str | os.PathLike[str],
+        timeout: float = -1,
+        mode: int = 0o644,
+        thread_local: bool = False,  # noqa: FBT001, FBT002
+        *,
+        blocking: bool = True,
+        is_singleton: bool = False,
+        loop: asyncio.AbstractEventLoop | None = None,
+        run_in_executor: bool = True,
+        executor: futures.Executor | None = None,
+    ) -> None:
+        """
+        Create a new lock object.
+        :param lock_file: path to the file
+        :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
+            the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
+            to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock.
+        :param mode: file permissions for the lockfile
+        :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
+            ``False`` then the lock will be reentrant across threads.
+        :param blocking: whether the lock should be blocking or not
+        :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
+            per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
+            to pass the same object around.
+        :param loop: The event loop to use. If not specified, the running event loop will be used.
+        :param run_in_executor: If this is set to ``True`` then the lock will be acquired in an executor.
+        :param executor: The executor to use. If not specified, the default executor will be used.
+        """
+        self._is_thread_local = thread_local
+        self._is_singleton = is_singleton
+        # Create the context. Note that external code should not work with the context directly and should instead use
+        # properties of this class.
+        kwargs: dict[str, Any] = {
+            "lock_file": os.fspath(lock_file),
+            "timeout": timeout,
+            "mode": mode,
+            "blocking": blocking,
+            "loop": loop,
+            "run_in_executor": run_in_executor,
+            "executor": executor,
+        }
+        self._context: AsyncFileLockContext = (AsyncThreadLocalFileContext if thread_local else AsyncFileLockContext)(
+            **kwargs
+        )
+    @property
+    def run_in_executor(self) -> bool:
+        """::return: whether run in executor."""
+        return self._context.run_in_executor
+    @property
+    def executor(self) -> futures.Executor | None:
+        """::return: the executor."""
+        return self._context.executor
+    @executor.setter
+    def executor(self, value: futures.Executor | None) -> None:  # pragma: no cover
+        """
+        Change the executor.
+        :param value: the new executor or ``None``
+        :type value: futures.Executor | None
+        """
+        self._context.executor = value
+    @property
+    def loop(self) -> asyncio.AbstractEventLoop | None:
+        """::return: the event loop."""
+        return self._context.loop
+    async def acquire(  # type: ignore[override]
+        self,
+        timeout: float | None = None,
+        poll_interval: float = 0.05,
+        *,
+        blocking: bool | None = None,
+    ) -> AsyncAcquireReturnProxy:
+        """
+        Try to acquire the file lock.
+        :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default
+            :attr:`~BaseFileLock.timeout` is and if ``timeout < 0``, there is no timeout and
+            this method will block until the lock could be acquired
+        :param poll_interval: interval of trying to acquire the lock file
+        :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
+         first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
+        :raises Timeout: if fails to acquire lock within the timeout period
+        :return: a context object that will unlock the file when the context is exited
+        .. code-block:: python
+            # You can use this method in the context manager (recommended)
+            with lock.acquire():
+                pass
+            # Or use an equivalent try-finally construct:
+            lock.acquire()
+            try:
+                pass
+            finally:
+                lock.release()
+        """
+        # Use the default timeout, if no timeout is provided.
+        if timeout is None:
+            timeout = self._context.timeout
+        if blocking is None:
+            blocking = self._context.blocking
+        # Increment the number right at the beginning. We can still undo it, if something fails.
+        self._context.lock_counter += 1
+        lock_id = id(self)
+        lock_filename = self.lock_file
+        start_time = time.perf_counter()
+        try:
+            while True:
+                if not self.is_locked:
+                    _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
+                    await self._run_internal_method(self._acquire)
+                if self.is_locked:
+                    _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
+                    break
+                if blocking is False:
+                    _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
+                    raise Timeout(lock_filename)  # noqa: TRY301
+                if 0 <= timeout < time.perf_counter() - start_time:
+                    _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
+                    raise Timeout(lock_filename)  # noqa: TRY301
+                msg = "Lock %s not acquired on %s, waiting %s seconds ..."
+                _LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
+                await asyncio.sleep(poll_interval)
+        except BaseException:  # Something did go wrong, so decrement the counter.
+            self._context.lock_counter = max(0, self._context.lock_counter - 1)
+            raise
+        return AsyncAcquireReturnProxy(lock=self)
+    async def release(self, force: bool = False) -> None:  # type: ignore[override]  # noqa: FBT001, FBT002
+        """
+        Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0.
+        Also note, that the lock file itself is not automatically deleted.
+        :param force: If true, the lock counter is ignored and the lock is released in every case/
+        """
+        if self.is_locked:
+            self._context.lock_counter -= 1
+            if self._context.lock_counter == 0 or force:
+                lock_id, lock_filename = id(self), self.lock_file
+                _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
+                await self._run_internal_method(self._release)
+                self._context.lock_counter = 0
+                _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
+    async def _run_internal_method(self, method: Callable[[], Any]) -> None:
+        if iscoroutinefunction(method):
+            await method()
+        elif self.run_in_executor:
+            loop = self.loop or asyncio.get_running_loop()
+            await loop.run_in_executor(self.executor, method)
+        else:
+            method()
+    def __enter__(self) -> NoReturn:
+        """
+        Replace old __enter__ method to avoid using it.
+        NOTE: DO NOT USE `with` FOR ASYNCIO LOCKS, USE `async with` INSTEAD.
+        :return: none
+        :rtype: NoReturn
+        """
+        msg = "Do not use `with` for asyncio locks, use `async with` instead."
+        raise NotImplementedError(msg)
+    async def __aenter__(self) -> Self:
+        """
+        Acquire the lock.
+        :return: the lock object
+        """
+        await self.acquire()
+        return self
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        """
+        Release the lock.
+        :param exc_type: the exception type if raised
+        :param exc_value: the exception value if raised
+        :param traceback: the exception traceback if raised
+        """
+        await self.release()
+    def __del__(self) -> None:
+        """Called when the lock object is deleted."""
+        with contextlib.suppress(RuntimeError):
+            loop = self.loop or asyncio.get_running_loop()
+            if not loop.is_running():  # pragma: no cover
+                loop.run_until_complete(self.release(force=True))
+            else:
+                loop.create_task(self.release(force=True))
+class AsyncSoftFileLock(SoftFileLock, BaseAsyncFileLock):
+    """Simply watches the existence of the lock file."""
+class AsyncUnixFileLock(UnixFileLock, BaseAsyncFileLock):
+    """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
+class AsyncWindowsFileLock(WindowsFileLock, BaseAsyncFileLock):
+    """Uses the :func:`msvcrt.locking` to hard lock the lock file on windows systems."""
+__all__ = [
+    "AsyncAcquireReturnProxy",
+    "AsyncSoftFileLock",
+    "AsyncUnixFileLock",
+    "AsyncWindowsFileLock",
+    "BaseAsyncFileLock",
+]

venv/lib/python3.13/site-packages/filelock/py.typed ADDED Viewed

File without changes

venv/lib/python3.13/site-packages/filelock/version.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '3.20.0'
+__version_tuple__ = version_tuple = (3, 20, 0)
+__commit_id__ = commit_id = None

venv/lib/python3.13/site-packages/fsspec/__init__.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from . import caching
+from ._version import __version__  # noqa: F401
+from .callbacks import Callback
+from .compression import available_compressions
+from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
+from .exceptions import FSTimeoutError
+from .mapping import FSMap, get_mapper
+from .registry import (
+    available_protocols,
+    filesystem,
+    get_filesystem_class,
+    register_implementation,
+    registry,
+)
+from .spec import AbstractFileSystem
+__all__ = [
+    "AbstractFileSystem",
+    "FSTimeoutError",
+    "FSMap",
+    "filesystem",
+    "register_implementation",
+    "get_filesystem_class",
+    "get_fs_token_paths",
+    "get_mapper",
+    "open",
+    "open_files",
+    "open_local",
+    "registry",
+    "caching",
+    "Callback",
+    "available_protocols",
+    "available_compressions",
+    "url_to_fs",
+]
+def process_entries():
+    try:
+        from importlib.metadata import entry_points
+    except ImportError:
+        return
+    if entry_points is not None:
+        try:
+            eps = entry_points()
+        except TypeError:
+            pass  # importlib-metadata < 0.8
+        else:
+            if hasattr(eps, "select"):  # Python 3.10+ / importlib_metadata >= 3.9.0
+                specs = eps.select(group="fsspec.specs")
+            else:
+                specs = eps.get("fsspec.specs", [])
+            registered_names = {}
+            for spec in specs:
+                err_msg = f"Unable to load filesystem from {spec}"
+                name = spec.name
+                if name in registered_names:
+                    continue
+                registered_names[name] = True
+                register_implementation(
+                    name,
+                    spec.value.replace(":", "."),
+                    errtxt=err_msg,
+                    # We take our implementations as the ones to overload with if
+                    # for some reason we encounter some, may be the same, already
+                    # registered
+                    clobber=True,
+                )
+process_entries()

venv/lib/python3.13/site-packages/fsspec/_version.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '2025.10.0'
+__version_tuple__ = version_tuple = (2025, 10, 0)
+__commit_id__ = commit_id = None

venv/lib/python3.13/site-packages/fsspec/caching.py ADDED Viewed

	@@ -0,0 +1,1004 @@

+from __future__ import annotations
+import collections
+import functools
+import logging
+import math
+import os
+import threading
+import warnings
+from collections import OrderedDict
+from concurrent.futures import Future, ThreadPoolExecutor
+from itertools import groupby
+from operator import itemgetter
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    ClassVar,
+    Generic,
+    NamedTuple,
+    TypeVar,
+)
+if TYPE_CHECKING:
+    import mmap
+    from typing_extensions import ParamSpec
+    P = ParamSpec("P")
+else:
+    P = TypeVar("P")
+T = TypeVar("T")
+logger = logging.getLogger("fsspec")
+Fetcher = Callable[[int, int], bytes]  # Maps (start, end) to bytes
+MultiFetcher = Callable[[list[int, int]], bytes]  # Maps [(start, end)] to bytes
+class BaseCache:
+    """Pass-though cache: doesn't keep anything, calls every time
+    Acts as base class for other cachers
+    Parameters
+    ----------
+    blocksize: int
+        How far to read ahead in numbers of bytes
+    fetcher: func
+        Function of the form f(start, end) which gets bytes from remote as
+        specified
+    size: int
+        How big this file is
+    """
+    name: ClassVar[str] = "none"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        self.blocksize = blocksize
+        self.nblocks = 0
+        self.fetcher = fetcher
+        self.size = size
+        self.hit_count = 0
+        self.miss_count = 0
+        # the bytes that we actually requested
+        self.total_requested_bytes = 0
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if stop is None:
+            stop = self.size
+        if start >= self.size or start >= stop:
+            return b""
+        return self.fetcher(start, stop)
+    def _reset_stats(self) -> None:
+        """Reset hit and miss counts for a more ganular report e.g. by file."""
+        self.hit_count = 0
+        self.miss_count = 0
+        self.total_requested_bytes = 0
+    def _log_stats(self) -> str:
+        """Return a formatted string of the cache statistics."""
+        if self.hit_count == 0 and self.miss_count == 0:
+            # a cache that does nothing, this is for logs only
+            return ""
+        return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
+    def __repr__(self) -> str:
+        # TODO: use rich for better formatting
+        return f"""
+        <{self.__class__.__name__}:
+            block size  :   {self.blocksize}
+            block count :   {self.nblocks}
+            file size   :   {self.size}
+            cache hits  :   {self.hit_count}
+            cache misses:   {self.miss_count}
+            total requested bytes: {self.total_requested_bytes}>
+        """
+class MMapCache(BaseCache):
+    """memory-mapped sparse file cache
+    Opens temporary file, which is filled blocks-wise when data is requested.
+    Ensure there is enough disc space in the temporary location.
+    This cache method might only work on posix
+    Parameters
+    ----------
+    blocksize: int
+        How far to read ahead in numbers of bytes
+    fetcher: Fetcher
+        Function of the form f(start, end) which gets bytes from remote as
+        specified
+    size: int
+        How big this file is
+    location: str
+        Where to create the temporary file. If None, a temporary file is
+        created using tempfile.TemporaryFile().
+    blocks: set[int]
+        Set of block numbers that have already been fetched. If None, an empty
+        set is created.
+    multi_fetcher: MultiFetcher
+        Function of the form f([(start, end)]) which gets bytes from remote
+        as specified. This function is used to fetch multiple blocks at once.
+        If not specified, the fetcher function is used instead.
+    """
+    name = "mmap"
+    def __init__(
+        self,
+        blocksize: int,
+        fetcher: Fetcher,
+        size: int,
+        location: str | None = None,
+        blocks: set[int] | None = None,
+        multi_fetcher: MultiFetcher | None = None,
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.blocks = set() if blocks is None else blocks
+        self.location = location
+        self.multi_fetcher = multi_fetcher
+        self.cache = self._makefile()
+    def _makefile(self) -> mmap.mmap | bytearray:
+        import mmap
+        import tempfile
+        if self.size == 0:
+            return bytearray()
+        # posix version
+        if self.location is None or not os.path.exists(self.location):
+            if self.location is None:
+                fd = tempfile.TemporaryFile()
+                self.blocks = set()
+            else:
+                fd = open(self.location, "wb+")
+            fd.seek(self.size - 1)
+            fd.write(b"1")
+            fd.flush()
+        else:
+            fd = open(self.location, "r+b")
+        return mmap.mmap(fd.fileno(), self.size)
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        logger.debug(f"MMap cache fetching {start}-{end}")
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        start_block = start // self.blocksize
+        end_block = end // self.blocksize
+        block_range = range(start_block, end_block + 1)
+        # Determine which blocks need to be fetched. This sequence is sorted by construction.
+        need = (i for i in block_range if i not in self.blocks)
+        # Count the number of blocks already cached
+        self.hit_count += sum(1 for i in block_range if i in self.blocks)
+        ranges = []
+        # Consolidate needed blocks.
+        # Algorithm adapted from Python 2.x itertools documentation.
+        # We are grouping an enumerated sequence of blocks. By comparing when the difference
+        # between an ascending range (provided by enumerate) and the needed block numbers
+        # we can detect when the block number skips values. The key computes this difference.
+        # Whenever the difference changes, we know that we have previously cached block(s),
+        # and a new group is started. In other words, this algorithm neatly groups
+        # runs of consecutive block numbers so they can be fetched together.
+        for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
+            # Extract the blocks from the enumerated sequence
+            _blocks = tuple(map(itemgetter(1), _blocks))
+            # Compute start of first block
+            sstart = _blocks[0] * self.blocksize
+            # Compute the end of the last block. Last block may not be full size.
+            send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
+            # Fetch bytes (could be multiple consecutive blocks)
+            self.total_requested_bytes += send - sstart
+            logger.debug(
+                f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
+            )
+            ranges.append((sstart, send))
+            # Update set of cached blocks
+            self.blocks.update(_blocks)
+            # Update cache statistics with number of blocks we had to cache
+            self.miss_count += len(_blocks)
+        if not ranges:
+            return self.cache[start:end]
+        if self.multi_fetcher:
+            logger.debug(f"MMap get blocks {ranges}")
+            for idx, r in enumerate(self.multi_fetcher(ranges)):
+                (sstart, send) = ranges[idx]
+                logger.debug(f"MMap copy block ({sstart}-{send}")
+                self.cache[sstart:send] = r
+        else:
+            for sstart, send in ranges:
+                logger.debug(f"MMap get block ({sstart}-{send}")
+                self.cache[sstart:send] = self.fetcher(sstart, send)
+        return self.cache[start:end]
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__.copy()
+        # Remove the unpicklable entries.
+        del state["cache"]
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        # Restore instance attributes
+        self.__dict__.update(state)
+        self.cache = self._makefile()
+class ReadAheadCache(BaseCache):
+    """Cache which reads only when we get beyond a block of data
+    This is a much simpler version of BytesCache, and does not attempt to
+    fill holes in the cache or keep fragments alive. It is best suited to
+    many small reads in a sequential order (e.g., reading lines from a file).
+    """
+    name = "readahead"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.cache = b""
+        self.start = 0
+        self.end = 0
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None or end > self.size:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        l = end - start
+        if start >= self.start and end <= self.end:
+            # cache hit
+            self.hit_count += 1
+            return self.cache[start - self.start : end - self.start]
+        elif self.start <= start < self.end:
+            # partial hit
+            self.miss_count += 1
+            part = self.cache[start - self.start :]
+            l -= len(part)
+            start = self.end
+        else:
+            # miss
+            self.miss_count += 1
+            part = b""
+        end = min(self.size, end + self.blocksize)
+        self.total_requested_bytes += end - start
+        self.cache = self.fetcher(start, end)  # new block replaces old
+        self.start = start
+        self.end = self.start + len(self.cache)
+        return part + self.cache[:l]
+class FirstChunkCache(BaseCache):
+    """Caches the first block of a file only
+    This may be useful for file types where the metadata is stored in the header,
+    but is randomly accessed.
+    """
+    name = "first"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        if blocksize > size:
+            # this will buffer the whole thing
+            blocksize = size
+        super().__init__(blocksize, fetcher, size)
+        self.cache: bytes | None = None
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        start = start or 0
+        if start > self.size:
+            logger.debug("FirstChunkCache: requested start > file size")
+            return b""
+        end = min(end, self.size)
+        if start < self.blocksize:
+            if self.cache is None:
+                self.miss_count += 1
+                if end > self.blocksize:
+                    self.total_requested_bytes += end
+                    data = self.fetcher(0, end)
+                    self.cache = data[: self.blocksize]
+                    return data[start:]
+                self.cache = self.fetcher(0, self.blocksize)
+                self.total_requested_bytes += self.blocksize
+            part = self.cache[start:end]
+            if end > self.blocksize:
+                self.total_requested_bytes += end - self.blocksize
+                part += self.fetcher(self.blocksize, end)
+            self.hit_count += 1
+            return part
+        else:
+            self.miss_count += 1
+            self.total_requested_bytes += end - start
+            return self.fetcher(start, end)
+class BlockCache(BaseCache):
+    """
+    Cache holding memory as a set of blocks.
+    Requests are only ever made ``blocksize`` at a time, and are
+    stored in an LRU cache. The least recently accessed block is
+    discarded when more than ``maxblocks`` are stored.
+    Parameters
+    ----------
+    blocksize : int
+        The number of bytes to store in each block.
+        Requests are only ever made for ``blocksize``, so this
+        should balance the overhead of making a request against
+        the granularity of the blocks.
+    fetcher : Callable
+    size : int
+        The total size of the file being cached.
+    maxblocks : int
+        The maximum number of blocks to cache for. The maximum memory
+        use for this cache is then ``blocksize * maxblocks``.
+    """
+    name = "blockcache"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.nblocks = math.ceil(size / blocksize)
+        self.maxblocks = maxblocks
+        self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
+    def cache_info(self):
+        """
+        The statistics on the block cache.
+        Returns
+        -------
+        NamedTuple
+            Returned directly from the LRU Cache used internally.
+        """
+        return self._fetch_block_cached.cache_info()
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__
+        del state["_fetch_block_cached"]
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        self.__dict__.update(state)
+        self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
+            self._fetch_block
+        )
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        # byte position -> block numbers
+        start_block_number = start // self.blocksize
+        end_block_number = end // self.blocksize
+        # these are cached, so safe to do multiple calls for the same start and end.
+        for block_number in range(start_block_number, end_block_number + 1):
+            self._fetch_block_cached(block_number)
+        return self._read_cache(
+            start,
+            end,
+            start_block_number=start_block_number,
+            end_block_number=end_block_number,
+        )
+    def _fetch_block(self, block_number: int) -> bytes:
+        """
+        Fetch the block of data for `block_number`.
+        """
+        if block_number > self.nblocks:
+            raise ValueError(
+                f"'block_number={block_number}' is greater than "
+                f"the number of blocks ({self.nblocks})"
+            )
+        start = block_number * self.blocksize
+        end = start + self.blocksize
+        self.total_requested_bytes += end - start
+        self.miss_count += 1
+        logger.info("BlockCache fetching block %d", block_number)
+        block_contents = super()._fetch(start, end)
+        return block_contents
+    def _read_cache(
+        self, start: int, end: int, start_block_number: int, end_block_number: int
+    ) -> bytes:
+        """
+        Read from our block cache.
+        Parameters
+        ----------
+        start, end : int
+            The start and end byte positions.
+        start_block_number, end_block_number : int
+            The start and end block numbers.
+        """
+        start_pos = start % self.blocksize
+        end_pos = end % self.blocksize
+        self.hit_count += 1
+        if start_block_number == end_block_number:
+            block: bytes = self._fetch_block_cached(start_block_number)
+            return block[start_pos:end_pos]
+        else:
+            # read from the initial
+            out = [self._fetch_block_cached(start_block_number)[start_pos:]]
+            # intermediate blocks
+            # Note: it'd be nice to combine these into one big request. However
+            # that doesn't play nicely with our LRU cache.
+            out.extend(
+                map(
+                    self._fetch_block_cached,
+                    range(start_block_number + 1, end_block_number),
+                )
+            )
+            # final block
+            out.append(self._fetch_block_cached(end_block_number)[:end_pos])
+            return b"".join(out)
+class BytesCache(BaseCache):
+    """Cache which holds data in a in-memory bytes object
+    Implements read-ahead by the block size, for semi-random reads progressing
+    through the file.
+    Parameters
+    ----------
+    trim: bool
+        As we read more data, whether to discard the start of the buffer when
+        we are more than a blocksize ahead of it.
+    """
+    name: ClassVar[str] = "bytes"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.cache = b""
+        self.start: int | None = None
+        self.end: int | None = None
+        self.trim = trim
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        # TODO: only set start/end after fetch, in case it fails?
+        # is this where retry logic might go?
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        if (
+            self.start is not None
+            and start >= self.start
+            and self.end is not None
+            and end < self.end
+        ):
+            # cache hit: we have all the required data
+            offset = start - self.start
+            self.hit_count += 1
+            return self.cache[offset : offset + end - start]
+        if self.blocksize:
+            bend = min(self.size, end + self.blocksize)
+        else:
+            bend = end
+        if bend == start or start > self.size:
+            return b""
+        if (self.start is None or start < self.start) and (
+            self.end is None or end > self.end
+        ):
+            # First read, or extending both before and after
+            self.total_requested_bytes += bend - start
+            self.miss_count += 1
+            self.cache = self.fetcher(start, bend)
+            self.start = start
+        else:
+            assert self.start is not None
+            assert self.end is not None
+            self.miss_count += 1
+            if start < self.start:
+                if self.end is None or self.end - end > self.blocksize:
+                    self.total_requested_bytes += bend - start
+                    self.cache = self.fetcher(start, bend)
+                    self.start = start
+                else:
+                    self.total_requested_bytes += self.start - start
+                    new = self.fetcher(start, self.start)
+                    self.start = start
+                    self.cache = new + self.cache
+            elif self.end is not None and bend > self.end:
+                if self.end > self.size:
+                    pass
+                elif end - self.end > self.blocksize:
+                    self.total_requested_bytes += bend - start
+                    self.cache = self.fetcher(start, bend)
+                    self.start = start
+                else:
+                    self.total_requested_bytes += bend - self.end
+                    new = self.fetcher(self.end, bend)
+                    self.cache = self.cache + new
+        self.end = self.start + len(self.cache)
+        offset = start - self.start
+        out = self.cache[offset : offset + end - start]
+        if self.trim:
+            num = (self.end - self.start) // (self.blocksize + 1)
+            if num > 1:
+                self.start += self.blocksize * num
+                self.cache = self.cache[self.blocksize * num :]
+        return out
+    def __len__(self) -> int:
+        return len(self.cache)
+class AllBytes(BaseCache):
+    """Cache entire contents of the file"""
+    name: ClassVar[str] = "all"
+    def __init__(
+        self,
+        blocksize: int | None = None,
+        fetcher: Fetcher | None = None,
+        size: int | None = None,
+        data: bytes | None = None,
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)  # type: ignore[arg-type]
+        if data is None:
+            self.miss_count += 1
+            self.total_requested_bytes += self.size
+            data = self.fetcher(0, self.size)
+        self.data = data
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        self.hit_count += 1
+        return self.data[start:stop]
+class KnownPartsOfAFile(BaseCache):
+    """
+    Cache holding known file parts.
+    Parameters
+    ----------
+    blocksize: int
+        How far to read ahead in numbers of bytes
+    fetcher: func
+        Function of the form f(start, end) which gets bytes from remote as
+        specified
+    size: int
+        How big this file is
+    data: dict
+        A dictionary mapping explicit `(start, stop)` file-offset tuples
+        with known bytes.
+    strict: bool, default True
+        Whether to fetch reads that go beyond a known byte-range boundary.
+        If `False`, any read that ends outside a known part will be zero
+        padded. Note that zero padding will not be used for reads that
+        begin outside a known byte-range.
+    """
+    name: ClassVar[str] = "parts"
+    def __init__(
+        self,
+        blocksize: int,
+        fetcher: Fetcher,
+        size: int,
+        data: dict[tuple[int, int], bytes] | None = None,
+        strict: bool = True,
+        **_: Any,
+    ):
+        super().__init__(blocksize, fetcher, size)
+        self.strict = strict
+        # simple consolidation of contiguous blocks
+        if data:
+            old_offsets = sorted(data.keys())
+            offsets = [old_offsets[0]]
+            blocks = [data.pop(old_offsets[0])]
+            for start, stop in old_offsets[1:]:
+                start0, stop0 = offsets[-1]
+                if start == stop0:
+                    offsets[-1] = (start0, stop)
+                    blocks[-1] += data.pop((start, stop))
+                else:
+                    offsets.append((start, stop))
+                    blocks.append(data.pop((start, stop)))
+            self.data = dict(zip(offsets, blocks))
+        else:
+            self.data = {}
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if stop is None:
+            stop = self.size
+        out = b""
+        for (loc0, loc1), data in self.data.items():
+            # If self.strict=False, use zero-padded data
+            # for reads beyond the end of a "known" buffer
+            if loc0 <= start < loc1:
+                off = start - loc0
+                out = data[off : off + stop - start]
+                if not self.strict or loc0 <= stop <= loc1:
+                    # The request is within a known range, or
+                    # it begins within a known range, and we
+                    # are allowed to pad reads beyond the
+                    # buffer with zero
+                    out += b"\x00" * (stop - start - len(out))
+                    self.hit_count += 1
+                    return out
+                else:
+                    # The request ends outside a known range,
+                    # and we are being "strict" about reads
+                    # beyond the buffer
+                    start = loc1
+                    break
+        # We only get here if there is a request outside the
+        # known parts of the file. In an ideal world, this
+        # should never happen
+        if self.fetcher is None:
+            # We cannot fetch the data, so raise an error
+            raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
+        # We can fetch the data, but should warn the user
+        # that this may be slow
+        warnings.warn(
+            f"Read is outside the known file parts: {(start, stop)}. "
+            f"IO/caching performance may be poor!"
+        )
+        logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
+        self.total_requested_bytes += stop - start
+        self.miss_count += 1
+        return out + super()._fetch(start, stop)
+class UpdatableLRU(Generic[P, T]):
+    """
+    Custom implementation of LRU cache that allows updating keys
+    Used by BackgroudBlockCache
+    """
+    class CacheInfo(NamedTuple):
+        hits: int
+        misses: int
+        maxsize: int
+        currsize: int
+    def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
+        self._cache: OrderedDict[Any, T] = collections.OrderedDict()
+        self._func = func
+        self._max_size = max_size
+        self._hits = 0
+        self._misses = 0
+        self._lock = threading.Lock()
+    def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
+        if kwargs:
+            raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
+        with self._lock:
+            if args in self._cache:
+                self._cache.move_to_end(args)
+                self._hits += 1
+                return self._cache[args]
+        result = self._func(*args, **kwargs)
+        with self._lock:
+            self._cache[args] = result
+            self._misses += 1
+            if len(self._cache) > self._max_size:
+                self._cache.popitem(last=False)
+        return result
+    def is_key_cached(self, *args: Any) -> bool:
+        with self._lock:
+            return args in self._cache
+    def add_key(self, result: T, *args: Any) -> None:
+        with self._lock:
+            self._cache[args] = result
+            if len(self._cache) > self._max_size:
+                self._cache.popitem(last=False)
+    def cache_info(self) -> UpdatableLRU.CacheInfo:
+        with self._lock:
+            return self.CacheInfo(
+                maxsize=self._max_size,
+                currsize=len(self._cache),
+                hits=self._hits,
+                misses=self._misses,
+            )
+class BackgroundBlockCache(BaseCache):
+    """
+    Cache holding memory as a set of blocks with pre-loading of
+    the next block in the background.
+    Requests are only ever made ``blocksize`` at a time, and are
+    stored in an LRU cache. The least recently accessed block is
+    discarded when more than ``maxblocks`` are stored. If the
+    next block is not in cache, it is loaded in a separate thread
+    in non-blocking way.
+    Parameters
+    ----------
+    blocksize : int
+        The number of bytes to store in each block.
+        Requests are only ever made for ``blocksize``, so this
+        should balance the overhead of making a request against
+        the granularity of the blocks.
+    fetcher : Callable
+    size : int
+        The total size of the file being cached.
+    maxblocks : int
+        The maximum number of blocks to cache for. The maximum memory
+        use for this cache is then ``blocksize * maxblocks``.
+    """
+    name: ClassVar[str] = "background"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.nblocks = math.ceil(size / blocksize)
+        self.maxblocks = maxblocks
+        self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
+        self._thread_executor = ThreadPoolExecutor(max_workers=1)
+        self._fetch_future_block_number: int | None = None
+        self._fetch_future: Future[bytes] | None = None
+        self._fetch_future_lock = threading.Lock()
+    def cache_info(self) -> UpdatableLRU.CacheInfo:
+        """
+        The statistics on the block cache.
+        Returns
+        -------
+        NamedTuple
+            Returned directly from the LRU Cache used internally.
+        """
+        return self._fetch_block_cached.cache_info()
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__
+        del state["_fetch_block_cached"]
+        del state["_thread_executor"]
+        del state["_fetch_future_block_number"]
+        del state["_fetch_future"]
+        del state["_fetch_future_lock"]
+        return state
+    def __setstate__(self, state) -> None:
+        self.__dict__.update(state)
+        self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
+        self._thread_executor = ThreadPoolExecutor(max_workers=1)
+        self._fetch_future_block_number = None
+        self._fetch_future = None
+        self._fetch_future_lock = threading.Lock()
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        # byte position -> block numbers
+        start_block_number = start // self.blocksize
+        end_block_number = end // self.blocksize
+        fetch_future_block_number = None
+        fetch_future = None
+        with self._fetch_future_lock:
+            # Background thread is running. Check we we can or must join it.
+            if self._fetch_future is not None:
+                assert self._fetch_future_block_number is not None
+                if self._fetch_future.done():
+                    logger.info("BlockCache joined background fetch without waiting.")
+                    self._fetch_block_cached.add_key(
+                        self._fetch_future.result(), self._fetch_future_block_number
+                    )
+                    # Cleanup the fetch variables. Done with fetching the block.
+                    self._fetch_future_block_number = None
+                    self._fetch_future = None
+                else:
+                    # Must join if we need the block for the current fetch
+                    must_join = bool(
+                        start_block_number
+                        <= self._fetch_future_block_number
+                        <= end_block_number
+                    )
+                    if must_join:
+                        # Copy to the local variables to release lock
+                        # before waiting for result
+                        fetch_future_block_number = self._fetch_future_block_number
+                        fetch_future = self._fetch_future
+                        # Cleanup the fetch variables. Have a local copy.
+                        self._fetch_future_block_number = None
+                        self._fetch_future = None
+        # Need to wait for the future for the current read
+        if fetch_future is not None:
+            logger.info("BlockCache waiting for background fetch.")
+            # Wait until result and put it in cache
+            self._fetch_block_cached.add_key(
+                fetch_future.result(), fetch_future_block_number
+            )
+        # these are cached, so safe to do multiple calls for the same start and end.
+        for block_number in range(start_block_number, end_block_number + 1):
+            self._fetch_block_cached(block_number)
+        # fetch next block in the background if nothing is running in the background,
+        # the block is within file and it is not already cached
+        end_block_plus_1 = end_block_number + 1
+        with self._fetch_future_lock:
+            if (
+                self._fetch_future is None
+                and end_block_plus_1 <= self.nblocks
+                and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
+            ):
+                self._fetch_future_block_number = end_block_plus_1
+                self._fetch_future = self._thread_executor.submit(
+                    self._fetch_block, end_block_plus_1, "async"
+                )
+        return self._read_cache(
+            start,
+            end,
+            start_block_number=start_block_number,
+            end_block_number=end_block_number,
+        )
+    def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
+        """
+        Fetch the block of data for `block_number`.
+        """
+        if block_number > self.nblocks:
+            raise ValueError(
+                f"'block_number={block_number}' is greater than "
+                f"the number of blocks ({self.nblocks})"
+            )
+        start = block_number * self.blocksize
+        end = start + self.blocksize
+        logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
+        self.total_requested_bytes += end - start
+        self.miss_count += 1
+        block_contents = super()._fetch(start, end)
+        return block_contents
+    def _read_cache(
+        self, start: int, end: int, start_block_number: int, end_block_number: int
+    ) -> bytes:
+        """
+        Read from our block cache.
+        Parameters
+        ----------
+        start, end : int
+            The start and end byte positions.
+        start_block_number, end_block_number : int
+            The start and end block numbers.
+        """
+        start_pos = start % self.blocksize
+        end_pos = end % self.blocksize
+        # kind of pointless to count this as a hit, but it is
+        self.hit_count += 1
+        if start_block_number == end_block_number:
+            block = self._fetch_block_cached(start_block_number)
+            return block[start_pos:end_pos]
+        else:
+            # read from the initial
+            out = [self._fetch_block_cached(start_block_number)[start_pos:]]
+            # intermediate blocks
+            # Note: it'd be nice to combine these into one big request. However
+            # that doesn't play nicely with our LRU cache.
+            out.extend(
+                map(
+                    self._fetch_block_cached,
+                    range(start_block_number + 1, end_block_number),
+                )
+            )
+            # final block
+            out.append(self._fetch_block_cached(end_block_number)[:end_pos])
+            return b"".join(out)
+caches: dict[str | None, type[BaseCache]] = {
+    # one custom case
+    None: BaseCache,
+}
+def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
+    """'Register' cache implementation.
+    Parameters
+    ----------
+    clobber: bool, optional
+        If set to True (default is False) - allow to overwrite existing
+        entry.
+    Raises
+    ------
+    ValueError
+    """
+    name = cls.name
+    if not clobber and name in caches:
+        raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
+    caches[name] = cls
+for c in (
+    BaseCache,
+    MMapCache,
+    BytesCache,
+    ReadAheadCache,
+    BlockCache,
+    FirstChunkCache,
+    AllBytes,
+    KnownPartsOfAFile,
+    BackgroundBlockCache,
+):
+    register_cache(c)

venv/lib/python3.13/site-packages/fsspec/compression.py ADDED Viewed

	@@ -0,0 +1,182 @@

+"""Helper functions for a standard streaming compression API"""
+from zipfile import ZipFile
+import fsspec.utils
+from fsspec.spec import AbstractBufferedFile
+def noop_file(file, mode, **kwargs):
+    return file
+# TODO: files should also be available as contexts
+# should be functions of the form func(infile, mode=, **kwargs) -> file-like
+compr = {None: noop_file}
+def register_compression(name, callback, extensions, force=False):
+    """Register an "inferable" file compression type.
+    Registers transparent file compression type for use with fsspec.open.
+    Compression can be specified by name in open, or "infer"-ed for any files
+    ending with the given extensions.
+    Args:
+        name: (str) The compression type name. Eg. "gzip".
+        callback: A callable of form (infile, mode, **kwargs) -> file-like.
+            Accepts an input file-like object, the target mode and kwargs.
+            Returns a wrapped file-like object.
+        extensions: (str, Iterable[str]) A file extension, or list of file
+            extensions for which to infer this compression scheme. Eg. "gz".
+        force: (bool) Force re-registration of compression type or extensions.
+    Raises:
+        ValueError: If name or extensions already registered, and not force.
+    """
+    if isinstance(extensions, str):
+        extensions = [extensions]
+    # Validate registration
+    if name in compr and not force:
+        raise ValueError(f"Duplicate compression registration: {name}")
+    for ext in extensions:
+        if ext in fsspec.utils.compressions and not force:
+            raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
+    compr[name] = callback
+    for ext in extensions:
+        fsspec.utils.compressions[ext] = name
+def unzip(infile, mode="rb", filename=None, **kwargs):
+    if "r" not in mode:
+        filename = filename or "file"
+        z = ZipFile(infile, mode="w", **kwargs)
+        fo = z.open(filename, mode="w")
+        fo.close = lambda closer=fo.close: closer() or z.close()
+        return fo
+    z = ZipFile(infile)
+    if filename is None:
+        filename = z.namelist()[0]
+    return z.open(filename, mode="r", **kwargs)
+register_compression("zip", unzip, "zip")
+try:
+    from bz2 import BZ2File
+except ImportError:
+    pass
+else:
+    register_compression("bz2", BZ2File, "bz2")
+try:  # pragma: no cover
+    from isal import igzip
+    def isal(infile, mode="rb", **kwargs):
+        return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
+    register_compression("gzip", isal, "gz")
+except ImportError:
+    from gzip import GzipFile
+    register_compression(
+        "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
+    )
+try:
+    from lzma import LZMAFile
+    register_compression("lzma", LZMAFile, "lzma")
+    register_compression("xz", LZMAFile, "xz")
+except ImportError:
+    pass
+try:
+    import lzmaffi
+    register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
+    register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
+except ImportError:
+    pass
+class SnappyFile(AbstractBufferedFile):
+    def __init__(self, infile, mode, **kwargs):
+        import snappy
+        super().__init__(
+            fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
+        )
+        self.infile = infile
+        if "r" in mode:
+            self.codec = snappy.StreamDecompressor()
+        else:
+            self.codec = snappy.StreamCompressor()
+    def _upload_chunk(self, final=False):
+        self.buffer.seek(0)
+        out = self.codec.add_chunk(self.buffer.read())
+        self.infile.write(out)
+        return True
+    def seek(self, loc, whence=0):
+        raise NotImplementedError("SnappyFile is not seekable")
+    def seekable(self):
+        return False
+    def _fetch_range(self, start, end):
+        """Get the specified set of bytes from remote"""
+        data = self.infile.read(end - start)
+        return self.codec.decompress(data)
+try:
+    import snappy
+    snappy.compress(b"")
+    # Snappy may use the .sz file extension, but this is not part of the
+    # standard implementation.
+    register_compression("snappy", SnappyFile, [])
+except (ImportError, NameError, AttributeError):
+    pass
+try:
+    import lz4.frame
+    register_compression("lz4", lz4.frame.open, "lz4")
+except ImportError:
+    pass
+try:
+    # zstd in the standard library for python >= 3.14
+    from compression.zstd import ZstdFile
+    register_compression("zstd", ZstdFile, "zst")
+except ImportError:
+    try:
+        import zstandard as zstd
+        def zstandard_file(infile, mode="rb"):
+            if "r" in mode:
+                cctx = zstd.ZstdDecompressor()
+                return cctx.stream_reader(infile)
+            else:
+                cctx = zstd.ZstdCompressor(level=10)
+                return cctx.stream_writer(infile)
+        register_compression("zstd", zstandard_file, "zst")
+    except ImportError:
+        pass
+def available_compressions():
+    """Return a list of the implemented compressions."""
+    return list(compr)

venv/lib/python3.13/site-packages/fsspec/config.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from __future__ import annotations
+import configparser
+import json
+import os
+import warnings
+from typing import Any
+conf: dict[str, dict[str, Any]] = {}
+default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
+conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
+def set_conf_env(conf_dict, envdict=os.environ):
+    """Set config values from environment variables
+    Looks for variables of the form ``FSSPEC_<protocol>`` and
+    ``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
+    as a json dictionary and used to ``update`` the config of the
+    corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
+    attempt to convert the string value, but the kwarg keys will be lower-cased.
+    The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
+    ``FSSPEC_<protocol>`` ones.
+    Parameters
+    ----------
+    conf_dict : dict(str, dict)
+        This dict will be mutated
+    envdict : dict-like(str, str)
+        Source for the values - usually the real environment
+    """
+    kwarg_keys = []
+    for key in envdict:
+        if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
+            if key.count("_") > 1:
+                kwarg_keys.append(key)
+                continue
+            try:
+                value = json.loads(envdict[key])
+            except json.decoder.JSONDecodeError as ex:
+                warnings.warn(
+                    f"Ignoring environment variable {key} due to a parse failure: {ex}"
+                )
+            else:
+                if isinstance(value, dict):
+                    _, proto = key.split("_", 1)
+                    conf_dict.setdefault(proto.lower(), {}).update(value)
+                else:
+                    warnings.warn(
+                        f"Ignoring environment variable {key} due to not being a dict:"
+                        f" {type(value)}"
+                    )
+        elif key.startswith("FSSPEC"):
+            warnings.warn(
+                f"Ignoring environment variable {key} due to having an unexpected name"
+            )
+    for key in kwarg_keys:
+        _, proto, kwarg = key.split("_", 2)
+        conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
+def set_conf_files(cdir, conf_dict):
+    """Set config values from files
+    Scans for INI and JSON files in the given dictionary, and uses their
+    contents to set the config. In case of repeated values, later values
+    win.
+    In the case of INI files, all values are strings, and these will not
+    be converted.
+    Parameters
+    ----------
+    cdir : str
+        Directory to search
+    conf_dict : dict(str, dict)
+        This dict will be mutated
+    """
+    if not os.path.isdir(cdir):
+        return
+    allfiles = sorted(os.listdir(cdir))
+    for fn in allfiles:
+        if fn.endswith(".ini"):
+            ini = configparser.ConfigParser()
+            ini.read(os.path.join(cdir, fn))
+            for key in ini:
+                if key == "DEFAULT":
+                    continue
+                conf_dict.setdefault(key, {}).update(dict(ini[key]))
+        if fn.endswith(".json"):
+            with open(os.path.join(cdir, fn)) as f:
+                js = json.load(f)
+            for key in js:
+                conf_dict.setdefault(key, {}).update(dict(js[key]))
+def apply_config(cls, kwargs, conf_dict=None):
+    """Supply default values for kwargs when instantiating class
+    Augments the passed kwargs, by finding entries in the config dict
+    which match the classes ``.protocol`` attribute (one or more str)
+    Parameters
+    ----------
+    cls : file system implementation
+    kwargs : dict
+    conf_dict : dict of dict
+        Typically this is the global configuration
+    Returns
+    -------
+    dict : the modified set of kwargs
+    """
+    if conf_dict is None:
+        conf_dict = conf
+    protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
+    kw = {}
+    for proto in protos:
+        # default kwargs from the current state of the config
+        if proto in conf_dict:
+            kw.update(conf_dict[proto])
+    # explicit kwargs always win
+    kw.update(**kwargs)
+    kwargs = kw
+    return kwargs
+set_conf_files(conf_dir, conf)
+set_conf_env(conf)

venv/lib/python3.13/site-packages/fsspec/conftest.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+import shutil
+import subprocess
+import sys
+import time
+from collections import deque
+from collections.abc import Generator, Sequence
+import pytest
+import fsspec
+@pytest.fixture()
+def m():
+    """
+    Fixture providing a memory filesystem.
+    """
+    m = fsspec.filesystem("memory")
+    m.store.clear()
+    m.pseudo_dirs.clear()
+    m.pseudo_dirs.append("")
+    try:
+        yield m
+    finally:
+        m.store.clear()
+        m.pseudo_dirs.clear()
+        m.pseudo_dirs.append("")
+class InstanceCacheInspector:
+    """
+    Helper class to inspect instance caches of filesystem classes in tests.
+    """
+    def clear(self) -> None:
+        """
+        Clear instance caches of all currently imported filesystem classes.
+        """
+        classes = deque([fsspec.spec.AbstractFileSystem])
+        while classes:
+            cls = classes.popleft()
+            cls.clear_instance_cache()
+            classes.extend(cls.__subclasses__())
+    def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]:
+        """
+        Gather counts of filesystem instances in the instance caches
+        of all currently imported filesystem classes.
+        Parameters
+        ----------
+        omit_zero:
+            Whether to omit instance types with no cached instances.
+        """
+        out: dict[str, int] = {}
+        classes = deque([fsspec.spec.AbstractFileSystem])
+        while classes:
+            cls = classes.popleft()
+            count = len(cls._cache)  # there is no public interface for the cache
+            # note: skip intermediate AbstractFileSystem subclasses
+            #   if they proxy the protocol attribute via a property.
+            if isinstance(cls.protocol, (Sequence, str)):
+                key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0]
+                if count or not omit_zero:
+                    out[key] = count
+            classes.extend(cls.__subclasses__())
+        return out
+@pytest.fixture(scope="function", autouse=True)
+def instance_caches() -> Generator[InstanceCacheInspector, None, None]:
+    """
+    Fixture to ensure empty filesystem instance caches before and after a test.
+    Used by default for all tests.
+    Clears caches of all imported filesystem classes.
+    Can be used to write test assertions about instance caches.
+    Usage:
+        def test_something(instance_caches):
+            # Test code here
+            fsspec.open("file://abc")
+            fsspec.open("memory://foo/bar")
+            # Test assertion
+            assert instance_caches.gather_counts() == {"file": 1, "memory": 1}
+    Returns
+    -------
+    instance_caches: An instance cache inspector for clearing and inspecting caches.
+    """
+    ic = InstanceCacheInspector()
+    ic.clear()
+    try:
+        yield ic
+    finally:
+        ic.clear()
+@pytest.fixture(scope="function")
+def ftp_writable(tmpdir):
+    """
+    Fixture providing a writable FTP filesystem.
+    """
+    pytest.importorskip("pyftpdlib")
+    d = str(tmpdir)
+    with open(os.path.join(d, "out"), "wb") as f:
+        f.write(b"hello" * 10000)
+    P = subprocess.Popen(
+        [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
+    )
+    try:
+        time.sleep(1)
+        yield "localhost", 2121, "user", "pass"
+    finally:
+        P.terminate()
+        P.wait()
+        try:
+            shutil.rmtree(tmpdir)
+        except Exception:
+            pass

venv/lib/python3.13/site-packages/fsspec/core.py ADDED Viewed

	@@ -0,0 +1,743 @@

+from __future__ import annotations
+import io
+import logging
+import os
+import re
+from glob import has_magic
+from pathlib import Path
+# for backwards compat, we export cache things from here too
+from fsspec.caching import (  # noqa: F401
+    BaseCache,
+    BlockCache,
+    BytesCache,
+    MMapCache,
+    ReadAheadCache,
+    caches,
+)
+from fsspec.compression import compr
+from fsspec.config import conf
+from fsspec.registry import filesystem, get_filesystem_class
+from fsspec.utils import (
+    _unstrip_protocol,
+    build_name_function,
+    infer_compression,
+    stringify_path,
+)
+logger = logging.getLogger("fsspec")
+class OpenFile:
+    """
+    File-like object to be used in a context
+    Can layer (buffered) text-mode and compression over any file-system, which
+    are typically binary-only.
+    These instances are safe to serialize, as the low-level file object
+    is not created until invoked using ``with``.
+    Parameters
+    ----------
+    fs: FileSystem
+        The file system to use for opening the file. Should be a subclass or duck-type
+        with ``fsspec.spec.AbstractFileSystem``
+    path: str
+        Location to open
+    mode: str like 'rb', optional
+        Mode of the opened file
+    compression: str or None, optional
+        Compression to apply
+    encoding: str or None, optional
+        The encoding to use if opened in text mode.
+    errors: str or None, optional
+        How to handle encoding errors if opened in text mode.
+    newline: None or str
+        Passed to TextIOWrapper in text mode, how to handle line endings.
+    autoopen: bool
+        If True, calls open() immediately. Mostly used by pickle
+    pos: int
+        If given and autoopen is True, seek to this location immediately
+    """
+    def __init__(
+        self,
+        fs,
+        path,
+        mode="rb",
+        compression=None,
+        encoding=None,
+        errors=None,
+        newline=None,
+    ):
+        self.fs = fs
+        self.path = path
+        self.mode = mode
+        self.compression = get_compression(path, compression)
+        self.encoding = encoding
+        self.errors = errors
+        self.newline = newline
+        self.fobjects = []
+    def __reduce__(self):
+        return (
+            OpenFile,
+            (
+                self.fs,
+                self.path,
+                self.mode,
+                self.compression,
+                self.encoding,
+                self.errors,
+                self.newline,
+            ),
+        )
+    def __repr__(self):
+        return f"<OpenFile '{self.path}'>"
+    def __enter__(self):
+        mode = self.mode.replace("t", "").replace("b", "") + "b"
+        try:
+            f = self.fs.open(self.path, mode=mode)
+        except FileNotFoundError as e:
+            if has_magic(self.path):
+                raise FileNotFoundError(
+                    "%s not found. The URL contains glob characters: you maybe needed\n"
+                    "to pass expand=True in fsspec.open() or the storage_options of \n"
+                    "your library. You can also set the config value 'open_expand'\n"
+                    "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
+                    self.path,
+                ) from e
+            raise
+        self.fobjects = [f]
+        if self.compression is not None:
+            compress = compr[self.compression]
+            f = compress(f, mode=mode[0])
+            self.fobjects.append(f)
+        if "b" not in self.mode:
+            # assume, for example, that 'r' is equivalent to 'rt' as in builtin
+            f = PickleableTextIOWrapper(
+                f, encoding=self.encoding, errors=self.errors, newline=self.newline
+            )
+            self.fobjects.append(f)
+        return self.fobjects[-1]
+    def __exit__(self, *args):
+        self.close()
+    @property
+    def full_name(self):
+        return _unstrip_protocol(self.path, self.fs)
+    def open(self):
+        """Materialise this as a real open file without context
+        The OpenFile object should be explicitly closed to avoid enclosed file
+        instances persisting. You must, therefore, keep a reference to the OpenFile
+        during the life of the file-like it generates.
+        """
+        return self.__enter__()
+    def close(self):
+        """Close all encapsulated file objects"""
+        for f in reversed(self.fobjects):
+            if "r" not in self.mode and not f.closed:
+                f.flush()
+            f.close()
+        self.fobjects.clear()
+class OpenFiles(list):
+    """List of OpenFile instances
+    Can be used in a single context, which opens and closes all of the
+    contained files. Normal list access to get the elements works as
+    normal.
+    A special case is made for caching filesystems - the files will
+    be down/uploaded together at the start or end of the context, and
+    this may happen concurrently, if the target filesystem supports it.
+    """
+    def __init__(self, *args, mode="rb", fs=None):
+        self.mode = mode
+        self.fs = fs
+        self.files = []
+        super().__init__(*args)
+    def __enter__(self):
+        if self.fs is None:
+            raise ValueError("Context has already been used")
+        fs = self.fs
+        while True:
+            if hasattr(fs, "open_many"):
+                # check for concurrent cache download; or set up for upload
+                self.files = fs.open_many(self)
+                return self.files
+            if hasattr(fs, "fs") and fs.fs is not None:
+                fs = fs.fs
+            else:
+                break
+        return [s.__enter__() for s in self]
+    def __exit__(self, *args):
+        fs = self.fs
+        [s.__exit__(*args) for s in self]
+        if "r" not in self.mode:
+            while True:
+                if hasattr(fs, "open_many"):
+                    # check for concurrent cache upload
+                    fs.commit_many(self.files)
+                    return
+                if hasattr(fs, "fs") and fs.fs is not None:
+                    fs = fs.fs
+                else:
+                    break
+    def __getitem__(self, item):
+        out = super().__getitem__(item)
+        if isinstance(item, slice):
+            return OpenFiles(out, mode=self.mode, fs=self.fs)
+        return out
+    def __repr__(self):
+        return f"<List of {len(self)} OpenFile instances>"
+def open_files(
+    urlpath,
+    mode="rb",
+    compression=None,
+    encoding="utf8",
+    errors=None,
+    name_function=None,
+    num=1,
+    protocol=None,
+    newline=None,
+    auto_mkdir=True,
+    expand=True,
+    **kwargs,
+):
+    """Given a path or paths, return a list of ``OpenFile`` objects.
+    For writing, a str path must contain the "*" character, which will be filled
+    in by increasing numbers, e.g., "part*" ->  "part1", "part2" if num=2.
+    For either reading or writing, can instead provide explicit list of paths.
+    Parameters
+    ----------
+    urlpath: string or list
+        Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
+        to read from alternative filesystems. To read from multiple files you
+        can pass a globstring or a list of paths, with the caveat that they
+        must all have the same protocol.
+    mode: 'rb', 'wt', etc.
+    compression: string or None
+        If given, open file using compression codec. Can either be a compression
+        name (a key in ``fsspec.compression.compr``) or "infer" to guess the
+        compression from the filename suffix.
+    encoding: str
+        For text mode only
+    errors: None or str
+        Passed to TextIOWrapper in text mode
+    name_function: function or None
+        if opening a set of files for writing, those files do not yet exist,
+        so we need to generate their names by formatting the urlpath for
+        each sequence number
+    num: int [1]
+        if writing mode, number of files we expect to create (passed to
+        name+function)
+    protocol: str or None
+        If given, overrides the protocol found in the URL.
+    newline: bytes or None
+        Used for line terminator in text mode. If None, uses system default;
+        if blank, uses no translation.
+    auto_mkdir: bool (True)
+        If in write mode, this will ensure the target directory exists before
+        writing, by calling ``fs.mkdirs(exist_ok=True)``.
+    expand: bool
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Examples
+    --------
+    >>> files = open_files('2015-*-*.csv')  # doctest: +SKIP
+    >>> files = open_files(
+    ...     's3://bucket/2015-*-*.csv.gz', compression='gzip'
+    ... )  # doctest: +SKIP
+    Returns
+    -------
+    An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
+    be used as a single context
+    Notes
+    -----
+    For a full list of the available protocols and the implementations that
+    they map across to see the latest online documentation:
+    - For implementations built into ``fsspec`` see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
+    - For implementations in separate packages see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
+    """
+    fs, fs_token, paths = get_fs_token_paths(
+        urlpath,
+        mode,
+        num=num,
+        name_function=name_function,
+        storage_options=kwargs,
+        protocol=protocol,
+        expand=expand,
+    )
+    if fs.protocol == "file":
+        fs.auto_mkdir = auto_mkdir
+    elif "r" not in mode and auto_mkdir:
+        parents = {fs._parent(path) for path in paths}
+        for parent in parents:
+            try:
+                fs.makedirs(parent, exist_ok=True)
+            except PermissionError:
+                pass
+    return OpenFiles(
+        [
+            OpenFile(
+                fs,
+                path,
+                mode=mode,
+                compression=compression,
+                encoding=encoding,
+                errors=errors,
+                newline=newline,
+            )
+            for path in paths
+        ],
+        mode=mode,
+        fs=fs,
+    )
+def _un_chain(path, kwargs):
+    # Avoid a circular import
+    from fsspec.implementations.chained import ChainedFileSystem
+    if "::" in path:
+        x = re.compile(".*[^a-z]+.*")  # test for non protocol-like single word
+        bits = []
+        for p in path.split("::"):
+            if "://" in p or x.match(p):
+                bits.append(p)
+            else:
+                bits.append(p + "://")
+    else:
+        bits = [path]
+    # [[url, protocol, kwargs], ...]
+    out = []
+    previous_bit = None
+    kwargs = kwargs.copy()
+    for bit in reversed(bits):
+        protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
+        cls = get_filesystem_class(protocol)
+        extra_kwargs = cls._get_kwargs_from_urls(bit)
+        kws = kwargs.pop(protocol, {})
+        if bit is bits[0]:
+            kws.update(kwargs)
+        kw = dict(
+            **{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
+            **kws,
+        )
+        bit = cls._strip_protocol(bit)
+        if "target_protocol" not in kw and issubclass(cls, ChainedFileSystem):
+            bit = previous_bit
+        out.append((bit, protocol, kw))
+        previous_bit = bit
+    out.reverse()
+    return out
+def url_to_fs(url, **kwargs):
+    """
+    Turn fully-qualified and potentially chained URL into filesystem instance
+    Parameters
+    ----------
+    url : str
+        The fsspec-compatible URL
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Returns
+    -------
+    filesystem : FileSystem
+        The new filesystem discovered from ``url`` and created with
+        ``**kwargs``.
+    urlpath : str
+        The file-systems-specific URL for ``url``.
+    """
+    url = stringify_path(url)
+    # non-FS arguments that appear in fsspec.open()
+    # inspect could keep this in sync with open()'s signature
+    known_kwargs = {
+        "compression",
+        "encoding",
+        "errors",
+        "expand",
+        "mode",
+        "name_function",
+        "newline",
+        "num",
+    }
+    kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
+    chain = _un_chain(url, kwargs)
+    inkwargs = {}
+    # Reverse iterate the chain, creating a nested target_* structure
+    for i, ch in enumerate(reversed(chain)):
+        urls, protocol, kw = ch
+        if i == len(chain) - 1:
+            inkwargs = dict(**kw, **inkwargs)
+            continue
+        inkwargs["target_options"] = dict(**kw, **inkwargs)
+        inkwargs["target_protocol"] = protocol
+        inkwargs["fo"] = urls
+    urlpath, protocol, _ = chain[0]
+    fs = filesystem(protocol, **inkwargs)
+    return fs, urlpath
+DEFAULT_EXPAND = conf.get("open_expand", False)
+def open(
+    urlpath,
+    mode="rb",
+    compression=None,
+    encoding="utf8",
+    errors=None,
+    protocol=None,
+    newline=None,
+    expand=None,
+    **kwargs,
+):
+    """Given a path or paths, return one ``OpenFile`` object.
+    Parameters
+    ----------
+    urlpath: string or list
+        Absolute or relative filepath. Prefix with a protocol like ``s3://``
+        to read from alternative filesystems. Should not include glob
+        character(s).
+    mode: 'rb', 'wt', etc.
+    compression: string or None
+        If given, open file using compression codec. Can either be a compression
+        name (a key in ``fsspec.compression.compr``) or "infer" to guess the
+        compression from the filename suffix.
+    encoding: str
+        For text mode only
+    errors: None or str
+        Passed to TextIOWrapper in text mode
+    protocol: str or None
+        If given, overrides the protocol found in the URL.
+    newline: bytes or None
+        Used for line terminator in text mode. If None, uses system default;
+        if blank, uses no translation.
+    expand: bool or None
+        Whether to regard file paths containing special glob characters as needing
+        expansion (finding the first match) or absolute. Setting False allows using
+        paths which do embed such characters. If None (default), this argument
+        takes its value from the DEFAULT_EXPAND module variable, which takes
+        its initial value from the "open_expand" config value at startup, which will
+        be False if not set.
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Examples
+    --------
+    >>> openfile = open('2015-01-01.csv')  # doctest: +SKIP
+    >>> openfile = open(
+    ...     's3://bucket/2015-01-01.csv.gz', compression='gzip'
+    ... )  # doctest: +SKIP
+    >>> with openfile as f:
+    ...     df = pd.read_csv(f)  # doctest: +SKIP
+    ...
+    Returns
+    -------
+    ``OpenFile`` object.
+    Notes
+    -----
+    For a full list of the available protocols and the implementations that
+    they map across to see the latest online documentation:
+    - For implementations built into ``fsspec`` see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
+    - For implementations in separate packages see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
+    """
+    expand = DEFAULT_EXPAND if expand is None else expand
+    out = open_files(
+        urlpath=[urlpath],
+        mode=mode,
+        compression=compression,
+        encoding=encoding,
+        errors=errors,
+        protocol=protocol,
+        newline=newline,
+        expand=expand,
+        **kwargs,
+    )
+    if not out:
+        raise FileNotFoundError(urlpath)
+    return out[0]
+def open_local(
+    url: str | list[str] | Path | list[Path],
+    mode: str = "rb",
+    **storage_options: dict,
+) -> str | list[str]:
+    """Open file(s) which can be resolved to local
+    For files which either are local, or get downloaded upon open
+    (e.g., by file caching)
+    Parameters
+    ----------
+    url: str or list(str)
+    mode: str
+        Must be read mode
+    storage_options:
+        passed on to FS for or used by open_files (e.g., compression)
+    """
+    if "r" not in mode:
+        raise ValueError("Can only ensure local files when reading")
+    of = open_files(url, mode=mode, **storage_options)
+    if not getattr(of[0].fs, "local_file", False):
+        raise ValueError(
+            "open_local can only be used on a filesystem which"
+            " has attribute local_file=True"
+        )
+    with of as files:
+        paths = [f.name for f in files]
+    if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
+        return paths[0]
+    return paths
+def get_compression(urlpath, compression):
+    if compression == "infer":
+        compression = infer_compression(urlpath)
+    if compression is not None and compression not in compr:
+        raise ValueError(f"Compression type {compression} not supported")
+    return compression
+def split_protocol(urlpath):
+    """Return protocol, path pair"""
+    urlpath = stringify_path(urlpath)
+    if "://" in urlpath:
+        protocol, path = urlpath.split("://", 1)
+        if len(protocol) > 1:
+            # excludes Windows paths
+            return protocol, path
+    if urlpath.startswith("data:"):
+        return urlpath.split(":", 1)
+    return None, urlpath
+def strip_protocol(urlpath):
+    """Return only path part of full URL, according to appropriate backend"""
+    protocol, _ = split_protocol(urlpath)
+    cls = get_filesystem_class(protocol)
+    return cls._strip_protocol(urlpath)
+def expand_paths_if_needed(paths, mode, num, fs, name_function):
+    """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
+    in them (read mode).
+    :param paths: list of paths
+    mode: str
+        Mode in which to open files.
+    num: int
+        If opening in writing mode, number of files we expect to create.
+    fs: filesystem object
+    name_function: callable
+        If opening in writing mode, this callable is used to generate path
+        names. Names are generated for each partition by
+        ``urlpath.replace('*', name_function(partition_index))``.
+    :return: list of paths
+    """
+    expanded_paths = []
+    paths = list(paths)
+    if "w" in mode:  # read mode
+        if sum(1 for p in paths if "*" in p) > 1:
+            raise ValueError(
+                "When writing data, only one filename mask can be specified."
+            )
+        num = max(num, len(paths))
+        for curr_path in paths:
+            if "*" in curr_path:
+                # expand using name_function
+                expanded_paths.extend(_expand_paths(curr_path, name_function, num))
+            else:
+                expanded_paths.append(curr_path)
+        # if we generated more paths that asked for, trim the list
+        if len(expanded_paths) > num:
+            expanded_paths = expanded_paths[:num]
+    else:  # read mode
+        for curr_path in paths:
+            if has_magic(curr_path):
+                # expand using glob
+                expanded_paths.extend(fs.glob(curr_path))
+            else:
+                expanded_paths.append(curr_path)
+    return expanded_paths
+def get_fs_token_paths(
+    urlpath,
+    mode="rb",
+    num=1,
+    name_function=None,
+    storage_options=None,
+    protocol=None,
+    expand=True,
+):
+    """Filesystem, deterministic token, and paths from a urlpath and options.
+    Parameters
+    ----------
+    urlpath: string or iterable
+        Absolute or relative filepath, URL (may include protocols like
+        ``s3://``), or globstring pointing to data.
+    mode: str, optional
+        Mode in which to open files.
+    num: int, optional
+        If opening in writing mode, number of files we expect to create.
+    name_function: callable, optional
+        If opening in writing mode, this callable is used to generate path
+        names. Names are generated for each partition by
+        ``urlpath.replace('*', name_function(partition_index))``.
+    storage_options: dict, optional
+        Additional keywords to pass to the filesystem class.
+    protocol: str or None
+        To override the protocol specifier in the URL
+    expand: bool
+        Expand string paths for writing, assuming the path is a directory
+    """
+    if isinstance(urlpath, (list, tuple, set)):
+        if not urlpath:
+            raise ValueError("empty urlpath sequence")
+        urlpath0 = stringify_path(next(iter(urlpath)))
+    else:
+        urlpath0 = stringify_path(urlpath)
+    storage_options = storage_options or {}
+    if protocol:
+        storage_options["protocol"] = protocol
+    chain = _un_chain(urlpath0, storage_options or {})
+    inkwargs = {}
+    # Reverse iterate the chain, creating a nested target_* structure
+    for i, ch in enumerate(reversed(chain)):
+        urls, nested_protocol, kw = ch
+        if i == len(chain) - 1:
+            inkwargs = dict(**kw, **inkwargs)
+            continue
+        inkwargs["target_options"] = dict(**kw, **inkwargs)
+        inkwargs["target_protocol"] = nested_protocol
+        inkwargs["fo"] = urls
+    paths, protocol, _ = chain[0]
+    fs = filesystem(protocol, **inkwargs)
+    if isinstance(urlpath, (list, tuple, set)):
+        pchains = [
+            _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
+        ]
+        if len({pc[1] for pc in pchains}) > 1:
+            raise ValueError("Protocol mismatch getting fs from %s", urlpath)
+        paths = [pc[0] for pc in pchains]
+    else:
+        paths = fs._strip_protocol(paths)
+    if isinstance(paths, (list, tuple, set)):
+        if expand:
+            paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
+        elif not isinstance(paths, list):
+            paths = list(paths)
+    else:
+        if ("w" in mode or "x" in mode) and expand:
+            paths = _expand_paths(paths, name_function, num)
+        elif "*" in paths:
+            paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
+        else:
+            paths = [paths]
+    return fs, fs._fs_token, paths
+def _expand_paths(path, name_function, num):
+    if isinstance(path, str):
+        if path.count("*") > 1:
+            raise ValueError("Output path spec must contain exactly one '*'.")
+        elif "*" not in path:
+            path = os.path.join(path, "*.part")
+        if name_function is None:
+            name_function = build_name_function(num - 1)
+        paths = [path.replace("*", name_function(i)) for i in range(num)]
+        if paths != sorted(paths):
+            logger.warning(
+                "In order to preserve order between partitions"
+                " paths created with ``name_function`` should "
+                "sort to partition order"
+            )
+    elif isinstance(path, (tuple, list)):
+        assert len(path) == num
+        paths = list(path)
+    else:
+        raise ValueError(
+            "Path should be either\n"
+            "1. A list of paths: ['foo.json', 'bar.json', ...]\n"
+            "2. A directory: 'foo/\n"
+            "3. A path with a '*' in it: 'foo.*.json'"
+        )
+    return paths
+class PickleableTextIOWrapper(io.TextIOWrapper):
+    """TextIOWrapper cannot be pickled. This solves it.
+    Requires that ``buffer`` be pickleable, which all instances of
+    AbstractBufferedFile are.
+    """
+    def __init__(
+        self,
+        buffer,
+        encoding=None,
+        errors=None,
+        newline=None,
+        line_buffering=False,
+        write_through=False,
+    ):
+        self.args = buffer, encoding, errors, newline, line_buffering, write_through
+        super().__init__(*self.args)
+    def __reduce__(self):
+        return PickleableTextIOWrapper, self.args

venv/lib/python3.13/site-packages/fsspec/dircache.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import time
+from collections.abc import MutableMapping
+from functools import lru_cache
+class DirCache(MutableMapping):
+    """
+    Caching of directory listings, in a structure like::
+        {"path0": [
+            {"name": "path0/file0",
+             "size": 123,
+             "type": "file",
+             ...
+            },
+            {"name": "path0/file1",
+            },
+            ...
+            ],
+         "path1": [...]
+        }
+    Parameters to this class control listing expiry or indeed turn
+    caching off
+    """
+    def __init__(
+        self,
+        use_listings_cache=True,
+        listings_expiry_time=None,
+        max_paths=None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        use_listings_cache: bool
+            If False, this cache never returns items, but always reports KeyError,
+            and setting items has no effect
+        listings_expiry_time: int or float (optional)
+            Time in seconds that a listing is considered valid. If None,
+            listings do not expire.
+        max_paths: int (optional)
+            The number of most recent listings that are considered valid; 'recent'
+            refers to when the entry was set.
+        """
+        self._cache = {}
+        self._times = {}
+        if max_paths:
+            self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
+        self.use_listings_cache = use_listings_cache
+        self.listings_expiry_time = listings_expiry_time
+        self.max_paths = max_paths
+    def __getitem__(self, item):
+        if self.listings_expiry_time is not None:
+            if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
+                del self._cache[item]
+        if self.max_paths:
+            self._q(item)
+        return self._cache[item]  # maybe raises KeyError
+    def clear(self):
+        self._cache.clear()
+    def __len__(self):
+        return len(self._cache)
+    def __contains__(self, item):
+        try:
+            self[item]
+            return True
+        except KeyError:
+            return False
+    def __setitem__(self, key, value):
+        if not self.use_listings_cache:
+            return
+        if self.max_paths:
+            self._q(key)
+        self._cache[key] = value
+        if self.listings_expiry_time is not None:
+            self._times[key] = time.time()
+    def __delitem__(self, key):
+        del self._cache[key]
+    def __iter__(self):
+        entries = list(self._cache)
+        return (k for k in entries if k in self)
+    def __reduce__(self):
+        return (
+            DirCache,
+            (self.use_listings_cache, self.listings_expiry_time, self.max_paths),
+        )

venv/lib/python3.13/site-packages/fsspec/fuse.py ADDED Viewed

	@@ -0,0 +1,324 @@

+import argparse
+import logging
+import os
+import stat
+import threading
+import time
+from errno import EIO, ENOENT
+from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
+from fsspec import __version__
+from fsspec.core import url_to_fs
+logger = logging.getLogger("fsspec.fuse")
+class FUSEr(Operations):
+    def __init__(self, fs, path, ready_file=False):
+        self.fs = fs
+        self.cache = {}
+        self.root = path.rstrip("/") + "/"
+        self.counter = 0
+        logger.info("Starting FUSE at %s", path)
+        self._ready_file = ready_file
+    def getattr(self, path, fh=None):
+        logger.debug("getattr %s", path)
+        if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
+            return {"type": "file", "st_size": 5}
+        path = "".join([self.root, path.lstrip("/")]).rstrip("/")
+        try:
+            info = self.fs.info(path)
+        except FileNotFoundError as exc:
+            raise FuseOSError(ENOENT) from exc
+        data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
+        perm = info.get("mode", 0o777)
+        if info["type"] != "file":
+            data["st_mode"] = stat.S_IFDIR | perm
+            data["st_size"] = 0
+            data["st_blksize"] = 0
+        else:
+            data["st_mode"] = stat.S_IFREG | perm
+            data["st_size"] = info["size"]
+            data["st_blksize"] = 5 * 2**20
+            data["st_nlink"] = 1
+        data["st_atime"] = info["atime"] if "atime" in info else time.time()
+        data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
+        data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
+        return data
+    def readdir(self, path, fh):
+        logger.debug("readdir %s", path)
+        path = "".join([self.root, path.lstrip("/")])
+        files = self.fs.ls(path, False)
+        files = [os.path.basename(f.rstrip("/")) for f in files]
+        return [".", ".."] + files
+    def mkdir(self, path, mode):
+        path = "".join([self.root, path.lstrip("/")])
+        self.fs.mkdir(path)
+        return 0
+    def rmdir(self, path):
+        path = "".join([self.root, path.lstrip("/")])
+        self.fs.rmdir(path)
+        return 0
+    def read(self, path, size, offset, fh):
+        logger.debug("read %s", (path, size, offset))
+        if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
+            # status indicator
+            return b"ready"
+        f = self.cache[fh]
+        f.seek(offset)
+        out = f.read(size)
+        return out
+    def write(self, path, data, offset, fh):
+        logger.debug("write %s", (path, offset))
+        f = self.cache[fh]
+        f.seek(offset)
+        f.write(data)
+        return len(data)
+    def create(self, path, flags, fi=None):
+        logger.debug("create %s", (path, flags))
+        fn = "".join([self.root, path.lstrip("/")])
+        self.fs.touch(fn)  # OS will want to get attributes immediately
+        f = self.fs.open(fn, "wb")
+        self.cache[self.counter] = f
+        self.counter += 1
+        return self.counter - 1
+    def open(self, path, flags):
+        logger.debug("open %s", (path, flags))
+        fn = "".join([self.root, path.lstrip("/")])
+        if flags % 2 == 0:
+            # read
+            mode = "rb"
+        else:
+            # write/create
+            mode = "wb"
+        self.cache[self.counter] = self.fs.open(fn, mode)
+        self.counter += 1
+        return self.counter - 1
+    def truncate(self, path, length, fh=None):
+        fn = "".join([self.root, path.lstrip("/")])
+        if length != 0:
+            raise NotImplementedError
+        # maybe should be no-op since open with write sets size to zero anyway
+        self.fs.touch(fn)
+    def unlink(self, path):
+        fn = "".join([self.root, path.lstrip("/")])
+        try:
+            self.fs.rm(fn, False)
+        except (OSError, FileNotFoundError) as exc:
+            raise FuseOSError(EIO) from exc
+    def release(self, path, fh):
+        try:
+            if fh in self.cache:
+                f = self.cache[fh]
+                f.close()
+                self.cache.pop(fh)
+        except Exception as e:
+            print(e)
+        return 0
+    def chmod(self, path, mode):
+        if hasattr(self.fs, "chmod"):
+            path = "".join([self.root, path.lstrip("/")])
+            return self.fs.chmod(path, mode)
+        raise NotImplementedError
+def run(
+    fs,
+    path,
+    mount_point,
+    foreground=True,
+    threads=False,
+    ready_file=False,
+    ops_class=FUSEr,
+):
+    """Mount stuff in a local directory
+    This uses fusepy to make it appear as if a given path on an fsspec
+    instance is in fact resident within the local file-system.
+    This requires that fusepy by installed, and that FUSE be available on
+    the system (typically requiring a package to be installed with
+    apt, yum, brew, etc.).
+    Parameters
+    ----------
+    fs: file-system instance
+        From one of the compatible implementations
+    path: str
+        Location on that file-system to regard as the root directory to
+        mount. Note that you typically should include the terminating "/"
+        character.
+    mount_point: str
+        An empty directory on the local file-system where the contents of
+        the remote path will appear.
+    foreground: bool
+        Whether or not calling this function will block. Operation will
+        typically be more stable if True.
+    threads: bool
+        Whether or not to create threads when responding to file operations
+        within the mounter directory. Operation will typically be more
+        stable if False.
+    ready_file: bool
+        Whether the FUSE process is ready. The ``.fuse_ready`` file will
+        exist in the ``mount_point`` directory if True. Debugging purpose.
+    ops_class: FUSEr or Subclass of FUSEr
+        To override the default behavior of FUSEr. For Example, logging
+        to file.
+    """
+    func = lambda: FUSE(
+        ops_class(fs, path, ready_file=ready_file),
+        mount_point,
+        nothreads=not threads,
+        foreground=foreground,
+    )
+    if not foreground:
+        th = threading.Thread(target=func)
+        th.daemon = True
+        th.start()
+        return th
+    else:  # pragma: no cover
+        try:
+            func()
+        except KeyboardInterrupt:
+            pass
+def main(args):
+    """Mount filesystem from chained URL to MOUNT_POINT.
+    Examples:
+    python3 -m fsspec.fuse memory /usr/share /tmp/mem
+    python3 -m fsspec.fuse local /tmp/source /tmp/local \\
+            -l /tmp/fsspecfuse.log
+    You can also mount chained-URLs and use special settings:
+    python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
+            / /tmp/zip \\
+            -o 'filecache-cache_storage=/tmp/simplecache'
+    You can specify the type of the setting by using `[int]` or `[bool]`,
+    (`true`, `yes`, `1` represents the Boolean value `True`):
+    python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
+            /historic/packages/RPMS /tmp/ftp \\
+            -o 'simplecache-cache_storage=/tmp/simplecache' \\
+            -o 'simplecache-check_files=false[bool]' \\
+            -o 'ftp-listings_expiry_time=60[int]' \\
+            -o 'ftp-username=anonymous' \\
+            -o 'ftp-password=xieyanbo'
+    """
+    class RawDescriptionArgumentParser(argparse.ArgumentParser):
+        def format_help(self):
+            usage = super().format_help()
+            parts = usage.split("\n\n")
+            parts[1] = self.description.rstrip()
+            return "\n\n".join(parts)
+    parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
+    parser.add_argument("--version", action="version", version=__version__)
+    parser.add_argument("url", type=str, help="fs url")
+    parser.add_argument("source_path", type=str, help="source directory in fs")
+    parser.add_argument("mount_point", type=str, help="local directory")
+    parser.add_argument(
+        "-o",
+        "--option",
+        action="append",
+        help="Any options of protocol included in the chained URL",
+    )
+    parser.add_argument(
+        "-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
+    )
+    parser.add_argument(
+        "-f",
+        "--foreground",
+        action="store_false",
+        help="Running in foreground or not (Default: False)",
+    )
+    parser.add_argument(
+        "-t",
+        "--threads",
+        action="store_false",
+        help="Running with threads support (Default: False)",
+    )
+    parser.add_argument(
+        "-r",
+        "--ready-file",
+        action="store_false",
+        help="The `.fuse_ready` file will exist after FUSE is ready. "
+        "(Debugging purpose, Default: False)",
+    )
+    args = parser.parse_args(args)
+    kwargs = {}
+    for item in args.option or []:
+        key, sep, value = item.partition("=")
+        if not sep:
+            parser.error(message=f"Wrong option: {item!r}")
+        val = value.lower()
+        if val.endswith("[int]"):
+            value = int(value[: -len("[int]")])
+        elif val.endswith("[bool]"):
+            value = val[: -len("[bool]")] in ["1", "yes", "true"]
+        if "-" in key:
+            fs_name, setting_name = key.split("-", 1)
+            if fs_name in kwargs:
+                kwargs[fs_name][setting_name] = value
+            else:
+                kwargs[fs_name] = {setting_name: value}
+        else:
+            kwargs[key] = value
+    if args.log_file:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            filename=args.log_file,
+            format="%(asctime)s %(message)s",
+        )
+        class LoggingFUSEr(FUSEr, LoggingMixIn):
+            pass
+        fuser = LoggingFUSEr
+    else:
+        fuser = FUSEr
+    fs, url_path = url_to_fs(args.url, **kwargs)
+    logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
+    run(
+        fs,
+        args.source_path,
+        args.mount_point,
+        foreground=args.foreground,
+        threads=args.threads,
+        ready_file=args.ready_file,
+        ops_class=fuser,
+    )
+if __name__ == "__main__":
+    import sys
+    main(sys.argv[1:])

venv/lib/python3.13/site-packages/fsspec/generic.py ADDED Viewed

	@@ -0,0 +1,396 @@

+from __future__ import annotations
+import inspect
+import logging
+import os
+import shutil
+import uuid
+from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
+from .callbacks import DEFAULT_CALLBACK
+from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
+_generic_fs = {}
+logger = logging.getLogger("fsspec.generic")
+def set_generic_fs(protocol, **storage_options):
+    """Populate the dict used for method=="generic" lookups"""
+    _generic_fs[protocol] = filesystem(protocol, **storage_options)
+def _resolve_fs(url, method, protocol=None, storage_options=None):
+    """Pick instance of backend FS"""
+    url = url[0] if isinstance(url, (list, tuple)) else url
+    protocol = protocol or split_protocol(url)[0]
+    storage_options = storage_options or {}
+    if method == "default":
+        return filesystem(protocol)
+    if method == "generic":
+        return _generic_fs[protocol]
+    if method == "current":
+        cls = get_filesystem_class(protocol)
+        return cls.current()
+    if method == "options":
+        fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
+        return fs
+    raise ValueError(f"Unknown FS resolution method: {method}")
+def rsync(
+    source,
+    destination,
+    delete_missing=False,
+    source_field="size",
+    dest_field="size",
+    update_cond="different",
+    inst_kwargs=None,
+    fs=None,
+    **kwargs,
+):
+    """Sync files between two directory trees
+    (experimental)
+    Parameters
+    ----------
+    source: str
+        Root of the directory tree to take files from. This must be a directory, but
+        do not include any terminating "/" character
+    destination: str
+        Root path to copy into. The contents of this location should be
+        identical to the contents of ``source`` when done. This will be made a
+        directory, and the terminal "/" should not be included.
+    delete_missing: bool
+        If there are paths in the destination that don't exist in the
+        source and this is True, delete them. Otherwise, leave them alone.
+    source_field: str | callable
+        If ``update_field`` is "different", this is the key in the info
+        of source files to consider for difference. Maybe a function of the
+        info dict.
+    dest_field: str | callable
+        If ``update_field`` is "different", this is the key in the info
+        of destination files to consider for difference. May be a function of
+        the info dict.
+    update_cond: "different"|"always"|"never"
+        If "always", every file is copied, regardless of whether it exists in
+        the destination. If "never", files that exist in the destination are
+        not copied again. If "different" (default), only copy if the info
+        fields given by ``source_field`` and ``dest_field`` (usually "size")
+        are different. Other comparisons may be added in the future.
+    inst_kwargs: dict|None
+        If ``fs`` is None, use this set of keyword arguments to make a
+        GenericFileSystem instance
+    fs: GenericFileSystem|None
+        Instance to use if explicitly given. The instance defines how to
+        to make downstream file system instances from paths.
+    Returns
+    -------
+    dict of the copy operations that were performed, {source: destination}
+    """
+    fs = fs or GenericFileSystem(**(inst_kwargs or {}))
+    source = fs._strip_protocol(source)
+    destination = fs._strip_protocol(destination)
+    allfiles = fs.find(source, withdirs=True, detail=True)
+    if not fs.isdir(source):
+        raise ValueError("Can only rsync on a directory")
+    otherfiles = fs.find(destination, withdirs=True, detail=True)
+    dirs = [
+        a
+        for a, v in allfiles.items()
+        if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
+    ]
+    logger.debug(f"{len(dirs)} directories to create")
+    if dirs:
+        fs.make_many_dirs(
+            [dirn.replace(source, destination) for dirn in dirs], exist_ok=True
+        )
+    allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
+    logger.debug(f"{len(allfiles)} files to consider for copy")
+    to_delete = [
+        o
+        for o, v in otherfiles.items()
+        if o.replace(destination, source) not in allfiles and v["type"] == "file"
+    ]
+    for k, v in allfiles.copy().items():
+        otherfile = k.replace(source, destination)
+        if otherfile in otherfiles:
+            if update_cond == "always":
+                allfiles[k] = otherfile
+            elif update_cond == "never":
+                allfiles.pop(k)
+            elif update_cond == "different":
+                inf1 = source_field(v) if callable(source_field) else v[source_field]
+                v2 = otherfiles[otherfile]
+                inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
+                if inf1 != inf2:
+                    # details mismatch, make copy
+                    allfiles[k] = otherfile
+                else:
+                    # details match, don't copy
+                    allfiles.pop(k)
+        else:
+            # file not in target yet
+            allfiles[k] = otherfile
+    logger.debug(f"{len(allfiles)} files to copy")
+    if allfiles:
+        source_files, target_files = zip(*allfiles.items())
+        fs.cp(source_files, target_files, **kwargs)
+    logger.debug(f"{len(to_delete)} files to delete")
+    if delete_missing and to_delete:
+        fs.rm(to_delete)
+    return allfiles
+class GenericFileSystem(AsyncFileSystem):
+    """Wrapper over all other FS types
+    <experimental!>
+    This implementation is a single unified interface to be able to run FS operations
+    over generic URLs, and dispatch to the specific implementations using the URL
+    protocol prefix.
+    Note: instances of this FS are always async, even if you never use it with any async
+    backend.
+    """
+    protocol = "generic"  # there is no real reason to ever use a protocol with this FS
+    def __init__(self, default_method="default", storage_options=None, **kwargs):
+        """
+        Parameters
+        ----------
+        default_method: str (optional)
+            Defines how to configure backend FS instances. Options are:
+            - "default": instantiate like FSClass(), with no
+              extra arguments; this is the default instance of that FS, and can be
+              configured via the config system
+            - "generic": takes instances from the `_generic_fs` dict in this module,
+              which you must populate before use. Keys are by protocol
+            - "options": expects storage_options, a dict mapping protocol to
+              kwargs to use when constructing the filesystem
+            - "current": takes the most recently instantiated version of each FS
+        """
+        self.method = default_method
+        self.st_opts = storage_options
+        super().__init__(**kwargs)
+    def _parent(self, path):
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        return fs.unstrip_protocol(fs._parent(path))
+    def _strip_protocol(self, path):
+        # normalization only
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        return fs.unstrip_protocol(fs._strip_protocol(path))
+    async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        if fs.async_impl:
+            out = await fs._find(
+                path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
+            )
+        else:
+            out = fs.find(
+                path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
+            )
+        result = {}
+        for k, v in out.items():
+            v = v.copy()  # don't corrupt target FS dircache
+            name = fs.unstrip_protocol(k)
+            v["name"] = name
+            result[name] = v
+        if detail:
+            return result
+        return list(result)
+    async def _info(self, url, **kwargs):
+        fs = _resolve_fs(url, self.method)
+        if fs.async_impl:
+            out = await fs._info(url, **kwargs)
+        else:
+            out = fs.info(url, **kwargs)
+        out = out.copy()  # don't edit originals
+        out["name"] = fs.unstrip_protocol(out["name"])
+        return out
+    async def _ls(
+        self,
+        url,
+        detail=True,
+        **kwargs,
+    ):
+        fs = _resolve_fs(url, self.method)
+        if fs.async_impl:
+            out = await fs._ls(url, detail=True, **kwargs)
+        else:
+            out = fs.ls(url, detail=True, **kwargs)
+        out = [o.copy() for o in out]  # don't edit originals
+        for o in out:
+            o["name"] = fs.unstrip_protocol(o["name"])
+        if detail:
+            return out
+        else:
+            return [o["name"] for o in out]
+    async def _cat_file(
+        self,
+        url,
+        **kwargs,
+    ):
+        fs = _resolve_fs(url, self.method)
+        if fs.async_impl:
+            return await fs._cat_file(url, **kwargs)
+        else:
+            return fs.cat_file(url, **kwargs)
+    async def _pipe_file(
+        self,
+        path,
+        value,
+        **kwargs,
+    ):
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        if fs.async_impl:
+            return await fs._pipe_file(path, value, **kwargs)
+        else:
+            return fs.pipe_file(path, value, **kwargs)
+    async def _rm(self, url, **kwargs):
+        urls = url
+        if isinstance(urls, str):
+            urls = [urls]
+        fs = _resolve_fs(urls[0], self.method)
+        if fs.async_impl:
+            await fs._rm(urls, **kwargs)
+        else:
+            fs.rm(url, **kwargs)
+    async def _makedirs(self, path, exist_ok=False):
+        logger.debug("Make dir %s", path)
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        if fs.async_impl:
+            await fs._makedirs(path, exist_ok=exist_ok)
+        else:
+            fs.makedirs(path, exist_ok=exist_ok)
+    def rsync(self, source, destination, **kwargs):
+        """Sync files between two directory trees
+        See `func:rsync` for more details.
+        """
+        rsync(source, destination, fs=self, **kwargs)
+    async def _cp_file(
+        self,
+        url,
+        url2,
+        blocksize=2**20,
+        callback=DEFAULT_CALLBACK,
+        tempdir: str | None = None,
+        **kwargs,
+    ):
+        fs = _resolve_fs(url, self.method)
+        fs2 = _resolve_fs(url2, self.method)
+        if fs is fs2:
+            # pure remote
+            if fs.async_impl:
+                return await fs._copy(url, url2, **kwargs)
+            else:
+                return fs.copy(url, url2, **kwargs)
+        await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
+    async def _make_many_dirs(self, urls, exist_ok=True):
+        fs = _resolve_fs(urls[0], self.method)
+        if fs.async_impl:
+            coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
+            await _run_coros_in_chunks(coros)
+        else:
+            for u in urls:
+                fs.makedirs(u, exist_ok=exist_ok)
+    make_many_dirs = sync_wrapper(_make_many_dirs)
+    async def _copy(
+        self,
+        path1: list[str],
+        path2: list[str],
+        recursive: bool = False,
+        on_error: str = "ignore",
+        maxdepth: int | None = None,
+        batch_size: int | None = None,
+        tempdir: str | None = None,
+        **kwargs,
+    ):
+        # TODO: special case for one FS being local, which can use get/put
+        # TODO: special case for one being memFS, which can use cat/pipe
+        if recursive:
+            raise NotImplementedError("Please use fsspec.generic.rsync")
+        path1 = [path1] if isinstance(path1, str) else path1
+        path2 = [path2] if isinstance(path2, str) else path2
+        fs = _resolve_fs(path1, self.method)
+        fs2 = _resolve_fs(path2, self.method)
+        if fs is fs2:
+            if fs.async_impl:
+                return await fs._copy(path1, path2, **kwargs)
+            else:
+                return fs.copy(path1, path2, **kwargs)
+        await copy_file_op(
+            fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
+        )
+async def copy_file_op(
+    fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
+):
+    import tempfile
+    tempdir = tempdir or tempfile.mkdtemp()
+    try:
+        coros = [
+            _copy_file_op(
+                fs1,
+                u1,
+                fs2,
+                u2,
+                os.path.join(tempdir, uuid.uuid4().hex),
+            )
+            for u1, u2 in zip(url1, url2)
+        ]
+        out = await _run_coros_in_chunks(
+            coros, batch_size=batch_size, return_exceptions=True
+        )
+    finally:
+        shutil.rmtree(tempdir)
+    if on_error == "return":
+        return out
+    elif on_error == "raise":
+        for o in out:
+            if isinstance(o, Exception):
+                raise o
+async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
+    if fs1.async_impl:
+        await fs1._get_file(url1, local)
+    else:
+        fs1.get_file(url1, local)
+    if fs2.async_impl:
+        await fs2._put_file(local, url2)
+    else:
+        fs2.put_file(local, url2)
+    os.unlink(local)
+    logger.debug("Copy %s -> %s; done", url1, url2)
+async def maybe_await(cor):
+    if inspect.iscoroutine(cor):
+        return await cor
+    else:
+        return cor

venv/lib/python3.13/site-packages/fsspec/gui.py ADDED Viewed

	@@ -0,0 +1,417 @@

+import ast
+import contextlib
+import logging
+import os
+import re
+from collections.abc import Sequence
+from typing import ClassVar
+import panel as pn
+from .core import OpenFile, get_filesystem_class, split_protocol
+from .registry import known_implementations
+pn.extension()
+logger = logging.getLogger("fsspec.gui")
+class SigSlot:
+    """Signal-slot mixin, for Panel event passing
+    Include this class in a widget manager's superclasses to be able to
+    register events and callbacks on Panel widgets managed by that class.
+    The method ``_register`` should be called as widgets are added, and external
+    code should call ``connect`` to associate callbacks.
+    By default, all signals emit a DEBUG logging statement.
+    """
+    # names of signals that this class may emit each of which must be
+    # set by _register for any new instance
+    signals: ClassVar[Sequence[str]] = []
+    # names of actions that this class may respond to
+    slots: ClassVar[Sequence[str]] = []
+    # each of which must be a method name
+    def __init__(self):
+        self._ignoring_events = False
+        self._sigs = {}
+        self._map = {}
+        self._setup()
+    def _setup(self):
+        """Create GUI elements and register signals"""
+        self.panel = pn.pane.PaneBase()
+        # no signals to set up in the base class
+    def _register(
+        self, widget, name, thing="value", log_level=logging.DEBUG, auto=False
+    ):
+        """Watch the given attribute of a widget and assign it a named event
+        This is normally called at the time a widget is instantiated, in the
+        class which owns it.
+        Parameters
+        ----------
+        widget : pn.layout.Panel or None
+            Widget to watch. If None, an anonymous signal not associated with
+            any widget.
+        name : str
+            Name of this event
+        thing : str
+            Attribute of the given widget to watch
+        log_level : int
+            When the signal is triggered, a logging event of the given level
+            will be fired in the dfviz logger.
+        auto : bool
+            If True, automatically connects with a method in this class of the
+            same name.
+        """
+        if name not in self.signals:
+            raise ValueError(f"Attempt to assign an undeclared signal: {name}")
+        self._sigs[name] = {
+            "widget": widget,
+            "callbacks": [],
+            "thing": thing,
+            "log": log_level,
+        }
+        wn = "-".join(
+            [
+                getattr(widget, "name", str(widget)) if widget is not None else "none",
+                thing,
+            ]
+        )
+        self._map[wn] = name
+        if widget is not None:
+            widget.param.watch(self._signal, thing, onlychanged=True)
+        if auto and hasattr(self, name):
+            self.connect(name, getattr(self, name))
+    def _repr_mimebundle_(self, *args, **kwargs):
+        """Display in a notebook or a server"""
+        try:
+            return self.panel._repr_mimebundle_(*args, **kwargs)
+        except (ValueError, AttributeError) as exc:
+            raise NotImplementedError(
+                "Panel does not seem to be set up properly"
+            ) from exc
+    def connect(self, signal, slot):
+        """Associate call back with given event
+        The callback must be a function which takes the "new" value of the
+        watched attribute as the only parameter. If the callback return False,
+        this cancels any further processing of the given event.
+        Alternatively, the callback can be a string, in which case it means
+        emitting the correspondingly-named event (i.e., connect to self)
+        """
+        self._sigs[signal]["callbacks"].append(slot)
+    def _signal(self, event):
+        """This is called by a an action on a widget
+        Within an self.ignore_events context, nothing happens.
+        Tests can execute this method by directly changing the values of
+        widget components.
+        """
+        if not self._ignoring_events:
+            wn = "-".join([event.obj.name, event.name])
+            if wn in self._map and self._map[wn] in self._sigs:
+                self._emit(self._map[wn], event.new)
+    @contextlib.contextmanager
+    def ignore_events(self):
+        """Temporarily turn off events processing in this instance
+        (does not propagate to children)
+        """
+        self._ignoring_events = True
+        try:
+            yield
+        finally:
+            self._ignoring_events = False
+    def _emit(self, sig, value=None):
+        """An event happened, call its callbacks
+        This method can be used in tests to simulate message passing without
+        directly changing visual elements.
+        Calling of callbacks will halt whenever one returns False.
+        """
+        logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
+        for callback in self._sigs[sig]["callbacks"]:
+            if isinstance(callback, str):
+                self._emit(callback)
+            else:
+                try:
+                    # running callbacks should not break the interface
+                    ret = callback(value)
+                    if ret is False:
+                        break
+                except Exception as e:
+                    logger.exception(
+                        "Exception (%s) while executing callback for signal: %s",
+                        e,
+                        sig,
+                    )
+    def show(self, threads=False):
+        """Open a new browser tab and display this instance's interface"""
+        self.panel.show(threads=threads, verbose=False)
+        return self
+class SingleSelect(SigSlot):
+    """A multiselect which only allows you to select one item for an event"""
+    signals = ["_selected", "selected"]  # the first is internal
+    slots = ["set_options", "set_selection", "add", "clear", "select"]
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        super().__init__()
+    def _setup(self):
+        self.panel = pn.widgets.MultiSelect(**self.kwargs)
+        self._register(self.panel, "_selected", "value")
+        self._register(None, "selected")
+        self.connect("_selected", self.select_one)
+    def _signal(self, *args, **kwargs):
+        super()._signal(*args, **kwargs)
+    def select_one(self, *_):
+        with self.ignore_events():
+            val = [self.panel.value[-1]] if self.panel.value else []
+            self.panel.value = val
+        self._emit("selected", self.panel.value)
+    def set_options(self, options):
+        self.panel.options = options
+    def clear(self):
+        self.panel.options = []
+    @property
+    def value(self):
+        return self.panel.value
+    def set_selection(self, selection):
+        self.panel.value = [selection]
+class FileSelector(SigSlot):
+    """Panel-based graphical file selector widget
+    Instances of this widget are interactive and can be displayed in jupyter by having
+    them as the output of a cell,  or in a separate browser tab using ``.show()``.
+    """
+    signals = [
+        "protocol_changed",
+        "selection_changed",
+        "directory_entered",
+        "home_clicked",
+        "up_clicked",
+        "go_clicked",
+        "filters_changed",
+    ]
+    slots = ["set_filters", "go_home"]
+    def __init__(self, url=None, filters=None, ignore=None, kwargs=None):
+        """
+        Parameters
+        ----------
+        url : str (optional)
+            Initial value of the URL to populate the dialog; should include protocol
+        filters : list(str) (optional)
+            File endings to include in the listings. If not included, all files are
+            allowed. Does not affect directories.
+            If given, the endings will appear as checkboxes in the interface
+        ignore : list(str) (optional)
+            Regex(s) of file basename patterns to ignore, e.g., "\\." for typical
+            hidden files on posix
+        kwargs : dict (optional)
+            To pass to file system instance
+        """
+        if url:
+            self.init_protocol, url = split_protocol(url)
+        else:
+            self.init_protocol, url = "file", os.getcwd()
+        self.init_url = url
+        self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
+        self.filters = filters
+        self.ignore = [re.compile(i) for i in ignore or []]
+        self._fs = None
+        super().__init__()
+    def _setup(self):
+        self.url = pn.widgets.TextInput(
+            name="url",
+            value=self.init_url,
+            align="end",
+            sizing_mode="stretch_width",
+            width_policy="max",
+        )
+        self.protocol = pn.widgets.Select(
+            options=sorted(known_implementations),
+            value=self.init_protocol,
+            name="protocol",
+            align="center",
+        )
+        self.kwargs = pn.widgets.TextInput(
+            name="kwargs", value=self.init_kwargs, align="center"
+        )
+        self.go = pn.widgets.Button(name="⇨", align="end", width=45)
+        self.main = SingleSelect(size=10)
+        self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
+        self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end")
+        self._register(self.protocol, "protocol_changed", auto=True)
+        self._register(self.go, "go_clicked", "clicks", auto=True)
+        self._register(self.up, "up_clicked", "clicks", auto=True)
+        self._register(self.home, "home_clicked", "clicks", auto=True)
+        self._register(None, "selection_changed")
+        self.main.connect("selected", self.selection_changed)
+        self._register(None, "directory_entered")
+        self.prev_protocol = self.protocol.value
+        self.prev_kwargs = self.storage_options
+        self.filter_sel = pn.widgets.CheckBoxGroup(
+            value=[], options=[], inline=False, align="end", width_policy="min"
+        )
+        self._register(self.filter_sel, "filters_changed", auto=True)
+        self.panel = pn.Column(
+            pn.Row(self.protocol, self.kwargs),
+            pn.Row(self.home, self.up, self.url, self.go, self.filter_sel),
+            self.main.panel,
+        )
+        self.set_filters(self.filters)
+        self.go_clicked()
+    def set_filters(self, filters=None):
+        self.filters = filters
+        if filters:
+            self.filter_sel.options = filters
+            self.filter_sel.value = filters
+        else:
+            self.filter_sel.options = []
+            self.filter_sel.value = []
+    @property
+    def storage_options(self):
+        """Value of the kwargs box as a dictionary"""
+        return ast.literal_eval(self.kwargs.value) or {}
+    @property
+    def fs(self):
+        """Current filesystem instance"""
+        if self._fs is None:
+            cls = get_filesystem_class(self.protocol.value)
+            self._fs = cls(**self.storage_options)
+        return self._fs
+    @property
+    def urlpath(self):
+        """URL of currently selected item"""
+        return (
+            (f"{self.protocol.value}://{self.main.value[0]}")
+            if self.main.value
+            else None
+        )
+    def open_file(self, mode="rb", compression=None, encoding=None):
+        """Create OpenFile instance for the currently selected item
+        For example, in a notebook you might do something like
+        .. code-block::
+            [ ]: sel = FileSelector(); sel
+            # user selects their file
+            [ ]: with sel.open_file('rb') as f:
+            ...      out = f.read()
+        Parameters
+        ----------
+        mode: str (optional)
+            Open mode for the file.
+        compression: str (optional)
+            The interact with the file as compressed. Set to 'infer' to guess
+            compression from the file ending
+        encoding: str (optional)
+            If using text mode, use this encoding; defaults to UTF8.
+        """
+        if self.urlpath is None:
+            raise ValueError("No file selected")
+        return OpenFile(self.fs, self.urlpath, mode, compression, encoding)
+    def filters_changed(self, values):
+        self.filters = values
+        self.go_clicked()
+    def selection_changed(self, *_):
+        if self.urlpath is None:
+            return
+        if self.fs.isdir(self.urlpath):
+            self.url.value = self.fs._strip_protocol(self.urlpath)
+        self.go_clicked()
+    def go_clicked(self, *_):
+        if (
+            self.prev_protocol != self.protocol.value
+            or self.prev_kwargs != self.storage_options
+        ):
+            self._fs = None  # causes fs to be recreated
+            self.prev_protocol = self.protocol.value
+            self.prev_kwargs = self.storage_options
+        listing = sorted(
+            self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"]
+        )
+        listing = [
+            l
+            for l in listing
+            if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore)
+        ]
+        folders = {
+            "📁 " + o["name"].rsplit("/", 1)[-1]: o["name"]
+            for o in listing
+            if o["type"] == "directory"
+        }
+        files = {
+            "📄 " + o["name"].rsplit("/", 1)[-1]: o["name"]
+            for o in listing
+            if o["type"] == "file"
+        }
+        if self.filters:
+            files = {
+                k: v
+                for k, v in files.items()
+                if any(v.endswith(ext) for ext in self.filters)
+            }
+        self.main.set_options(dict(**folders, **files))
+    def protocol_changed(self, *_):
+        self._fs = None
+        self.main.options = []
+        self.url.value = ""
+    def home_clicked(self, *_):
+        self.protocol.value = self.init_protocol
+        self.kwargs.value = self.init_kwargs
+        self.url.value = self.init_url
+        self.go_clicked()
+    def up_clicked(self, *_):
+        self.url.value = self.fs._parent(self.url.value)
+        self.go_clicked()

venv/lib/python3.13/site-packages/fsspec/json.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import json
+from collections.abc import Mapping, Sequence
+from contextlib import suppress
+from pathlib import PurePath
+from typing import (
+    Any,
+    Callable,
+    ClassVar,
+    Optional,
+)
+from .registry import _import_class, get_filesystem_class
+from .spec import AbstractFileSystem
+class FilesystemJSONEncoder(json.JSONEncoder):
+    include_password: ClassVar[bool] = True
+    def default(self, o: Any) -> Any:
+        if isinstance(o, AbstractFileSystem):
+            return o.to_dict(include_password=self.include_password)
+        if isinstance(o, PurePath):
+            cls = type(o)
+            return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)}
+        return super().default(o)
+    def make_serializable(self, obj: Any) -> Any:
+        """
+        Recursively converts an object so that it can be JSON serialized via
+        :func:`json.dumps` and :func:`json.dump`, without actually calling
+        said functions.
+        """
+        if isinstance(obj, (str, int, float, bool)):
+            return obj
+        if isinstance(obj, Mapping):
+            return {k: self.make_serializable(v) for k, v in obj.items()}
+        if isinstance(obj, Sequence):
+            return [self.make_serializable(v) for v in obj]
+        return self.default(obj)
+class FilesystemJSONDecoder(json.JSONDecoder):
+    def __init__(
+        self,
+        *,
+        object_hook: Optional[Callable[[dict[str, Any]], Any]] = None,
+        parse_float: Optional[Callable[[str], Any]] = None,
+        parse_int: Optional[Callable[[str], Any]] = None,
+        parse_constant: Optional[Callable[[str], Any]] = None,
+        strict: bool = True,
+        object_pairs_hook: Optional[Callable[[list[tuple[str, Any]]], Any]] = None,
+    ) -> None:
+        self.original_object_hook = object_hook
+        super().__init__(
+            object_hook=self.custom_object_hook,
+            parse_float=parse_float,
+            parse_int=parse_int,
+            parse_constant=parse_constant,
+            strict=strict,
+            object_pairs_hook=object_pairs_hook,
+        )
+    @classmethod
+    def try_resolve_path_cls(cls, dct: dict[str, Any]):
+        with suppress(Exception):
+            fqp = dct["cls"]
+            path_cls = _import_class(fqp)
+            if issubclass(path_cls, PurePath):
+                return path_cls
+        return None
+    @classmethod
+    def try_resolve_fs_cls(cls, dct: dict[str, Any]):
+        with suppress(Exception):
+            if "cls" in dct:
+                try:
+                    fs_cls = _import_class(dct["cls"])
+                    if issubclass(fs_cls, AbstractFileSystem):
+                        return fs_cls
+                except Exception:
+                    if "protocol" in dct:  # Fallback if cls cannot be imported
+                        return get_filesystem_class(dct["protocol"])
+                    raise
+        return None
+    def custom_object_hook(self, dct: dict[str, Any]):
+        if "cls" in dct:
+            if (obj_cls := self.try_resolve_fs_cls(dct)) is not None:
+                return AbstractFileSystem.from_dict(dct)
+            if (obj_cls := self.try_resolve_path_cls(dct)) is not None:
+                return obj_cls(dct["str"])
+        if self.original_object_hook is not None:
+            return self.original_object_hook(dct)
+        return dct
+    def unmake_serializable(self, obj: Any) -> Any:
+        """
+        Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`.
+        """
+        if isinstance(obj, dict):
+            obj = self.custom_object_hook(obj)
+        if isinstance(obj, dict):
+            return {k: self.unmake_serializable(v) for k, v in obj.items()}
+        if isinstance(obj, (list, tuple)):
+            return [self.unmake_serializable(v) for v in obj]
+        return obj

venv/lib/python3.13/site-packages/fsspec/mapping.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import array
+import logging
+import posixpath
+import warnings
+from collections.abc import MutableMapping
+from functools import cached_property
+from fsspec.core import url_to_fs
+logger = logging.getLogger("fsspec.mapping")
+class FSMap(MutableMapping):
+    """Wrap a FileSystem instance as a mutable wrapping.
+    The keys of the mapping become files under the given root, and the
+    values (which must be bytes) the contents of those files.
+    Parameters
+    ----------
+    root: string
+        prefix for all the files
+    fs: FileSystem instance
+    check: bool (=True)
+        performs a touch at the location, to check for write access.
+    Examples
+    --------
+    >>> fs = FileSystem(**parameters)  # doctest: +SKIP
+    >>> d = FSMap('my-data/path/', fs)  # doctest: +SKIP
+    or, more likely
+    >>> d = fs.get_mapper('my-data/path/')
+    >>> d['loc1'] = b'Hello World'  # doctest: +SKIP
+    >>> list(d.keys())  # doctest: +SKIP
+    ['loc1']
+    >>> d['loc1']  # doctest: +SKIP
+    b'Hello World'
+    """
+    def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
+        self.fs = fs
+        self.root = fs._strip_protocol(root)
+        self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
+        if missing_exceptions is None:
+            missing_exceptions = (
+                FileNotFoundError,
+                IsADirectoryError,
+                NotADirectoryError,
+            )
+        self.missing_exceptions = missing_exceptions
+        self.check = check
+        self.create = create
+        if create:
+            if not self.fs.exists(root):
+                self.fs.mkdir(root)
+        if check:
+            if not self.fs.exists(root):
+                raise ValueError(
+                    f"Path {root} does not exist. Create "
+                    f" with the ``create=True`` keyword"
+                )
+            self.fs.touch(root + "/a")
+            self.fs.rm(root + "/a")
+    @cached_property
+    def dirfs(self):
+        """dirfs instance that can be used with the same keys as the mapper"""
+        from .implementations.dirfs import DirFileSystem
+        return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
+    def clear(self):
+        """Remove all keys below root - empties out mapping"""
+        logger.info("Clear mapping at %s", self.root)
+        try:
+            self.fs.rm(self.root, True)
+            self.fs.mkdir(self.root)
+        except:  # noqa: E722
+            pass
+    def getitems(self, keys, on_error="raise"):
+        """Fetch multiple items from the store
+        If the backend is async-able, this might proceed concurrently
+        Parameters
+        ----------
+        keys: list(str)
+            They keys to be fetched
+        on_error : "raise", "omit", "return"
+            If raise, an underlying exception will be raised (converted to KeyError
+            if the type is in self.missing_exceptions); if omit, keys with exception
+            will simply not be included in the output; if "return", all keys are
+            included in the output, but the value will be bytes or an exception
+            instance.
+        Returns
+        -------
+        dict(key, bytes|exception)
+        """
+        keys2 = [self._key_to_str(k) for k in keys]
+        oe = on_error if on_error == "raise" else "return"
+        try:
+            out = self.fs.cat(keys2, on_error=oe)
+            if isinstance(out, bytes):
+                out = {keys2[0]: out}
+        except self.missing_exceptions as e:
+            raise KeyError from e
+        out = {
+            k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
+            for k, v in out.items()
+        }
+        return {
+            key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
+            for key, k2 in zip(keys, keys2)
+            if on_error == "return" or not isinstance(out[k2], BaseException)
+        }
+    def setitems(self, values_dict):
+        """Set the values of multiple items in the store
+        Parameters
+        ----------
+        values_dict: dict(str, bytes)
+        """
+        values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
+        self.fs.pipe(values)
+    def delitems(self, keys):
+        """Remove multiple keys from the store"""
+        self.fs.rm([self._key_to_str(k) for k in keys])
+    def _key_to_str(self, key):
+        """Generate full path for the key"""
+        if not isinstance(key, str):
+            # raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
+            warnings.warn(
+                "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
+                DeprecationWarning,
+            )
+            if isinstance(key, list):
+                key = tuple(key)
+            key = str(key)
+        return f"{self._root_key_to_str}{key}".rstrip("/")
+    def _str_to_key(self, s):
+        """Strip path of to leave key name"""
+        return s[len(self.root) :].lstrip("/")
+    def __getitem__(self, key, default=None):
+        """Retrieve data"""
+        k = self._key_to_str(key)
+        try:
+            result = self.fs.cat(k)
+        except self.missing_exceptions as exc:
+            if default is not None:
+                return default
+            raise KeyError(key) from exc
+        return result
+    def pop(self, key, default=None):
+        """Pop data"""
+        result = self.__getitem__(key, default)
+        try:
+            del self[key]
+        except KeyError:
+            pass
+        return result
+    def __setitem__(self, key, value):
+        """Store value in key"""
+        key = self._key_to_str(key)
+        self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
+        self.fs.pipe_file(key, maybe_convert(value))
+    def __iter__(self):
+        return (self._str_to_key(x) for x in self.fs.find(self.root))
+    def __len__(self):
+        return len(self.fs.find(self.root))
+    def __delitem__(self, key):
+        """Remove key"""
+        try:
+            self.fs.rm(self._key_to_str(key))
+        except Exception as exc:
+            raise KeyError from exc
+    def __contains__(self, key):
+        """Does key exist in mapping?"""
+        path = self._key_to_str(key)
+        return self.fs.isfile(path)
+    def __reduce__(self):
+        return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
+def maybe_convert(value):
+    if isinstance(value, array.array) or hasattr(value, "__array__"):
+        # bytes-like things
+        if hasattr(value, "dtype") and value.dtype.kind in "Mm":
+            # The buffer interface doesn't support datetime64/timdelta64 numpy
+            # arrays
+            value = value.view("int64")
+        value = bytes(memoryview(value))
+    return value
+def get_mapper(
+    url="",
+    check=False,
+    create=False,
+    missing_exceptions=None,
+    alternate_root=None,
+    **kwargs,
+):
+    """Create key-value interface for given URL and options
+    The URL will be of the form "protocol://location" and point to the root
+    of the mapper required. All keys will be file-names below this location,
+    and their values the contents of each key.
+    Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
+    Parameters
+    ----------
+    url: str
+        Root URL of mapping
+    check: bool
+        Whether to attempt to read from the location before instantiation, to
+        check that the mapping does exist
+    create: bool
+        Whether to make the directory corresponding to the root before
+        instantiating
+    missing_exceptions: None or tuple
+        If given, these exception types will be regarded as missing keys and
+        return KeyError when trying to read data. By default, you get
+        (FileNotFoundError, IsADirectoryError, NotADirectoryError)
+    alternate_root: None or str
+        In cases of complex URLs, the parser may fail to pick the correct part
+        for the mapper root, so this arg can override
+    Returns
+    -------
+    ``FSMap`` instance, the dict-like key-value store.
+    """
+    # Removing protocol here - could defer to each open() on the backend
+    fs, urlpath = url_to_fs(url, **kwargs)
+    root = alternate_root if alternate_root is not None else urlpath
+    return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)

venv/lib/python3.13/site-packages/fsspec/parquet.py ADDED Viewed

	@@ -0,0 +1,541 @@

+import io
+import json
+import warnings
+from .core import url_to_fs
+from .utils import merge_offset_ranges
+# Parquet-Specific Utilities for fsspec
+#
+# Most of the functions defined in this module are NOT
+# intended for public consumption. The only exception
+# to this is `open_parquet_file`, which should be used
+# place of `fs.open()` to open parquet-formatted files
+# on remote file systems.
+def open_parquet_file(
+    path,
+    mode="rb",
+    fs=None,
+    metadata=None,
+    columns=None,
+    row_groups=None,
+    storage_options=None,
+    strict=False,
+    engine="auto",
+    max_gap=64_000,
+    max_block=256_000_000,
+    footer_sample_size=1_000_000,
+    **kwargs,
+):
+    """
+    Return a file-like object for a single Parquet file.
+    The specified parquet `engine` will be used to parse the
+    footer metadata, and determine the required byte ranges
+    from the file. The target path will then be opened with
+    the "parts" (`KnownPartsOfAFile`) caching strategy.
+    Note that this method is intended for usage with remote
+    file systems, and is unlikely to improve parquet-read
+    performance on local file systems.
+    Parameters
+    ----------
+    path: str
+        Target file path.
+    mode: str, optional
+        Mode option to be passed through to `fs.open`. Default is "rb".
+    metadata: Any, optional
+        Parquet metadata object. Object type must be supported
+        by the backend parquet engine. For now, only the "fastparquet"
+        engine supports an explicit `ParquetFile` metadata object.
+        If a metadata object is supplied, the remote footer metadata
+        will not need to be transferred into local memory.
+    fs: AbstractFileSystem, optional
+        Filesystem object to use for opening the file. If nothing is
+        specified, an `AbstractFileSystem` object will be inferred.
+    engine : str, default "auto"
+        Parquet engine to use for metadata parsing. Allowed options
+        include "fastparquet", "pyarrow", and "auto". The specified
+        engine must be installed in the current environment. If
+        "auto" is specified, and both engines are installed,
+        "fastparquet" will take precedence over "pyarrow".
+    columns: list, optional
+        List of all column names that may be read from the file.
+    row_groups : list, optional
+        List of all row-groups that may be read from the file. This
+        may be a list of row-group indices (integers), or it may be
+        a list of `RowGroup` metadata objects (if the "fastparquet"
+        engine is used).
+    storage_options : dict, optional
+        Used to generate an `AbstractFileSystem` object if `fs` was
+        not specified.
+    strict : bool, optional
+        Whether the resulting `KnownPartsOfAFile` cache should
+        fetch reads that go beyond a known byte-range boundary.
+        If `False` (the default), any read that ends outside a
+        known part will be zero padded. Note that using
+        `strict=True` may be useful for debugging.
+    max_gap : int, optional
+        Neighboring byte ranges will only be merged when their
+        inter-range gap is <= `max_gap`. Default is 64KB.
+    max_block : int, optional
+        Neighboring byte ranges will only be merged when the size of
+        the aggregated range is <= `max_block`. Default is 256MB.
+    footer_sample_size : int, optional
+        Number of bytes to read from the end of the path to look
+        for the footer metadata. If the sampled bytes do not contain
+        the footer, a second read request will be required, and
+        performance will suffer. Default is 1MB.
+    **kwargs :
+        Optional key-word arguments to pass to `fs.open`
+    """
+    # Make sure we have an `AbstractFileSystem` object
+    # to work with
+    if fs is None:
+        fs = url_to_fs(path, **(storage_options or {}))[0]
+    # For now, `columns == []` not supported. Just use
+    # default `open` command with `path` input
+    if columns is not None and len(columns) == 0:
+        return fs.open(path, mode=mode)
+    # Set the engine
+    engine = _set_engine(engine)
+    # Fetch the known byte ranges needed to read
+    # `columns` and/or `row_groups`
+    data = _get_parquet_byte_ranges(
+        [path],
+        fs,
+        metadata=metadata,
+        columns=columns,
+        row_groups=row_groups,
+        engine=engine,
+        max_gap=max_gap,
+        max_block=max_block,
+        footer_sample_size=footer_sample_size,
+    )
+    # Extract file name from `data`
+    fn = next(iter(data)) if data else path
+    # Call self.open with "parts" caching
+    options = kwargs.pop("cache_options", {}).copy()
+    return fs.open(
+        fn,
+        mode=mode,
+        cache_type="parts",
+        cache_options={
+            **options,
+            "data": data.get(fn, {}),
+            "strict": strict,
+        },
+        **kwargs,
+    )
+def _get_parquet_byte_ranges(
+    paths,
+    fs,
+    metadata=None,
+    columns=None,
+    row_groups=None,
+    max_gap=64_000,
+    max_block=256_000_000,
+    footer_sample_size=1_000_000,
+    engine="auto",
+):
+    """Get a dictionary of the known byte ranges needed
+    to read a specific column/row-group selection from a
+    Parquet dataset. Each value in the output dictionary
+    is intended for use as the `data` argument for the
+    `KnownPartsOfAFile` caching strategy of a single path.
+    """
+    # Set engine if necessary
+    if isinstance(engine, str):
+        engine = _set_engine(engine)
+    # Pass to specialized function if metadata is defined
+    if metadata is not None:
+        # Use the provided parquet metadata object
+        # to avoid transferring/parsing footer metadata
+        return _get_parquet_byte_ranges_from_metadata(
+            metadata,
+            fs,
+            engine,
+            columns=columns,
+            row_groups=row_groups,
+            max_gap=max_gap,
+            max_block=max_block,
+        )
+    # Get file sizes asynchronously
+    file_sizes = fs.sizes(paths)
+    # Populate global paths, starts, & ends
+    result = {}
+    data_paths = []
+    data_starts = []
+    data_ends = []
+    add_header_magic = True
+    if columns is None and row_groups is None:
+        # We are NOT selecting specific columns or row-groups.
+        #
+        # We can avoid sampling the footers, and just transfer
+        # all file data with cat_ranges
+        for i, path in enumerate(paths):
+            result[path] = {}
+            for b in range(0, file_sizes[i], max_block):
+                data_paths.append(path)
+                data_starts.append(b)
+                data_ends.append(min(b + max_block, file_sizes[i]))
+        add_header_magic = False  # "Magic" should already be included
+    else:
+        # We ARE selecting specific columns or row-groups.
+        #
+        # Gather file footers.
+        # We just take the last `footer_sample_size` bytes of each
+        # file (or the entire file if it is smaller than that)
+        footer_starts = []
+        footer_ends = []
+        for i, path in enumerate(paths):
+            footer_ends.append(file_sizes[i])
+            sample_size = max(0, file_sizes[i] - footer_sample_size)
+            footer_starts.append(sample_size)
+        footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
+        # Check our footer samples and re-sample if necessary.
+        missing_footer_starts = footer_starts.copy()
+        large_footer = 0
+        for i, path in enumerate(paths):
+            footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
+            real_footer_start = file_sizes[i] - (footer_size + 8)
+            if real_footer_start < footer_starts[i]:
+                missing_footer_starts[i] = real_footer_start
+                large_footer = max(large_footer, (footer_size + 8))
+        if large_footer:
+            warnings.warn(
+                f"Not enough data was used to sample the parquet footer. "
+                f"Try setting footer_sample_size >= {large_footer}."
+            )
+            for i, block in enumerate(
+                fs.cat_ranges(
+                    paths,
+                    missing_footer_starts,
+                    footer_starts,
+                )
+            ):
+                footer_samples[i] = block + footer_samples[i]
+                footer_starts[i] = missing_footer_starts[i]
+        # Calculate required byte ranges for each path
+        for i, path in enumerate(paths):
+            # Deal with small-file case.
+            # Just include all remaining bytes of the file
+            # in a single range.
+            if file_sizes[i] < max_block:
+                if footer_starts[i] > 0:
+                    # Only need to transfer the data if the
+                    # footer sample isn't already the whole file
+                    data_paths.append(path)
+                    data_starts.append(0)
+                    data_ends.append(footer_starts[i])
+                continue
+            # Use "engine" to collect data byte ranges
+            path_data_starts, path_data_ends = engine._parquet_byte_ranges(
+                columns,
+                row_groups=row_groups,
+                footer=footer_samples[i],
+                footer_start=footer_starts[i],
+            )
+            data_paths += [path] * len(path_data_starts)
+            data_starts += path_data_starts
+            data_ends += path_data_ends
+        # Merge adjacent offset ranges
+        data_paths, data_starts, data_ends = merge_offset_ranges(
+            data_paths,
+            data_starts,
+            data_ends,
+            max_gap=max_gap,
+            max_block=max_block,
+            sort=False,  # Should already be sorted
+        )
+        # Start by populating `result` with footer samples
+        for i, path in enumerate(paths):
+            result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
+    # Transfer the data byte-ranges into local memory
+    _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
+    # Add b"PAR1" to header if necessary
+    if add_header_magic:
+        _add_header_magic(result)
+    return result
+def _get_parquet_byte_ranges_from_metadata(
+    metadata,
+    fs,
+    engine,
+    columns=None,
+    row_groups=None,
+    max_gap=64_000,
+    max_block=256_000_000,
+):
+    """Simplified version of `_get_parquet_byte_ranges` for
+    the case that an engine-specific `metadata` object is
+    provided, and the remote footer metadata does not need to
+    be transferred before calculating the required byte ranges.
+    """
+    # Use "engine" to collect data byte ranges
+    data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
+        columns,
+        row_groups=row_groups,
+        metadata=metadata,
+    )
+    # Merge adjacent offset ranges
+    data_paths, data_starts, data_ends = merge_offset_ranges(
+        data_paths,
+        data_starts,
+        data_ends,
+        max_gap=max_gap,
+        max_block=max_block,
+        sort=False,  # Should be sorted
+    )
+    # Transfer the data byte-ranges into local memory
+    result = {fn: {} for fn in list(set(data_paths))}
+    _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
+    # Add b"PAR1" to header
+    _add_header_magic(result)
+    return result
+def _transfer_ranges(fs, blocks, paths, starts, ends):
+    # Use cat_ranges to gather the data byte_ranges
+    ranges = (paths, starts, ends)
+    for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
+        blocks[path][(start, stop)] = data
+def _add_header_magic(data):
+    # Add b"PAR1" to file headers
+    for path in list(data.keys()):
+        add_magic = True
+        for k in data[path]:
+            if k[0] == 0 and k[1] >= 4:
+                add_magic = False
+                break
+        if add_magic:
+            data[path][(0, 4)] = b"PAR1"
+def _set_engine(engine_str):
+    # Define a list of parquet engines to try
+    if engine_str == "auto":
+        try_engines = ("fastparquet", "pyarrow")
+    elif not isinstance(engine_str, str):
+        raise ValueError(
+            "Failed to set parquet engine! "
+            "Please pass 'fastparquet', 'pyarrow', or 'auto'"
+        )
+    elif engine_str not in ("fastparquet", "pyarrow"):
+        raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
+    else:
+        try_engines = [engine_str]
+    # Try importing the engines in `try_engines`,
+    # and choose the first one that succeeds
+    for engine in try_engines:
+        try:
+            if engine == "fastparquet":
+                return FastparquetEngine()
+            elif engine == "pyarrow":
+                return PyarrowEngine()
+        except ImportError:
+            pass
+    # Raise an error if a supported parquet engine
+    # was not found
+    raise ImportError(
+        f"The following parquet engines are not installed "
+        f"in your python environment: {try_engines}."
+        f"Please install 'fastparquert' or 'pyarrow' to "
+        f"utilize the `fsspec.parquet` module."
+    )
+class FastparquetEngine:
+    # The purpose of the FastparquetEngine class is
+    # to check if fastparquet can be imported (on initialization)
+    # and to define a `_parquet_byte_ranges` method. In the
+    # future, this class may also be used to define other
+    # methods/logic that are specific to fastparquet.
+    def __init__(self):
+        import fastparquet as fp
+        self.fp = fp
+    def _row_group_filename(self, row_group, pf):
+        return pf.row_group_filename(row_group)
+    def _parquet_byte_ranges(
+        self,
+        columns,
+        row_groups=None,
+        metadata=None,
+        footer=None,
+        footer_start=None,
+    ):
+        # Initialize offset ranges and define ParqetFile metadata
+        pf = metadata
+        data_paths, data_starts, data_ends = [], [], []
+        if pf is None:
+            pf = self.fp.ParquetFile(io.BytesIO(footer))
+        # Convert columns to a set and add any index columns
+        # specified in the pandas metadata (just in case)
+        column_set = None if columns is None else set(columns)
+        if column_set is not None and hasattr(pf, "pandas_metadata"):
+            md_index = [
+                ind
+                for ind in pf.pandas_metadata.get("index_columns", [])
+                # Ignore RangeIndex information
+                if not isinstance(ind, dict)
+            ]
+            column_set |= set(md_index)
+        # Check if row_groups is a list of integers
+        # or a list of row-group metadata
+        if row_groups and not isinstance(row_groups[0], int):
+            # Input row_groups contains row-group metadata
+            row_group_indices = None
+        else:
+            # Input row_groups contains row-group indices
+            row_group_indices = row_groups
+            row_groups = pf.row_groups
+        # Loop through column chunks to add required byte ranges
+        for r, row_group in enumerate(row_groups):
+            # Skip this row-group if we are targeting
+            # specific row-groups
+            if row_group_indices is None or r in row_group_indices:
+                # Find the target parquet-file path for `row_group`
+                fn = self._row_group_filename(row_group, pf)
+                for column in row_group.columns:
+                    name = column.meta_data.path_in_schema[0]
+                    # Skip this column if we are targeting a
+                    # specific columns
+                    if column_set is None or name in column_set:
+                        file_offset0 = column.meta_data.dictionary_page_offset
+                        if file_offset0 is None:
+                            file_offset0 = column.meta_data.data_page_offset
+                        num_bytes = column.meta_data.total_compressed_size
+                        if footer_start is None or file_offset0 < footer_start:
+                            data_paths.append(fn)
+                            data_starts.append(file_offset0)
+                            data_ends.append(
+                                min(
+                                    file_offset0 + num_bytes,
+                                    footer_start or (file_offset0 + num_bytes),
+                                )
+                            )
+        if metadata:
+            # The metadata in this call may map to multiple
+            # file paths. Need to include `data_paths`
+            return data_paths, data_starts, data_ends
+        return data_starts, data_ends
+class PyarrowEngine:
+    # The purpose of the PyarrowEngine class is
+    # to check if pyarrow can be imported (on initialization)
+    # and to define a `_parquet_byte_ranges` method. In the
+    # future, this class may also be used to define other
+    # methods/logic that are specific to pyarrow.
+    def __init__(self):
+        import pyarrow.parquet as pq
+        self.pq = pq
+    def _row_group_filename(self, row_group, metadata):
+        raise NotImplementedError
+    def _parquet_byte_ranges(
+        self,
+        columns,
+        row_groups=None,
+        metadata=None,
+        footer=None,
+        footer_start=None,
+    ):
+        if metadata is not None:
+            raise ValueError("metadata input not supported for PyarrowEngine")
+        data_starts, data_ends = [], []
+        md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
+        # Convert columns to a set and add any index columns
+        # specified in the pandas metadata (just in case)
+        column_set = None if columns is None else set(columns)
+        if column_set is not None:
+            schema = md.schema.to_arrow_schema()
+            has_pandas_metadata = (
+                schema.metadata is not None and b"pandas" in schema.metadata
+            )
+            if has_pandas_metadata:
+                md_index = [
+                    ind
+                    for ind in json.loads(
+                        schema.metadata[b"pandas"].decode("utf8")
+                    ).get("index_columns", [])
+                    # Ignore RangeIndex information
+                    if not isinstance(ind, dict)
+                ]
+                column_set |= set(md_index)
+        # Loop through column chunks to add required byte ranges
+        for r in range(md.num_row_groups):
+            # Skip this row-group if we are targeting
+            # specific row-groups
+            if row_groups is None or r in row_groups:
+                row_group = md.row_group(r)
+                for c in range(row_group.num_columns):
+                    column = row_group.column(c)
+                    name = column.path_in_schema
+                    # Skip this column if we are targeting a
+                    # specific columns
+                    split_name = name.split(".")[0]
+                    if (
+                        column_set is None
+                        or name in column_set
+                        or split_name in column_set
+                    ):
+                        file_offset0 = column.dictionary_page_offset
+                        if file_offset0 is None:
+                            file_offset0 = column.data_page_offset
+                        num_bytes = column.total_compressed_size
+                        if file_offset0 < footer_start:
+                            data_starts.append(file_offset0)
+                            data_ends.append(
+                                min(file_offset0 + num_bytes, footer_start)
+                            )
+        return data_starts, data_ends

venv/lib/python3.13/site-packages/fsspec/registry.py ADDED Viewed

	@@ -0,0 +1,330 @@

+from __future__ import annotations
+import importlib
+import types
+import warnings
+__all__ = ["registry", "get_filesystem_class", "default"]
+# internal, mutable
+_registry: dict[str, type] = {}
+# external, immutable
+registry = types.MappingProxyType(_registry)
+default = "file"
+def register_implementation(name, cls, clobber=False, errtxt=None):
+    """Add implementation class to the registry
+    Parameters
+    ----------
+    name: str
+        Protocol name to associate with the class
+    cls: class or str
+        if a class: fsspec-compliant implementation class (normally inherits from
+        ``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
+        str, the full path to an implementation class like package.module.class,
+        which gets added to known_implementations,
+        so the import is deferred until the filesystem is actually used.
+    clobber: bool (optional)
+        Whether to overwrite a protocol with the same name; if False, will raise
+        instead.
+    errtxt: str (optional)
+        If given, then a failure to import the given class will result in this
+        text being given.
+    """
+    if isinstance(cls, str):
+        if name in known_implementations and clobber is False:
+            if cls != known_implementations[name]["class"]:
+                raise ValueError(
+                    f"Name ({name}) already in the known_implementations and clobber "
+                    f"is False"
+                )
+        else:
+            known_implementations[name] = {
+                "class": cls,
+                "err": errtxt or f"{cls} import failed for protocol {name}",
+            }
+    else:
+        if name in registry and clobber is False:
+            if _registry[name] is not cls:
+                raise ValueError(
+                    f"Name ({name}) already in the registry and clobber is False"
+                )
+        else:
+            _registry[name] = cls
+# protocols mapped to the class which implements them. This dict can be
+# updated with register_implementation
+known_implementations = {
+    "abfs": {
+        "class": "adlfs.AzureBlobFileSystem",
+        "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
+    },
+    "adl": {
+        "class": "adlfs.AzureDatalakeFileSystem",
+        "err": "Install adlfs to access Azure Datalake Gen1",
+    },
+    "arrow_hdfs": {
+        "class": "fsspec.implementations.arrow.HadoopFileSystem",
+        "err": "pyarrow and local java libraries required for HDFS",
+    },
+    "asynclocal": {
+        "class": "morefs.asyn_local.AsyncLocalFileSystem",
+        "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
+    },
+    "asyncwrapper": {
+        "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
+    },
+    "az": {
+        "class": "adlfs.AzureBlobFileSystem",
+        "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
+    },
+    "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
+    "box": {
+        "class": "boxfs.BoxFileSystem",
+        "err": "Please install boxfs to access BoxFileSystem",
+    },
+    "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
+    "dask": {
+        "class": "fsspec.implementations.dask.DaskWorkerFileSystem",
+        "err": "Install dask distributed to access worker file system",
+    },
+    "data": {"class": "fsspec.implementations.data.DataFileSystem"},
+    "dbfs": {
+        "class": "fsspec.implementations.dbfs.DatabricksFileSystem",
+        "err": "Install the requests package to use the DatabricksFileSystem",
+    },
+    "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
+    "dropbox": {
+        "class": "dropboxdrivefs.DropboxDriveFileSystem",
+        "err": (
+            'DropboxFileSystem requires "dropboxdrivefs","requests" and "'
+            '"dropbox" to be installed'
+        ),
+    },
+    "dvc": {
+        "class": "dvc.api.DVCFileSystem",
+        "err": "Install dvc to access DVCFileSystem",
+    },
+    "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
+    "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
+    "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
+    "gcs": {
+        "class": "gcsfs.GCSFileSystem",
+        "err": "Please install gcsfs to access Google Storage",
+    },
+    "gdrive": {
+        "class": "gdrive_fsspec.GoogleDriveFileSystem",
+        "err": "Please install gdrive_fs for access to Google Drive",
+    },
+    "generic": {"class": "fsspec.generic.GenericFileSystem"},
+    "gist": {
+        "class": "fsspec.implementations.gist.GistFileSystem",
+        "err": "Install the requests package to use the gist FS",
+    },
+    "git": {
+        "class": "fsspec.implementations.git.GitFileSystem",
+        "err": "Install pygit2 to browse local git repos",
+    },
+    "github": {
+        "class": "fsspec.implementations.github.GithubFileSystem",
+        "err": "Install the requests package to use the github FS",
+    },
+    "gs": {
+        "class": "gcsfs.GCSFileSystem",
+        "err": "Please install gcsfs to access Google Storage",
+    },
+    "hdfs": {
+        "class": "fsspec.implementations.arrow.HadoopFileSystem",
+        "err": "pyarrow and local java libraries required for HDFS",
+    },
+    "hf": {
+        "class": "huggingface_hub.HfFileSystem",
+        "err": "Install huggingface_hub to access HfFileSystem",
+    },
+    "http": {
+        "class": "fsspec.implementations.http.HTTPFileSystem",
+        "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
+    },
+    "https": {
+        "class": "fsspec.implementations.http.HTTPFileSystem",
+        "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
+    },
+    "jlab": {
+        "class": "fsspec.implementations.jupyter.JupyterFileSystem",
+        "err": "Jupyter FS requires requests to be installed",
+    },
+    "jupyter": {
+        "class": "fsspec.implementations.jupyter.JupyterFileSystem",
+        "err": "Jupyter FS requires requests to be installed",
+    },
+    "lakefs": {
+        "class": "lakefs_spec.LakeFSFileSystem",
+        "err": "Please install lakefs-spec to access LakeFSFileSystem",
+    },
+    "libarchive": {
+        "class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
+        "err": "LibArchive requires to be installed",
+    },
+    "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
+    "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
+    "oci": {
+        "class": "ocifs.OCIFileSystem",
+        "err": "Install ocifs to access OCI Object Storage",
+    },
+    "ocilake": {
+        "class": "ocifs.OCIFileSystem",
+        "err": "Install ocifs to access OCI Data Lake",
+    },
+    "oss": {
+        "class": "ossfs.OSSFileSystem",
+        "err": "Install ossfs to access Alibaba Object Storage System",
+    },
+    "pyscript": {
+        "class": "pyscript_fsspec_client.client.PyscriptFileSystem",
+        "err": "Install requests (cpython) or run in pyscript",
+    },
+    "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
+    "root": {
+        "class": "fsspec_xrootd.XRootDFileSystem",
+        "err": (
+            "Install fsspec-xrootd to access xrootd storage system. "
+            "Note: 'root' is the protocol name for xrootd storage systems, "
+            "not referring to root directories"
+        ),
+    },
+    "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
+    "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
+    "sftp": {
+        "class": "fsspec.implementations.sftp.SFTPFileSystem",
+        "err": 'SFTPFileSystem requires "paramiko" to be installed',
+    },
+    "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
+    "smb": {
+        "class": "fsspec.implementations.smb.SMBFileSystem",
+        "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
+    },
+    "ssh": {
+        "class": "fsspec.implementations.sftp.SFTPFileSystem",
+        "err": 'SFTPFileSystem requires "paramiko" to be installed',
+    },
+    "tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
+    "tos": {
+        "class": "tosfs.TosFileSystem",
+        "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
+    },
+    "tosfs": {
+        "class": "tosfs.TosFileSystem",
+        "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
+    },
+    "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
+    "webdav": {
+        "class": "webdav4.fsspec.WebdavFileSystem",
+        "err": "Install webdav4 to access WebDAV",
+    },
+    "webhdfs": {
+        "class": "fsspec.implementations.webhdfs.WebHDFS",
+        "err": 'webHDFS access requires "requests" to be installed',
+    },
+    "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
+}
+assert list(known_implementations) == sorted(known_implementations), (
+    "Not in alphabetical order"
+)
+def get_filesystem_class(protocol):
+    """Fetch named protocol implementation from the registry
+    The dict ``known_implementations`` maps protocol names to the locations
+    of classes implementing the corresponding file-system. When used for the
+    first time, appropriate imports will happen and the class will be placed in
+    the registry. All subsequent calls will fetch directly from the registry.
+    Some protocol implementations require additional dependencies, and so the
+    import may fail. In this case, the string in the "err" field of the
+    ``known_implementations`` will be given as the error message.
+    """
+    if not protocol:
+        protocol = default
+    if protocol not in registry:
+        if protocol not in known_implementations:
+            raise ValueError(f"Protocol not known: {protocol}")
+        bit = known_implementations[protocol]
+        try:
+            register_implementation(protocol, _import_class(bit["class"]))
+        except ImportError as e:
+            raise ImportError(bit.get("err")) from e
+    cls = registry[protocol]
+    if getattr(cls, "protocol", None) in ("abstract", None):
+        cls.protocol = protocol
+    return cls
+s3_msg = """Your installed version of s3fs is very old and known to cause
+severe performance issues, see also https://github.com/dask/dask/issues/10276
+To fix, you should specify a lower version bound on s3fs, or
+update the current installation.
+"""
+def _import_class(fqp: str):
+    """Take a fully-qualified path and return the imported class or identifier.
+    ``fqp`` is of the form "package.module.klass" or
+    "package.module:subobject.klass".
+    Warnings
+    --------
+    This can import arbitrary modules. Make sure you haven't installed any modules
+    that may execute malicious code at import time.
+    """
+    if ":" in fqp:
+        mod, name = fqp.rsplit(":", 1)
+    else:
+        mod, name = fqp.rsplit(".", 1)
+    is_s3 = mod == "s3fs"
+    mod = importlib.import_module(mod)
+    if is_s3 and mod.__version__.split(".") < ["0", "5"]:
+        warnings.warn(s3_msg)
+    for part in name.split("."):
+        mod = getattr(mod, part)
+    if not isinstance(mod, type):
+        raise TypeError(f"{fqp} is not a class")
+    return mod
+def filesystem(protocol, **storage_options):
+    """Instantiate filesystems for given protocol and arguments
+    ``storage_options`` are specific to the protocol being chosen, and are
+    passed directly to the class.
+    """
+    if protocol == "arrow_hdfs":
+        warnings.warn(
+            "The 'arrow_hdfs' protocol has been deprecated and will be "
+            "removed in the future. Specify it as 'hdfs'.",
+            DeprecationWarning,
+        )
+    cls = get_filesystem_class(protocol)
+    return cls(**storage_options)
+def available_protocols():
+    """Return a list of the implemented protocols.
+    Note that any given protocol may require extra packages to be importable.
+    """
+    return list(known_implementations)

venv/lib/python3.13/site-packages/fsspec/spec.py ADDED Viewed

	@@ -0,0 +1,2281 @@

+from __future__ import annotations
+import io
+import json
+import logging
+import os
+import threading
+import warnings
+import weakref
+from errno import ESPIPE
+from glob import has_magic
+from hashlib import sha256
+from typing import Any, ClassVar
+from .callbacks import DEFAULT_CALLBACK
+from .config import apply_config, conf
+from .dircache import DirCache
+from .transaction import Transaction
+from .utils import (
+    _unstrip_protocol,
+    glob_translate,
+    isfilelike,
+    other_paths,
+    read_block,
+    stringify_path,
+    tokenize,
+)
+logger = logging.getLogger("fsspec")
+def make_instance(cls, args, kwargs):
+    return cls(*args, **kwargs)
+class _Cached(type):
+    """
+    Metaclass for caching file system instances.
+    Notes
+    -----
+    Instances are cached according to
+    * The values of the class attributes listed in `_extra_tokenize_attributes`
+    * The arguments passed to ``__init__``.
+    This creates an additional reference to the filesystem, which prevents the
+    filesystem from being garbage collected when all *user* references go away.
+    A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
+    be made for a filesystem instance to be garbage collected.
+    """
+    def __init__(cls, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Note: we intentionally create a reference here, to avoid garbage
+        # collecting instances when all other references are gone. To really
+        # delete a FileSystem, the cache must be cleared.
+        if conf.get("weakref_instance_cache"):  # pragma: no cover
+            # debug option for analysing fork/spawn conditions
+            cls._cache = weakref.WeakValueDictionary()
+        else:
+            cls._cache = {}
+        cls._pid = os.getpid()
+    def __call__(cls, *args, **kwargs):
+        kwargs = apply_config(cls, kwargs)
+        extra_tokens = tuple(
+            getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
+        )
+        strip_tokenize_options = {
+            k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs
+        }
+        token = tokenize(
+            cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
+        )
+        skip = kwargs.pop("skip_instance_cache", False)
+        if os.getpid() != cls._pid:
+            cls._cache.clear()
+            cls._pid = os.getpid()
+        if not skip and cls.cachable and token in cls._cache:
+            cls._latest = token
+            return cls._cache[token]
+        else:
+            obj = super().__call__(*args, **kwargs, **strip_tokenize_options)
+            # Setting _fs_token here causes some static linters to complain.
+            obj._fs_token_ = token
+            obj.storage_args = args
+            obj.storage_options = kwargs
+            if obj.async_impl and obj.mirror_sync_methods:
+                from .asyn import mirror_sync_methods
+                mirror_sync_methods(obj)
+            if cls.cachable and not skip:
+                cls._latest = token
+                cls._cache[token] = obj
+            return obj
+class AbstractFileSystem(metaclass=_Cached):
+    """
+    An abstract super-class for pythonic file-systems
+    Implementations are expected to be compatible with or, better, subclass
+    from here.
+    """
+    cachable = True  # this class can be cached, instances reused
+    _cached = False
+    blocksize = 2**22
+    sep = "/"
+    protocol: ClassVar[str | tuple[str, ...]] = "abstract"
+    _latest = None
+    async_impl = False
+    mirror_sync_methods = False
+    root_marker = ""  # For some FSs, may require leading '/' or other character
+    transaction_type = Transaction
+    #: Extra *class attributes* that should be considered when hashing.
+    _extra_tokenize_attributes = ()
+    #: *storage options* that should not be considered when hashing.
+    _strip_tokenize_options = ()
+    # Set by _Cached metaclass
+    storage_args: tuple[Any, ...]
+    storage_options: dict[str, Any]
+    def __init__(self, *args, **storage_options):
+        """Create and configure file-system instance
+        Instances may be cachable, so if similar enough arguments are seen
+        a new instance is not required. The token attribute exists to allow
+        implementations to cache instances if they wish.
+        A reasonable default should be provided if there are no arguments.
+        Subclasses should call this method.
+        Parameters
+        ----------
+        use_listings_cache, listings_expiry_time, max_paths:
+            passed to ``DirCache``, if the implementation supports
+            directory listing caching. Pass use_listings_cache=False
+            to disable such caching.
+        skip_instance_cache: bool
+            If this is a cachable implementation, pass True here to force
+            creating a new instance even if a matching instance exists, and prevent
+            storing this instance.
+        asynchronous: bool
+        loop: asyncio-compatible IOLoop or None
+        """
+        if self._cached:
+            # reusing instance, don't change
+            return
+        self._cached = True
+        self._intrans = False
+        self._transaction = None
+        self._invalidated_caches_in_transaction = []
+        self.dircache = DirCache(**storage_options)
+        if storage_options.pop("add_docs", None):
+            warnings.warn("add_docs is no longer supported.", FutureWarning)
+        if storage_options.pop("add_aliases", None):
+            warnings.warn("add_aliases has been removed.", FutureWarning)
+        # This is set in _Cached
+        self._fs_token_ = None
+    @property
+    def fsid(self):
+        """Persistent filesystem id that can be used to compare filesystems
+        across sessions.
+        """
+        raise NotImplementedError
+    @property
+    def _fs_token(self):
+        return self._fs_token_
+    def __dask_tokenize__(self):
+        return self._fs_token
+    def __hash__(self):
+        return int(self._fs_token, 16)
+    def __eq__(self, other):
+        return isinstance(other, type(self)) and self._fs_token == other._fs_token
+    def __reduce__(self):
+        return make_instance, (type(self), self.storage_args, self.storage_options)
+    @classmethod
+    def _strip_protocol(cls, path):
+        """Turn path from fully-qualified to file-system-specific
+        May require FS-specific handling, e.g., for relative paths or links.
+        """
+        if isinstance(path, list):
+            return [cls._strip_protocol(p) for p in path]
+        path = stringify_path(path)
+        protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
+        for protocol in protos:
+            if path.startswith(protocol + "://"):
+                path = path[len(protocol) + 3 :]
+            elif path.startswith(protocol + "::"):
+                path = path[len(protocol) + 2 :]
+        path = path.rstrip("/")
+        # use of root_marker to make minimum required path, e.g., "/"
+        return path or cls.root_marker
+    def unstrip_protocol(self, name: str) -> str:
+        """Format FS-specific path to generic, including protocol"""
+        protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
+        for protocol in protos:
+            if name.startswith(f"{protocol}://"):
+                return name
+        return f"{protos[0]}://{name}"
+    @staticmethod
+    def _get_kwargs_from_urls(path):
+        """If kwargs can be encoded in the paths, extract them here
+        This should happen before instantiation of the class; incoming paths
+        then should be amended to strip the options in methods.
+        Examples may look like an sftp path "sftp://user@host:/my/path", where
+        the user and host should become kwargs and later get stripped.
+        """
+        # by default, nothing happens
+        return {}
+    @classmethod
+    def current(cls):
+        """Return the most recently instantiated FileSystem
+        If no instance has been created, then create one with defaults
+        """
+        if cls._latest in cls._cache:
+            return cls._cache[cls._latest]
+        return cls()
+    @property
+    def transaction(self):
+        """A context within which files are committed together upon exit
+        Requires the file class to implement `.commit()` and `.discard()`
+        for the normal and exception cases.
+        """
+        if self._transaction is None:
+            self._transaction = self.transaction_type(self)
+        return self._transaction
+    def start_transaction(self):
+        """Begin write transaction for deferring files, non-context version"""
+        self._intrans = True
+        self._transaction = self.transaction_type(self)
+        return self.transaction
+    def end_transaction(self):
+        """Finish write transaction, non-context version"""
+        self.transaction.complete()
+        self._transaction = None
+        # The invalid cache must be cleared after the transaction is completed.
+        for path in self._invalidated_caches_in_transaction:
+            self.invalidate_cache(path)
+        self._invalidated_caches_in_transaction.clear()
+    def invalidate_cache(self, path=None):
+        """
+        Discard any cached directory information
+        Parameters
+        ----------
+        path: string or None
+            If None, clear all listings cached else listings at or under given
+            path.
+        """
+        # Not necessary to implement invalidation mechanism, may have no cache.
+        # But if have, you should call this method of parent class from your
+        # subclass to ensure expiring caches after transacations correctly.
+        # See the implementation of FTPFileSystem in ftp.py
+        if self._intrans:
+            self._invalidated_caches_in_transaction.append(path)
+    def mkdir(self, path, create_parents=True, **kwargs):
+        """
+        Create directory entry at path
+        For systems that don't have true directories, may create an for
+        this instance only and not touch the real filesystem
+        Parameters
+        ----------
+        path: str
+            location
+        create_parents: bool
+            if True, this is equivalent to ``makedirs``
+        kwargs:
+            may be permissions, etc.
+        """
+        pass  # not necessary to implement, may not have directories
+    def makedirs(self, path, exist_ok=False):
+        """Recursively make directories
+        Creates directory at path and any intervening required directories.
+        Raises exception if, for instance, the path already exists but is a
+        file.
+        Parameters
+        ----------
+        path: str
+            leaf directory name
+        exist_ok: bool (False)
+            If False, will error if the target already exists
+        """
+        pass  # not necessary to implement, may not have directories
+    def rmdir(self, path):
+        """Remove a directory, if empty"""
+        pass  # not necessary to implement, may not have directories
+    def ls(self, path, detail=True, **kwargs):
+        """List objects at path.
+        This should include subdirectories and files at that location. The
+        difference between a file and a directory must be clear when details
+        are requested.
+        The specific keys, or perhaps a FileInfo class, or similar, is TBD,
+        but must be consistent across implementations.
+        Must include:
+        - full path to the entry (without protocol)
+        - size of the entry, in bytes. If the value cannot be determined, will
+          be ``None``.
+        - type of entry, "file", "directory" or other
+        Additional information
+        may be present, appropriate to the file-system, e.g., generation,
+        checksum, etc.
+        May use refresh=True|False to allow use of self._ls_from_cache to
+        check for a saved listing and avoid calling the backend. This would be
+        common where listing may be expensive.
+        Parameters
+        ----------
+        path: str
+        detail: bool
+            if True, gives a list of dictionaries, where each is the same as
+            the result of ``info(path)``. If False, gives a list of paths
+            (str).
+        kwargs: may have additional backend-specific options, such as version
+            information
+        Returns
+        -------
+        List of strings if detail is False, or list of directory information
+        dicts if detail is True.
+        """
+        raise NotImplementedError
+    def _ls_from_cache(self, path):
+        """Check cache for listing
+        Returns listing, if found (may be empty list for a directly that exists
+        but contains nothing), None if not in cache.
+        """
+        parent = self._parent(path)
+        try:
+            return self.dircache[path.rstrip("/")]
+        except KeyError:
+            pass
+        try:
+            files = [
+                f
+                for f in self.dircache[parent]
+                if f["name"] == path
+                or (f["name"] == path.rstrip("/") and f["type"] == "directory")
+            ]
+            if len(files) == 0:
+                # parent dir was listed but did not contain this file
+                raise FileNotFoundError(path)
+            return files
+        except KeyError:
+            pass
+    def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
+        """Return all files under the given path.
+        List all files, recursing into subdirectories; output is iterator-style,
+        like ``os.walk()``. For a simple list of files, ``find()`` is available.
+        When topdown is True, the caller can modify the dirnames list in-place (perhaps
+        using del or slice assignment), and walk() will
+        only recurse into the subdirectories whose names remain in dirnames;
+        this can be used to prune the search, impose a specific order of visiting,
+        or even to inform walk() about directories the caller creates or renames before
+        it resumes walk() again.
+        Modifying dirnames when topdown is False has no effect. (see os.walk)
+        Note that the "files" outputted will include anything that is not
+        a directory, such as links.
+        Parameters
+        ----------
+        path: str
+            Root to recurse into
+        maxdepth: int
+            Maximum recursion depth. None means limitless, but not recommended
+            on link-based file-systems.
+        topdown: bool (True)
+            Whether to walk the directory tree from the top downwards or from
+            the bottom upwards.
+        on_error: "omit", "raise", a callable
+            if omit (default), path with exception will simply be empty;
+            If raise, an underlying exception will be raised;
+            if callable, it will be called with a single OSError instance as argument
+        kwargs: passed to ``ls``
+        """
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        path = self._strip_protocol(path)
+        full_dirs = {}
+        dirs = {}
+        files = {}
+        detail = kwargs.pop("detail", False)
+        try:
+            listing = self.ls(path, detail=True, **kwargs)
+        except (FileNotFoundError, OSError) as e:
+            if on_error == "raise":
+                raise
+            if callable(on_error):
+                on_error(e)
+            return
+        for info in listing:
+            # each info name must be at least [path]/part , but here
+            # we check also for names like [path]/part/
+            pathname = info["name"].rstrip("/")
+            name = pathname.rsplit("/", 1)[-1]
+            if info["type"] == "directory" and pathname != path:
+                # do not include "self" path
+                full_dirs[name] = pathname
+                dirs[name] = info
+            elif pathname == path:
+                # file-like with same name as give path
+                files[""] = info
+            else:
+                files[name] = info
+        if not detail:
+            dirs = list(dirs)
+            files = list(files)
+        if topdown:
+            # Yield before recursion if walking top down
+            yield path, dirs, files
+        if maxdepth is not None:
+            maxdepth -= 1
+            if maxdepth < 1:
+                if not topdown:
+                    yield path, dirs, files
+                return
+        for d in dirs:
+            yield from self.walk(
+                full_dirs[d],
+                maxdepth=maxdepth,
+                detail=detail,
+                topdown=topdown,
+                **kwargs,
+            )
+        if not topdown:
+            # Yield after recursion if walking bottom up
+            yield path, dirs, files
+    def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
+        """List all files below path.
+        Like posix ``find`` command without conditions
+        Parameters
+        ----------
+        path : str
+        maxdepth: int or None
+            If not None, the maximum number of levels to descend
+        withdirs: bool
+            Whether to include directory paths in the output. This is True
+            when used by glob, but users usually only want files.
+        kwargs are passed to ``ls``.
+        """
+        # TODO: allow equivalent of -name parameter
+        path = self._strip_protocol(path)
+        out = {}
+        # Add the root directory if withdirs is requested
+        # This is needed for posix glob compliance
+        if withdirs and path != "" and self.isdir(path):
+            out[path] = self.info(path)
+        for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
+            if withdirs:
+                files.update(dirs)
+            out.update({info["name"]: info for name, info in files.items()})
+        if not out and self.isfile(path):
+            # walk works on directories, but find should also return [path]
+            # when path happens to be a file
+            out[path] = {}
+        names = sorted(out)
+        if not detail:
+            return names
+        else:
+            return {name: out[name] for name in names}
+    def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
+        """Space used by files and optionally directories within a path
+        Directory size does not include the size of its contents.
+        Parameters
+        ----------
+        path: str
+        total: bool
+            Whether to sum all the file sizes
+        maxdepth: int or None
+            Maximum number of directory levels to descend, None for unlimited.
+        withdirs: bool
+            Whether to include directory paths in the output.
+        kwargs: passed to ``find``
+        Returns
+        -------
+        Dict of {path: size} if total=False, or int otherwise, where numbers
+        refer to bytes used.
+        """
+        sizes = {}
+        if withdirs and self.isdir(path):
+            # Include top-level directory in output
+            info = self.info(path)
+            sizes[info["name"]] = info["size"]
+        for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
+            info = self.info(f)
+            sizes[info["name"]] = info["size"]
+        if total:
+            return sum(sizes.values())
+        else:
+            return sizes
+    def glob(self, path, maxdepth=None, **kwargs):
+        """Find files by glob-matching.
+        Pattern matching capabilities for finding files that match the given pattern.
+        Parameters
+        ----------
+        path: str
+            The glob pattern to match against
+        maxdepth: int or None
+            Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found.
+            Must be at least 1 if provided.
+        kwargs:
+            Additional arguments passed to ``find`` (e.g., detail=True)
+        Returns
+        -------
+        List of matched paths, or dict of paths and their info if detail=True
+        Notes
+        -----
+        Supported patterns:
+        - '*': Matches any sequence of characters within a single directory level
+        - ``'**'``: Matches any number of directory levels (must be an entire path component)
+        - '?': Matches exactly one character
+        - '[abc]': Matches any character in the set
+        - '[a-z]': Matches any character in the range
+        - '[!abc]': Matches any character NOT in the set
+        Special behaviors:
+        - If the path ends with '/', only folders are returned
+        - Consecutive '*' characters are compressed into a single '*'
+        - Empty brackets '[]' never match anything
+        - Negated empty brackets '[!]' match any single character
+        - Special characters in character classes are escaped properly
+        Limitations:
+        - ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``)
+        - No brace expansion ('{a,b}.txt')
+        - No extended glob patterns ('+(pattern)', '!(pattern)')
+        """
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        import re
+        seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
+        ends_with_sep = path.endswith(seps)  # _strip_protocol strips trailing slash
+        path = self._strip_protocol(path)
+        append_slash_to_dirname = ends_with_sep or path.endswith(
+            tuple(sep + "**" for sep in seps)
+        )
+        idx_star = path.find("*") if path.find("*") >= 0 else len(path)
+        idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
+        idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
+        min_idx = min(idx_star, idx_qmark, idx_brace)
+        detail = kwargs.pop("detail", False)
+        if not has_magic(path):
+            if self.exists(path, **kwargs):
+                if not detail:
+                    return [path]
+                else:
+                    return {path: self.info(path, **kwargs)}
+            else:
+                if not detail:
+                    return []  # glob of non-existent returns empty
+                else:
+                    return {}
+        elif "/" in path[:min_idx]:
+            min_idx = path[:min_idx].rindex("/")
+            root = path[: min_idx + 1]
+            depth = path[min_idx + 1 :].count("/") + 1
+        else:
+            root = ""
+            depth = path[min_idx + 1 :].count("/") + 1
+        if "**" in path:
+            if maxdepth is not None:
+                idx_double_stars = path.find("**")
+                depth_double_stars = path[idx_double_stars:].count("/") + 1
+                depth = depth - depth_double_stars + maxdepth
+            else:
+                depth = None
+        allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
+        pattern = glob_translate(path + ("/" if ends_with_sep else ""))
+        pattern = re.compile(pattern)
+        out = {
+            p: info
+            for p, info in sorted(allpaths.items())
+            if pattern.match(
+                p + "/"
+                if append_slash_to_dirname and info["type"] == "directory"
+                else p
+            )
+        }
+        if detail:
+            return out
+        else:
+            return list(out)
+    def exists(self, path, **kwargs):
+        """Is there a file at the given path"""
+        try:
+            self.info(path, **kwargs)
+            return True
+        except:  # noqa: E722
+            # any exception allowed bar FileNotFoundError?
+            return False
+    def lexists(self, path, **kwargs):
+        """If there is a file at the given path (including
+        broken links)"""
+        return self.exists(path)
+    def info(self, path, **kwargs):
+        """Give details of entry at path
+        Returns a single dictionary, with exactly the same information as ``ls``
+        would with ``detail=True``.
+        The default implementation calls ls and could be overridden by a
+        shortcut. kwargs are passed on to ```ls()``.
+        Some file systems might not be able to measure the file's size, in
+        which case, the returned dict will include ``'size': None``.
+        Returns
+        -------
+        dict with keys: name (full path in the FS), size (in bytes), type (file,
+        directory, or something else) and other FS-specific keys.
+        """
+        path = self._strip_protocol(path)
+        out = self.ls(self._parent(path), detail=True, **kwargs)
+        out = [o for o in out if o["name"].rstrip("/") == path]
+        if out:
+            return out[0]
+        out = self.ls(path, detail=True, **kwargs)
+        path = path.rstrip("/")
+        out1 = [o for o in out if o["name"].rstrip("/") == path]
+        if len(out1) == 1:
+            if "size" not in out1[0]:
+                out1[0]["size"] = None
+            return out1[0]
+        elif len(out1) > 1 or out:
+            return {"name": path, "size": 0, "type": "directory"}
+        else:
+            raise FileNotFoundError(path)
+    def checksum(self, path):
+        """Unique value for current version of file
+        If the checksum is the same from one moment to another, the contents
+        are guaranteed to be the same. If the checksum changes, the contents
+        *might* have changed.
+        This should normally be overridden; default will probably capture
+        creation/modification timestamp (which would be good) or maybe
+        access timestamp (which would be bad)
+        """
+        return int(tokenize(self.info(path)), 16)
+    def size(self, path):
+        """Size in bytes of file"""
+        return self.info(path).get("size", None)
+    def sizes(self, paths):
+        """Size in bytes of each file in a list of paths"""
+        return [self.size(p) for p in paths]
+    def isdir(self, path):
+        """Is this entry directory-like?"""
+        try:
+            return self.info(path)["type"] == "directory"
+        except OSError:
+            return False
+    def isfile(self, path):
+        """Is this entry file-like?"""
+        try:
+            return self.info(path)["type"] == "file"
+        except:  # noqa: E722
+            return False
+    def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
+        """Get the contents of the file as a string.
+        Parameters
+        ----------
+        path: str
+            URL of file on this filesystems
+        encoding, errors, newline: same as `open`.
+        """
+        with self.open(
+            path,
+            mode="r",
+            encoding=encoding,
+            errors=errors,
+            newline=newline,
+            **kwargs,
+        ) as f:
+            return f.read()
+    def write_text(
+        self, path, value, encoding=None, errors=None, newline=None, **kwargs
+    ):
+        """Write the text to the given file.
+        An existing file will be overwritten.
+        Parameters
+        ----------
+        path: str
+            URL of file on this filesystems
+        value: str
+            Text to write.
+        encoding, errors, newline: same as `open`.
+        """
+        with self.open(
+            path,
+            mode="w",
+            encoding=encoding,
+            errors=errors,
+            newline=newline,
+            **kwargs,
+        ) as f:
+            return f.write(value)
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        """Get the content of a file
+        Parameters
+        ----------
+        path: URL of file on this filesystems
+        start, end: int
+            Bytes limits of the read. If negative, backwards from end,
+            like usual python slices. Either can be None for start or
+            end of file, respectively
+        kwargs: passed to ``open()``.
+        """
+        # explicitly set buffering off?
+        with self.open(path, "rb", **kwargs) as f:
+            if start is not None:
+                if start >= 0:
+                    f.seek(start)
+                else:
+                    f.seek(max(0, f.size + start))
+            if end is not None:
+                if end < 0:
+                    end = f.size + end
+                return f.read(end - f.tell())
+            return f.read()
+    def pipe_file(self, path, value, mode="overwrite", **kwargs):
+        """Set the bytes of given file"""
+        if mode == "create" and self.exists(path):
+            # non-atomic but simple way; or could use "xb" in open(), which is likely
+            # not as well supported
+            raise FileExistsError
+        with self.open(path, "wb", **kwargs) as f:
+            f.write(value)
+    def pipe(self, path, value=None, **kwargs):
+        """Put value into path
+        (counterpart to ``cat``)
+        Parameters
+        ----------
+        path: string or dict(str, bytes)
+            If a string, a single remote location to put ``value`` bytes; if a dict,
+            a mapping of {path: bytesvalue}.
+        value: bytes, optional
+            If using a single path, these are the bytes to put there. Ignored if
+            ``path`` is a dict
+        """
+        if isinstance(path, str):
+            self.pipe_file(self._strip_protocol(path), value, **kwargs)
+        elif isinstance(path, dict):
+            for k, v in path.items():
+                self.pipe_file(self._strip_protocol(k), v, **kwargs)
+        else:
+            raise ValueError("path must be str or dict")
+    def cat_ranges(
+        self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
+    ):
+        """Get the contents of byte ranges from one or more files
+        Parameters
+        ----------
+        paths: list
+            A list of of filepaths on this filesystems
+        starts, ends: int or list
+            Bytes limits of the read. If using a single int, the same value will be
+            used to read all the specified files.
+        """
+        if max_gap is not None:
+            raise NotImplementedError
+        if not isinstance(paths, list):
+            raise TypeError
+        if not isinstance(starts, list):
+            starts = [starts] * len(paths)
+        if not isinstance(ends, list):
+            ends = [ends] * len(paths)
+        if len(starts) != len(paths) or len(ends) != len(paths):
+            raise ValueError
+        out = []
+        for p, s, e in zip(paths, starts, ends):
+            try:
+                out.append(self.cat_file(p, s, e))
+            except Exception as e:
+                if on_error == "return":
+                    out.append(e)
+                else:
+                    raise
+        return out
+    def cat(self, path, recursive=False, on_error="raise", **kwargs):
+        """Fetch (potentially multiple) paths' contents
+        Parameters
+        ----------
+        recursive: bool
+            If True, assume the path(s) are directories, and get all the
+            contained files
+        on_error : "raise", "omit", "return"
+            If raise, an underlying exception will be raised (converted to KeyError
+            if the type is in self.missing_exceptions); if omit, keys with exception
+            will simply not be included in the output; if "return", all keys are
+            included in the output, but the value will be bytes or an exception
+            instance.
+        kwargs: passed to cat_file
+        Returns
+        -------
+        dict of {path: contents} if there are multiple paths
+        or the path has been otherwise expanded
+        """
+        paths = self.expand_path(path, recursive=recursive, **kwargs)
+        if (
+            len(paths) > 1
+            or isinstance(path, list)
+            or paths[0] != self._strip_protocol(path)
+        ):
+            out = {}
+            for path in paths:
+                try:
+                    out[path] = self.cat_file(path, **kwargs)
+                except Exception as e:
+                    if on_error == "raise":
+                        raise
+                    if on_error == "return":
+                        out[path] = e
+            return out
+        else:
+            return self.cat_file(paths[0], **kwargs)
+    def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
+        """Copy single remote file to local"""
+        from .implementations.local import LocalFileSystem
+        if isfilelike(lpath):
+            outfile = lpath
+        elif self.isdir(rpath):
+            os.makedirs(lpath, exist_ok=True)
+            return None
+        fs = LocalFileSystem(auto_mkdir=True)
+        fs.makedirs(fs._parent(lpath), exist_ok=True)
+        with self.open(rpath, "rb", **kwargs) as f1:
+            if outfile is None:
+                outfile = open(lpath, "wb")
+            try:
+                callback.set_size(getattr(f1, "size", None))
+                data = True
+                while data:
+                    data = f1.read(self.blocksize)
+                    segment_len = outfile.write(data)
+                    if segment_len is None:
+                        segment_len = len(data)
+                    callback.relative_update(segment_len)
+            finally:
+                if not isfilelike(lpath):
+                    outfile.close()
+    def get(
+        self,
+        rpath,
+        lpath,
+        recursive=False,
+        callback=DEFAULT_CALLBACK,
+        maxdepth=None,
+        **kwargs,
+    ):
+        """Copy file(s) to local.
+        Copies a specific file or tree of files (if recursive=True). If lpath
+        ends with a "/", it will be assumed to be a directory, and target files
+        will go within. Can submit a list of paths, which may be glob-patterns
+        and will be expanded.
+        Calls get_file for each source.
+        """
+        if isinstance(lpath, list) and isinstance(rpath, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            rpaths = rpath
+            lpaths = lpath
+        else:
+            from .implementations.local import (
+                LocalFileSystem,
+                make_path_posix,
+                trailing_sep,
+            )
+            source_is_str = isinstance(rpath, str)
+            rpaths = self.expand_path(
+                rpath, recursive=recursive, maxdepth=maxdepth, **kwargs
+            )
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
+                if not rpaths:
+                    return
+            if isinstance(lpath, str):
+                lpath = make_path_posix(lpath)
+            source_is_file = len(rpaths) == 1
+            dest_is_dir = isinstance(lpath, str) and (
+                trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
+            )
+            exists = source_is_str and (
+                (has_magic(rpath) and source_is_file)
+                or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
+            )
+            lpaths = other_paths(
+                rpaths,
+                lpath,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        callback.set_size(len(lpaths))
+        for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
+            with callback.branched(rpath, lpath) as child:
+                self.get_file(rpath, lpath, callback=child, **kwargs)
+    def put_file(
+        self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
+    ):
+        """Copy single file to remote"""
+        if mode == "create" and self.exists(rpath):
+            raise FileExistsError
+        if os.path.isdir(lpath):
+            self.makedirs(rpath, exist_ok=True)
+            return None
+        with open(lpath, "rb") as f1:
+            size = f1.seek(0, 2)
+            callback.set_size(size)
+            f1.seek(0)
+            self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
+            with self.open(rpath, "wb", **kwargs) as f2:
+                while f1.tell() < size:
+                    data = f1.read(self.blocksize)
+                    segment_len = f2.write(data)
+                    if segment_len is None:
+                        segment_len = len(data)
+                    callback.relative_update(segment_len)
+    def put(
+        self,
+        lpath,
+        rpath,
+        recursive=False,
+        callback=DEFAULT_CALLBACK,
+        maxdepth=None,
+        **kwargs,
+    ):
+        """Copy file(s) from local.
+        Copies a specific file or tree of files (if recursive=True). If rpath
+        ends with a "/", it will be assumed to be a directory, and target files
+        will go within.
+        Calls put_file for each source.
+        """
+        if isinstance(lpath, list) and isinstance(rpath, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            rpaths = rpath
+            lpaths = lpath
+        else:
+            from .implementations.local import (
+                LocalFileSystem,
+                make_path_posix,
+                trailing_sep,
+            )
+            source_is_str = isinstance(lpath, str)
+            if source_is_str:
+                lpath = make_path_posix(lpath)
+            fs = LocalFileSystem()
+            lpaths = fs.expand_path(
+                lpath, recursive=recursive, maxdepth=maxdepth, **kwargs
+            )
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
+                if not lpaths:
+                    return
+            source_is_file = len(lpaths) == 1
+            dest_is_dir = isinstance(rpath, str) and (
+                trailing_sep(rpath) or self.isdir(rpath)
+            )
+            rpath = (
+                self._strip_protocol(rpath)
+                if isinstance(rpath, str)
+                else [self._strip_protocol(p) for p in rpath]
+            )
+            exists = source_is_str and (
+                (has_magic(lpath) and source_is_file)
+                or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
+            )
+            rpaths = other_paths(
+                lpaths,
+                rpath,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        callback.set_size(len(rpaths))
+        for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
+            with callback.branched(lpath, rpath) as child:
+                self.put_file(lpath, rpath, callback=child, **kwargs)
+    def head(self, path, size=1024):
+        """Get the first ``size`` bytes from file"""
+        with self.open(path, "rb") as f:
+            return f.read(size)
+    def tail(self, path, size=1024):
+        """Get the last ``size`` bytes from file"""
+        with self.open(path, "rb") as f:
+            f.seek(max(-size, -f.size), 2)
+            return f.read()
+    def cp_file(self, path1, path2, **kwargs):
+        raise NotImplementedError
+    def copy(
+        self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
+    ):
+        """Copy within two locations in the filesystem
+        on_error : "raise", "ignore"
+            If raise, any not-found exceptions will be raised; if ignore any
+            not-found exceptions will cause the path to be skipped; defaults to
+            raise unless recursive is true, where the default is ignore
+        """
+        if on_error is None and recursive:
+            on_error = "ignore"
+        elif on_error is None:
+            on_error = "raise"
+        if isinstance(path1, list) and isinstance(path2, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            paths1 = path1
+            paths2 = path2
+        else:
+            from .implementations.local import trailing_sep
+            source_is_str = isinstance(path1, str)
+            paths1 = self.expand_path(
+                path1, recursive=recursive, maxdepth=maxdepth, **kwargs
+            )
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
+                if not paths1:
+                    return
+            source_is_file = len(paths1) == 1
+            dest_is_dir = isinstance(path2, str) and (
+                trailing_sep(path2) or self.isdir(path2)
+            )
+            exists = source_is_str and (
+                (has_magic(path1) and source_is_file)
+                or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
+            )
+            paths2 = other_paths(
+                paths1,
+                path2,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        for p1, p2 in zip(paths1, paths2):
+            try:
+                self.cp_file(p1, p2, **kwargs)
+            except FileNotFoundError:
+                if on_error == "raise":
+                    raise
+    def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
+        """Turn one or more globs or directories into a list of all matching paths
+        to files or directories.
+        kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
+        """
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        if isinstance(path, (str, os.PathLike)):
+            out = self.expand_path([path], recursive, maxdepth, **kwargs)
+        else:
+            out = set()
+            path = [self._strip_protocol(p) for p in path]
+            for p in path:
+                if has_magic(p):
+                    bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
+                    out |= bit
+                    if recursive:
+                        # glob call above expanded one depth so if maxdepth is defined
+                        # then decrement it in expand_path call below. If it is zero
+                        # after decrementing then avoid expand_path call.
+                        if maxdepth is not None and maxdepth <= 1:
+                            continue
+                        out |= set(
+                            self.expand_path(
+                                list(bit),
+                                recursive=recursive,
+                                maxdepth=maxdepth - 1 if maxdepth is not None else None,
+                                **kwargs,
+                            )
+                        )
+                    continue
+                elif recursive:
+                    rec = set(
+                        self.find(
+                            p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
+                        )
+                    )
+                    out |= rec
+                if p not in out and (recursive is False or self.exists(p)):
+                    # should only check once, for the root
+                    out.add(p)
+        if not out:
+            raise FileNotFoundError(path)
+        return sorted(out)
+    def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
+        """Move file(s) from one location to another"""
+        if path1 == path2:
+            logger.debug("%s mv: The paths are the same, so no files were moved.", self)
+        else:
+            # explicitly raise exception to prevent data corruption
+            self.copy(
+                path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise"
+            )
+            self.rm(path1, recursive=recursive)
+    def rm_file(self, path):
+        """Delete a file"""
+        self._rm(path)
+    def _rm(self, path):
+        """Delete one file"""
+        # this is the old name for the method, prefer rm_file
+        raise NotImplementedError
+    def rm(self, path, recursive=False, maxdepth=None):
+        """Delete files.
+        Parameters
+        ----------
+        path: str or list of str
+            File(s) to delete.
+        recursive: bool
+            If file(s) are directories, recursively delete contents and then
+            also remove the directory
+        maxdepth: int or None
+            Depth to pass to walk for finding files to delete, if recursive.
+            If None, there will be no limit and infinite recursion may be
+            possible.
+        """
+        path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
+        for p in reversed(path):
+            self.rm_file(p)
+    @classmethod
+    def _parent(cls, path):
+        path = cls._strip_protocol(path)
+        if "/" in path:
+            parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
+            return cls.root_marker + parent
+        else:
+            return cls.root_marker
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        """Return raw bytes-mode file-like from the file-system"""
+        return AbstractBufferedFile(
+            self,
+            path,
+            mode,
+            block_size,
+            autocommit,
+            cache_options=cache_options,
+            **kwargs,
+        )
+    def open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        cache_options=None,
+        compression=None,
+        **kwargs,
+    ):
+        """
+        Return a file-like object from the filesystem
+        The resultant instance must function correctly in a context ``with``
+        block.
+        Parameters
+        ----------
+        path: str
+            Target file
+        mode: str like 'rb', 'w'
+            See builtin ``open()``
+            Mode "x" (exclusive write) may be implemented by the backend. Even if
+            it is, whether  it is checked up front or on commit, and whether it is
+            atomic is implementation-dependent.
+        block_size: int
+            Some indication of buffering - this is a value in bytes
+        cache_options : dict, optional
+            Extra arguments to pass through to the cache.
+        compression: string or None
+            If given, open file using compression codec. Can either be a compression
+            name (a key in ``fsspec.compression.compr``) or "infer" to guess the
+            compression from the filename suffix.
+        encoding, errors, newline: passed on to TextIOWrapper for text mode
+        """
+        import io
+        path = self._strip_protocol(path)
+        if "b" not in mode:
+            mode = mode.replace("t", "") + "b"
+            text_kwargs = {
+                k: kwargs.pop(k)
+                for k in ["encoding", "errors", "newline"]
+                if k in kwargs
+            }
+            return io.TextIOWrapper(
+                self.open(
+                    path,
+                    mode,
+                    block_size=block_size,
+                    cache_options=cache_options,
+                    compression=compression,
+                    **kwargs,
+                ),
+                **text_kwargs,
+            )
+        else:
+            ac = kwargs.pop("autocommit", not self._intrans)
+            f = self._open(
+                path,
+                mode=mode,
+                block_size=block_size,
+                autocommit=ac,
+                cache_options=cache_options,
+                **kwargs,
+            )
+            if compression is not None:
+                from fsspec.compression import compr
+                from fsspec.core import get_compression
+                compression = get_compression(path, compression)
+                compress = compr[compression]
+                f = compress(f, mode=mode[0])
+            if not ac and "r" not in mode:
+                self.transaction.files.append(f)
+            return f
+    def touch(self, path, truncate=True, **kwargs):
+        """Create empty file, or update timestamp
+        Parameters
+        ----------
+        path: str
+            file location
+        truncate: bool
+            If True, always set file size to 0; if False, update timestamp and
+            leave file unchanged, if backend allows this
+        """
+        if truncate or not self.exists(path):
+            with self.open(path, "wb", **kwargs):
+                pass
+        else:
+            raise NotImplementedError  # update timestamp, if possible
+    def ukey(self, path):
+        """Hash of file properties, to tell if it has changed"""
+        return sha256(str(self.info(path)).encode()).hexdigest()
+    def read_block(self, fn, offset, length, delimiter=None):
+        """Read a block of bytes from
+        Starting at ``offset`` of the file, read ``length`` bytes.  If
+        ``delimiter`` is set then we ensure that the read starts and stops at
+        delimiter boundaries that follow the locations ``offset`` and ``offset
+        + length``.  If ``offset`` is zero then we start at zero.  The
+        bytestring returned WILL include the end delimiter string.
+        If offset+length is beyond the eof, reads to eof.
+        Parameters
+        ----------
+        fn: string
+            Path to filename
+        offset: int
+            Byte offset to start read
+        length: int
+            Number of bytes to read. If None, read to end.
+        delimiter: bytes (optional)
+            Ensure reading starts and stops at delimiter bytestring
+        Examples
+        --------
+        >>> fs.read_block('data/file.csv', 0, 13)  # doctest: +SKIP
+        b'Alice, 100\\nBo'
+        >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n')  # doctest: +SKIP
+        b'Alice, 100\\nBob, 200\\n'
+        Use ``length=None`` to read to the end of the file.
+        >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n')  # doctest: +SKIP
+        b'Alice, 100\\nBob, 200\\nCharlie, 300'
+        See Also
+        --------
+        :func:`fsspec.utils.read_block`
+        """
+        with self.open(fn, "rb") as f:
+            size = f.size
+            if length is None:
+                length = size
+            if size is not None and offset + length > size:
+                length = size - offset
+            return read_block(f, offset, length, delimiter)
+    def to_json(self, *, include_password: bool = True) -> str:
+        """
+        JSON representation of this filesystem instance.
+        Parameters
+        ----------
+        include_password: bool, default True
+            Whether to include the password (if any) in the output.
+        Returns
+        -------
+        JSON string with keys ``cls`` (the python location of this class),
+        protocol (text name of this class's protocol, first one in case of
+        multiple), ``args`` (positional args, usually empty), and all other
+        keyword arguments as their own keys.
+        Warnings
+        --------
+        Serialized filesystems may contain sensitive information which have been
+        passed to the constructor, such as passwords and tokens. Make sure you
+        store and send them in a secure environment!
+        """
+        from .json import FilesystemJSONEncoder
+        return json.dumps(
+            self,
+            cls=type(
+                "_FilesystemJSONEncoder",
+                (FilesystemJSONEncoder,),
+                {"include_password": include_password},
+            ),
+        )
+    @staticmethod
+    def from_json(blob: str) -> AbstractFileSystem:
+        """
+        Recreate a filesystem instance from JSON representation.
+        See ``.to_json()`` for the expected structure of the input.
+        Parameters
+        ----------
+        blob: str
+        Returns
+        -------
+        file system instance, not necessarily of this particular class.
+        Warnings
+        --------
+        This can import arbitrary modules (as determined by the ``cls`` key).
+        Make sure you haven't installed any modules that may execute malicious code
+        at import time.
+        """
+        from .json import FilesystemJSONDecoder
+        return json.loads(blob, cls=FilesystemJSONDecoder)
+    def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
+        """
+        JSON-serializable dictionary representation of this filesystem instance.
+        Parameters
+        ----------
+        include_password: bool, default True
+            Whether to include the password (if any) in the output.
+        Returns
+        -------
+        Dictionary with keys ``cls`` (the python location of this class),
+        protocol (text name of this class's protocol, first one in case of
+        multiple), ``args`` (positional args, usually empty), and all other
+        keyword arguments as their own keys.
+        Warnings
+        --------
+        Serialized filesystems may contain sensitive information which have been
+        passed to the constructor, such as passwords and tokens. Make sure you
+        store and send them in a secure environment!
+        """
+        from .json import FilesystemJSONEncoder
+        json_encoder = FilesystemJSONEncoder()
+        cls = type(self)
+        proto = self.protocol
+        storage_options = dict(self.storage_options)
+        if not include_password:
+            storage_options.pop("password", None)
+        return dict(
+            cls=f"{cls.__module__}:{cls.__name__}",
+            protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
+            args=json_encoder.make_serializable(self.storage_args),
+            **json_encoder.make_serializable(storage_options),
+        )
+    @staticmethod
+    def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
+        """
+        Recreate a filesystem instance from dictionary representation.
+        See ``.to_dict()`` for the expected structure of the input.
+        Parameters
+        ----------
+        dct: Dict[str, Any]
+        Returns
+        -------
+        file system instance, not necessarily of this particular class.
+        Warnings
+        --------
+        This can import arbitrary modules (as determined by the ``cls`` key).
+        Make sure you haven't installed any modules that may execute malicious code
+        at import time.
+        """
+        from .json import FilesystemJSONDecoder
+        json_decoder = FilesystemJSONDecoder()
+        dct = dict(dct)  # Defensive copy
+        cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
+        if cls is None:
+            raise ValueError("Not a serialized AbstractFileSystem")
+        dct.pop("cls", None)
+        dct.pop("protocol", None)
+        return cls(
+            *json_decoder.unmake_serializable(dct.pop("args", ())),
+            **json_decoder.unmake_serializable(dct),
+        )
+    def _get_pyarrow_filesystem(self):
+        """
+        Make a version of the FS instance which will be acceptable to pyarrow
+        """
+        # all instances already also derive from pyarrow
+        return self
+    def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
+        """Create key/value store based on this file-system
+        Makes a MutableMapping interface to the FS at the given root path.
+        See ``fsspec.mapping.FSMap`` for further details.
+        """
+        from .mapping import FSMap
+        return FSMap(
+            root,
+            self,
+            check=check,
+            create=create,
+            missing_exceptions=missing_exceptions,
+        )
+    @classmethod
+    def clear_instance_cache(cls):
+        """
+        Clear the cache of filesystem instances.
+        Notes
+        -----
+        Unless overridden by setting the ``cachable`` class attribute to False,
+        the filesystem class stores a reference to newly created instances. This
+        prevents Python's normal rules around garbage collection from working,
+        since the instances refcount will not drop to zero until
+        ``clear_instance_cache`` is called.
+        """
+        cls._cache.clear()
+    def created(self, path):
+        """Return the created timestamp of a file as a datetime.datetime"""
+        raise NotImplementedError
+    def modified(self, path):
+        """Return the modified timestamp of a file as a datetime.datetime"""
+        raise NotImplementedError
+    def tree(
+        self,
+        path: str = "/",
+        recursion_limit: int = 2,
+        max_display: int = 25,
+        display_size: bool = False,
+        prefix: str = "",
+        is_last: bool = True,
+        first: bool = True,
+        indent_size: int = 4,
+    ) -> str:
+        """
+        Return a tree-like structure of the filesystem starting from the given path as a string.
+        Parameters
+        ----------
+            path: Root path to start traversal from
+            recursion_limit: Maximum depth of directory traversal
+            max_display: Maximum number of items to display per directory
+            display_size: Whether to display file sizes
+            prefix: Current line prefix for visual tree structure
+            is_last: Whether current item is last in its level
+            first: Whether this is the first call (displays root path)
+            indent_size: Number of spaces by indent
+        Returns
+        -------
+            str: A string representing the tree structure.
+        Example
+        -------
+            >>> from fsspec import filesystem
+            >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
+            >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
+            >>> print(tree)
+        """
+        def format_bytes(n: int) -> str:
+            """Format bytes as text."""
+            for prefix, k in (
+                ("P", 2**50),
+                ("T", 2**40),
+                ("G", 2**30),
+                ("M", 2**20),
+                ("k", 2**10),
+            ):
+                if n >= 0.9 * k:
+                    return f"{n / k:.2f} {prefix}b"
+            return f"{n}B"
+        result = []
+        if first:
+            result.append(path)
+        if recursion_limit:
+            indent = " " * indent_size
+            contents = self.ls(path, detail=True)
+            contents.sort(
+                key=lambda x: (x.get("type") != "directory", x.get("name", ""))
+            )
+            if max_display is not None and len(contents) > max_display:
+                displayed_contents = contents[:max_display]
+                remaining_count = len(contents) - max_display
+            else:
+                displayed_contents = contents
+                remaining_count = 0
+            for i, item in enumerate(displayed_contents):
+                is_last_item = (i == len(displayed_contents) - 1) and (
+                    remaining_count == 0
+                )
+                branch = (
+                    "└" + ("─" * (indent_size - 2))
+                    if is_last_item
+                    else "├" + ("─" * (indent_size - 2))
+                )
+                branch += " "
+                new_prefix = prefix + (
+                    indent if is_last_item else "│" + " " * (indent_size - 1)
+                )
+                name = os.path.basename(item.get("name", ""))
+                if display_size and item.get("type") == "directory":
+                    sub_contents = self.ls(item.get("name", ""), detail=True)
+                    num_files = sum(
+                        1 for sub_item in sub_contents if sub_item.get("type") == "file"
+                    )
+                    num_folders = sum(
+                        1
+                        for sub_item in sub_contents
+                        if sub_item.get("type") == "directory"
+                    )
+                    if num_files == 0 and num_folders == 0:
+                        size = " (empty folder)"
+                    elif num_files == 0:
+                        size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
+                    elif num_folders == 0:
+                        size = f" ({num_files} file{'s' if num_files > 1 else ''})"
+                    else:
+                        size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
+                elif display_size and item.get("type") == "file":
+                    size = f" ({format_bytes(item.get('size', 0))})"
+                else:
+                    size = ""
+                result.append(f"{prefix}{branch}{name}{size}")
+                if item.get("type") == "directory" and recursion_limit > 0:
+                    result.append(
+                        self.tree(
+                            path=item.get("name", ""),
+                            recursion_limit=recursion_limit - 1,
+                            max_display=max_display,
+                            display_size=display_size,
+                            prefix=new_prefix,
+                            is_last=is_last_item,
+                            first=False,
+                            indent_size=indent_size,
+                        )
+                    )
+            if remaining_count > 0:
+                more_message = f"{remaining_count} more item(s) not displayed."
+                result.append(
+                    f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
+                )
+        return "\n".join(_ for _ in result if _)
+    # ------------------------------------------------------------------------
+    # Aliases
+    def read_bytes(self, path, start=None, end=None, **kwargs):
+        """Alias of `AbstractFileSystem.cat_file`."""
+        return self.cat_file(path, start=start, end=end, **kwargs)
+    def write_bytes(self, path, value, **kwargs):
+        """Alias of `AbstractFileSystem.pipe_file`."""
+        self.pipe_file(path, value, **kwargs)
+    def makedir(self, path, create_parents=True, **kwargs):
+        """Alias of `AbstractFileSystem.mkdir`."""
+        return self.mkdir(path, create_parents=create_parents, **kwargs)
+    def mkdirs(self, path, exist_ok=False):
+        """Alias of `AbstractFileSystem.makedirs`."""
+        return self.makedirs(path, exist_ok=exist_ok)
+    def listdir(self, path, detail=True, **kwargs):
+        """Alias of `AbstractFileSystem.ls`."""
+        return self.ls(path, detail=detail, **kwargs)
+    def cp(self, path1, path2, **kwargs):
+        """Alias of `AbstractFileSystem.copy`."""
+        return self.copy(path1, path2, **kwargs)
+    def move(self, path1, path2, **kwargs):
+        """Alias of `AbstractFileSystem.mv`."""
+        return self.mv(path1, path2, **kwargs)
+    def stat(self, path, **kwargs):
+        """Alias of `AbstractFileSystem.info`."""
+        return self.info(path, **kwargs)
+    def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
+        """Alias of `AbstractFileSystem.du`."""
+        return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
+    def rename(self, path1, path2, **kwargs):
+        """Alias of `AbstractFileSystem.mv`."""
+        return self.mv(path1, path2, **kwargs)
+    def delete(self, path, recursive=False, maxdepth=None):
+        """Alias of `AbstractFileSystem.rm`."""
+        return self.rm(path, recursive=recursive, maxdepth=maxdepth)
+    def upload(self, lpath, rpath, recursive=False, **kwargs):
+        """Alias of `AbstractFileSystem.put`."""
+        return self.put(lpath, rpath, recursive=recursive, **kwargs)
+    def download(self, rpath, lpath, recursive=False, **kwargs):
+        """Alias of `AbstractFileSystem.get`."""
+        return self.get(rpath, lpath, recursive=recursive, **kwargs)
+    def sign(self, path, expiration=100, **kwargs):
+        """Create a signed URL representing the given path
+        Some implementations allow temporary URLs to be generated, as a
+        way of delegating credentials.
+        Parameters
+        ----------
+        path : str
+             The path on the filesystem
+        expiration : int
+            Number of seconds to enable the URL for (if supported)
+        Returns
+        -------
+        URL : str
+            The signed URL
+        Raises
+        ------
+        NotImplementedError : if method is not implemented for a filesystem
+        """
+        raise NotImplementedError("Sign is not implemented for this filesystem")
+    def _isfilestore(self):
+        # Originally inherited from pyarrow DaskFileSystem. Keeping this
+        # here for backwards compatibility as long as pyarrow uses its
+        # legacy fsspec-compatible filesystems and thus accepts fsspec
+        # filesystems as well
+        return False
+class AbstractBufferedFile(io.IOBase):
+    """Convenient class to derive from to provide buffering
+    In the case that the backend does not provide a pythonic file-like object
+    already, this class contains much of the logic to build one. The only
+    methods that need to be overridden are ``_upload_chunk``,
+    ``_initiate_upload`` and ``_fetch_range``.
+    """
+    DEFAULT_BLOCK_SIZE = 5 * 2**20
+    _details = None
+    def __init__(
+        self,
+        fs,
+        path,
+        mode="rb",
+        block_size="default",
+        autocommit=True,
+        cache_type="readahead",
+        cache_options=None,
+        size=None,
+        **kwargs,
+    ):
+        """
+        Template for files with buffered reading and writing
+        Parameters
+        ----------
+        fs: instance of FileSystem
+        path: str
+            location in file-system
+        mode: str
+            Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
+            systems may be read-only, and some may not support append.
+        block_size: int
+            Buffer size for reading or writing, 'default' for class default
+        autocommit: bool
+            Whether to write to final destination; may only impact what
+            happens when file is being closed.
+        cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
+            Caching policy in read mode. See the definitions in ``core``.
+        cache_options : dict
+            Additional options passed to the constructor for the cache specified
+            by `cache_type`.
+        size: int
+            If given and in read mode, suppressed having to look up the file size
+        kwargs:
+            Gets stored as self.kwargs
+        """
+        from .core import caches
+        self.path = path
+        self.fs = fs
+        self.mode = mode
+        self.blocksize = (
+            self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
+        )
+        self.loc = 0
+        self.autocommit = autocommit
+        self.end = None
+        self.start = None
+        self.closed = False
+        if cache_options is None:
+            cache_options = {}
+        if "trim" in kwargs:
+            warnings.warn(
+                "Passing 'trim' to control the cache behavior has been deprecated. "
+                "Specify it within the 'cache_options' argument instead.",
+                FutureWarning,
+            )
+            cache_options["trim"] = kwargs.pop("trim")
+        self.kwargs = kwargs
+        if mode not in {"ab", "rb", "wb", "xb"}:
+            raise NotImplementedError("File mode not supported")
+        if mode == "rb":
+            if size is not None:
+                self.size = size
+            else:
+                self.size = self.details["size"]
+            self.cache = caches[cache_type](
+                self.blocksize, self._fetch_range, self.size, **cache_options
+            )
+        else:
+            self.buffer = io.BytesIO()
+            self.offset = None
+            self.forced = False
+            self.location = None
+    @property
+    def details(self):
+        if self._details is None:
+            self._details = self.fs.info(self.path)
+        return self._details
+    @details.setter
+    def details(self, value):
+        self._details = value
+        self.size = value["size"]
+    @property
+    def full_name(self):
+        return _unstrip_protocol(self.path, self.fs)
+    @property
+    def closed(self):
+        # get around this attr being read-only in IOBase
+        # use getattr here, since this can be called during del
+        return getattr(self, "_closed", True)
+    @closed.setter
+    def closed(self, c):
+        self._closed = c
+    def __hash__(self):
+        if "w" in self.mode:
+            return id(self)
+        else:
+            return int(tokenize(self.details), 16)
+    def __eq__(self, other):
+        """Files are equal if they have the same checksum, only in read mode"""
+        if self is other:
+            return True
+        return (
+            isinstance(other, type(self))
+            and self.mode == "rb"
+            and other.mode == "rb"
+            and hash(self) == hash(other)
+        )
+    def commit(self):
+        """Move from temp to final destination"""
+    def discard(self):
+        """Throw away temporary file"""
+    def info(self):
+        """File information about this path"""
+        if self.readable():
+            return self.details
+        else:
+            raise ValueError("Info not available while writing")
+    def tell(self):
+        """Current file location"""
+        return self.loc
+    def seek(self, loc, whence=0):
+        """Set current file location
+        Parameters
+        ----------
+        loc: int
+            byte location
+        whence: {0, 1, 2}
+            from start of file, current location or end of file, resp.
+        """
+        loc = int(loc)
+        if not self.mode == "rb":
+            raise OSError(ESPIPE, "Seek only available in read mode")
+        if whence == 0:
+            nloc = loc
+        elif whence == 1:
+            nloc = self.loc + loc
+        elif whence == 2:
+            nloc = self.size + loc
+        else:
+            raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
+        if nloc < 0:
+            raise ValueError("Seek before start of file")
+        self.loc = nloc
+        return self.loc
+    def write(self, data):
+        """
+        Write data to buffer.
+        Buffer only sent on flush() or if buffer is greater than
+        or equal to blocksize.
+        Parameters
+        ----------
+        data: bytes
+            Set of bytes to be written.
+        """
+        if not self.writable():
+            raise ValueError("File not in write mode")
+        if self.closed:
+            raise ValueError("I/O operation on closed file.")
+        if self.forced:
+            raise ValueError("This file has been force-flushed, can only close")
+        out = self.buffer.write(data)
+        self.loc += out
+        if self.buffer.tell() >= self.blocksize:
+            self.flush()
+        return out
+    def flush(self, force=False):
+        """
+        Write buffered data to backend store.
+        Writes the current buffer, if it is larger than the block-size, or if
+        the file is being closed.
+        Parameters
+        ----------
+        force: bool
+            When closing, write the last block even if it is smaller than
+            blocks are allowed to be. Disallows further writing to this file.
+        """
+        if self.closed:
+            raise ValueError("Flush on closed file")
+        if force and self.forced:
+            raise ValueError("Force flush cannot be called more than once")
+        if force:
+            self.forced = True
+        if self.readable():
+            # no-op to flush on read-mode
+            return
+        if not force and self.buffer.tell() < self.blocksize:
+            # Defer write on small block
+            return
+        if self.offset is None:
+            # Initialize a multipart upload
+            self.offset = 0
+            try:
+                self._initiate_upload()
+            except:
+                self.closed = True
+                raise
+        if self._upload_chunk(final=force) is not False:
+            self.offset += self.buffer.seek(0, 2)
+            self.buffer = io.BytesIO()
+    def _upload_chunk(self, final=False):
+        """Write one part of a multi-block file upload
+        Parameters
+        ==========
+        final: bool
+            This is the last block, so should complete file, if
+            self.autocommit is True.
+        """
+        # may not yet have been initialized, may need to call _initialize_upload
+    def _initiate_upload(self):
+        """Create remote file/upload"""
+        pass
+    def _fetch_range(self, start, end):
+        """Get the specified set of bytes from remote"""
+        return self.fs.cat_file(self.path, start=start, end=end)
+    def read(self, length=-1):
+        """
+        Return data from cache, or fetch pieces as necessary
+        Parameters
+        ----------
+        length: int (-1)
+            Number of bytes to read; if <0, all remaining bytes.
+        """
+        length = -1 if length is None else int(length)
+        if self.mode != "rb":
+            raise ValueError("File not in read mode")
+        if length < 0:
+            length = self.size - self.loc
+        if self.closed:
+            raise ValueError("I/O operation on closed file.")
+        if length == 0:
+            # don't even bother calling fetch
+            return b""
+        out = self.cache._fetch(self.loc, self.loc + length)
+        logger.debug(
+            "%s read: %i - %i %s",
+            self,
+            self.loc,
+            self.loc + length,
+            self.cache._log_stats(),
+        )
+        self.loc += len(out)
+        return out
+    def readinto(self, b):
+        """mirrors builtin file's readinto method
+        https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
+        """
+        out = memoryview(b).cast("B")
+        data = self.read(out.nbytes)
+        out[: len(data)] = data
+        return len(data)
+    def readuntil(self, char=b"\n", blocks=None):
+        """Return data between current position and first occurrence of char
+        char is included in the output, except if the end of the tile is
+        encountered first.
+        Parameters
+        ----------
+        char: bytes
+            Thing to find
+        blocks: None or int
+            How much to read in each go. Defaults to file blocksize - which may
+            mean a new read on every call.
+        """
+        out = []
+        while True:
+            start = self.tell()
+            part = self.read(blocks or self.blocksize)
+            if len(part) == 0:
+                break
+            found = part.find(char)
+            if found > -1:
+                out.append(part[: found + len(char)])
+                self.seek(start + found + len(char))
+                break
+            out.append(part)
+        return b"".join(out)
+    def readline(self):
+        """Read until and including the first occurrence of newline character
+        Note that, because of character encoding, this is not necessarily a
+        true line ending.
+        """
+        return self.readuntil(b"\n")
+    def __next__(self):
+        out = self.readline()
+        if out:
+            return out
+        raise StopIteration
+    def __iter__(self):
+        return self
+    def readlines(self):
+        """Return all data, split by the newline character, including the newline character"""
+        data = self.read()
+        lines = data.split(b"\n")
+        out = [l + b"\n" for l in lines[:-1]]
+        if data.endswith(b"\n"):
+            return out
+        else:
+            return out + [lines[-1]]
+        # return list(self)  ???
+    def readinto1(self, b):
+        return self.readinto(b)
+    def close(self):
+        """Close file
+        Finalizes writes, discards cache
+        """
+        if getattr(self, "_unclosable", False):
+            return
+        if self.closed:
+            return
+        try:
+            if self.mode == "rb":
+                self.cache = None
+            else:
+                if not self.forced:
+                    self.flush(force=True)
+                if self.fs is not None:
+                    self.fs.invalidate_cache(self.path)
+                    self.fs.invalidate_cache(self.fs._parent(self.path))
+        finally:
+            self.closed = True
+    def readable(self):
+        """Whether opened for reading"""
+        return "r" in self.mode and not self.closed
+    def seekable(self):
+        """Whether is seekable (only in read mode)"""
+        return self.readable()
+    def writable(self):
+        """Whether opened for writing"""
+        return self.mode in {"wb", "ab", "xb"} and not self.closed
+    def __reduce__(self):
+        if self.mode != "rb":
+            raise RuntimeError("Pickling a writeable file is not supported")
+        return reopen, (
+            self.fs,
+            self.path,
+            self.mode,
+            self.blocksize,
+            self.loc,
+            self.size,
+            self.autocommit,
+            self.cache.name if self.cache else "none",
+            self.kwargs,
+        )
+    def __del__(self):
+        if not self.closed:
+            self.close()
+    def __str__(self):
+        return f"<File-like object {type(self.fs).__name__}, {self.path}>"
+    __repr__ = __str__
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        self.close()
+def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
+    file = fs.open(
+        path,
+        mode=mode,
+        block_size=blocksize,
+        autocommit=autocommit,
+        cache_type=cache_type,
+        size=size,
+        **kwargs,
+    )
+    if loc > 0:
+        file.seek(loc)
+    return file

venv/lib/python3.13/site-packages/fsspec/transaction.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from collections import deque
+class Transaction:
+    """Filesystem transaction write context
+    Gathers files for deferred commit or discard, so that several write
+    operations can be finalized semi-atomically. This works by having this
+    instance as the ``.transaction`` attribute of the given filesystem
+    """
+    def __init__(self, fs, **kwargs):
+        """
+        Parameters
+        ----------
+        fs: FileSystem instance
+        """
+        self.fs = fs
+        self.files = deque()
+    def __enter__(self):
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """End transaction and commit, if exit is not due to exception"""
+        # only commit if there was no exception
+        self.complete(commit=exc_type is None)
+        if self.fs:
+            self.fs._intrans = False
+            self.fs._transaction = None
+            self.fs = None
+    def start(self):
+        """Start a transaction on this FileSystem"""
+        self.files = deque()  # clean up after previous failed completions
+        self.fs._intrans = True
+    def complete(self, commit=True):
+        """Finish transaction: commit or discard all deferred files"""
+        while self.files:
+            f = self.files.popleft()
+            if commit:
+                f.commit()
+            else:
+                f.discard()
+        self.fs._intrans = False
+        self.fs._transaction = None
+        self.fs = None
+class FileActor:
+    def __init__(self):
+        self.files = []
+    def commit(self):
+        for f in self.files:
+            f.commit()
+        self.files.clear()
+    def discard(self):
+        for f in self.files:
+            f.discard()
+        self.files.clear()
+    def append(self, f):
+        self.files.append(f)
+class DaskTransaction(Transaction):
+    def __init__(self, fs):
+        """
+        Parameters
+        ----------
+        fs: FileSystem instance
+        """
+        import distributed
+        super().__init__(fs)
+        client = distributed.default_client()
+        self.files = client.submit(FileActor, actor=True).result()
+    def complete(self, commit=True):
+        """Finish transaction: commit or discard all deferred files"""
+        if commit:
+            self.files.commit().result()
+        else:
+            self.files.discard().result()
+        self.fs._intrans = False
+        self.fs = None

venv/lib/python3.13/site-packages/hf_xet/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .hf_xet import *
+__doc__ = hf_xet.__doc__
+if hasattr(hf_xet, "__all__"):
+    __all__ = hf_xet.__all__

venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,209 @@

+Metadata-Version: 2.4
+Name: idna
+Version: 3.11
+Summary: Internationalized Domain Names in Applications (IDNA)
+Author-email: Kim Davies <kim+pypi@gumleaf.org>
+Requires-Python: >=3.8
+Description-Content-Type: text/x-rst
+License-Expression: BSD-3-Clause
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: System Administrators
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Internet :: Name Service (DNS)
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Utilities
+License-File: LICENSE.md
+Requires-Dist: ruff >= 0.6.2 ; extra == "all"
+Requires-Dist: mypy >= 1.11.2 ; extra == "all"
+Requires-Dist: pytest >= 8.3.2 ; extra == "all"
+Requires-Dist: flake8 >= 7.1.1 ; extra == "all"
+Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst
+Project-URL: Issue tracker, https://github.com/kjd/idna/issues
+Project-URL: Source, https://github.com/kjd/idna
+Provides-Extra: all
+Internationalized Domain Names in Applications (IDNA)
+=====================================================
+Support for `Internationalized Domain Names in
+Applications (IDNA) <https://tools.ietf.org/html/rfc5891>`_
+and `Unicode IDNA Compatibility Processing
+<https://unicode.org/reports/tr46/>`_.
+The latest versions of these standards supplied here provide
+more comprehensive language coverage and reduce the potential of
+allowing domains with known security vulnerabilities. This library
+is a suitable replacement for the “encodings.idna”
+module that comes with the Python standard library, but which
+only supports an older superseded IDNA specification from 2003.
+Basic functions are simply executed:
+.. code-block:: pycon
+    >>> import idna
+    >>> idna.encode('ドメイン.テスト')
+    b'xn--eckwd4c7c.xn--zckzah'
+    >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
+    ドメイン.テスト
+Installation
+------------
+This package is available for installation from PyPI via the
+typical mechanisms, such as:
+.. code-block:: bash
+    $ python3 -m pip install idna
+Usage
+-----
+For typical usage, the ``encode`` and ``decode`` functions will take a
+domain name argument and perform a conversion to ASCII compatible encoding
+(known as A-labels), or to Unicode strings (known as U-labels)
+respectively.
+.. code-block:: pycon
+    >>> import idna
+    >>> idna.encode('ドメイン.テスト')
+    b'xn--eckwd4c7c.xn--zckzah'
+    >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
+    ドメイン.テスト
+Conversions can be applied at a per-label basis using the ``ulabel`` or
+``alabel`` functions if necessary:
+.. code-block:: pycon
+    >>> idna.alabel('测试')
+    b'xn--0zwm56d'
+Compatibility Mapping (UTS #46)
++++++++++++++++++++++++++++++++
+This library provides support for `Unicode IDNA Compatibility
+Processing <https://unicode.org/reports/tr46/>`_ which normalizes input from
+different potential ways a user may input a domain prior to performing the IDNA
+conversion operations. This functionality, known as a
+`mapping <https://tools.ietf.org/html/rfc5895>`_, is considered by the
+specification to be a local user-interface issue distinct from IDNA
+conversion functionality.
+For example, “Königsgäßchen” is not a permissible label as *LATIN
+CAPITAL LETTER K* is not allowed (nor are capital letters in general).
+UTS 46 will convert this into lower case prior to applying the IDNA
+conversion.
+.. code-block:: pycon
+    >>> import idna
+    >>> idna.encode('Königsgäßchen')
+    ...
+    idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
+    >>> idna.encode('Königsgäßchen', uts46=True)
+    b'xn--knigsgchen-b4a3dun'
+    >>> print(idna.decode('xn--knigsgchen-b4a3dun'))
+    königsgäßchen
+Exceptions
+----------
+All errors raised during the conversion following the specification
+should raise an exception derived from the ``idna.IDNAError`` base
+class.
+More specific exceptions that may be generated as ``idna.IDNABidiError``
+when the error reflects an illegal combination of left-to-right and
+right-to-left characters in a label; ``idna.InvalidCodepoint`` when
+a specific codepoint is an illegal character in an IDN label (i.e.
+INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is
+illegal based on its position in the string (i.e. it is CONTEXTO or CONTEXTJ
+but the contextual requirements are not satisfied.)
+Building and Diagnostics
+------------------------
+The IDNA and UTS 46 functionality relies upon pre-calculated lookup
+tables for performance. These tables are derived from computing against
+eligibility criteria in the respective standards using the command-line
+script ``tools/idna-data``.
+This tool will fetch relevant codepoint data from the Unicode repository
+and perform the required calculations to identify eligibility. There are
+three main modes:
+* ``idna-data make-libdata``. Generates ``idnadata.py`` and
+  ``uts46data.py``, the pre-calculated lookup tables used for IDNA and
+  UTS 46 conversions. Implementers who wish to track this library against
+  a different Unicode version may use this tool to manually generate a
+  different version of the ``idnadata.py`` and ``uts46data.py`` files.
+* ``idna-data make-table``. Generate a table of the IDNA disposition
+  (e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix
+  B.1 of RFC 5892 and the pre-computed tables published by `IANA
+  <https://www.iana.org/>`_.
+* ``idna-data U+0061``. Prints debugging output on the various
+  properties associated with an individual Unicode codepoint (in this
+  case, U+0061), that are used to assess the IDNA and UTS 46 status of a
+  codepoint. This is helpful in debugging or analysis.
+The tool accepts a number of arguments, described using ``idna-data
+-h``. Most notably, the ``--version`` argument allows the specification
+of the version of Unicode to be used in computing the table data. For
+example, ``idna-data --version 9.0.0 make-libdata`` will generate
+library data against Unicode 9.0.0.
+Additional Notes
+----------------
+* **Packages**. The latest tagged release version is published in the
+  `Python Package Index <https://pypi.org/project/idna/>`_.
+* **Version support**. This library supports Python 3.8 and higher.
+  As this library serves as a low-level toolkit for a variety of
+  applications, many of which strive for broad compatibility with older
+  Python versions, there is no rush to remove older interpreter support.
+  Support for older versions are likely to be removed from new releases
+  as automated tests can no longer easily be run, i.e. once the Python
+  version is officially end-of-life.
+* **Testing**. The library has a test suite based on each rule of the
+  IDNA specification, as well as tests that are provided as part of the
+  Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing
+  <https://unicode.org/reports/tr46/>`_.
+* **Emoji**. It is an occasional request to support emoji domains in
+  this library. Encoding of symbols like emoji is expressly prohibited by
+  the technical standard IDNA 2008 and emoji domains are broadly phased
+  out across the domain industry due to associated security risks. For
+  now, applications that need to support these non-compliant labels
+  may wish to consider trying the encode/decode operation in this library
+  first, and then falling back to using `encodings.idna`. See `the Github
+  project <https://github.com/kjd/idna/issues/18>`_ for more discussion.
+* **Transitional processing**. Unicode 16.0.0 removed transitional
+  processing so the `transitional` argument for the encode() method
+  no longer has any effect and will be removed at a later date.

venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,22 @@

+idna-3.11.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+idna-3.11.dist-info/METADATA,sha256=fCwSww9SuiN8TIHllFSASUQCW55hAs8dzKnr9RaEEbA,8378
+idna-3.11.dist-info/RECORD,,
+idna-3.11.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+idna-3.11.dist-info/licenses/LICENSE.md,sha256=t6M2q_OwThgOwGXN0W5wXQeeHMehT5EKpukYfza5zYc,1541
+idna/__init__.py,sha256=MPqNDLZbXqGaNdXxAFhiqFPKEQXju2jNQhCey6-5eJM,868
+idna/__pycache__/__init__.cpython-313.pyc,,
+idna/__pycache__/codec.cpython-313.pyc,,
+idna/__pycache__/compat.cpython-313.pyc,,
+idna/__pycache__/core.cpython-313.pyc,,
+idna/__pycache__/idnadata.cpython-313.pyc,,
+idna/__pycache__/intranges.cpython-313.pyc,,
+idna/__pycache__/package_data.cpython-313.pyc,,
+idna/__pycache__/uts46data.cpython-313.pyc,,
+idna/codec.py,sha256=M2SGWN7cs_6B32QmKTyTN6xQGZeYQgQ2wiX3_DR6loE,3438
+idna/compat.py,sha256=RzLy6QQCdl9784aFhb2EX9EKGCJjg0P3PilGdeXXcx8,316
+idna/core.py,sha256=P26_XVycuMTZ1R2mNK1ZREVzM5mvTzdabBXfyZVU1Lc,13246
+idna/idnadata.py,sha256=SG8jhaGE53iiD6B49pt2pwTv_UvClciWE-N54oR2p4U,79623
+idna/intranges.py,sha256=amUtkdhYcQG8Zr-CoMM_kVRacxkivC1WgxN1b63KKdU,1898
+idna/package_data.py,sha256=_CUavOxobnbyNG2FLyHoN8QHP3QM9W1tKuw7eq9QwBk,21
+idna/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+idna/uts46data.py,sha256=H9J35VkD0F9L9mKOqjeNGd2A-Va6FlPoz6Jz4K7h-ps,243725

venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,4 @@

+Wheel-Version: 1.0
+Generator: flit 3.12.0
+Root-Is-Purelib: true
+Tag: py3-none-any

venv/lib/python3.13/site-packages/packaging/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+__title__ = "packaging"
+__summary__ = "Core utilities for Python packages"
+__uri__ = "https://github.com/pypa/packaging"
+__version__ = "25.0"
+__author__ = "Donald Stufft and individual contributors"
+__email__ = "donald@stufft.io"
+__license__ = "BSD-2-Clause or Apache-2.0"
+__copyright__ = f"2014 {__author__}"

venv/lib/python3.13/site-packages/packaging/_elffile.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""
+ELF file parser.
+This provides a class ``ELFFile`` that parses an ELF executable in a similar
+interface to ``ZipFile``. Only the read interface is implemented.
+Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
+ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
+"""
+from __future__ import annotations
+import enum
+import os
+import struct
+from typing import IO
+class ELFInvalid(ValueError):
+    pass
+class EIClass(enum.IntEnum):
+    C32 = 1
+    C64 = 2
+class EIData(enum.IntEnum):
+    Lsb = 1
+    Msb = 2
+class EMachine(enum.IntEnum):
+    I386 = 3
+    S390 = 22
+    Arm = 40
+    X8664 = 62
+    AArc64 = 183
+class ELFFile:
+    """
+    Representation of an ELF executable.
+    """
+    def __init__(self, f: IO[bytes]) -> None:
+        self._f = f
+        try:
+            ident = self._read("16B")
+        except struct.error as e:
+            raise ELFInvalid("unable to parse identification") from e
+        magic = bytes(ident[:4])
+        if magic != b"\x7fELF":
+            raise ELFInvalid(f"invalid magic: {magic!r}")
+        self.capacity = ident[4]  # Format for program header (bitness).
+        self.encoding = ident[5]  # Data structure encoding (endianness).
+        try:
+            # e_fmt: Format for program header.
+            # p_fmt: Format for section header.
+            # p_idx: Indexes to find p_type, p_offset, and p_filesz.
+            e_fmt, self._p_fmt, self._p_idx = {
+                (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)),  # 32-bit LSB.
+                (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)),  # 32-bit MSB.
+                (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)),  # 64-bit LSB.
+                (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)),  # 64-bit MSB.
+            }[(self.capacity, self.encoding)]
+        except KeyError as e:
+            raise ELFInvalid(
+                f"unrecognized capacity ({self.capacity}) or encoding ({self.encoding})"
+            ) from e
+        try:
+            (
+                _,
+                self.machine,  # Architecture type.
+                _,
+                _,
+                self._e_phoff,  # Offset of program header.
+                _,
+                self.flags,  # Processor-specific flags.
+                _,
+                self._e_phentsize,  # Size of section.
+                self._e_phnum,  # Number of sections.
+            ) = self._read(e_fmt)
+        except struct.error as e:
+            raise ELFInvalid("unable to parse machine and section information") from e
+    def _read(self, fmt: str) -> tuple[int, ...]:
+        return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
+    @property
+    def interpreter(self) -> str | None:
+        """
+        The path recorded in the ``PT_INTERP`` section header.
+        """
+        for index in range(self._e_phnum):
+            self._f.seek(self._e_phoff + self._e_phentsize * index)
+            try:
+                data = self._read(self._p_fmt)
+            except struct.error:
+                continue
+            if data[self._p_idx[0]] != 3:  # Not PT_INTERP.
+                continue
+            self._f.seek(data[self._p_idx[1]])
+            return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
+        return None

venv/lib/python3.13/site-packages/packaging/_manylinux.py ADDED Viewed

	@@ -0,0 +1,262 @@

+from __future__ import annotations
+import collections
+import contextlib
+import functools
+import os
+import re
+import sys
+import warnings
+from typing import Generator, Iterator, NamedTuple, Sequence
+from ._elffile import EIClass, EIData, ELFFile, EMachine
+EF_ARM_ABIMASK = 0xFF000000
+EF_ARM_ABI_VER5 = 0x05000000
+EF_ARM_ABI_FLOAT_HARD = 0x00000400
+# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
+# as the type for `path` until then.
+@contextlib.contextmanager
+def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]:
+    try:
+        with open(path, "rb") as f:
+            yield ELFFile(f)
+    except (OSError, TypeError, ValueError):
+        yield None
+def _is_linux_armhf(executable: str) -> bool:
+    # hard-float ABI can be detected from the ELF header of the running
+    # process
+    # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
+    with _parse_elf(executable) as f:
+        return (
+            f is not None
+            and f.capacity == EIClass.C32
+            and f.encoding == EIData.Lsb
+            and f.machine == EMachine.Arm
+            and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
+            and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
+        )
+def _is_linux_i686(executable: str) -> bool:
+    with _parse_elf(executable) as f:
+        return (
+            f is not None
+            and f.capacity == EIClass.C32
+            and f.encoding == EIData.Lsb
+            and f.machine == EMachine.I386
+        )
+def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
+    if "armv7l" in archs:
+        return _is_linux_armhf(executable)
+    if "i686" in archs:
+        return _is_linux_i686(executable)
+    allowed_archs = {
+        "x86_64",
+        "aarch64",
+        "ppc64",
+        "ppc64le",
+        "s390x",
+        "loongarch64",
+        "riscv64",
+    }
+    return any(arch in allowed_archs for arch in archs)
+# If glibc ever changes its major version, we need to know what the last
+# minor version was, so we can build the complete list of all versions.
+# For now, guess what the highest minor version might be, assume it will
+# be 50 for testing. Once this actually happens, update the dictionary
+# with the actual value.
+_LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50)
+class _GLibCVersion(NamedTuple):
+    major: int
+    minor: int
+def _glibc_version_string_confstr() -> str | None:
+    """
+    Primary implementation of glibc_version_string using os.confstr.
+    """
+    # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
+    # to be broken or missing. This strategy is used in the standard library
+    # platform module.
+    # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
+    try:
+        # Should be a string like "glibc 2.17".
+        version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
+        assert version_string is not None
+        _, version = version_string.rsplit()
+    except (AssertionError, AttributeError, OSError, ValueError):
+        # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
+        return None
+    return version
+def _glibc_version_string_ctypes() -> str | None:
+    """
+    Fallback implementation of glibc_version_string using ctypes.
+    """
+    try:
+        import ctypes
+    except ImportError:
+        return None
+    # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
+    # manpage says, "If filename is NULL, then the returned handle is for the
+    # main program". This way we can let the linker do the work to figure out
+    # which libc our process is actually using.
+    #
+    # We must also handle the special case where the executable is not a
+    # dynamically linked executable. This can occur when using musl libc,
+    # for example. In this situation, dlopen() will error, leading to an
+    # OSError. Interestingly, at least in the case of musl, there is no
+    # errno set on the OSError. The single string argument used to construct
+    # OSError comes from libc itself and is therefore not portable to
+    # hard code here. In any case, failure to call dlopen() means we
+    # can proceed, so we bail on our attempt.
+    try:
+        process_namespace = ctypes.CDLL(None)
+    except OSError:
+        return None
+    try:
+        gnu_get_libc_version = process_namespace.gnu_get_libc_version
+    except AttributeError:
+        # Symbol doesn't exist -> therefore, we are not linked to
+        # glibc.
+        return None
+    # Call gnu_get_libc_version, which returns a string like "2.5"
+    gnu_get_libc_version.restype = ctypes.c_char_p
+    version_str: str = gnu_get_libc_version()
+    # py2 / py3 compatibility:
+    if not isinstance(version_str, str):
+        version_str = version_str.decode("ascii")
+    return version_str
+def _glibc_version_string() -> str | None:
+    """Returns glibc version string, or None if not using glibc."""
+    return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
+def _parse_glibc_version(version_str: str) -> tuple[int, int]:
+    """Parse glibc version.
+    We use a regexp instead of str.split because we want to discard any
+    random junk that might come after the minor version -- this might happen
+    in patched/forked versions of glibc (e.g. Linaro's version of glibc
+    uses version strings like "2.20-2014.11"). See gh-3588.
+    """
+    m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
+    if not m:
+        warnings.warn(
+            f"Expected glibc version with 2 components major.minor, got: {version_str}",
+            RuntimeWarning,
+            stacklevel=2,
+        )
+        return -1, -1
+    return int(m.group("major")), int(m.group("minor"))
+@functools.lru_cache
+def _get_glibc_version() -> tuple[int, int]:
+    version_str = _glibc_version_string()
+    if version_str is None:
+        return (-1, -1)
+    return _parse_glibc_version(version_str)
+# From PEP 513, PEP 600
+def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
+    sys_glibc = _get_glibc_version()
+    if sys_glibc < version:
+        return False
+    # Check for presence of _manylinux module.
+    try:
+        import _manylinux
+    except ImportError:
+        return True
+    if hasattr(_manylinux, "manylinux_compatible"):
+        result = _manylinux.manylinux_compatible(version[0], version[1], arch)
+        if result is not None:
+            return bool(result)
+        return True
+    if version == _GLibCVersion(2, 5):
+        if hasattr(_manylinux, "manylinux1_compatible"):
+            return bool(_manylinux.manylinux1_compatible)
+    if version == _GLibCVersion(2, 12):
+        if hasattr(_manylinux, "manylinux2010_compatible"):
+            return bool(_manylinux.manylinux2010_compatible)
+    if version == _GLibCVersion(2, 17):
+        if hasattr(_manylinux, "manylinux2014_compatible"):
+            return bool(_manylinux.manylinux2014_compatible)
+    return True
+_LEGACY_MANYLINUX_MAP = {
+    # CentOS 7 w/ glibc 2.17 (PEP 599)
+    (2, 17): "manylinux2014",
+    # CentOS 6 w/ glibc 2.12 (PEP 571)
+    (2, 12): "manylinux2010",
+    # CentOS 5 w/ glibc 2.5 (PEP 513)
+    (2, 5): "manylinux1",
+}
+def platform_tags(archs: Sequence[str]) -> Iterator[str]:
+    """Generate manylinux tags compatible to the current platform.
+    :param archs: Sequence of compatible architectures.
+        The first one shall be the closest to the actual architecture and be the part of
+        platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
+        The ``linux_`` prefix is assumed as a prerequisite for the current platform to
+        be manylinux-compatible.
+    :returns: An iterator of compatible manylinux tags.
+    """
+    if not _have_compatible_abi(sys.executable, archs):
+        return
+    # Oldest glibc to be supported regardless of architecture is (2, 17).
+    too_old_glibc2 = _GLibCVersion(2, 16)
+    if set(archs) & {"x86_64", "i686"}:
+        # On x86/i686 also oldest glibc to be supported is (2, 5).
+        too_old_glibc2 = _GLibCVersion(2, 4)
+    current_glibc = _GLibCVersion(*_get_glibc_version())
+    glibc_max_list = [current_glibc]
+    # We can assume compatibility across glibc major versions.
+    # https://sourceware.org/bugzilla/show_bug.cgi?id=24636
+    #
+    # Build a list of maximum glibc versions so that we can
+    # output the canonical list of all glibc from current_glibc
+    # down to too_old_glibc2, including all intermediary versions.
+    for glibc_major in range(current_glibc.major - 1, 1, -1):
+        glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
+        glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
+    for arch in archs:
+        for glibc_max in glibc_max_list:
+            if glibc_max.major == too_old_glibc2.major:
+                min_minor = too_old_glibc2.minor
+            else:
+                # For other glibc major versions oldest supported is (x, 0).
+                min_minor = -1
+            for glibc_minor in range(glibc_max.minor, min_minor, -1):
+                glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
+                tag = "manylinux_{}_{}".format(*glibc_version)
+                if _is_compatible(arch, glibc_version):
+                    yield f"{tag}_{arch}"
+                # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
+                if glibc_version in _LEGACY_MANYLINUX_MAP:
+                    legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
+                    if _is_compatible(arch, glibc_version):
+                        yield f"{legacy_tag}_{arch}"

venv/lib/python3.13/site-packages/packaging/_musllinux.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""PEP 656 support.
+This module implements logic to detect if the currently running Python is
+linked against musl, and what musl version is used.
+"""
+from __future__ import annotations
+import functools
+import re
+import subprocess
+import sys
+from typing import Iterator, NamedTuple, Sequence
+from ._elffile import ELFFile
+class _MuslVersion(NamedTuple):
+    major: int
+    minor: int
+def _parse_musl_version(output: str) -> _MuslVersion | None:
+    lines = [n for n in (n.strip() for n in output.splitlines()) if n]
+    if len(lines) < 2 or lines[0][:4] != "musl":
+        return None
+    m = re.match(r"Version (\d+)\.(\d+)", lines[1])
+    if not m:
+        return None
+    return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
+@functools.lru_cache
+def _get_musl_version(executable: str) -> _MuslVersion | None:
+    """Detect currently-running musl runtime version.
+    This is done by checking the specified executable's dynamic linking
+    information, and invoking the loader to parse its output for a version
+    string. If the loader is musl, the output would be something like::
+        musl libc (x86_64)
+        Version 1.2.2
+        Dynamic Program Loader
+    """
+    try:
+        with open(executable, "rb") as f:
+            ld = ELFFile(f).interpreter
+    except (OSError, TypeError, ValueError):
+        return None
+    if ld is None or "musl" not in ld:
+        return None
+    proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True)
+    return _parse_musl_version(proc.stderr)
+def platform_tags(archs: Sequence[str]) -> Iterator[str]:
+    """Generate musllinux tags compatible to the current platform.
+    :param archs: Sequence of compatible architectures.
+        The first one shall be the closest to the actual architecture and be the part of
+        platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
+        The ``linux_`` prefix is assumed as a prerequisite for the current platform to
+        be musllinux-compatible.
+    :returns: An iterator of compatible musllinux tags.
+    """
+    sys_musl = _get_musl_version(sys.executable)
+    if sys_musl is None:  # Python not dynamically linked against musl.
+        return
+    for arch in archs:
+        for minor in range(sys_musl.minor, -1, -1):
+            yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
+if __name__ == "__main__":  # pragma: no cover
+    import sysconfig
+    plat = sysconfig.get_platform()
+    assert plat.startswith("linux-"), "not linux"
+    print("plat:", plat)
+    print("musl:", _get_musl_version(sys.executable))
+    print("tags:", end=" ")
+    for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
+        print(t, end="\n      ")

venv/lib/python3.13/site-packages/packaging/_parser.py ADDED Viewed

	@@ -0,0 +1,353 @@

+"""Handwritten parser of dependency specifiers.
+The docstring for each __parse_* function contains EBNF-inspired grammar representing
+the implementation.
+"""
+from __future__ import annotations
+import ast
+from typing import NamedTuple, Sequence, Tuple, Union
+from ._tokenizer import DEFAULT_RULES, Tokenizer
+class Node:
+    def __init__(self, value: str) -> None:
+        self.value = value
+    def __str__(self) -> str:
+        return self.value
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__}('{self}')>"
+    def serialize(self) -> str:
+        raise NotImplementedError
+class Variable(Node):
+    def serialize(self) -> str:
+        return str(self)
+class Value(Node):
+    def serialize(self) -> str:
+        return f'"{self}"'
+class Op(Node):
+    def serialize(self) -> str:
+        return str(self)
+MarkerVar = Union[Variable, Value]
+MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
+MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
+MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
+class ParsedRequirement(NamedTuple):
+    name: str
+    url: str
+    extras: list[str]
+    specifier: str
+    marker: MarkerList | None
+# --------------------------------------------------------------------------------------
+# Recursive descent parser for dependency specifier
+# --------------------------------------------------------------------------------------
+def parse_requirement(source: str) -> ParsedRequirement:
+    return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
+def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
+    """
+    requirement = WS? IDENTIFIER WS? extras WS? requirement_details
+    """
+    tokenizer.consume("WS")
+    name_token = tokenizer.expect(
+        "IDENTIFIER", expected="package name at the start of dependency specifier"
+    )
+    name = name_token.text
+    tokenizer.consume("WS")
+    extras = _parse_extras(tokenizer)
+    tokenizer.consume("WS")
+    url, specifier, marker = _parse_requirement_details(tokenizer)
+    tokenizer.expect("END", expected="end of dependency specifier")
+    return ParsedRequirement(name, url, extras, specifier, marker)
+def _parse_requirement_details(
+    tokenizer: Tokenizer,
+) -> tuple[str, str, MarkerList | None]:
+    """
+    requirement_details = AT URL (WS requirement_marker?)?
+                        | specifier WS? (requirement_marker)?
+    """
+    specifier = ""
+    url = ""
+    marker = None
+    if tokenizer.check("AT"):
+        tokenizer.read()
+        tokenizer.consume("WS")
+        url_start = tokenizer.position
+        url = tokenizer.expect("URL", expected="URL after @").text
+        if tokenizer.check("END", peek=True):
+            return (url, specifier, marker)
+        tokenizer.expect("WS", expected="whitespace after URL")
+        # The input might end after whitespace.
+        if tokenizer.check("END", peek=True):
+            return (url, specifier, marker)
+        marker = _parse_requirement_marker(
+            tokenizer, span_start=url_start, after="URL and whitespace"
+        )
+    else:
+        specifier_start = tokenizer.position
+        specifier = _parse_specifier(tokenizer)
+        tokenizer.consume("WS")
+        if tokenizer.check("END", peek=True):
+            return (url, specifier, marker)
+        marker = _parse_requirement_marker(
+            tokenizer,
+            span_start=specifier_start,
+            after=(
+                "version specifier"
+                if specifier
+                else "name and no valid version specifier"
+            ),
+        )
+    return (url, specifier, marker)
+def _parse_requirement_marker(
+    tokenizer: Tokenizer, *, span_start: int, after: str
+) -> MarkerList:
+    """
+    requirement_marker = SEMICOLON marker WS?
+    """
+    if not tokenizer.check("SEMICOLON"):
+        tokenizer.raise_syntax_error(
+            f"Expected end or semicolon (after {after})",
+            span_start=span_start,
+        )
+    tokenizer.read()
+    marker = _parse_marker(tokenizer)
+    tokenizer.consume("WS")
+    return marker
+def _parse_extras(tokenizer: Tokenizer) -> list[str]:
+    """
+    extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
+    """
+    if not tokenizer.check("LEFT_BRACKET", peek=True):
+        return []
+    with tokenizer.enclosing_tokens(
+        "LEFT_BRACKET",
+        "RIGHT_BRACKET",
+        around="extras",
+    ):
+        tokenizer.consume("WS")
+        extras = _parse_extras_list(tokenizer)
+        tokenizer.consume("WS")
+    return extras
+def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
+    """
+    extras_list = identifier (wsp* ',' wsp* identifier)*
+    """
+    extras: list[str] = []
+    if not tokenizer.check("IDENTIFIER"):
+        return extras
+    extras.append(tokenizer.read().text)
+    while True:
+        tokenizer.consume("WS")
+        if tokenizer.check("IDENTIFIER", peek=True):
+            tokenizer.raise_syntax_error("Expected comma between extra names")
+        elif not tokenizer.check("COMMA"):
+            break
+        tokenizer.read()
+        tokenizer.consume("WS")
+        extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
+        extras.append(extra_token.text)
+    return extras
+def _parse_specifier(tokenizer: Tokenizer) -> str:
+    """
+    specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
+              | WS? version_many WS?
+    """
+    with tokenizer.enclosing_tokens(
+        "LEFT_PARENTHESIS",
+        "RIGHT_PARENTHESIS",
+        around="version specifier",
+    ):
+        tokenizer.consume("WS")
+        parsed_specifiers = _parse_version_many(tokenizer)
+        tokenizer.consume("WS")
+    return parsed_specifiers
+def _parse_version_many(tokenizer: Tokenizer) -> str:
+    """
+    version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
+    """
+    parsed_specifiers = ""
+    while tokenizer.check("SPECIFIER"):
+        span_start = tokenizer.position
+        parsed_specifiers += tokenizer.read().text
+        if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
+            tokenizer.raise_syntax_error(
+                ".* suffix can only be used with `==` or `!=` operators",
+                span_start=span_start,
+                span_end=tokenizer.position + 1,
+            )
+        if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
+            tokenizer.raise_syntax_error(
+                "Local version label can only be used with `==` or `!=` operators",
+                span_start=span_start,
+                span_end=tokenizer.position,
+            )
+        tokenizer.consume("WS")
+        if not tokenizer.check("COMMA"):
+            break
+        parsed_specifiers += tokenizer.read().text
+        tokenizer.consume("WS")
+    return parsed_specifiers
+# --------------------------------------------------------------------------------------
+# Recursive descent parser for marker expression
+# --------------------------------------------------------------------------------------
+def parse_marker(source: str) -> MarkerList:
+    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
+def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
+    retval = _parse_marker(tokenizer)
+    tokenizer.expect("END", expected="end of marker expression")
+    return retval
+def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
+    """
+    marker = marker_atom (BOOLOP marker_atom)+
+    """
+    expression = [_parse_marker_atom(tokenizer)]
+    while tokenizer.check("BOOLOP"):
+        token = tokenizer.read()
+        expr_right = _parse_marker_atom(tokenizer)
+        expression.extend((token.text, expr_right))
+    return expression
+def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
+    """
+    marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
+                | WS? marker_item WS?
+    """
+    tokenizer.consume("WS")
+    if tokenizer.check("LEFT_PARENTHESIS", peek=True):
+        with tokenizer.enclosing_tokens(
+            "LEFT_PARENTHESIS",
+            "RIGHT_PARENTHESIS",
+            around="marker expression",
+        ):
+            tokenizer.consume("WS")
+            marker: MarkerAtom = _parse_marker(tokenizer)
+            tokenizer.consume("WS")
+    else:
+        marker = _parse_marker_item(tokenizer)
+    tokenizer.consume("WS")
+    return marker
+def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
+    """
+    marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
+    """
+    tokenizer.consume("WS")
+    marker_var_left = _parse_marker_var(tokenizer)
+    tokenizer.consume("WS")
+    marker_op = _parse_marker_op(tokenizer)
+    tokenizer.consume("WS")
+    marker_var_right = _parse_marker_var(tokenizer)
+    tokenizer.consume("WS")
+    return (marker_var_left, marker_op, marker_var_right)
+def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
+    """
+    marker_var = VARIABLE | QUOTED_STRING
+    """
+    if tokenizer.check("VARIABLE"):
+        return process_env_var(tokenizer.read().text.replace(".", "_"))
+    elif tokenizer.check("QUOTED_STRING"):
+        return process_python_str(tokenizer.read().text)
+    else:
+        tokenizer.raise_syntax_error(
+            message="Expected a marker variable or quoted string"
+        )
+def process_env_var(env_var: str) -> Variable:
+    if env_var in ("platform_python_implementation", "python_implementation"):
+        return Variable("platform_python_implementation")
+    else:
+        return Variable(env_var)
+def process_python_str(python_str: str) -> Value:
+    value = ast.literal_eval(python_str)
+    return Value(str(value))
+def _parse_marker_op(tokenizer: Tokenizer) -> Op:
+    """
+    marker_op = IN | NOT IN | OP
+    """
+    if tokenizer.check("IN"):
+        tokenizer.read()
+        return Op("in")
+    elif tokenizer.check("NOT"):
+        tokenizer.read()
+        tokenizer.expect("WS", expected="whitespace after 'not'")
+        tokenizer.expect("IN", expected="'in' after 'not'")
+        return Op("not in")
+    elif tokenizer.check("OP"):
+        return Op(tokenizer.read().text)
+    else:
+        return tokenizer.raise_syntax_error(
+            "Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in"
+        )

venv/lib/python3.13/site-packages/packaging/_structures.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+class InfinityType:
+    def __repr__(self) -> str:
+        return "Infinity"
+    def __hash__(self) -> int:
+        return hash(repr(self))
+    def __lt__(self, other: object) -> bool:
+        return False
+    def __le__(self, other: object) -> bool:
+        return False
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, self.__class__)
+    def __gt__(self, other: object) -> bool:
+        return True
+    def __ge__(self, other: object) -> bool:
+        return True
+    def __neg__(self: object) -> "NegativeInfinityType":
+        return NegativeInfinity
+Infinity = InfinityType()
+class NegativeInfinityType:
+    def __repr__(self) -> str:
+        return "-Infinity"
+    def __hash__(self) -> int:
+        return hash(repr(self))
+    def __lt__(self, other: object) -> bool:
+        return True
+    def __le__(self, other: object) -> bool:
+        return True
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, self.__class__)
+    def __gt__(self, other: object) -> bool:
+        return False
+    def __ge__(self, other: object) -> bool:
+        return False
+    def __neg__(self: object) -> InfinityType:
+        return Infinity
+NegativeInfinity = NegativeInfinityType()

venv/lib/python3.13/site-packages/packaging/_tokenizer.py ADDED Viewed

	@@ -0,0 +1,195 @@

+from __future__ import annotations
+import contextlib
+import re
+from dataclasses import dataclass
+from typing import Iterator, NoReturn
+from .specifiers import Specifier
+@dataclass
+class Token:
+    name: str
+    text: str
+    position: int
+class ParserSyntaxError(Exception):
+    """The provided source text could not be parsed correctly."""
+    def __init__(
+        self,
+        message: str,
+        *,
+        source: str,
+        span: tuple[int, int],
+    ) -> None:
+        self.span = span
+        self.message = message
+        self.source = source
+        super().__init__()
+    def __str__(self) -> str:
+        marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
+        return "\n    ".join([self.message, self.source, marker])
+DEFAULT_RULES: dict[str, str | re.Pattern[str]] = {
+    "LEFT_PARENTHESIS": r"\(",
+    "RIGHT_PARENTHESIS": r"\)",
+    "LEFT_BRACKET": r"\[",
+    "RIGHT_BRACKET": r"\]",
+    "SEMICOLON": r";",
+    "COMMA": r",",
+    "QUOTED_STRING": re.compile(
+        r"""
+            (
+                ('[^']*')
+                |
+                ("[^"]*")
+            )
+        """,
+        re.VERBOSE,
+    ),
+    "OP": r"(===|==|~=|!=|<=|>=|<|>)",
+    "BOOLOP": r"\b(or|and)\b",
+    "IN": r"\bin\b",
+    "NOT": r"\bnot\b",
+    "VARIABLE": re.compile(
+        r"""
+            \b(
+                python_version
+                |python_full_version
+                |os[._]name
+                |sys[._]platform
+                |platform_(release|system)
+                |platform[._](version|machine|python_implementation)
+                |python_implementation
+                |implementation_(name|version)
+                |extras?
+                |dependency_groups
+            )\b
+        """,
+        re.VERBOSE,
+    ),
+    "SPECIFIER": re.compile(
+        Specifier._operator_regex_str + Specifier._version_regex_str,
+        re.VERBOSE | re.IGNORECASE,
+    ),
+    "AT": r"\@",
+    "URL": r"[^ \t]+",
+    "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
+    "VERSION_PREFIX_TRAIL": r"\.\*",
+    "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
+    "WS": r"[ \t]+",
+    "END": r"$",
+}
+class Tokenizer:
+    """Context-sensitive token parsing.
+    Provides methods to examine the input stream to check whether the next token
+    matches.
+    """
+    def __init__(
+        self,
+        source: str,
+        *,
+        rules: dict[str, str | re.Pattern[str]],
+    ) -> None:
+        self.source = source
+        self.rules: dict[str, re.Pattern[str]] = {
+            name: re.compile(pattern) for name, pattern in rules.items()
+        }
+        self.next_token: Token | None = None
+        self.position = 0
+    def consume(self, name: str) -> None:
+        """Move beyond provided token name, if at current position."""
+        if self.check(name):
+            self.read()
+    def check(self, name: str, *, peek: bool = False) -> bool:
+        """Check whether the next token has the provided name.
+        By default, if the check succeeds, the token *must* be read before
+        another check. If `peek` is set to `True`, the token is not loaded and
+        would need to be checked again.
+        """
+        assert self.next_token is None, (
+            f"Cannot check for {name!r}, already have {self.next_token!r}"
+        )
+        assert name in self.rules, f"Unknown token name: {name!r}"
+        expression = self.rules[name]
+        match = expression.match(self.source, self.position)
+        if match is None:
+            return False
+        if not peek:
+            self.next_token = Token(name, match[0], self.position)
+        return True
+    def expect(self, name: str, *, expected: str) -> Token:
+        """Expect a certain token name next, failing with a syntax error otherwise.
+        The token is *not* read.
+        """
+        if not self.check(name):
+            raise self.raise_syntax_error(f"Expected {expected}")
+        return self.read()
+    def read(self) -> Token:
+        """Consume the next token and return it."""
+        token = self.next_token
+        assert token is not None
+        self.position += len(token.text)
+        self.next_token = None
+        return token
+    def raise_syntax_error(
+        self,
+        message: str,
+        *,
+        span_start: int | None = None,
+        span_end: int | None = None,
+    ) -> NoReturn:
+        """Raise ParserSyntaxError at the given position."""
+        span = (
+            self.position if span_start is None else span_start,
+            self.position if span_end is None else span_end,
+        )
+        raise ParserSyntaxError(
+            message,
+            source=self.source,
+            span=span,
+        )
+    @contextlib.contextmanager
+    def enclosing_tokens(
+        self, open_token: str, close_token: str, *, around: str
+    ) -> Iterator[None]:
+        if self.check(open_token):
+            open_position = self.position
+            self.read()
+        else:
+            open_position = None
+        yield
+        if open_position is None:
+            return
+        if not self.check(close_token):
+            self.raise_syntax_error(
+                f"Expected matching {close_token} for {open_token}, after {around}",
+                span_start=open_position,
+            )
+        self.read()

venv/lib/python3.13/site-packages/packaging/markers.py ADDED Viewed

	@@ -0,0 +1,362 @@

+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import annotations
+import operator
+import os
+import platform
+import sys
+from typing import AbstractSet, Any, Callable, Literal, TypedDict, Union, cast
+from ._parser import MarkerAtom, MarkerList, Op, Value, Variable
+from ._parser import parse_marker as _parse_marker
+from ._tokenizer import ParserSyntaxError
+from .specifiers import InvalidSpecifier, Specifier
+from .utils import canonicalize_name
+__all__ = [
+    "EvaluateContext",
+    "InvalidMarker",
+    "Marker",
+    "UndefinedComparison",
+    "UndefinedEnvironmentName",
+    "default_environment",
+]
+Operator = Callable[[str, Union[str, AbstractSet[str]]], bool]
+EvaluateContext = Literal["metadata", "lock_file", "requirement"]
+MARKERS_ALLOWING_SET = {"extras", "dependency_groups"}
+class InvalidMarker(ValueError):
+    """
+    An invalid marker was found, users should refer to PEP 508.
+    """
+class UndefinedComparison(ValueError):
+    """
+    An invalid operation was attempted on a value that doesn't support it.
+    """
+class UndefinedEnvironmentName(ValueError):
+    """
+    A name was attempted to be used that does not exist inside of the
+    environment.
+    """
+class Environment(TypedDict):
+    implementation_name: str
+    """The implementation's identifier, e.g. ``'cpython'``."""
+    implementation_version: str
+    """
+    The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or
+    ``'7.3.13'`` for PyPy3.10 v7.3.13.
+    """
+    os_name: str
+    """
+    The value of :py:data:`os.name`. The name of the operating system dependent module
+    imported, e.g. ``'posix'``.
+    """
+    platform_machine: str
+    """
+    Returns the machine type, e.g. ``'i386'``.
+    An empty string if the value cannot be determined.
+    """
+    platform_release: str
+    """
+    The system's release, e.g. ``'2.2.0'`` or ``'NT'``.
+    An empty string if the value cannot be determined.
+    """
+    platform_system: str
+    """
+    The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``.
+    An empty string if the value cannot be determined.
+    """
+    platform_version: str
+    """
+    The system's release version, e.g. ``'#3 on degas'``.
+    An empty string if the value cannot be determined.
+    """
+    python_full_version: str
+    """
+    The Python version as string ``'major.minor.patchlevel'``.
+    Note that unlike the Python :py:data:`sys.version`, this value will always include
+    the patchlevel (it defaults to 0).
+    """
+    platform_python_implementation: str
+    """
+    A string identifying the Python implementation, e.g. ``'CPython'``.
+    """
+    python_version: str
+    """The Python version as string ``'major.minor'``."""
+    sys_platform: str
+    """
+    This string contains a platform identifier that can be used to append
+    platform-specific components to :py:data:`sys.path`, for instance.
+    For Unix systems, except on Linux and AIX, this is the lowercased OS name as
+    returned by ``uname -s`` with the first part of the version as returned by
+    ``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python
+    was built.
+    """
+def _normalize_extra_values(results: Any) -> Any:
+    """
+    Normalize extra values.
+    """
+    if isinstance(results[0], tuple):
+        lhs, op, rhs = results[0]
+        if isinstance(lhs, Variable) and lhs.value == "extra":
+            normalized_extra = canonicalize_name(rhs.value)
+            rhs = Value(normalized_extra)
+        elif isinstance(rhs, Variable) and rhs.value == "extra":
+            normalized_extra = canonicalize_name(lhs.value)
+            lhs = Value(normalized_extra)
+        results[0] = lhs, op, rhs
+    return results
+def _format_marker(
+    marker: list[str] | MarkerAtom | str, first: bool | None = True
+) -> str:
+    assert isinstance(marker, (list, tuple, str))
+    # Sometimes we have a structure like [[...]] which is a single item list
+    # where the single item is itself it's own list. In that case we want skip
+    # the rest of this function so that we don't get extraneous () on the
+    # outside.
+    if (
+        isinstance(marker, list)
+        and len(marker) == 1
+        and isinstance(marker[0], (list, tuple))
+    ):
+        return _format_marker(marker[0])
+    if isinstance(marker, list):
+        inner = (_format_marker(m, first=False) for m in marker)
+        if first:
+            return " ".join(inner)
+        else:
+            return "(" + " ".join(inner) + ")"
+    elif isinstance(marker, tuple):
+        return " ".join([m.serialize() for m in marker])
+    else:
+        return marker
+_operators: dict[str, Operator] = {
+    "in": lambda lhs, rhs: lhs in rhs,
+    "not in": lambda lhs, rhs: lhs not in rhs,
+    "<": operator.lt,
+    "<=": operator.le,
+    "==": operator.eq,
+    "!=": operator.ne,
+    ">=": operator.ge,
+    ">": operator.gt,
+}
+def _eval_op(lhs: str, op: Op, rhs: str | AbstractSet[str]) -> bool:
+    if isinstance(rhs, str):
+        try:
+            spec = Specifier("".join([op.serialize(), rhs]))
+        except InvalidSpecifier:
+            pass
+        else:
+            return spec.contains(lhs, prereleases=True)
+    oper: Operator | None = _operators.get(op.serialize())
+    if oper is None:
+        raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
+    return oper(lhs, rhs)
+def _normalize(
+    lhs: str, rhs: str | AbstractSet[str], key: str
+) -> tuple[str, str | AbstractSet[str]]:
+    # PEP 685 – Comparison of extra names for optional distribution dependencies
+    # https://peps.python.org/pep-0685/
+    # > When comparing extra names, tools MUST normalize the names being
+    # > compared using the semantics outlined in PEP 503 for names
+    if key == "extra":
+        assert isinstance(rhs, str), "extra value must be a string"
+        return (canonicalize_name(lhs), canonicalize_name(rhs))
+    if key in MARKERS_ALLOWING_SET:
+        if isinstance(rhs, str):  # pragma: no cover
+            return (canonicalize_name(lhs), canonicalize_name(rhs))
+        else:
+            return (canonicalize_name(lhs), {canonicalize_name(v) for v in rhs})
+    # other environment markers don't have such standards
+    return lhs, rhs
+def _evaluate_markers(
+    markers: MarkerList, environment: dict[str, str | AbstractSet[str]]
+) -> bool:
+    groups: list[list[bool]] = [[]]
+    for marker in markers:
+        assert isinstance(marker, (list, tuple, str))
+        if isinstance(marker, list):
+            groups[-1].append(_evaluate_markers(marker, environment))
+        elif isinstance(marker, tuple):
+            lhs, op, rhs = marker
+            if isinstance(lhs, Variable):
+                environment_key = lhs.value
+                lhs_value = environment[environment_key]
+                rhs_value = rhs.value
+            else:
+                lhs_value = lhs.value
+                environment_key = rhs.value
+                rhs_value = environment[environment_key]
+            assert isinstance(lhs_value, str), "lhs must be a string"
+            lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
+            groups[-1].append(_eval_op(lhs_value, op, rhs_value))
+        else:
+            assert marker in ["and", "or"]
+            if marker == "or":
+                groups.append([])
+    return any(all(item) for item in groups)
+def format_full_version(info: sys._version_info) -> str:
+    version = f"{info.major}.{info.minor}.{info.micro}"
+    kind = info.releaselevel
+    if kind != "final":
+        version += kind[0] + str(info.serial)
+    return version
+def default_environment() -> Environment:
+    iver = format_full_version(sys.implementation.version)
+    implementation_name = sys.implementation.name
+    return {
+        "implementation_name": implementation_name,
+        "implementation_version": iver,
+        "os_name": os.name,
+        "platform_machine": platform.machine(),
+        "platform_release": platform.release(),
+        "platform_system": platform.system(),
+        "platform_version": platform.version(),
+        "python_full_version": platform.python_version(),
+        "platform_python_implementation": platform.python_implementation(),
+        "python_version": ".".join(platform.python_version_tuple()[:2]),
+        "sys_platform": sys.platform,
+    }
+class Marker:
+    def __init__(self, marker: str) -> None:
+        # Note: We create a Marker object without calling this constructor in
+        #       packaging.requirements.Requirement. If any additional logic is
+        #       added here, make sure to mirror/adapt Requirement.
+        try:
+            self._markers = _normalize_extra_values(_parse_marker(marker))
+            # The attribute `_markers` can be described in terms of a recursive type:
+            # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
+            #
+            # For example, the following expression:
+            # python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
+            #
+            # is parsed into:
+            # [
+            #     (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
+            #     'and',
+            #     [
+            #         (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
+            #         'or',
+            #         (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
+            #     ]
+            # ]
+        except ParserSyntaxError as e:
+            raise InvalidMarker(str(e)) from e
+    def __str__(self) -> str:
+        return _format_marker(self._markers)
+    def __repr__(self) -> str:
+        return f"<Marker('{self}')>"
+    def __hash__(self) -> int:
+        return hash((self.__class__.__name__, str(self)))
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, Marker):
+            return NotImplemented
+        return str(self) == str(other)
+    def evaluate(
+        self,
+        environment: dict[str, str] | None = None,
+        context: EvaluateContext = "metadata",
+    ) -> bool:
+        """Evaluate a marker.
+        Return the boolean from evaluating the given marker against the
+        environment. environment is an optional argument to override all or
+        part of the determined environment. The *context* parameter specifies what
+        context the markers are being evaluated for, which influences what markers
+        are considered valid. Acceptable values are "metadata" (for core metadata;
+        default), "lock_file", and "requirement" (i.e. all other situations).
+        The environment is determined from the current Python process.
+        """
+        current_environment = cast(
+            "dict[str, str | AbstractSet[str]]", default_environment()
+        )
+        if context == "lock_file":
+            current_environment.update(
+                extras=frozenset(), dependency_groups=frozenset()
+            )
+        elif context == "metadata":
+            current_environment["extra"] = ""
+        if environment is not None:
+            current_environment.update(environment)
+            # The API used to allow setting extra to None. We need to handle this
+            # case for backwards compatibility.
+            if "extra" in current_environment and current_environment["extra"] is None:
+                current_environment["extra"] = ""
+        return _evaluate_markers(
+            self._markers, _repair_python_full_version(current_environment)
+        )
+def _repair_python_full_version(
+    env: dict[str, str | AbstractSet[str]],
+) -> dict[str, str | AbstractSet[str]]:
+    """
+    Work around platform.python_version() returning something that is not PEP 440
+    compliant for non-tagged Python builds.
+    """
+    python_full_version = cast(str, env["python_full_version"])
+    if python_full_version.endswith("+"):
+        env["python_full_version"] = f"{python_full_version}local"
+    return env

venv/lib/python3.13/site-packages/packaging/metadata.py ADDED Viewed

	@@ -0,0 +1,862 @@

+from __future__ import annotations
+import email.feedparser
+import email.header
+import email.message
+import email.parser
+import email.policy
+import pathlib
+import sys
+import typing
+from typing import (
+    Any,
+    Callable,
+    Generic,
+    Literal,
+    TypedDict,
+    cast,
+)
+from . import licenses, requirements, specifiers, utils
+from . import version as version_module
+from .licenses import NormalizedLicenseExpression
+T = typing.TypeVar("T")
+if sys.version_info >= (3, 11):  # pragma: no cover
+    ExceptionGroup = ExceptionGroup
+else:  # pragma: no cover
+    class ExceptionGroup(Exception):
+        """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
+        If :external:exc:`ExceptionGroup` is already defined by Python itself,
+        that version is used instead.
+        """
+        message: str
+        exceptions: list[Exception]
+        def __init__(self, message: str, exceptions: list[Exception]) -> None:
+            self.message = message
+            self.exceptions = exceptions
+        def __repr__(self) -> str:
+            return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
+class InvalidMetadata(ValueError):
+    """A metadata field contains invalid data."""
+    field: str
+    """The name of the field that contains invalid data."""
+    def __init__(self, field: str, message: str) -> None:
+        self.field = field
+        super().__init__(message)
+# The RawMetadata class attempts to make as few assumptions about the underlying
+# serialization formats as possible. The idea is that as long as a serialization
+# formats offer some very basic primitives in *some* way then we can support
+# serializing to and from that format.
+class RawMetadata(TypedDict, total=False):
+    """A dictionary of raw core metadata.
+    Each field in core metadata maps to a key of this dictionary (when data is
+    provided). The key is lower-case and underscores are used instead of dashes
+    compared to the equivalent core metadata field. Any core metadata field that
+    can be specified multiple times or can hold multiple values in a single
+    field have a key with a plural name. See :class:`Metadata` whose attributes
+    match the keys of this dictionary.
+    Core metadata fields that can be specified multiple times are stored as a
+    list or dict depending on which is appropriate for the field. Any fields
+    which hold multiple values in a single field are stored as a list.
+    """
+    # Metadata 1.0 - PEP 241
+    metadata_version: str
+    name: str
+    version: str
+    platforms: list[str]
+    summary: str
+    description: str
+    keywords: list[str]
+    home_page: str
+    author: str
+    author_email: str
+    license: str
+    # Metadata 1.1 - PEP 314
+    supported_platforms: list[str]
+    download_url: str
+    classifiers: list[str]
+    requires: list[str]
+    provides: list[str]
+    obsoletes: list[str]
+    # Metadata 1.2 - PEP 345
+    maintainer: str
+    maintainer_email: str
+    requires_dist: list[str]
+    provides_dist: list[str]
+    obsoletes_dist: list[str]
+    requires_python: str
+    requires_external: list[str]
+    project_urls: dict[str, str]
+    # Metadata 2.0
+    # PEP 426 attempted to completely revamp the metadata format
+    # but got stuck without ever being able to build consensus on
+    # it and ultimately ended up withdrawn.
+    #
+    # However, a number of tools had started emitting METADATA with
+    # `2.0` Metadata-Version, so for historical reasons, this version
+    # was skipped.
+    # Metadata 2.1 - PEP 566
+    description_content_type: str
+    provides_extra: list[str]
+    # Metadata 2.2 - PEP 643
+    dynamic: list[str]
+    # Metadata 2.3 - PEP 685
+    # No new fields were added in PEP 685, just some edge case were
+    # tightened up to provide better interoptability.
+    # Metadata 2.4 - PEP 639
+    license_expression: str
+    license_files: list[str]
+_STRING_FIELDS = {
+    "author",
+    "author_email",
+    "description",
+    "description_content_type",
+    "download_url",
+    "home_page",
+    "license",
+    "license_expression",
+    "maintainer",
+    "maintainer_email",
+    "metadata_version",
+    "name",
+    "requires_python",
+    "summary",
+    "version",
+}
+_LIST_FIELDS = {
+    "classifiers",
+    "dynamic",
+    "license_files",
+    "obsoletes",
+    "obsoletes_dist",
+    "platforms",
+    "provides",
+    "provides_dist",
+    "provides_extra",
+    "requires",
+    "requires_dist",
+    "requires_external",
+    "supported_platforms",
+}
+_DICT_FIELDS = {
+    "project_urls",
+}
+def _parse_keywords(data: str) -> list[str]:
+    """Split a string of comma-separated keywords into a list of keywords."""
+    return [k.strip() for k in data.split(",")]
+def _parse_project_urls(data: list[str]) -> dict[str, str]:
+    """Parse a list of label/URL string pairings separated by a comma."""
+    urls = {}
+    for pair in data:
+        # Our logic is slightly tricky here as we want to try and do
+        # *something* reasonable with malformed data.
+        #
+        # The main thing that we have to worry about, is data that does
+        # not have a ',' at all to split the label from the Value. There
+        # isn't a singular right answer here, and we will fail validation
+        # later on (if the caller is validating) so it doesn't *really*
+        # matter, but since the missing value has to be an empty str
+        # and our return value is dict[str, str], if we let the key
+        # be the missing value, then they'd have multiple '' values that
+        # overwrite each other in a accumulating dict.
+        #
+        # The other potentional issue is that it's possible to have the
+        # same label multiple times in the metadata, with no solid "right"
+        # answer with what to do in that case. As such, we'll do the only
+        # thing we can, which is treat the field as unparseable and add it
+        # to our list of unparsed fields.
+        parts = [p.strip() for p in pair.split(",", 1)]
+        parts.extend([""] * (max(0, 2 - len(parts))))  # Ensure 2 items
+        # TODO: The spec doesn't say anything about if the keys should be
+        #       considered case sensitive or not... logically they should
+        #       be case-preserving and case-insensitive, but doing that
+        #       would open up more cases where we might have duplicate
+        #       entries.
+        label, url = parts
+        if label in urls:
+            # The label already exists in our set of urls, so this field
+            # is unparseable, and we can just add the whole thing to our
+            # unparseable data and stop processing it.
+            raise KeyError("duplicate labels in project urls")
+        urls[label] = url
+    return urls
+def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
+    """Get the body of the message."""
+    # If our source is a str, then our caller has managed encodings for us,
+    # and we don't need to deal with it.
+    if isinstance(source, str):
+        payload = msg.get_payload()
+        assert isinstance(payload, str)
+        return payload
+    # If our source is a bytes, then we're managing the encoding and we need
+    # to deal with it.
+    else:
+        bpayload = msg.get_payload(decode=True)
+        assert isinstance(bpayload, bytes)
+        try:
+            return bpayload.decode("utf8", "strict")
+        except UnicodeDecodeError as exc:
+            raise ValueError("payload in an invalid encoding") from exc
+# The various parse_FORMAT functions here are intended to be as lenient as
+# possible in their parsing, while still returning a correctly typed
+# RawMetadata.
+#
+# To aid in this, we also generally want to do as little touching of the
+# data as possible, except where there are possibly some historic holdovers
+# that make valid data awkward to work with.
+#
+# While this is a lower level, intermediate format than our ``Metadata``
+# class, some light touch ups can make a massive difference in usability.
+# Map METADATA fields to RawMetadata.
+_EMAIL_TO_RAW_MAPPING = {
+    "author": "author",
+    "author-email": "author_email",
+    "classifier": "classifiers",
+    "description": "description",
+    "description-content-type": "description_content_type",
+    "download-url": "download_url",
+    "dynamic": "dynamic",
+    "home-page": "home_page",
+    "keywords": "keywords",
+    "license": "license",
+    "license-expression": "license_expression",
+    "license-file": "license_files",
+    "maintainer": "maintainer",
+    "maintainer-email": "maintainer_email",
+    "metadata-version": "metadata_version",
+    "name": "name",
+    "obsoletes": "obsoletes",
+    "obsoletes-dist": "obsoletes_dist",
+    "platform": "platforms",
+    "project-url": "project_urls",
+    "provides": "provides",
+    "provides-dist": "provides_dist",
+    "provides-extra": "provides_extra",
+    "requires": "requires",
+    "requires-dist": "requires_dist",
+    "requires-external": "requires_external",
+    "requires-python": "requires_python",
+    "summary": "summary",
+    "supported-platform": "supported_platforms",
+    "version": "version",
+}
+_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
+def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
+    """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
+    This function returns a two-item tuple of dicts. The first dict is of
+    recognized fields from the core metadata specification. Fields that can be
+    parsed and translated into Python's built-in types are converted
+    appropriately. All other fields are left as-is. Fields that are allowed to
+    appear multiple times are stored as lists.
+    The second dict contains all other fields from the metadata. This includes
+    any unrecognized fields. It also includes any fields which are expected to
+    be parsed into a built-in type but were not formatted appropriately. Finally,
+    any fields that are expected to appear only once but are repeated are
+    included in this dict.
+    """
+    raw: dict[str, str | list[str] | dict[str, str]] = {}
+    unparsed: dict[str, list[str]] = {}
+    if isinstance(data, str):
+        parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
+    else:
+        parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
+    # We have to wrap parsed.keys() in a set, because in the case of multiple
+    # values for a key (a list), the key will appear multiple times in the
+    # list of keys, but we're avoiding that by using get_all().
+    for name in frozenset(parsed.keys()):
+        # Header names in RFC are case insensitive, so we'll normalize to all
+        # lower case to make comparisons easier.
+        name = name.lower()
+        # We use get_all() here, even for fields that aren't multiple use,
+        # because otherwise someone could have e.g. two Name fields, and we
+        # would just silently ignore it rather than doing something about it.
+        headers = parsed.get_all(name) or []
+        # The way the email module works when parsing bytes is that it
+        # unconditionally decodes the bytes as ascii using the surrogateescape
+        # handler. When you pull that data back out (such as with get_all() ),
+        # it looks to see if the str has any surrogate escapes, and if it does
+        # it wraps it in a Header object instead of returning the string.
+        #
+        # As such, we'll look for those Header objects, and fix up the encoding.
+        value = []
+        # Flag if we have run into any issues processing the headers, thus
+        # signalling that the data belongs in 'unparsed'.
+        valid_encoding = True
+        for h in headers:
+            # It's unclear if this can return more types than just a Header or
+            # a str, so we'll just assert here to make sure.
+            assert isinstance(h, (email.header.Header, str))
+            # If it's a header object, we need to do our little dance to get
+            # the real data out of it. In cases where there is invalid data
+            # we're going to end up with mojibake, but there's no obvious, good
+            # way around that without reimplementing parts of the Header object
+            # ourselves.
+            #
+            # That should be fine since, if mojibacked happens, this key is
+            # going into the unparsed dict anyways.
+            if isinstance(h, email.header.Header):
+                # The Header object stores it's data as chunks, and each chunk
+                # can be independently encoded, so we'll need to check each
+                # of them.
+                chunks: list[tuple[bytes, str | None]] = []
+                for bin, encoding in email.header.decode_header(h):
+                    try:
+                        bin.decode("utf8", "strict")
+                    except UnicodeDecodeError:
+                        # Enable mojibake.
+                        encoding = "latin1"
+                        valid_encoding = False
+                    else:
+                        encoding = "utf8"
+                    chunks.append((bin, encoding))
+                # Turn our chunks back into a Header object, then let that
+                # Header object do the right thing to turn them into a
+                # string for us.
+                value.append(str(email.header.make_header(chunks)))
+            # This is already a string, so just add it.
+            else:
+                value.append(h)
+        # We've processed all of our values to get them into a list of str,
+        # but we may have mojibake data, in which case this is an unparsed
+        # field.
+        if not valid_encoding:
+            unparsed[name] = value
+            continue
+        raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
+        if raw_name is None:
+            # This is a bit of a weird situation, we've encountered a key that
+            # we don't know what it means, so we don't know whether it's meant
+            # to be a list or not.
+            #
+            # Since we can't really tell one way or another, we'll just leave it
+            # as a list, even though it may be a single item list, because that's
+            # what makes the most sense for email headers.
+            unparsed[name] = value
+            continue
+        # If this is one of our string fields, then we'll check to see if our
+        # value is a list of a single item. If it is then we'll assume that
+        # it was emitted as a single string, and unwrap the str from inside
+        # the list.
+        #
+        # If it's any other kind of data, then we haven't the faintest clue
+        # what we should parse it as, and we have to just add it to our list
+        # of unparsed stuff.
+        if raw_name in _STRING_FIELDS and len(value) == 1:
+            raw[raw_name] = value[0]
+        # If this is one of our list of string fields, then we can just assign
+        # the value, since email *only* has strings, and our get_all() call
+        # above ensures that this is a list.
+        elif raw_name in _LIST_FIELDS:
+            raw[raw_name] = value
+        # Special Case: Keywords
+        # The keywords field is implemented in the metadata spec as a str,
+        # but it conceptually is a list of strings, and is serialized using
+        # ", ".join(keywords), so we'll do some light data massaging to turn
+        # this into what it logically is.
+        elif raw_name == "keywords" and len(value) == 1:
+            raw[raw_name] = _parse_keywords(value[0])
+        # Special Case: Project-URL
+        # The project urls is implemented in the metadata spec as a list of
+        # specially-formatted strings that represent a key and a value, which
+        # is fundamentally a mapping, however the email format doesn't support
+        # mappings in a sane way, so it was crammed into a list of strings
+        # instead.
+        #
+        # We will do a little light data massaging to turn this into a map as
+        # it logically should be.
+        elif raw_name == "project_urls":
+            try:
+                raw[raw_name] = _parse_project_urls(value)
+            except KeyError:
+                unparsed[name] = value
+        # Nothing that we've done has managed to parse this, so it'll just
+        # throw it in our unparseable data and move on.
+        else:
+            unparsed[name] = value
+    # We need to support getting the Description from the message payload in
+    # addition to getting it from the the headers. This does mean, though, there
+    # is the possibility of it being set both ways, in which case we put both
+    # in 'unparsed' since we don't know which is right.
+    try:
+        payload = _get_payload(parsed, data)
+    except ValueError:
+        unparsed.setdefault("description", []).append(
+            parsed.get_payload(decode=isinstance(data, bytes))  # type: ignore[call-overload]
+        )
+    else:
+        if payload:
+            # Check to see if we've already got a description, if so then both
+            # it, and this body move to unparseable.
+            if "description" in raw:
+                description_header = cast(str, raw.pop("description"))
+                unparsed.setdefault("description", []).extend(
+                    [description_header, payload]
+                )
+            elif "description" in unparsed:
+                unparsed["description"].append(payload)
+            else:
+                raw["description"] = payload
+    # We need to cast our `raw` to a metadata, because a TypedDict only support
+    # literal key names, but we're computing our key names on purpose, but the
+    # way this function is implemented, our `TypedDict` can only have valid key
+    # names.
+    return cast(RawMetadata, raw), unparsed
+_NOT_FOUND = object()
+# Keep the two values in sync.
+_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
+_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
+_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
+class _Validator(Generic[T]):
+    """Validate a metadata field.
+    All _process_*() methods correspond to a core metadata field. The method is
+    called with the field's raw value. If the raw value is valid it is returned
+    in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
+    If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
+    as appropriate).
+    """
+    name: str
+    raw_name: str
+    added: _MetadataVersion
+    def __init__(
+        self,
+        *,
+        added: _MetadataVersion = "1.0",
+    ) -> None:
+        self.added = added
+    def __set_name__(self, _owner: Metadata, name: str) -> None:
+        self.name = name
+        self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
+    def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
+        # With Python 3.8, the caching can be replaced with functools.cached_property().
+        # No need to check the cache as attribute lookup will resolve into the
+        # instance's __dict__ before __get__ is called.
+        cache = instance.__dict__
+        value = instance._raw.get(self.name)
+        # To make the _process_* methods easier, we'll check if the value is None
+        # and if this field is NOT a required attribute, and if both of those
+        # things are true, we'll skip the the converter. This will mean that the
+        # converters never have to deal with the None union.
+        if self.name in _REQUIRED_ATTRS or value is not None:
+            try:
+                converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
+            except AttributeError:
+                pass
+            else:
+                value = converter(value)
+        cache[self.name] = value
+        try:
+            del instance._raw[self.name]  # type: ignore[misc]
+        except KeyError:
+            pass
+        return cast(T, value)
+    def _invalid_metadata(
+        self, msg: str, cause: Exception | None = None
+    ) -> InvalidMetadata:
+        exc = InvalidMetadata(
+            self.raw_name, msg.format_map({"field": repr(self.raw_name)})
+        )
+        exc.__cause__ = cause
+        return exc
+    def _process_metadata_version(self, value: str) -> _MetadataVersion:
+        # Implicitly makes Metadata-Version required.
+        if value not in _VALID_METADATA_VERSIONS:
+            raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
+        return cast(_MetadataVersion, value)
+    def _process_name(self, value: str) -> str:
+        if not value:
+            raise self._invalid_metadata("{field} is a required field")
+        # Validate the name as a side-effect.
+        try:
+            utils.canonicalize_name(value, validate=True)
+        except utils.InvalidName as exc:
+            raise self._invalid_metadata(
+                f"{value!r} is invalid for {{field}}", cause=exc
+            ) from exc
+        else:
+            return value
+    def _process_version(self, value: str) -> version_module.Version:
+        if not value:
+            raise self._invalid_metadata("{field} is a required field")
+        try:
+            return version_module.parse(value)
+        except version_module.InvalidVersion as exc:
+            raise self._invalid_metadata(
+                f"{value!r} is invalid for {{field}}", cause=exc
+            ) from exc
+    def _process_summary(self, value: str) -> str:
+        """Check the field contains no newlines."""
+        if "\n" in value:
+            raise self._invalid_metadata("{field} must be a single line")
+        return value
+    def _process_description_content_type(self, value: str) -> str:
+        content_types = {"text/plain", "text/x-rst", "text/markdown"}
+        message = email.message.EmailMessage()
+        message["content-type"] = value
+        content_type, parameters = (
+            # Defaults to `text/plain` if parsing failed.
+            message.get_content_type().lower(),
+            message["content-type"].params,
+        )
+        # Check if content-type is valid or defaulted to `text/plain` and thus was
+        # not parseable.
+        if content_type not in content_types or content_type not in value.lower():
+            raise self._invalid_metadata(
+                f"{{field}} must be one of {list(content_types)}, not {value!r}"
+            )
+        charset = parameters.get("charset", "UTF-8")
+        if charset != "UTF-8":
+            raise self._invalid_metadata(
+                f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
+            )
+        markdown_variants = {"GFM", "CommonMark"}
+        variant = parameters.get("variant", "GFM")  # Use an acceptable default.
+        if content_type == "text/markdown" and variant not in markdown_variants:
+            raise self._invalid_metadata(
+                f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
+                f"not {variant!r}",
+            )
+        return value
+    def _process_dynamic(self, value: list[str]) -> list[str]:
+        for dynamic_field in map(str.lower, value):
+            if dynamic_field in {"name", "version", "metadata-version"}:
+                raise self._invalid_metadata(
+                    f"{dynamic_field!r} is not allowed as a dynamic field"
+                )
+            elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
+                raise self._invalid_metadata(
+                    f"{dynamic_field!r} is not a valid dynamic field"
+                )
+        return list(map(str.lower, value))
+    def _process_provides_extra(
+        self,
+        value: list[str],
+    ) -> list[utils.NormalizedName]:
+        normalized_names = []
+        try:
+            for name in value:
+                normalized_names.append(utils.canonicalize_name(name, validate=True))
+        except utils.InvalidName as exc:
+            raise self._invalid_metadata(
+                f"{name!r} is invalid for {{field}}", cause=exc
+            ) from exc
+        else:
+            return normalized_names
+    def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
+        try:
+            return specifiers.SpecifierSet(value)
+        except specifiers.InvalidSpecifier as exc:
+            raise self._invalid_metadata(
+                f"{value!r} is invalid for {{field}}", cause=exc
+            ) from exc
+    def _process_requires_dist(
+        self,
+        value: list[str],
+    ) -> list[requirements.Requirement]:
+        reqs = []
+        try:
+            for req in value:
+                reqs.append(requirements.Requirement(req))
+        except requirements.InvalidRequirement as exc:
+            raise self._invalid_metadata(
+                f"{req!r} is invalid for {{field}}", cause=exc
+            ) from exc
+        else:
+            return reqs
+    def _process_license_expression(
+        self, value: str
+    ) -> NormalizedLicenseExpression | None:
+        try:
+            return licenses.canonicalize_license_expression(value)
+        except ValueError as exc:
+            raise self._invalid_metadata(
+                f"{value!r} is invalid for {{field}}", cause=exc
+            ) from exc
+    def _process_license_files(self, value: list[str]) -> list[str]:
+        paths = []
+        for path in value:
+            if ".." in path:
+                raise self._invalid_metadata(
+                    f"{path!r} is invalid for {{field}}, "
+                    "parent directory indicators are not allowed"
+                )
+            if "*" in path:
+                raise self._invalid_metadata(
+                    f"{path!r} is invalid for {{field}}, paths must be resolved"
+                )
+            if (
+                pathlib.PurePosixPath(path).is_absolute()
+                or pathlib.PureWindowsPath(path).is_absolute()
+            ):
+                raise self._invalid_metadata(
+                    f"{path!r} is invalid for {{field}}, paths must be relative"
+                )
+            if pathlib.PureWindowsPath(path).as_posix() != path:
+                raise self._invalid_metadata(
+                    f"{path!r} is invalid for {{field}}, paths must use '/' delimiter"
+                )
+            paths.append(path)
+        return paths
+class Metadata:
+    """Representation of distribution metadata.
+    Compared to :class:`RawMetadata`, this class provides objects representing
+    metadata fields instead of only using built-in types. Any invalid metadata
+    will cause :exc:`InvalidMetadata` to be raised (with a
+    :py:attr:`~BaseException.__cause__` attribute as appropriate).
+    """
+    _raw: RawMetadata
+    @classmethod
+    def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
+        """Create an instance from :class:`RawMetadata`.
+        If *validate* is true, all metadata will be validated. All exceptions
+        related to validation will be gathered and raised as an :class:`ExceptionGroup`.
+        """
+        ins = cls()
+        ins._raw = data.copy()  # Mutations occur due to caching enriched values.
+        if validate:
+            exceptions: list[Exception] = []
+            try:
+                metadata_version = ins.metadata_version
+                metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
+            except InvalidMetadata as metadata_version_exc:
+                exceptions.append(metadata_version_exc)
+                metadata_version = None
+            # Make sure to check for the fields that are present, the required
+            # fields (so their absence can be reported).
+            fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
+            # Remove fields that have already been checked.
+            fields_to_check -= {"metadata_version"}
+            for key in fields_to_check:
+                try:
+                    if metadata_version:
+                        # Can't use getattr() as that triggers descriptor protocol which
+                        # will fail due to no value for the instance argument.
+                        try:
+                            field_metadata_version = cls.__dict__[key].added
+                        except KeyError:
+                            exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
+                            exceptions.append(exc)
+                            continue
+                        field_age = _VALID_METADATA_VERSIONS.index(
+                            field_metadata_version
+                        )
+                        if field_age > metadata_age:
+                            field = _RAW_TO_EMAIL_MAPPING[key]
+                            exc = InvalidMetadata(
+                                field,
+                                f"{field} introduced in metadata version "
+                                f"{field_metadata_version}, not {metadata_version}",
+                            )
+                            exceptions.append(exc)
+                            continue
+                    getattr(ins, key)
+                except InvalidMetadata as exc:
+                    exceptions.append(exc)
+            if exceptions:
+                raise ExceptionGroup("invalid metadata", exceptions)
+        return ins
+    @classmethod
+    def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
+        """Parse metadata from email headers.
+        If *validate* is true, the metadata will be validated. All exceptions
+        related to validation will be gathered and raised as an :class:`ExceptionGroup`.
+        """
+        raw, unparsed = parse_email(data)
+        if validate:
+            exceptions: list[Exception] = []
+            for unparsed_key in unparsed:
+                if unparsed_key in _EMAIL_TO_RAW_MAPPING:
+                    message = f"{unparsed_key!r} has invalid data"
+                else:
+                    message = f"unrecognized field: {unparsed_key!r}"
+                exceptions.append(InvalidMetadata(unparsed_key, message))
+            if exceptions:
+                raise ExceptionGroup("unparsed", exceptions)
+        try:
+            return cls.from_raw(raw, validate=validate)
+        except ExceptionGroup as exc_group:
+            raise ExceptionGroup(
+                "invalid or unparsed metadata", exc_group.exceptions
+            ) from None
+    metadata_version: _Validator[_MetadataVersion] = _Validator()
+    """:external:ref:`core-metadata-metadata-version`
+    (required; validated to be a valid metadata version)"""
+    # `name` is not normalized/typed to NormalizedName so as to provide access to
+    # the original/raw name.
+    name: _Validator[str] = _Validator()
+    """:external:ref:`core-metadata-name`
+    (required; validated using :func:`~packaging.utils.canonicalize_name` and its
+    *validate* parameter)"""
+    version: _Validator[version_module.Version] = _Validator()
+    """:external:ref:`core-metadata-version` (required)"""
+    dynamic: _Validator[list[str] | None] = _Validator(
+        added="2.2",
+    )
+    """:external:ref:`core-metadata-dynamic`
+    (validated against core metadata field names and lowercased)"""
+    platforms: _Validator[list[str] | None] = _Validator()
+    """:external:ref:`core-metadata-platform`"""
+    supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
+    """:external:ref:`core-metadata-supported-platform`"""
+    summary: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
+    description: _Validator[str | None] = _Validator()  # TODO 2.1: can be in body
+    """:external:ref:`core-metadata-description`"""
+    description_content_type: _Validator[str | None] = _Validator(added="2.1")
+    """:external:ref:`core-metadata-description-content-type` (validated)"""
+    keywords: _Validator[list[str] | None] = _Validator()
+    """:external:ref:`core-metadata-keywords`"""
+    home_page: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-home-page`"""
+    download_url: _Validator[str | None] = _Validator(added="1.1")
+    """:external:ref:`core-metadata-download-url`"""
+    author: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-author`"""
+    author_email: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-author-email`"""
+    maintainer: _Validator[str | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-maintainer`"""
+    maintainer_email: _Validator[str | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-maintainer-email`"""
+    license: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-license`"""
+    license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
+        added="2.4"
+    )
+    """:external:ref:`core-metadata-license-expression`"""
+    license_files: _Validator[list[str] | None] = _Validator(added="2.4")
+    """:external:ref:`core-metadata-license-file`"""
+    classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
+    """:external:ref:`core-metadata-classifier`"""
+    requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
+        added="1.2"
+    )
+    """:external:ref:`core-metadata-requires-dist`"""
+    requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
+        added="1.2"
+    )
+    """:external:ref:`core-metadata-requires-python`"""
+    # Because `Requires-External` allows for non-PEP 440 version specifiers, we
+    # don't do any processing on the values.
+    requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-requires-external`"""
+    project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-project-url`"""
+    # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
+    # regardless of metadata version.
+    provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
+        added="2.1",
+    )
+    """:external:ref:`core-metadata-provides-extra`"""
+    provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-provides-dist`"""
+    obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-obsoletes-dist`"""
+    requires: _Validator[list[str] | None] = _Validator(added="1.1")
+    """``Requires`` (deprecated)"""
+    provides: _Validator[list[str] | None] = _Validator(added="1.1")
+    """``Provides`` (deprecated)"""
+    obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
+    """``Obsoletes`` (deprecated)"""

venv/lib/python3.13/site-packages/packaging/py.typed ADDED Viewed

File without changes

venv/lib/python3.13/site-packages/packaging/requirements.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import annotations
+from typing import Any, Iterator
+from ._parser import parse_requirement as _parse_requirement
+from ._tokenizer import ParserSyntaxError
+from .markers import Marker, _normalize_extra_values
+from .specifiers import SpecifierSet
+from .utils import canonicalize_name
+class InvalidRequirement(ValueError):
+    """
+    An invalid requirement was found, users should refer to PEP 508.
+    """
+class Requirement:
+    """Parse a requirement.
+    Parse a given requirement string into its parts, such as name, specifier,
+    URL, and extras. Raises InvalidRequirement on a badly-formed requirement
+    string.
+    """
+    # TODO: Can we test whether something is contained within a requirement?
+    #       If so how do we do that? Do we need to test against the _name_ of
+    #       the thing as well as the version? What about the markers?
+    # TODO: Can we normalize the name and extra name?
+    def __init__(self, requirement_string: str) -> None:
+        try:
+            parsed = _parse_requirement(requirement_string)
+        except ParserSyntaxError as e:
+            raise InvalidRequirement(str(e)) from e
+        self.name: str = parsed.name
+        self.url: str | None = parsed.url or None
+        self.extras: set[str] = set(parsed.extras or [])
+        self.specifier: SpecifierSet = SpecifierSet(parsed.specifier)
+        self.marker: Marker | None = None
+        if parsed.marker is not None:
+            self.marker = Marker.__new__(Marker)
+            self.marker._markers = _normalize_extra_values(parsed.marker)
+    def _iter_parts(self, name: str) -> Iterator[str]:
+        yield name
+        if self.extras:
+            formatted_extras = ",".join(sorted(self.extras))
+            yield f"[{formatted_extras}]"
+        if self.specifier:
+            yield str(self.specifier)
+        if self.url:
+            yield f"@ {self.url}"
+            if self.marker:
+                yield " "
+        if self.marker:
+            yield f"; {self.marker}"
+    def __str__(self) -> str:
+        return "".join(self._iter_parts(self.name))
+    def __repr__(self) -> str:
+        return f"<Requirement('{self}')>"
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self.__class__.__name__,
+                *self._iter_parts(canonicalize_name(self.name)),
+            )
+        )
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, Requirement):
+            return NotImplemented
+        return (
+            canonicalize_name(self.name) == canonicalize_name(other.name)
+            and self.extras == other.extras
+            and self.specifier == other.specifier
+            and self.url == other.url
+            and self.marker == other.marker
+        )