diff --git a/venv/lib/python3.13/site-packages/_yaml/__init__.py b/venv/lib/python3.13/site-packages/_yaml/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7baa8c4b68127d5cdf0be9a799429e61347c2694 --- /dev/null +++ b/venv/lib/python3.13/site-packages/_yaml/__init__.py @@ -0,0 +1,33 @@ +# This is a stub package designed to roughly emulate the _yaml +# extension module, which previously existed as a standalone module +# and has been moved into the `yaml` package namespace. +# It does not perfectly mimic its old counterpart, but should get +# close enough for anyone who's relying on it even when they shouldn't. +import yaml + +# in some circumstances, the yaml module we imoprted may be from a different version, so we need +# to tread carefully when poking at it here (it may not have the attributes we expect) +if not getattr(yaml, '__with_libyaml__', False): + from sys import version_info + + exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError + raise exc("No module named '_yaml'") +else: + from yaml._yaml import * + import warnings + warnings.warn( + 'The _yaml extension module is now located at yaml._yaml' + ' and its location is subject to change. To use the' + ' LibYAML-based parser and emitter, import from `yaml`:' + ' `from yaml import CLoader as Loader, CDumper as Dumper`.', + DeprecationWarning + ) + del warnings + # Don't `del yaml` here because yaml is actually an existing + # namespace member of _yaml. + +__name__ = '_yaml' +# If the module is top-level (i.e. not a part of any specific package) +# then the attribute should be set to ''. +# https://docs.python.org/3.8/library/types.html +__package__ = '' diff --git a/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..8d32edcc9f0aa2057cc7844f7bb36b2c8c2e41d7 --- /dev/null +++ b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA @@ -0,0 +1,764 @@ +Metadata-Version: 2.4 +Name: charset-normalizer +Version: 3.4.4 +Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. +Author-email: "Ahmed R. TAHRI" +Maintainer-email: "Ahmed R. TAHRI" +License: MIT +Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md +Project-URL: Documentation, https://charset-normalizer.readthedocs.io/ +Project-URL: Code, https://github.com/jawah/charset_normalizer +Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues +Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Text Processing :: Linguistic +Classifier: Topic :: Utilities +Classifier: Typing :: Typed +Requires-Python: >=3.7 +Description-Content-Type: text/markdown +License-File: LICENSE +Provides-Extra: unicode-backport +Dynamic: license-file + +

Charset Detection, for Everyone πŸ‘‹

+ +

+ The Real First Universal Charset Detector
+ + + + + Download Count Total + + + + +

+

+ Featured Packages
+ + Static Badge + + + Static Badge + +

+

+ In other language (unofficial port - by the community)
+ + Static Badge + +

+ +> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`, +> I'm trying to resolve the issue by taking a new approach. +> All IANA character set names for which the Python core library provides codecs are supported. + +

+ >>>>> πŸ‘‰ Try Me Online Now, Then Adopt Me πŸ‘ˆ <<<<< +

+ +This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**. + +| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) | +|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:| +| `Fast` | ❌ | βœ… | βœ… | +| `Universal**` | ❌ | βœ… | ❌ | +| `Reliable` **without** distinguishable standards | ❌ | βœ… | βœ… | +| `Reliable` **with** distinguishable standards | βœ… | βœ… | βœ… | +| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ | +| `Native Python` | βœ… | βœ… | ❌ | +| `Detect spoken language` | ❌ | βœ… | N/A | +| `UnicodeDecodeError Safety` | ❌ | βœ… | ❌ | +| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB | +| `Supported Encoding` | 33 | πŸŽ‰ [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 | + +

+Reading Normalized TextCat Reading Text +

+ +*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*
+ +## ⚑ Performance + +This package offer better performance than its counterpart Chardet. Here are some numbers. + +| Package | Accuracy | Mean per file (ms) | File per sec (est) | +|-----------------------------------------------|:--------:|:------------------:|:------------------:| +| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec | +| charset-normalizer | **98 %** | **10 ms** | 100 file/sec | + +| Package | 99th percentile | 95th percentile | 50th percentile | +|-----------------------------------------------|:---------------:|:---------------:|:---------------:| +| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms | +| charset-normalizer | 100 ms | 50 ms | 5 ms | + +_updated as of december 2024 using CPython 3.12_ + +Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload. + +> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows. +> And yes, these results might change at any time. The dataset can be updated to include more files. +> The actual delays heavily depends on your CPU capabilities. The factors should remain the same. +> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability +> (e.g. Supported Encoding) Challenge-them if you want. + +## ✨ Installation + +Using pip: + +```sh +pip install charset-normalizer -U +``` + +## πŸš€ Basic Usage + +### CLI +This package comes with a CLI. + +``` +usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD] + file [file ...] + +The Real First Universal Charset Detector. Discover originating encoding used +on text file. Normalize text to unicode. + +positional arguments: + files File(s) to be analysed + +optional arguments: + -h, --help show this help message and exit + -v, --verbose Display complementary information about file if any. + Stdout will contain logs about the detection process. + -a, --with-alternative + Output complementary possibilities if any. Top-level + JSON WILL be a list. + -n, --normalize Permit to normalize input file. If not set, program + does not write anything. + -m, --minimal Only output the charset detected to STDOUT. Disabling + JSON output. + -r, --replace Replace file when trying to normalize it instead of + creating a new one. + -f, --force Replace file without asking if you are sure, use this + flag with caution. + -t THRESHOLD, --threshold THRESHOLD + Define a custom maximum amount of chaos allowed in + decoded content. 0. <= chaos <= 1. + --version Show version information and exit. +``` + +```bash +normalizer ./data/sample.1.fr.srt +``` + +or + +```bash +python -m charset_normalizer ./data/sample.1.fr.srt +``` + +πŸŽ‰ Since version 1.4.0 the CLI produce easily usable stdout result in JSON format. + +```json +{ + "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt", + "encoding": "cp1252", + "encoding_aliases": [ + "1252", + "windows_1252" + ], + "alternative_encodings": [ + "cp1254", + "cp1256", + "cp1258", + "iso8859_14", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_9", + "latin_1", + "mbcs" + ], + "language": "French", + "alphabets": [ + "Basic Latin", + "Latin-1 Supplement" + ], + "has_sig_or_bom": false, + "chaos": 0.149, + "coherence": 97.152, + "unicode_path": null, + "is_preferred": true +} +``` + +### Python +*Just print out normalized text* +```python +from charset_normalizer import from_path + +results = from_path('./my_subtitle.srt') + +print(str(results.best())) +``` + +*Upgrade your code without effort* +```python +from charset_normalizer import detect +``` + +The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible. + +See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/) + +## πŸ˜‡ Why + +When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a +reliable alternative using a completely different method. Also! I never back down on a good challenge! + +I **don't care** about the **originating charset** encoding, because **two different tables** can +produce **two identical rendered string.** +What I want is to get readable text, the best I can. + +In a way, **I'm brute forcing text decoding.** How cool is that ? 😎 + +Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode. + +## 🍰 How + + - Discard all charset encoding table that could not fit the binary content. + - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding. + - Extract matches with the lowest mess detected. + - Additionally, we measure coherence / probe for a language. + +**Wait a minute**, what is noise/mess and coherence according to **YOU ?** + +*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then +**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text). + I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to + improve or rewrite it. + +*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought +that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design. + +## ⚑ Known limitations + + - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters)) + - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content. + +## ⚠️ About Python EOLs + +**If you are running:** + +- Python >=2.7,<3.5: Unsupported +- Python 3.5: charset-normalizer < 2.1 +- Python 3.6: charset-normalizer < 3.1 +- Python 3.7: charset-normalizer < 4.0 + +Upgrade your Python interpreter as soon as possible. + +## πŸ‘€ Contributing + +Contributions, issues and feature requests are very much welcome.
+Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute. + +## πŸ“ License + +Copyright Β© [Ahmed TAHRI @Ousret](https://github.com/Ousret).
+This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed. + +Characters frequencies used in this project Β© 2012 [Denny VrandečiΔ‡](http://simia.net/letters/) + +## πŸ’Ό For Enterprise + +Professional support for charset-normalizer is available as part of the [Tidelift +Subscription][1]. Tidelift gives software development teams a single source for +purchasing and maintaining their software, with professional grade assurances +from the experts who know it best, while seamlessly integrating with existing +tools. + +[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme + +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297) + +# Changelog +All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13) + +### Changed +- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`. +- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2 + +### Removed +- `setuptools-scm` as a build dependency. + +### Misc +- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes. +- Additional pre-built wheels for riscv64, s390x, and armv7l architectures. +- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel. + +## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09) + +### Changed +- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583) +- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391) + +### Added +- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase. +- Support for Python 3.14 + +### Fixed +- sdist archive contained useless directories. +- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633) + +### Misc +- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes. + Each published wheel comes with its SBOM. We choose CycloneDX as the format. +- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel. + +## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02) + +### Fixed +- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591) +- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587) + +### Changed +- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8 + +## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24) + +### Changed +- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend. +- Enforce annotation delayed loading for a simpler and consistent types in the project. +- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8 + +### Added +- pre-commit configuration. +- noxfile. + +### Removed +- `build-requirements.txt` as per using `pyproject.toml` native build configuration. +- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile). +- `setup.cfg` in favor of `pyproject.toml` metadata configuration. +- Unused `utils.range_scan` function. + +### Fixed +- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572) +- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+ + +## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08) + +### Added +- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints. +- Support for Python 3.13 (#512) + +### Fixed +- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch. +- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537) +- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381) + +## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31) + +### Fixed +- Unintentional memory usage regression when using large payload that match several encoding (#376) +- Regression on some detection case showcased in the documentation (#371) + +### Added +- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife) + +## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22) + +### Changed +- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8 +- Improved the general detection reliability based on reports from the community + +## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30) + +### Added +- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer` +- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323) + +### Removed +- (internal) Redundant utils.is_ascii function and unused function is_private_use_only +- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant + +### Changed +- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection +- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8 + +### Fixed +- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350) + +## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07) + +### Changed +- Typehint for function `from_path` no longer enforce `PathLike` as its first argument +- Minor improvement over the global detection reliability + +### Added +- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries +- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True) +- Explicit support for Python 3.12 + +### Fixed +- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289) + +## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06) + +### Added +- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262) + +### Removed +- Support for Python 3.6 (PR #260) + +### Changed +- Optional speedup provided by mypy/c 1.0.1 + +## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18) + +### Fixed +- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233) + +### Changed +- Speedup provided by mypy/c 0.990 on Python >= 3.7 + +## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20) + +### Added +- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results +- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES +- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio +- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl) + +### Changed +- Build with static metadata using 'build' frontend +- Make the language detection stricter +- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1 + +### Fixed +- CLI with opt --normalize fail when using full path for files +- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it +- Sphinx warnings when generating the documentation + +### Removed +- Coherence detector no longer return 'Simple English' instead return 'English' +- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese' +- Breaking: Method `first()` and `best()` from CharsetMatch +- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII) +- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches +- Breaking: Top-level function `normalize` +- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch +- Support for the backport `unicodedata2` + +## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18) + +### Added +- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results +- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES +- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio + +### Changed +- Build with static metadata using 'build' frontend +- Make the language detection stricter + +### Fixed +- CLI with opt --normalize fail when using full path for files +- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it + +### Removed +- Coherence detector no longer return 'Simple English' instead return 'English' +- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese' + +## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21) + +### Added +- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl) + +### Removed +- Breaking: Method `first()` and `best()` from CharsetMatch +- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII) + +### Fixed +- Sphinx warnings when generating the documentation + +## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15) + +### Changed +- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1 + +### Removed +- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches +- Breaking: Top-level function `normalize` +- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch +- Support for the backport `unicodedata2` + +## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19) + +### Deprecated +- Function `normalize` scheduled for removal in 3.0 + +### Changed +- Removed useless call to decode in fn is_unprintable (#206) + +### Fixed +- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204) + +## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19) + +### Added +- Output the Unicode table version when running the CLI with `--version` (PR #194) + +### Changed +- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175) +- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183) + +### Fixed +- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175) +- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181) + +### Removed +- Support for Python 3.5 (PR #192) + +### Deprecated +- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194) + +## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12) + +### Fixed +- ASCII miss-detection on rare cases (PR #170) + +## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30) + +### Added +- Explicit support for Python 3.11 (PR #164) + +### Changed +- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165) + +## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04) + +### Fixed +- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154) + +### Changed +- Skipping the language-detection (CD) on ASCII (PR #155) + +## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03) + +### Changed +- Moderating the logging impact (since 2.0.8) for specific environments (PR #147) + +### Fixed +- Wrong logging level applied when setting kwarg `explain` to True (PR #146) + +## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24) +### Changed +- Improvement over Vietnamese detection (PR #126) +- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124) +- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122) +- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129) +- Code style as refactored by Sourcery-AI (PR #131) +- Minor adjustment on the MD around european words (PR #133) +- Remove and replace SRTs from assets / tests (PR #139) +- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135) +- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135) + +### Fixed +- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137) +- Avoid using too insignificant chunk (PR #137) + +### Added +- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135) +- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141) + +## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11) +### Added +- Add support for Kazakh (Cyrillic) language detection (PR #109) + +### Changed +- Further, improve inferring the language from a given single-byte code page (PR #112) +- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116) +- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113) +- Various detection improvement (MD+CD) (PR #117) + +### Removed +- Remove redundant logging entry about detected language(s) (PR #115) + +### Fixed +- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102) + +## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18) +### Fixed +- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100) +- Fix CLI crash when using --minimal output in certain cases (PR #103) + +### Changed +- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101) + +## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14) +### Changed +- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81) +- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82) +- The Unicode detection is slightly improved (PR #93) +- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91) + +### Removed +- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92) + +### Fixed +- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95) +- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96) +- The MANIFEST.in was not exhaustive (PR #78) + +## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30) +### Fixed +- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70) +- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68) +- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72) +- Submatch factoring could be wrong in rare edge cases (PR #72) +- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72) +- Fix line endings from CRLF to LF for certain project files (PR #67) + +### Changed +- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76) +- Allow fallback on specified encoding if any (PR #71) + +## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16) +### Changed +- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63) +- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64) + +## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15) +### Fixed +- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) + +### Changed +- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57) + +## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13) +### Fixed +- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55) +- Using explain=False permanently disable the verbose output in the current runtime (PR #47) +- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47) +- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52) + +### Changed +- Public function normalize default args values were not aligned with from_bytes (PR #53) + +### Added +- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47) + +## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02) +### Changed +- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet. +- Accent has been made on UTF-8 detection, should perform rather instantaneous. +- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible. +- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time) +- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+ +- utf_7 detection has been reinstated. + +### Removed +- This package no longer require anything when used with Python 3.5 (Dropped cached_property) +- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, VolapΓΌk, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian. +- The exception hook on UnicodeDecodeError has been removed. + +### Deprecated +- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0 + +### Fixed +- The CLI output used the relative path of the file(s). Should be absolute. + +## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28) +### Fixed +- Logger configuration/usage no longer conflict with others (PR #44) + +## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21) +### Removed +- Using standard logging instead of using the package loguru. +- Dropping nose test framework in favor of the maintained pytest. +- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text. +- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version. +- Stop support for UTF-7 that does not contain a SIG. +- Dropping PrettyTable, replaced with pure JSON output in CLI. + +### Fixed +- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process. +- Not searching properly for the BOM when trying utf32/16 parent codec. + +### Changed +- Improving the package final size by compressing frequencies.json. +- Huge improvement over the larges payload. + +### Added +- CLI now produces JSON consumable output. +- Return ASCII if given sequences fit. Given reasonable confidence. + +## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13) + +### Fixed +- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40) + +## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12) + +### Fixed +- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39) + +## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12) + +### Fixed +- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38) + +## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09) + +### Changed +- Amend the previous release to allow prettytable 2.0 (PR #35) + +## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08) + +### Fixed +- Fix error while using the package with a python pre-release interpreter (PR #33) + +### Changed +- Dependencies refactoring, constraints revised. + +### Added +- Add python 3.9 and 3.10 to the supported interpreters + +MIT License + +Copyright (c) 2025 TAHRI Ahmed R. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..327ab57314e828d0b29076094be2df691984a385 --- /dev/null +++ b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD @@ -0,0 +1,35 @@ +../../../bin/normalizer,sha256=0NCCWHGXwNJFGXe9vG0dHrG67nHnzOFp4ZWd0RQ0qoI,225 +charset_normalizer-3.4.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +charset_normalizer-3.4.4.dist-info/METADATA,sha256=jVuUFBti8dav19YLvWissTihVdF2ozUY4KKMw7jdkBQ,37303 +charset_normalizer-3.4.4.dist-info/RECORD,, +charset_normalizer-3.4.4.dist-info/WHEEL,sha256=2iHh9e2o6T3nHtu_NVT7Cs7pebIqF94rZK8zrQfgoJI,190 +charset_normalizer-3.4.4.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65 +charset_normalizer-3.4.4.dist-info/licenses/LICENSE,sha256=bQ1Bv-FwrGx9wkjJpj4lTQ-0WmDVCoJX0K-SxuJJuIc,1071 +charset_normalizer-3.4.4.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19 +charset_normalizer/__init__.py,sha256=OKRxRv2Zhnqk00tqkN0c1BtJjm165fWXLydE52IKuHc,1590 +charset_normalizer/__main__.py,sha256=yzYxMR-IhKRHYwcSlavEv8oGdwxsR89mr2X09qXGdps,109 +charset_normalizer/__pycache__/__init__.cpython-313.pyc,, +charset_normalizer/__pycache__/__main__.cpython-313.pyc,, +charset_normalizer/__pycache__/api.cpython-313.pyc,, +charset_normalizer/__pycache__/cd.cpython-313.pyc,, +charset_normalizer/__pycache__/constant.cpython-313.pyc,, +charset_normalizer/__pycache__/legacy.cpython-313.pyc,, +charset_normalizer/__pycache__/md.cpython-313.pyc,, +charset_normalizer/__pycache__/models.cpython-313.pyc,, +charset_normalizer/__pycache__/utils.cpython-313.pyc,, +charset_normalizer/__pycache__/version.cpython-313.pyc,, +charset_normalizer/api.py,sha256=V07i8aVeCD8T2fSia3C-fn0i9t8qQguEBhsqszg32Ns,22668 +charset_normalizer/cd.py,sha256=WKTo1HDb-H9HfCDc3Bfwq5jzS25Ziy9SE2a74SgTq88,12522 +charset_normalizer/cli/__init__.py,sha256=D8I86lFk2-py45JvqxniTirSj_sFyE6sjaY_0-G1shc,136 +charset_normalizer/cli/__main__.py,sha256=dMaXG6IJXRvqq8z2tig7Qb83-BpWTln55ooiku5_uvg,12646 +charset_normalizer/cli/__pycache__/__init__.cpython-313.pyc,, +charset_normalizer/cli/__pycache__/__main__.cpython-313.pyc,, +charset_normalizer/constant.py,sha256=7UVY4ldYhmQMHUdgQ_sgZmzcQ0xxYxpBunqSZ-XJZ8U,42713 +charset_normalizer/legacy.py,sha256=sYBzSpzsRrg_wF4LP536pG64BItw7Tqtc3SMQAHvFLM,2731 +charset_normalizer/md.cpython-313-x86_64-linux-gnu.so,sha256=sZ7umtJLjKfA83NFJ7npkiDyr06zDT8cWtl6uIx2MsM,15912 +charset_normalizer/md.py,sha256=-_oN3h3_X99nkFfqamD3yu45DC_wfk5odH0Tr_CQiXs,20145 +charset_normalizer/md__mypyc.cpython-313-x86_64-linux-gnu.so,sha256=i-yavqPJtZwjTKvP9hBLZ8CLZD88rVtguaSoLHso_Oc,291056 +charset_normalizer/models.py,sha256=lKXhOnIPtiakbK3i__J9wpOfzx3JDTKj7Dn3Rg0VaRI,12394 +charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +charset_normalizer/utils.py,sha256=sTejPgrdlNsKNucZfJCxJ95lMTLA0ShHLLE3n5wpT9Q,12170 +charset_normalizer/version.py,sha256=nKE4qBNk5WA4LIJ_yIH_aSDfvtsyizkWMg-PUG-UZVk,115 diff --git a/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..539ecdef0c43b82970407bf0d2e868801a0862df --- /dev/null +++ b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL @@ -0,0 +1,7 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: false +Tag: cp313-cp313-manylinux_2_17_x86_64 +Tag: cp313-cp313-manylinux2014_x86_64 +Tag: cp313-cp313-manylinux_2_28_x86_64 + diff --git a/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..65619e73ec06c20c2a70c9507b872ad624d1a85c --- /dev/null +++ b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +normalizer = charset_normalizer.cli:cli_detect diff --git a/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..66958f0a069d7aea7939bed40b9197608e93b243 --- /dev/null +++ b/venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt @@ -0,0 +1 @@ +charset_normalizer diff --git a/venv/lib/python3.13/site-packages/filelock/__init__.py b/venv/lib/python3.13/site-packages/filelock/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9d8c5b8ebe565a652b3671b3dfa066f7346af45 --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/__init__.py @@ -0,0 +1,70 @@ +""" +A platform independent file lock that supports the with-statement. + +.. autodata:: filelock.__version__ + :no-value: + +""" + +from __future__ import annotations + +import sys +import warnings +from typing import TYPE_CHECKING + +from ._api import AcquireReturnProxy, BaseFileLock +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock, has_fcntl +from ._windows import WindowsFileLock +from .asyncio import ( + AsyncAcquireReturnProxy, + AsyncSoftFileLock, + AsyncUnixFileLock, + AsyncWindowsFileLock, + BaseAsyncFileLock, +) +from .version import version + +#: version of the project as a string +__version__: str = version + + +if sys.platform == "win32": # pragma: win32 cover + _FileLock: type[BaseFileLock] = WindowsFileLock + _AsyncFileLock: type[BaseAsyncFileLock] = AsyncWindowsFileLock +else: # pragma: win32 no cover # noqa: PLR5501 + if has_fcntl: + _FileLock: type[BaseFileLock] = UnixFileLock + _AsyncFileLock: type[BaseAsyncFileLock] = AsyncUnixFileLock + else: + _FileLock = SoftFileLock + _AsyncFileLock = AsyncSoftFileLock + if warnings is not None: + warnings.warn("only soft file lock is available", stacklevel=2) + +if TYPE_CHECKING: + FileLock = SoftFileLock + AsyncFileLock = AsyncSoftFileLock +else: + #: Alias for the lock, which should be used for the current platform. + FileLock = _FileLock + AsyncFileLock = _AsyncFileLock + + +__all__ = [ + "AcquireReturnProxy", + "AsyncAcquireReturnProxy", + "AsyncFileLock", + "AsyncSoftFileLock", + "AsyncUnixFileLock", + "AsyncWindowsFileLock", + "BaseAsyncFileLock", + "BaseFileLock", + "FileLock", + "SoftFileLock", + "Timeout", + "UnixFileLock", + "WindowsFileLock", + "__version__", +] diff --git a/venv/lib/python3.13/site-packages/filelock/_api.py b/venv/lib/python3.13/site-packages/filelock/_api.py new file mode 100644 index 0000000000000000000000000000000000000000..8fde69a0fef7badcc123d17735cd784a99baed52 --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/_api.py @@ -0,0 +1,403 @@ +from __future__ import annotations + +import contextlib +import inspect +import logging +import os +import time +import warnings +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass +from threading import local +from typing import TYPE_CHECKING, Any, cast +from weakref import WeakValueDictionary + +from ._error import Timeout + +if TYPE_CHECKING: + import sys + from types import TracebackType + + if sys.version_info >= (3, 11): # pragma: no cover (py311+) + from typing import Self + else: # pragma: no cover ( None: + self.lock = lock + + def __enter__(self) -> BaseFileLock: + return self.lock + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + self.lock.release() + + +@dataclass +class FileLockContext: + """A dataclass which holds the context for a ``BaseFileLock`` object.""" + + # The context is held in a separate class to allow optional use of thread local storage via the + # ThreadLocalFileContext class. + + #: The path to the lock file. + lock_file: str + + #: The default timeout value. + timeout: float + + #: The mode for the lock files + mode: int + + #: Whether the lock should be blocking or not + blocking: bool + + #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held + lock_file_fd: int | None = None + + #: The lock counter is used for implementing the nested locking mechanism. + lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0 + + +class ThreadLocalFileContext(FileLockContext, local): + """A thread local version of the ``FileLockContext`` class.""" + + +class FileLockMeta(ABCMeta): + def __call__( # noqa: PLR0913 + cls, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = True, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + **kwargs: Any, # capture remaining kwargs for subclasses # noqa: ANN401 + ) -> BaseFileLock: + if is_singleton: + instance = cls._instances.get(str(lock_file)) # type: ignore[attr-defined] + if instance: + params_to_check = { + "thread_local": (thread_local, instance.is_thread_local()), + "timeout": (timeout, instance.timeout), + "mode": (mode, instance.mode), + "blocking": (blocking, instance.blocking), + } + + non_matching_params = { + name: (passed_param, set_param) + for name, (passed_param, set_param) in params_to_check.items() + if passed_param != set_param + } + if not non_matching_params: + return cast("BaseFileLock", instance) + + # parameters do not match; raise error + msg = "Singleton lock instances cannot be initialized with differing arguments" + msg += "\nNon-matching arguments: " + for param_name, (passed_param, set_param) in non_matching_params.items(): + msg += f"\n\t{param_name} (existing lock has {set_param} but {passed_param} was passed)" + raise ValueError(msg) + + # Workaround to make `__init__`'s params optional in subclasses + # E.g. virtualenv changes the signature of the `__init__` method in the `BaseFileLock` class descendant + # (https://github.com/tox-dev/filelock/pull/340) + + all_params = { + "timeout": timeout, + "mode": mode, + "thread_local": thread_local, + "blocking": blocking, + "is_singleton": is_singleton, + **kwargs, + } + + present_params = inspect.signature(cls.__init__).parameters # type: ignore[misc] + init_params = {key: value for key, value in all_params.items() if key in present_params} + + instance = super().__call__(lock_file, **init_params) + + if is_singleton: + cls._instances[str(lock_file)] = instance # type: ignore[attr-defined] + + return cast("BaseFileLock", instance) + + +class BaseFileLock(contextlib.ContextDecorator, metaclass=FileLockMeta): + """Abstract base class for a file lock object.""" + + _instances: WeakValueDictionary[str, BaseFileLock] + + def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None: + """Setup unique state for lock subclasses.""" + super().__init_subclass__(**kwargs) + cls._instances = WeakValueDictionary() + + def __init__( # noqa: PLR0913 + self, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = True, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + ) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \ + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \ + to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock. + :param mode: file permissions for the lockfile + :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \ + ``False`` then the lock will be reentrant across threads. + :param blocking: whether the lock should be blocking or not + :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \ + per lock file. This is useful if you want to use the lock object for reentrant locking without needing \ + to pass the same object around. + + """ + self._is_thread_local = thread_local + self._is_singleton = is_singleton + + # Create the context. Note that external code should not work with the context directly and should instead use + # properties of this class. + kwargs: dict[str, Any] = { + "lock_file": os.fspath(lock_file), + "timeout": timeout, + "mode": mode, + "blocking": blocking, + } + self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs) + + def is_thread_local(self) -> bool: + """:return: a flag indicating if this lock is thread local or not""" + return self._is_thread_local + + @property + def is_singleton(self) -> bool: + """:return: a flag indicating if this lock is singleton or not""" + return self._is_singleton + + @property + def lock_file(self) -> str: + """:return: path to the lock file""" + return self._context.lock_file + + @property + def timeout(self) -> float: + """ + :return: the default timeout value, in seconds + + .. versionadded:: 2.0.0 + """ + return self._context.timeout + + @timeout.setter + def timeout(self, value: float | str) -> None: + """ + Change the default timeout value. + + :param value: the new value, in seconds + + """ + self._context.timeout = float(value) + + @property + def blocking(self) -> bool: + """:return: whether the locking is blocking or not""" + return self._context.blocking + + @blocking.setter + def blocking(self, value: bool) -> None: + """ + Change the default blocking value. + + :param value: the new value as bool + + """ + self._context.blocking = value + + @property + def mode(self) -> int: + """:return: the file permissions for the lockfile""" + return self._context.mode + + @abstractmethod + def _acquire(self) -> None: + """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file.""" + raise NotImplementedError + + @abstractmethod + def _release(self) -> None: + """Releases the lock and sets self._context.lock_file_fd to None.""" + raise NotImplementedError + + @property + def is_locked(self) -> bool: + """ + + :return: A boolean indicating if the lock file is holding the lock currently. + + .. versionchanged:: 2.0.0 + + This was previously a method and is now a property. + """ + return self._context.lock_file_fd is not None + + @property + def lock_counter(self) -> int: + """:return: The number of times this lock has been acquired (but not yet released).""" + return self._context.lock_counter + + def acquire( + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + poll_intervall: float | None = None, + blocking: bool | None = None, + ) -> AcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and + if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + .. versionchanged:: 2.0.0 + + This method returns now a *proxy* object instead of *self*, + so that it can be used in a with statement without side effects. + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self._context.timeout + + if blocking is None: + blocking = self._context.blocking + + if poll_intervall is not None: + msg = "use poll_interval instead of poll_intervall" + warnings.warn(msg, DeprecationWarning, stacklevel=2) + poll_interval = poll_intervall + + # Increment the number right at the beginning. We can still undo it, if something fails. + self._context.lock_counter += 1 + + lock_id = id(self) + lock_filename = self.lock_file + start_time = time.perf_counter() + try: + while True: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + self._acquire() + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + if blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + if 0 <= timeout < time.perf_counter() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + time.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + self._context.lock_counter = max(0, self._context.lock_counter - 1) + raise + return AcquireReturnProxy(lock=self) + + def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002 + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. + Also note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + + """ + if self.is_locked: + self._context.lock_counter -= 1 + + if self._context.lock_counter == 0 or force: + lock_id, lock_filename = id(self), self.lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + self._release() + self._context.lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + def __enter__(self) -> Self: + """ + Acquire the lock. + + :return: the lock object + + """ + self.acquire() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + + """ + self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + self.release(force=True) + + +__all__ = [ + "AcquireReturnProxy", + "BaseFileLock", +] diff --git a/venv/lib/python3.13/site-packages/filelock/_error.py b/venv/lib/python3.13/site-packages/filelock/_error.py new file mode 100644 index 0000000000000000000000000000000000000000..f7ff08c0f508ad7077eb6ed1990898840c952b3a --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/_error.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Any + + +class Timeout(TimeoutError): # noqa: N818 + """Raised when the lock could not be acquired in *timeout* seconds.""" + + def __init__(self, lock_file: str) -> None: + super().__init__() + self._lock_file = lock_file + + def __reduce__(self) -> str | tuple[Any, ...]: + return self.__class__, (self._lock_file,) # Properly pickle the exception + + def __str__(self) -> str: + return f"The file lock '{self._lock_file}' could not be acquired." + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.lock_file!r})" + + @property + def lock_file(self) -> str: + """:return: The path of the file lock.""" + return self._lock_file + + +__all__ = [ + "Timeout", +] diff --git a/venv/lib/python3.13/site-packages/filelock/_soft.py b/venv/lib/python3.13/site-packages/filelock/_soft.py new file mode 100644 index 0000000000000000000000000000000000000000..28c67f74cc82b8f55e47afd6a71972cc1fb95eb6 --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/_soft.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES, EEXIST +from pathlib import Path + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + + +class SoftFileLock(BaseFileLock): + """Simply watches the existence of the lock file.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + # first check for exists and read-only mode as the open will mask this case as EEXIST + flags = ( + os.O_WRONLY # open for writing only + | os.O_CREAT + | os.O_EXCL # together with above raise EEXIST if the file specified by filename exists + | os.O_TRUNC # truncate the file to zero byte + ) + try: + file_handler = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: # re-raise unless expected exception + if not ( + exception.errno == EEXIST # lock already exist + or (exception.errno == EACCES and sys.platform == "win32") # has no access to this lock + ): # pragma: win32 no cover + raise + else: + self._context.lock_file_fd = file_handler + + def _release(self) -> None: + assert self._context.lock_file_fd is not None # noqa: S101 + os.close(self._context.lock_file_fd) # the lock file is definitely not None + self._context.lock_file_fd = None + with suppress(OSError): # the file is already deleted and that's what we want + Path(self.lock_file).unlink() + + +__all__ = [ + "SoftFileLock", +] diff --git a/venv/lib/python3.13/site-packages/filelock/_unix.py b/venv/lib/python3.13/site-packages/filelock/_unix.py new file mode 100644 index 0000000000000000000000000000000000000000..b2fd0f33d25d2bdf4a2a883380154771b4a25f9b --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/_unix.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import ENOSYS +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists + +#: a flag to indicate if the fcntl API is available +has_fcntl = False +if sys.platform == "win32": # pragma: win32 cover + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + +else: # pragma: win32 no cover + try: + import fcntl + + _ = (fcntl.flock, fcntl.LOCK_EX, fcntl.LOCK_NB, fcntl.LOCK_UN) + except (ImportError, AttributeError): + pass + else: + has_fcntl = True + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + ensure_directory_exists(self.lock_file) + open_flags = os.O_RDWR | os.O_TRUNC + if not Path(self.lock_file).exists(): + open_flags |= os.O_CREAT + fd = os.open(self.lock_file, open_flags, self._context.mode) + with suppress(PermissionError): # This locked is not owned by this UID + os.fchmod(fd, self._context.mode) + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError as exception: + os.close(fd) + if exception.errno == ENOSYS: # NotImplemented error + msg = "FileSystem does not appear to support flock; use SoftFileLock instead" + raise NotImplementedError(msg) from exception + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + # Do not remove the lockfile: + # https://github.com/tox-dev/py-filelock/issues/31 + # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition + fd = cast("int", self._context.lock_file_fd) + self._context.lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + +__all__ = [ + "UnixFileLock", + "has_fcntl", +] diff --git a/venv/lib/python3.13/site-packages/filelock/_util.py b/venv/lib/python3.13/site-packages/filelock/_util.py new file mode 100644 index 0000000000000000000000000000000000000000..c671e8533873948f0e1b5575ff952c722019f067 --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/_util.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import os +import stat +import sys +from errno import EACCES, EISDIR +from pathlib import Path + + +def raise_on_not_writable_file(filename: str) -> None: + """ + Raise an exception if attempting to open the file for writing would fail. + + This is done so files that will never be writable can be separated from files that are writable but currently + locked. + + :param filename: file to check + :raises OSError: as if the file was opened for writing. + + """ + try: # use stat to do exists + can write to check without race condition + file_stat = os.stat(filename) # noqa: PTH116 + except OSError: + return # swallow does not exist or other errors + + if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it + if not (file_stat.st_mode & stat.S_IWUSR): + raise PermissionError(EACCES, "Permission denied", filename) + + if stat.S_ISDIR(file_stat.st_mode): + if sys.platform == "win32": # pragma: win32 cover + # On Windows, this is PermissionError + raise PermissionError(EACCES, "Permission denied", filename) + else: # pragma: win32 no cover # noqa: RET506 + # On linux / macOS, this is IsADirectoryError + raise IsADirectoryError(EISDIR, "Is a directory", filename) + + +def ensure_directory_exists(filename: Path | str) -> None: + """ + Ensure the directory containing the file exists (create it if necessary). + + :param filename: file. + + """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) + + +__all__ = [ + "ensure_directory_exists", + "raise_on_not_writable_file", +] diff --git a/venv/lib/python3.13/site-packages/filelock/_windows.py b/venv/lib/python3.13/site-packages/filelock/_windows.py new file mode 100644 index 0000000000000000000000000000000000000000..348251d1067c28c55a6a267f8d11337abfae837f --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/_windows.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + +if sys.platform == "win32": # pragma: win32 cover + import msvcrt + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + flags = ( + os.O_RDWR # open for read and write + | os.O_CREAT # create file if not exists + | os.O_TRUNC # truncate file if not empty + ) + try: + fd = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: + if exception.errno != EACCES: # has no access to this lock + raise + else: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + except OSError as exception: + os.close(fd) # close file first + if exception.errno != EACCES: # file is already locked + raise + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + fd = cast("int", self._context.lock_file_fd) + self._context.lock_file_fd = None + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + os.close(fd) + + with suppress(OSError): # Probably another instance of the application hat acquired the file lock. + Path(self.lock_file).unlink() + +else: # pragma: win32 no cover + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + + +__all__ = [ + "WindowsFileLock", +] diff --git a/venv/lib/python3.13/site-packages/filelock/asyncio.py b/venv/lib/python3.13/site-packages/filelock/asyncio.py new file mode 100644 index 0000000000000000000000000000000000000000..022d0ef6995dbcf34764366313f4f3f1e9ce8ed6 --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/asyncio.py @@ -0,0 +1,344 @@ +"""An asyncio-based implementation of the file lock.""" + +from __future__ import annotations + +import asyncio +import contextlib +import logging +import os +import time +from dataclasses import dataclass +from inspect import iscoroutinefunction +from threading import local +from typing import TYPE_CHECKING, Any, NoReturn, cast + +from ._api import BaseFileLock, FileLockContext, FileLockMeta +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock +from ._windows import WindowsFileLock + +if TYPE_CHECKING: + import sys + from collections.abc import Callable + from concurrent import futures + from types import TracebackType + + if sys.version_info >= (3, 11): # pragma: no cover (py311+) + from typing import Self + else: # pragma: no cover ( None: # noqa: D107 + self.lock = lock + + async def __aenter__(self) -> BaseAsyncFileLock: # noqa: D105 + return self.lock + + async def __aexit__( # noqa: D105 + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + await self.lock.release() + + +class AsyncFileLockMeta(FileLockMeta): + def __call__( # type: ignore[override] # noqa: PLR0913 + cls, # noqa: N805 + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = False, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + loop: asyncio.AbstractEventLoop | None = None, + run_in_executor: bool = True, + executor: futures.Executor | None = None, + ) -> BaseAsyncFileLock: + if thread_local and run_in_executor: + msg = "run_in_executor is not supported when thread_local is True" + raise ValueError(msg) + instance = super().__call__( + lock_file=lock_file, + timeout=timeout, + mode=mode, + thread_local=thread_local, + blocking=blocking, + is_singleton=is_singleton, + loop=loop, + run_in_executor=run_in_executor, + executor=executor, + ) + return cast("BaseAsyncFileLock", instance) + + +class BaseAsyncFileLock(BaseFileLock, metaclass=AsyncFileLockMeta): + """Base class for asynchronous file locks.""" + + def __init__( # noqa: PLR0913 + self, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = False, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + loop: asyncio.AbstractEventLoop | None = None, + run_in_executor: bool = True, + executor: futures.Executor | None = None, + ) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \ + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \ + to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock. + :param mode: file permissions for the lockfile + :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \ + ``False`` then the lock will be reentrant across threads. + :param blocking: whether the lock should be blocking or not + :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \ + per lock file. This is useful if you want to use the lock object for reentrant locking without needing \ + to pass the same object around. + :param loop: The event loop to use. If not specified, the running event loop will be used. + :param run_in_executor: If this is set to ``True`` then the lock will be acquired in an executor. + :param executor: The executor to use. If not specified, the default executor will be used. + + """ + self._is_thread_local = thread_local + self._is_singleton = is_singleton + + # Create the context. Note that external code should not work with the context directly and should instead use + # properties of this class. + kwargs: dict[str, Any] = { + "lock_file": os.fspath(lock_file), + "timeout": timeout, + "mode": mode, + "blocking": blocking, + "loop": loop, + "run_in_executor": run_in_executor, + "executor": executor, + } + self._context: AsyncFileLockContext = (AsyncThreadLocalFileContext if thread_local else AsyncFileLockContext)( + **kwargs + ) + + @property + def run_in_executor(self) -> bool: + """::return: whether run in executor.""" + return self._context.run_in_executor + + @property + def executor(self) -> futures.Executor | None: + """::return: the executor.""" + return self._context.executor + + @executor.setter + def executor(self, value: futures.Executor | None) -> None: # pragma: no cover + """ + Change the executor. + + :param value: the new executor or ``None`` + :type value: futures.Executor | None + + """ + self._context.executor = value + + @property + def loop(self) -> asyncio.AbstractEventLoop | None: + """::return: the event loop.""" + return self._context.loop + + async def acquire( # type: ignore[override] + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + blocking: bool | None = None, + ) -> AsyncAcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default + :attr:`~BaseFileLock.timeout` is and if ``timeout < 0``, there is no timeout and + this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self._context.timeout + + if blocking is None: + blocking = self._context.blocking + + # Increment the number right at the beginning. We can still undo it, if something fails. + self._context.lock_counter += 1 + + lock_id = id(self) + lock_filename = self.lock_file + start_time = time.perf_counter() + try: + while True: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + await self._run_internal_method(self._acquire) + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + if blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + if 0 <= timeout < time.perf_counter() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + await asyncio.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + self._context.lock_counter = max(0, self._context.lock_counter - 1) + raise + return AsyncAcquireReturnProxy(lock=self) + + async def release(self, force: bool = False) -> None: # type: ignore[override] # noqa: FBT001, FBT002 + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. + Also note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + + """ + if self.is_locked: + self._context.lock_counter -= 1 + + if self._context.lock_counter == 0 or force: + lock_id, lock_filename = id(self), self.lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + await self._run_internal_method(self._release) + self._context.lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + async def _run_internal_method(self, method: Callable[[], Any]) -> None: + if iscoroutinefunction(method): + await method() + elif self.run_in_executor: + loop = self.loop or asyncio.get_running_loop() + await loop.run_in_executor(self.executor, method) + else: + method() + + def __enter__(self) -> NoReturn: + """ + Replace old __enter__ method to avoid using it. + + NOTE: DO NOT USE `with` FOR ASYNCIO LOCKS, USE `async with` INSTEAD. + + :return: none + :rtype: NoReturn + """ + msg = "Do not use `with` for asyncio locks, use `async with` instead." + raise NotImplementedError(msg) + + async def __aenter__(self) -> Self: + """ + Acquire the lock. + + :return: the lock object + + """ + await self.acquire() + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + + """ + await self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + with contextlib.suppress(RuntimeError): + loop = self.loop or asyncio.get_running_loop() + if not loop.is_running(): # pragma: no cover + loop.run_until_complete(self.release(force=True)) + else: + loop.create_task(self.release(force=True)) + + +class AsyncSoftFileLock(SoftFileLock, BaseAsyncFileLock): + """Simply watches the existence of the lock file.""" + + +class AsyncUnixFileLock(UnixFileLock, BaseAsyncFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + +class AsyncWindowsFileLock(WindowsFileLock, BaseAsyncFileLock): + """Uses the :func:`msvcrt.locking` to hard lock the lock file on windows systems.""" + + +__all__ = [ + "AsyncAcquireReturnProxy", + "AsyncSoftFileLock", + "AsyncUnixFileLock", + "AsyncWindowsFileLock", + "BaseAsyncFileLock", +] diff --git a/venv/lib/python3.13/site-packages/filelock/py.typed b/venv/lib/python3.13/site-packages/filelock/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/venv/lib/python3.13/site-packages/filelock/version.py b/venv/lib/python3.13/site-packages/filelock/version.py new file mode 100644 index 0000000000000000000000000000000000000000..093125cd6ae5a94fa087734b389ccbb3082495a1 --- /dev/null +++ b/venv/lib/python3.13/site-packages/filelock/version.py @@ -0,0 +1,34 @@ +# file generated by setuptools-scm +# don't change, don't track in version control + +__all__ = [ + "__version__", + "__version_tuple__", + "version", + "version_tuple", + "__commit_id__", + "commit_id", +] + +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple + from typing import Union + + VERSION_TUPLE = Tuple[Union[int, str], ...] + COMMIT_ID = Union[str, None] +else: + VERSION_TUPLE = object + COMMIT_ID = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE +commit_id: COMMIT_ID +__commit_id__: COMMIT_ID + +__version__ = version = '3.20.0' +__version_tuple__ = version_tuple = (3, 20, 0) + +__commit_id__ = commit_id = None diff --git a/venv/lib/python3.13/site-packages/fsspec/__init__.py b/venv/lib/python3.13/site-packages/fsspec/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..452c78a055e72a6d04f1013d1a98fda33fdc449e --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/__init__.py @@ -0,0 +1,71 @@ +from . import caching +from ._version import __version__ # noqa: F401 +from .callbacks import Callback +from .compression import available_compressions +from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs +from .exceptions import FSTimeoutError +from .mapping import FSMap, get_mapper +from .registry import ( + available_protocols, + filesystem, + get_filesystem_class, + register_implementation, + registry, +) +from .spec import AbstractFileSystem + +__all__ = [ + "AbstractFileSystem", + "FSTimeoutError", + "FSMap", + "filesystem", + "register_implementation", + "get_filesystem_class", + "get_fs_token_paths", + "get_mapper", + "open", + "open_files", + "open_local", + "registry", + "caching", + "Callback", + "available_protocols", + "available_compressions", + "url_to_fs", +] + + +def process_entries(): + try: + from importlib.metadata import entry_points + except ImportError: + return + if entry_points is not None: + try: + eps = entry_points() + except TypeError: + pass # importlib-metadata < 0.8 + else: + if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0 + specs = eps.select(group="fsspec.specs") + else: + specs = eps.get("fsspec.specs", []) + registered_names = {} + for spec in specs: + err_msg = f"Unable to load filesystem from {spec}" + name = spec.name + if name in registered_names: + continue + registered_names[name] = True + register_implementation( + name, + spec.value.replace(":", "."), + errtxt=err_msg, + # We take our implementations as the ones to overload with if + # for some reason we encounter some, may be the same, already + # registered + clobber=True, + ) + + +process_entries() diff --git a/venv/lib/python3.13/site-packages/fsspec/_version.py b/venv/lib/python3.13/site-packages/fsspec/_version.py new file mode 100644 index 0000000000000000000000000000000000000000..40e2ee18b932293874287e812aae1de08e3988c6 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/_version.py @@ -0,0 +1,34 @@ +# file generated by setuptools-scm +# don't change, don't track in version control + +__all__ = [ + "__version__", + "__version_tuple__", + "version", + "version_tuple", + "__commit_id__", + "commit_id", +] + +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple + from typing import Union + + VERSION_TUPLE = Tuple[Union[int, str], ...] + COMMIT_ID = Union[str, None] +else: + VERSION_TUPLE = object + COMMIT_ID = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE +commit_id: COMMIT_ID +__commit_id__: COMMIT_ID + +__version__ = version = '2025.10.0' +__version_tuple__ = version_tuple = (2025, 10, 0) + +__commit_id__ = commit_id = None diff --git a/venv/lib/python3.13/site-packages/fsspec/caching.py b/venv/lib/python3.13/site-packages/fsspec/caching.py new file mode 100644 index 0000000000000000000000000000000000000000..de6a4e3407a62a1c2123bf2b4a81afb96c54150a --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/caching.py @@ -0,0 +1,1004 @@ +from __future__ import annotations + +import collections +import functools +import logging +import math +import os +import threading +import warnings +from collections import OrderedDict +from concurrent.futures import Future, ThreadPoolExecutor +from itertools import groupby +from operator import itemgetter +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Generic, + NamedTuple, + TypeVar, +) + +if TYPE_CHECKING: + import mmap + + from typing_extensions import ParamSpec + + P = ParamSpec("P") +else: + P = TypeVar("P") + +T = TypeVar("T") + + +logger = logging.getLogger("fsspec") + +Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes +MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes + + +class BaseCache: + """Pass-though cache: doesn't keep anything, calls every time + + Acts as base class for other cachers + + Parameters + ---------- + blocksize: int + How far to read ahead in numbers of bytes + fetcher: func + Function of the form f(start, end) which gets bytes from remote as + specified + size: int + How big this file is + """ + + name: ClassVar[str] = "none" + + def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None: + self.blocksize = blocksize + self.nblocks = 0 + self.fetcher = fetcher + self.size = size + self.hit_count = 0 + self.miss_count = 0 + # the bytes that we actually requested + self.total_requested_bytes = 0 + + def _fetch(self, start: int | None, stop: int | None) -> bytes: + if start is None: + start = 0 + if stop is None: + stop = self.size + if start >= self.size or start >= stop: + return b"" + return self.fetcher(start, stop) + + def _reset_stats(self) -> None: + """Reset hit and miss counts for a more ganular report e.g. by file.""" + self.hit_count = 0 + self.miss_count = 0 + self.total_requested_bytes = 0 + + def _log_stats(self) -> str: + """Return a formatted string of the cache statistics.""" + if self.hit_count == 0 and self.miss_count == 0: + # a cache that does nothing, this is for logs only + return "" + return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes" + + def __repr__(self) -> str: + # TODO: use rich for better formatting + return f""" + <{self.__class__.__name__}: + block size : {self.blocksize} + block count : {self.nblocks} + file size : {self.size} + cache hits : {self.hit_count} + cache misses: {self.miss_count} + total requested bytes: {self.total_requested_bytes}> + """ + + +class MMapCache(BaseCache): + """memory-mapped sparse file cache + + Opens temporary file, which is filled blocks-wise when data is requested. + Ensure there is enough disc space in the temporary location. + + This cache method might only work on posix + + Parameters + ---------- + blocksize: int + How far to read ahead in numbers of bytes + fetcher: Fetcher + Function of the form f(start, end) which gets bytes from remote as + specified + size: int + How big this file is + location: str + Where to create the temporary file. If None, a temporary file is + created using tempfile.TemporaryFile(). + blocks: set[int] + Set of block numbers that have already been fetched. If None, an empty + set is created. + multi_fetcher: MultiFetcher + Function of the form f([(start, end)]) which gets bytes from remote + as specified. This function is used to fetch multiple blocks at once. + If not specified, the fetcher function is used instead. + """ + + name = "mmap" + + def __init__( + self, + blocksize: int, + fetcher: Fetcher, + size: int, + location: str | None = None, + blocks: set[int] | None = None, + multi_fetcher: MultiFetcher | None = None, + ) -> None: + super().__init__(blocksize, fetcher, size) + self.blocks = set() if blocks is None else blocks + self.location = location + self.multi_fetcher = multi_fetcher + self.cache = self._makefile() + + def _makefile(self) -> mmap.mmap | bytearray: + import mmap + import tempfile + + if self.size == 0: + return bytearray() + + # posix version + if self.location is None or not os.path.exists(self.location): + if self.location is None: + fd = tempfile.TemporaryFile() + self.blocks = set() + else: + fd = open(self.location, "wb+") + fd.seek(self.size - 1) + fd.write(b"1") + fd.flush() + else: + fd = open(self.location, "r+b") + + return mmap.mmap(fd.fileno(), self.size) + + def _fetch(self, start: int | None, end: int | None) -> bytes: + logger.debug(f"MMap cache fetching {start}-{end}") + if start is None: + start = 0 + if end is None: + end = self.size + if start >= self.size or start >= end: + return b"" + start_block = start // self.blocksize + end_block = end // self.blocksize + block_range = range(start_block, end_block + 1) + # Determine which blocks need to be fetched. This sequence is sorted by construction. + need = (i for i in block_range if i not in self.blocks) + # Count the number of blocks already cached + self.hit_count += sum(1 for i in block_range if i in self.blocks) + + ranges = [] + + # Consolidate needed blocks. + # Algorithm adapted from Python 2.x itertools documentation. + # We are grouping an enumerated sequence of blocks. By comparing when the difference + # between an ascending range (provided by enumerate) and the needed block numbers + # we can detect when the block number skips values. The key computes this difference. + # Whenever the difference changes, we know that we have previously cached block(s), + # and a new group is started. In other words, this algorithm neatly groups + # runs of consecutive block numbers so they can be fetched together. + for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]): + # Extract the blocks from the enumerated sequence + _blocks = tuple(map(itemgetter(1), _blocks)) + # Compute start of first block + sstart = _blocks[0] * self.blocksize + # Compute the end of the last block. Last block may not be full size. + send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size) + + # Fetch bytes (could be multiple consecutive blocks) + self.total_requested_bytes += send - sstart + logger.debug( + f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})" + ) + ranges.append((sstart, send)) + + # Update set of cached blocks + self.blocks.update(_blocks) + # Update cache statistics with number of blocks we had to cache + self.miss_count += len(_blocks) + + if not ranges: + return self.cache[start:end] + + if self.multi_fetcher: + logger.debug(f"MMap get blocks {ranges}") + for idx, r in enumerate(self.multi_fetcher(ranges)): + (sstart, send) = ranges[idx] + logger.debug(f"MMap copy block ({sstart}-{send}") + self.cache[sstart:send] = r + else: + for sstart, send in ranges: + logger.debug(f"MMap get block ({sstart}-{send}") + self.cache[sstart:send] = self.fetcher(sstart, send) + + return self.cache[start:end] + + def __getstate__(self) -> dict[str, Any]: + state = self.__dict__.copy() + # Remove the unpicklable entries. + del state["cache"] + return state + + def __setstate__(self, state: dict[str, Any]) -> None: + # Restore instance attributes + self.__dict__.update(state) + self.cache = self._makefile() + + +class ReadAheadCache(BaseCache): + """Cache which reads only when we get beyond a block of data + + This is a much simpler version of BytesCache, and does not attempt to + fill holes in the cache or keep fragments alive. It is best suited to + many small reads in a sequential order (e.g., reading lines from a file). + """ + + name = "readahead" + + def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None: + super().__init__(blocksize, fetcher, size) + self.cache = b"" + self.start = 0 + self.end = 0 + + def _fetch(self, start: int | None, end: int | None) -> bytes: + if start is None: + start = 0 + if end is None or end > self.size: + end = self.size + if start >= self.size or start >= end: + return b"" + l = end - start + if start >= self.start and end <= self.end: + # cache hit + self.hit_count += 1 + return self.cache[start - self.start : end - self.start] + elif self.start <= start < self.end: + # partial hit + self.miss_count += 1 + part = self.cache[start - self.start :] + l -= len(part) + start = self.end + else: + # miss + self.miss_count += 1 + part = b"" + end = min(self.size, end + self.blocksize) + self.total_requested_bytes += end - start + self.cache = self.fetcher(start, end) # new block replaces old + self.start = start + self.end = self.start + len(self.cache) + return part + self.cache[:l] + + +class FirstChunkCache(BaseCache): + """Caches the first block of a file only + + This may be useful for file types where the metadata is stored in the header, + but is randomly accessed. + """ + + name = "first" + + def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None: + if blocksize > size: + # this will buffer the whole thing + blocksize = size + super().__init__(blocksize, fetcher, size) + self.cache: bytes | None = None + + def _fetch(self, start: int | None, end: int | None) -> bytes: + start = start or 0 + if start > self.size: + logger.debug("FirstChunkCache: requested start > file size") + return b"" + + end = min(end, self.size) + + if start < self.blocksize: + if self.cache is None: + self.miss_count += 1 + if end > self.blocksize: + self.total_requested_bytes += end + data = self.fetcher(0, end) + self.cache = data[: self.blocksize] + return data[start:] + self.cache = self.fetcher(0, self.blocksize) + self.total_requested_bytes += self.blocksize + part = self.cache[start:end] + if end > self.blocksize: + self.total_requested_bytes += end - self.blocksize + part += self.fetcher(self.blocksize, end) + self.hit_count += 1 + return part + else: + self.miss_count += 1 + self.total_requested_bytes += end - start + return self.fetcher(start, end) + + +class BlockCache(BaseCache): + """ + Cache holding memory as a set of blocks. + + Requests are only ever made ``blocksize`` at a time, and are + stored in an LRU cache. The least recently accessed block is + discarded when more than ``maxblocks`` are stored. + + Parameters + ---------- + blocksize : int + The number of bytes to store in each block. + Requests are only ever made for ``blocksize``, so this + should balance the overhead of making a request against + the granularity of the blocks. + fetcher : Callable + size : int + The total size of the file being cached. + maxblocks : int + The maximum number of blocks to cache for. The maximum memory + use for this cache is then ``blocksize * maxblocks``. + """ + + name = "blockcache" + + def __init__( + self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32 + ) -> None: + super().__init__(blocksize, fetcher, size) + self.nblocks = math.ceil(size / blocksize) + self.maxblocks = maxblocks + self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block) + + def cache_info(self): + """ + The statistics on the block cache. + + Returns + ------- + NamedTuple + Returned directly from the LRU Cache used internally. + """ + return self._fetch_block_cached.cache_info() + + def __getstate__(self) -> dict[str, Any]: + state = self.__dict__ + del state["_fetch_block_cached"] + return state + + def __setstate__(self, state: dict[str, Any]) -> None: + self.__dict__.update(state) + self._fetch_block_cached = functools.lru_cache(state["maxblocks"])( + self._fetch_block + ) + + def _fetch(self, start: int | None, end: int | None) -> bytes: + if start is None: + start = 0 + if end is None: + end = self.size + if start >= self.size or start >= end: + return b"" + + # byte position -> block numbers + start_block_number = start // self.blocksize + end_block_number = end // self.blocksize + + # these are cached, so safe to do multiple calls for the same start and end. + for block_number in range(start_block_number, end_block_number + 1): + self._fetch_block_cached(block_number) + + return self._read_cache( + start, + end, + start_block_number=start_block_number, + end_block_number=end_block_number, + ) + + def _fetch_block(self, block_number: int) -> bytes: + """ + Fetch the block of data for `block_number`. + """ + if block_number > self.nblocks: + raise ValueError( + f"'block_number={block_number}' is greater than " + f"the number of blocks ({self.nblocks})" + ) + + start = block_number * self.blocksize + end = start + self.blocksize + self.total_requested_bytes += end - start + self.miss_count += 1 + logger.info("BlockCache fetching block %d", block_number) + block_contents = super()._fetch(start, end) + return block_contents + + def _read_cache( + self, start: int, end: int, start_block_number: int, end_block_number: int + ) -> bytes: + """ + Read from our block cache. + + Parameters + ---------- + start, end : int + The start and end byte positions. + start_block_number, end_block_number : int + The start and end block numbers. + """ + start_pos = start % self.blocksize + end_pos = end % self.blocksize + + self.hit_count += 1 + if start_block_number == end_block_number: + block: bytes = self._fetch_block_cached(start_block_number) + return block[start_pos:end_pos] + + else: + # read from the initial + out = [self._fetch_block_cached(start_block_number)[start_pos:]] + + # intermediate blocks + # Note: it'd be nice to combine these into one big request. However + # that doesn't play nicely with our LRU cache. + out.extend( + map( + self._fetch_block_cached, + range(start_block_number + 1, end_block_number), + ) + ) + + # final block + out.append(self._fetch_block_cached(end_block_number)[:end_pos]) + + return b"".join(out) + + +class BytesCache(BaseCache): + """Cache which holds data in a in-memory bytes object + + Implements read-ahead by the block size, for semi-random reads progressing + through the file. + + Parameters + ---------- + trim: bool + As we read more data, whether to discard the start of the buffer when + we are more than a blocksize ahead of it. + """ + + name: ClassVar[str] = "bytes" + + def __init__( + self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True + ) -> None: + super().__init__(blocksize, fetcher, size) + self.cache = b"" + self.start: int | None = None + self.end: int | None = None + self.trim = trim + + def _fetch(self, start: int | None, end: int | None) -> bytes: + # TODO: only set start/end after fetch, in case it fails? + # is this where retry logic might go? + if start is None: + start = 0 + if end is None: + end = self.size + if start >= self.size or start >= end: + return b"" + if ( + self.start is not None + and start >= self.start + and self.end is not None + and end < self.end + ): + # cache hit: we have all the required data + offset = start - self.start + self.hit_count += 1 + return self.cache[offset : offset + end - start] + + if self.blocksize: + bend = min(self.size, end + self.blocksize) + else: + bend = end + + if bend == start or start > self.size: + return b"" + + if (self.start is None or start < self.start) and ( + self.end is None or end > self.end + ): + # First read, or extending both before and after + self.total_requested_bytes += bend - start + self.miss_count += 1 + self.cache = self.fetcher(start, bend) + self.start = start + else: + assert self.start is not None + assert self.end is not None + self.miss_count += 1 + + if start < self.start: + if self.end is None or self.end - end > self.blocksize: + self.total_requested_bytes += bend - start + self.cache = self.fetcher(start, bend) + self.start = start + else: + self.total_requested_bytes += self.start - start + new = self.fetcher(start, self.start) + self.start = start + self.cache = new + self.cache + elif self.end is not None and bend > self.end: + if self.end > self.size: + pass + elif end - self.end > self.blocksize: + self.total_requested_bytes += bend - start + self.cache = self.fetcher(start, bend) + self.start = start + else: + self.total_requested_bytes += bend - self.end + new = self.fetcher(self.end, bend) + self.cache = self.cache + new + + self.end = self.start + len(self.cache) + offset = start - self.start + out = self.cache[offset : offset + end - start] + if self.trim: + num = (self.end - self.start) // (self.blocksize + 1) + if num > 1: + self.start += self.blocksize * num + self.cache = self.cache[self.blocksize * num :] + return out + + def __len__(self) -> int: + return len(self.cache) + + +class AllBytes(BaseCache): + """Cache entire contents of the file""" + + name: ClassVar[str] = "all" + + def __init__( + self, + blocksize: int | None = None, + fetcher: Fetcher | None = None, + size: int | None = None, + data: bytes | None = None, + ) -> None: + super().__init__(blocksize, fetcher, size) # type: ignore[arg-type] + if data is None: + self.miss_count += 1 + self.total_requested_bytes += self.size + data = self.fetcher(0, self.size) + self.data = data + + def _fetch(self, start: int | None, stop: int | None) -> bytes: + self.hit_count += 1 + return self.data[start:stop] + + +class KnownPartsOfAFile(BaseCache): + """ + Cache holding known file parts. + + Parameters + ---------- + blocksize: int + How far to read ahead in numbers of bytes + fetcher: func + Function of the form f(start, end) which gets bytes from remote as + specified + size: int + How big this file is + data: dict + A dictionary mapping explicit `(start, stop)` file-offset tuples + with known bytes. + strict: bool, default True + Whether to fetch reads that go beyond a known byte-range boundary. + If `False`, any read that ends outside a known part will be zero + padded. Note that zero padding will not be used for reads that + begin outside a known byte-range. + """ + + name: ClassVar[str] = "parts" + + def __init__( + self, + blocksize: int, + fetcher: Fetcher, + size: int, + data: dict[tuple[int, int], bytes] | None = None, + strict: bool = True, + **_: Any, + ): + super().__init__(blocksize, fetcher, size) + self.strict = strict + + # simple consolidation of contiguous blocks + if data: + old_offsets = sorted(data.keys()) + offsets = [old_offsets[0]] + blocks = [data.pop(old_offsets[0])] + for start, stop in old_offsets[1:]: + start0, stop0 = offsets[-1] + if start == stop0: + offsets[-1] = (start0, stop) + blocks[-1] += data.pop((start, stop)) + else: + offsets.append((start, stop)) + blocks.append(data.pop((start, stop))) + + self.data = dict(zip(offsets, blocks)) + else: + self.data = {} + + def _fetch(self, start: int | None, stop: int | None) -> bytes: + if start is None: + start = 0 + if stop is None: + stop = self.size + + out = b"" + for (loc0, loc1), data in self.data.items(): + # If self.strict=False, use zero-padded data + # for reads beyond the end of a "known" buffer + if loc0 <= start < loc1: + off = start - loc0 + out = data[off : off + stop - start] + if not self.strict or loc0 <= stop <= loc1: + # The request is within a known range, or + # it begins within a known range, and we + # are allowed to pad reads beyond the + # buffer with zero + out += b"\x00" * (stop - start - len(out)) + self.hit_count += 1 + return out + else: + # The request ends outside a known range, + # and we are being "strict" about reads + # beyond the buffer + start = loc1 + break + + # We only get here if there is a request outside the + # known parts of the file. In an ideal world, this + # should never happen + if self.fetcher is None: + # We cannot fetch the data, so raise an error + raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ") + # We can fetch the data, but should warn the user + # that this may be slow + warnings.warn( + f"Read is outside the known file parts: {(start, stop)}. " + f"IO/caching performance may be poor!" + ) + logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}") + self.total_requested_bytes += stop - start + self.miss_count += 1 + return out + super()._fetch(start, stop) + + +class UpdatableLRU(Generic[P, T]): + """ + Custom implementation of LRU cache that allows updating keys + + Used by BackgroudBlockCache + """ + + class CacheInfo(NamedTuple): + hits: int + misses: int + maxsize: int + currsize: int + + def __init__(self, func: Callable[P, T], max_size: int = 128) -> None: + self._cache: OrderedDict[Any, T] = collections.OrderedDict() + self._func = func + self._max_size = max_size + self._hits = 0 + self._misses = 0 + self._lock = threading.Lock() + + def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T: + if kwargs: + raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}") + with self._lock: + if args in self._cache: + self._cache.move_to_end(args) + self._hits += 1 + return self._cache[args] + + result = self._func(*args, **kwargs) + + with self._lock: + self._cache[args] = result + self._misses += 1 + if len(self._cache) > self._max_size: + self._cache.popitem(last=False) + + return result + + def is_key_cached(self, *args: Any) -> bool: + with self._lock: + return args in self._cache + + def add_key(self, result: T, *args: Any) -> None: + with self._lock: + self._cache[args] = result + if len(self._cache) > self._max_size: + self._cache.popitem(last=False) + + def cache_info(self) -> UpdatableLRU.CacheInfo: + with self._lock: + return self.CacheInfo( + maxsize=self._max_size, + currsize=len(self._cache), + hits=self._hits, + misses=self._misses, + ) + + +class BackgroundBlockCache(BaseCache): + """ + Cache holding memory as a set of blocks with pre-loading of + the next block in the background. + + Requests are only ever made ``blocksize`` at a time, and are + stored in an LRU cache. The least recently accessed block is + discarded when more than ``maxblocks`` are stored. If the + next block is not in cache, it is loaded in a separate thread + in non-blocking way. + + Parameters + ---------- + blocksize : int + The number of bytes to store in each block. + Requests are only ever made for ``blocksize``, so this + should balance the overhead of making a request against + the granularity of the blocks. + fetcher : Callable + size : int + The total size of the file being cached. + maxblocks : int + The maximum number of blocks to cache for. The maximum memory + use for this cache is then ``blocksize * maxblocks``. + """ + + name: ClassVar[str] = "background" + + def __init__( + self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32 + ) -> None: + super().__init__(blocksize, fetcher, size) + self.nblocks = math.ceil(size / blocksize) + self.maxblocks = maxblocks + self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks) + + self._thread_executor = ThreadPoolExecutor(max_workers=1) + self._fetch_future_block_number: int | None = None + self._fetch_future: Future[bytes] | None = None + self._fetch_future_lock = threading.Lock() + + def cache_info(self) -> UpdatableLRU.CacheInfo: + """ + The statistics on the block cache. + + Returns + ------- + NamedTuple + Returned directly from the LRU Cache used internally. + """ + return self._fetch_block_cached.cache_info() + + def __getstate__(self) -> dict[str, Any]: + state = self.__dict__ + del state["_fetch_block_cached"] + del state["_thread_executor"] + del state["_fetch_future_block_number"] + del state["_fetch_future"] + del state["_fetch_future_lock"] + return state + + def __setstate__(self, state) -> None: + self.__dict__.update(state) + self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"]) + self._thread_executor = ThreadPoolExecutor(max_workers=1) + self._fetch_future_block_number = None + self._fetch_future = None + self._fetch_future_lock = threading.Lock() + + def _fetch(self, start: int | None, end: int | None) -> bytes: + if start is None: + start = 0 + if end is None: + end = self.size + if start >= self.size or start >= end: + return b"" + + # byte position -> block numbers + start_block_number = start // self.blocksize + end_block_number = end // self.blocksize + + fetch_future_block_number = None + fetch_future = None + with self._fetch_future_lock: + # Background thread is running. Check we we can or must join it. + if self._fetch_future is not None: + assert self._fetch_future_block_number is not None + if self._fetch_future.done(): + logger.info("BlockCache joined background fetch without waiting.") + self._fetch_block_cached.add_key( + self._fetch_future.result(), self._fetch_future_block_number + ) + # Cleanup the fetch variables. Done with fetching the block. + self._fetch_future_block_number = None + self._fetch_future = None + else: + # Must join if we need the block for the current fetch + must_join = bool( + start_block_number + <= self._fetch_future_block_number + <= end_block_number + ) + if must_join: + # Copy to the local variables to release lock + # before waiting for result + fetch_future_block_number = self._fetch_future_block_number + fetch_future = self._fetch_future + + # Cleanup the fetch variables. Have a local copy. + self._fetch_future_block_number = None + self._fetch_future = None + + # Need to wait for the future for the current read + if fetch_future is not None: + logger.info("BlockCache waiting for background fetch.") + # Wait until result and put it in cache + self._fetch_block_cached.add_key( + fetch_future.result(), fetch_future_block_number + ) + + # these are cached, so safe to do multiple calls for the same start and end. + for block_number in range(start_block_number, end_block_number + 1): + self._fetch_block_cached(block_number) + + # fetch next block in the background if nothing is running in the background, + # the block is within file and it is not already cached + end_block_plus_1 = end_block_number + 1 + with self._fetch_future_lock: + if ( + self._fetch_future is None + and end_block_plus_1 <= self.nblocks + and not self._fetch_block_cached.is_key_cached(end_block_plus_1) + ): + self._fetch_future_block_number = end_block_plus_1 + self._fetch_future = self._thread_executor.submit( + self._fetch_block, end_block_plus_1, "async" + ) + + return self._read_cache( + start, + end, + start_block_number=start_block_number, + end_block_number=end_block_number, + ) + + def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes: + """ + Fetch the block of data for `block_number`. + """ + if block_number > self.nblocks: + raise ValueError( + f"'block_number={block_number}' is greater than " + f"the number of blocks ({self.nblocks})" + ) + + start = block_number * self.blocksize + end = start + self.blocksize + logger.info("BlockCache fetching block (%s) %d", log_info, block_number) + self.total_requested_bytes += end - start + self.miss_count += 1 + block_contents = super()._fetch(start, end) + return block_contents + + def _read_cache( + self, start: int, end: int, start_block_number: int, end_block_number: int + ) -> bytes: + """ + Read from our block cache. + + Parameters + ---------- + start, end : int + The start and end byte positions. + start_block_number, end_block_number : int + The start and end block numbers. + """ + start_pos = start % self.blocksize + end_pos = end % self.blocksize + + # kind of pointless to count this as a hit, but it is + self.hit_count += 1 + + if start_block_number == end_block_number: + block = self._fetch_block_cached(start_block_number) + return block[start_pos:end_pos] + + else: + # read from the initial + out = [self._fetch_block_cached(start_block_number)[start_pos:]] + + # intermediate blocks + # Note: it'd be nice to combine these into one big request. However + # that doesn't play nicely with our LRU cache. + out.extend( + map( + self._fetch_block_cached, + range(start_block_number + 1, end_block_number), + ) + ) + + # final block + out.append(self._fetch_block_cached(end_block_number)[:end_pos]) + + return b"".join(out) + + +caches: dict[str | None, type[BaseCache]] = { + # one custom case + None: BaseCache, +} + + +def register_cache(cls: type[BaseCache], clobber: bool = False) -> None: + """'Register' cache implementation. + + Parameters + ---------- + clobber: bool, optional + If set to True (default is False) - allow to overwrite existing + entry. + + Raises + ------ + ValueError + """ + name = cls.name + if not clobber and name in caches: + raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}") + caches[name] = cls + + +for c in ( + BaseCache, + MMapCache, + BytesCache, + ReadAheadCache, + BlockCache, + FirstChunkCache, + AllBytes, + KnownPartsOfAFile, + BackgroundBlockCache, +): + register_cache(c) diff --git a/venv/lib/python3.13/site-packages/fsspec/compression.py b/venv/lib/python3.13/site-packages/fsspec/compression.py new file mode 100644 index 0000000000000000000000000000000000000000..e21da562bbab49c2ad60e9d9beb546af8dadea45 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/compression.py @@ -0,0 +1,182 @@ +"""Helper functions for a standard streaming compression API""" + +from zipfile import ZipFile + +import fsspec.utils +from fsspec.spec import AbstractBufferedFile + + +def noop_file(file, mode, **kwargs): + return file + + +# TODO: files should also be available as contexts +# should be functions of the form func(infile, mode=, **kwargs) -> file-like +compr = {None: noop_file} + + +def register_compression(name, callback, extensions, force=False): + """Register an "inferable" file compression type. + + Registers transparent file compression type for use with fsspec.open. + Compression can be specified by name in open, or "infer"-ed for any files + ending with the given extensions. + + Args: + name: (str) The compression type name. Eg. "gzip". + callback: A callable of form (infile, mode, **kwargs) -> file-like. + Accepts an input file-like object, the target mode and kwargs. + Returns a wrapped file-like object. + extensions: (str, Iterable[str]) A file extension, or list of file + extensions for which to infer this compression scheme. Eg. "gz". + force: (bool) Force re-registration of compression type or extensions. + + Raises: + ValueError: If name or extensions already registered, and not force. + + """ + if isinstance(extensions, str): + extensions = [extensions] + + # Validate registration + if name in compr and not force: + raise ValueError(f"Duplicate compression registration: {name}") + + for ext in extensions: + if ext in fsspec.utils.compressions and not force: + raise ValueError(f"Duplicate compression file extension: {ext} ({name})") + + compr[name] = callback + + for ext in extensions: + fsspec.utils.compressions[ext] = name + + +def unzip(infile, mode="rb", filename=None, **kwargs): + if "r" not in mode: + filename = filename or "file" + z = ZipFile(infile, mode="w", **kwargs) + fo = z.open(filename, mode="w") + fo.close = lambda closer=fo.close: closer() or z.close() + return fo + z = ZipFile(infile) + if filename is None: + filename = z.namelist()[0] + return z.open(filename, mode="r", **kwargs) + + +register_compression("zip", unzip, "zip") + +try: + from bz2 import BZ2File +except ImportError: + pass +else: + register_compression("bz2", BZ2File, "bz2") + +try: # pragma: no cover + from isal import igzip + + def isal(infile, mode="rb", **kwargs): + return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs) + + register_compression("gzip", isal, "gz") +except ImportError: + from gzip import GzipFile + + register_compression( + "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz" + ) + +try: + from lzma import LZMAFile + + register_compression("lzma", LZMAFile, "lzma") + register_compression("xz", LZMAFile, "xz") +except ImportError: + pass + +try: + import lzmaffi + + register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True) + register_compression("xz", lzmaffi.LZMAFile, "xz", force=True) +except ImportError: + pass + + +class SnappyFile(AbstractBufferedFile): + def __init__(self, infile, mode, **kwargs): + import snappy + + super().__init__( + fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs + ) + self.infile = infile + if "r" in mode: + self.codec = snappy.StreamDecompressor() + else: + self.codec = snappy.StreamCompressor() + + def _upload_chunk(self, final=False): + self.buffer.seek(0) + out = self.codec.add_chunk(self.buffer.read()) + self.infile.write(out) + return True + + def seek(self, loc, whence=0): + raise NotImplementedError("SnappyFile is not seekable") + + def seekable(self): + return False + + def _fetch_range(self, start, end): + """Get the specified set of bytes from remote""" + data = self.infile.read(end - start) + return self.codec.decompress(data) + + +try: + import snappy + + snappy.compress(b"") + # Snappy may use the .sz file extension, but this is not part of the + # standard implementation. + register_compression("snappy", SnappyFile, []) + +except (ImportError, NameError, AttributeError): + pass + +try: + import lz4.frame + + register_compression("lz4", lz4.frame.open, "lz4") +except ImportError: + pass + +try: + # zstd in the standard library for python >= 3.14 + from compression.zstd import ZstdFile + + register_compression("zstd", ZstdFile, "zst") + +except ImportError: + try: + import zstandard as zstd + + def zstandard_file(infile, mode="rb"): + if "r" in mode: + cctx = zstd.ZstdDecompressor() + return cctx.stream_reader(infile) + else: + cctx = zstd.ZstdCompressor(level=10) + return cctx.stream_writer(infile) + + register_compression("zstd", zstandard_file, "zst") + except ImportError: + pass + + +def available_compressions(): + """Return a list of the implemented compressions.""" + return list(compr) diff --git a/venv/lib/python3.13/site-packages/fsspec/config.py b/venv/lib/python3.13/site-packages/fsspec/config.py new file mode 100644 index 0000000000000000000000000000000000000000..76d9af14aaf7df47c4551c169f27b05abf9c269e --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/config.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import configparser +import json +import os +import warnings +from typing import Any + +conf: dict[str, dict[str, Any]] = {} +default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec") +conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir) + + +def set_conf_env(conf_dict, envdict=os.environ): + """Set config values from environment variables + + Looks for variables of the form ``FSSPEC_`` and + ``FSSPEC__``. For ``FSSPEC_`` the value is parsed + as a json dictionary and used to ``update`` the config of the + corresponding protocol. For ``FSSPEC__`` there is no + attempt to convert the string value, but the kwarg keys will be lower-cased. + + The ``FSSPEC__`` variables are applied after the + ``FSSPEC_`` ones. + + Parameters + ---------- + conf_dict : dict(str, dict) + This dict will be mutated + envdict : dict-like(str, str) + Source for the values - usually the real environment + """ + kwarg_keys = [] + for key in envdict: + if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_": + if key.count("_") > 1: + kwarg_keys.append(key) + continue + try: + value = json.loads(envdict[key]) + except json.decoder.JSONDecodeError as ex: + warnings.warn( + f"Ignoring environment variable {key} due to a parse failure: {ex}" + ) + else: + if isinstance(value, dict): + _, proto = key.split("_", 1) + conf_dict.setdefault(proto.lower(), {}).update(value) + else: + warnings.warn( + f"Ignoring environment variable {key} due to not being a dict:" + f" {type(value)}" + ) + elif key.startswith("FSSPEC"): + warnings.warn( + f"Ignoring environment variable {key} due to having an unexpected name" + ) + + for key in kwarg_keys: + _, proto, kwarg = key.split("_", 2) + conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key] + + +def set_conf_files(cdir, conf_dict): + """Set config values from files + + Scans for INI and JSON files in the given dictionary, and uses their + contents to set the config. In case of repeated values, later values + win. + + In the case of INI files, all values are strings, and these will not + be converted. + + Parameters + ---------- + cdir : str + Directory to search + conf_dict : dict(str, dict) + This dict will be mutated + """ + if not os.path.isdir(cdir): + return + allfiles = sorted(os.listdir(cdir)) + for fn in allfiles: + if fn.endswith(".ini"): + ini = configparser.ConfigParser() + ini.read(os.path.join(cdir, fn)) + for key in ini: + if key == "DEFAULT": + continue + conf_dict.setdefault(key, {}).update(dict(ini[key])) + if fn.endswith(".json"): + with open(os.path.join(cdir, fn)) as f: + js = json.load(f) + for key in js: + conf_dict.setdefault(key, {}).update(dict(js[key])) + + +def apply_config(cls, kwargs, conf_dict=None): + """Supply default values for kwargs when instantiating class + + Augments the passed kwargs, by finding entries in the config dict + which match the classes ``.protocol`` attribute (one or more str) + + Parameters + ---------- + cls : file system implementation + kwargs : dict + conf_dict : dict of dict + Typically this is the global configuration + + Returns + ------- + dict : the modified set of kwargs + """ + if conf_dict is None: + conf_dict = conf + protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol] + kw = {} + for proto in protos: + # default kwargs from the current state of the config + if proto in conf_dict: + kw.update(conf_dict[proto]) + # explicit kwargs always win + kw.update(**kwargs) + kwargs = kw + return kwargs + + +set_conf_files(conf_dir, conf) +set_conf_env(conf) diff --git a/venv/lib/python3.13/site-packages/fsspec/conftest.py b/venv/lib/python3.13/site-packages/fsspec/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..f05eb5c30d42b0c1c5cc432f9c217d8f0e01f412 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/conftest.py @@ -0,0 +1,125 @@ +import os +import shutil +import subprocess +import sys +import time +from collections import deque +from collections.abc import Generator, Sequence + +import pytest + +import fsspec + + +@pytest.fixture() +def m(): + """ + Fixture providing a memory filesystem. + """ + m = fsspec.filesystem("memory") + m.store.clear() + m.pseudo_dirs.clear() + m.pseudo_dirs.append("") + try: + yield m + finally: + m.store.clear() + m.pseudo_dirs.clear() + m.pseudo_dirs.append("") + + +class InstanceCacheInspector: + """ + Helper class to inspect instance caches of filesystem classes in tests. + """ + + def clear(self) -> None: + """ + Clear instance caches of all currently imported filesystem classes. + """ + classes = deque([fsspec.spec.AbstractFileSystem]) + while classes: + cls = classes.popleft() + cls.clear_instance_cache() + classes.extend(cls.__subclasses__()) + + def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]: + """ + Gather counts of filesystem instances in the instance caches + of all currently imported filesystem classes. + + Parameters + ---------- + omit_zero: + Whether to omit instance types with no cached instances. + """ + out: dict[str, int] = {} + classes = deque([fsspec.spec.AbstractFileSystem]) + while classes: + cls = classes.popleft() + count = len(cls._cache) # there is no public interface for the cache + # note: skip intermediate AbstractFileSystem subclasses + # if they proxy the protocol attribute via a property. + if isinstance(cls.protocol, (Sequence, str)): + key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0] + if count or not omit_zero: + out[key] = count + classes.extend(cls.__subclasses__()) + return out + + +@pytest.fixture(scope="function", autouse=True) +def instance_caches() -> Generator[InstanceCacheInspector, None, None]: + """ + Fixture to ensure empty filesystem instance caches before and after a test. + + Used by default for all tests. + Clears caches of all imported filesystem classes. + Can be used to write test assertions about instance caches. + + Usage: + + def test_something(instance_caches): + # Test code here + fsspec.open("file://abc") + fsspec.open("memory://foo/bar") + + # Test assertion + assert instance_caches.gather_counts() == {"file": 1, "memory": 1} + + Returns + ------- + instance_caches: An instance cache inspector for clearing and inspecting caches. + """ + ic = InstanceCacheInspector() + + ic.clear() + try: + yield ic + finally: + ic.clear() + + +@pytest.fixture(scope="function") +def ftp_writable(tmpdir): + """ + Fixture providing a writable FTP filesystem. + """ + pytest.importorskip("pyftpdlib") + + d = str(tmpdir) + with open(os.path.join(d, "out"), "wb") as f: + f.write(b"hello" * 10000) + P = subprocess.Popen( + [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"] + ) + try: + time.sleep(1) + yield "localhost", 2121, "user", "pass" + finally: + P.terminate() + P.wait() + try: + shutil.rmtree(tmpdir) + except Exception: + pass diff --git a/venv/lib/python3.13/site-packages/fsspec/core.py b/venv/lib/python3.13/site-packages/fsspec/core.py new file mode 100644 index 0000000000000000000000000000000000000000..a7db808ae0de2e8e4bb594708e78c599bdb1518e --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/core.py @@ -0,0 +1,743 @@ +from __future__ import annotations + +import io +import logging +import os +import re +from glob import has_magic +from pathlib import Path + +# for backwards compat, we export cache things from here too +from fsspec.caching import ( # noqa: F401 + BaseCache, + BlockCache, + BytesCache, + MMapCache, + ReadAheadCache, + caches, +) +from fsspec.compression import compr +from fsspec.config import conf +from fsspec.registry import filesystem, get_filesystem_class +from fsspec.utils import ( + _unstrip_protocol, + build_name_function, + infer_compression, + stringify_path, +) + +logger = logging.getLogger("fsspec") + + +class OpenFile: + """ + File-like object to be used in a context + + Can layer (buffered) text-mode and compression over any file-system, which + are typically binary-only. + + These instances are safe to serialize, as the low-level file object + is not created until invoked using ``with``. + + Parameters + ---------- + fs: FileSystem + The file system to use for opening the file. Should be a subclass or duck-type + with ``fsspec.spec.AbstractFileSystem`` + path: str + Location to open + mode: str like 'rb', optional + Mode of the opened file + compression: str or None, optional + Compression to apply + encoding: str or None, optional + The encoding to use if opened in text mode. + errors: str or None, optional + How to handle encoding errors if opened in text mode. + newline: None or str + Passed to TextIOWrapper in text mode, how to handle line endings. + autoopen: bool + If True, calls open() immediately. Mostly used by pickle + pos: int + If given and autoopen is True, seek to this location immediately + """ + + def __init__( + self, + fs, + path, + mode="rb", + compression=None, + encoding=None, + errors=None, + newline=None, + ): + self.fs = fs + self.path = path + self.mode = mode + self.compression = get_compression(path, compression) + self.encoding = encoding + self.errors = errors + self.newline = newline + self.fobjects = [] + + def __reduce__(self): + return ( + OpenFile, + ( + self.fs, + self.path, + self.mode, + self.compression, + self.encoding, + self.errors, + self.newline, + ), + ) + + def __repr__(self): + return f"" + + def __enter__(self): + mode = self.mode.replace("t", "").replace("b", "") + "b" + + try: + f = self.fs.open(self.path, mode=mode) + except FileNotFoundError as e: + if has_magic(self.path): + raise FileNotFoundError( + "%s not found. The URL contains glob characters: you maybe needed\n" + "to pass expand=True in fsspec.open() or the storage_options of \n" + "your library. You can also set the config value 'open_expand'\n" + "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.", + self.path, + ) from e + raise + + self.fobjects = [f] + + if self.compression is not None: + compress = compr[self.compression] + f = compress(f, mode=mode[0]) + self.fobjects.append(f) + + if "b" not in self.mode: + # assume, for example, that 'r' is equivalent to 'rt' as in builtin + f = PickleableTextIOWrapper( + f, encoding=self.encoding, errors=self.errors, newline=self.newline + ) + self.fobjects.append(f) + + return self.fobjects[-1] + + def __exit__(self, *args): + self.close() + + @property + def full_name(self): + return _unstrip_protocol(self.path, self.fs) + + def open(self): + """Materialise this as a real open file without context + + The OpenFile object should be explicitly closed to avoid enclosed file + instances persisting. You must, therefore, keep a reference to the OpenFile + during the life of the file-like it generates. + """ + return self.__enter__() + + def close(self): + """Close all encapsulated file objects""" + for f in reversed(self.fobjects): + if "r" not in self.mode and not f.closed: + f.flush() + f.close() + self.fobjects.clear() + + +class OpenFiles(list): + """List of OpenFile instances + + Can be used in a single context, which opens and closes all of the + contained files. Normal list access to get the elements works as + normal. + + A special case is made for caching filesystems - the files will + be down/uploaded together at the start or end of the context, and + this may happen concurrently, if the target filesystem supports it. + """ + + def __init__(self, *args, mode="rb", fs=None): + self.mode = mode + self.fs = fs + self.files = [] + super().__init__(*args) + + def __enter__(self): + if self.fs is None: + raise ValueError("Context has already been used") + + fs = self.fs + while True: + if hasattr(fs, "open_many"): + # check for concurrent cache download; or set up for upload + self.files = fs.open_many(self) + return self.files + if hasattr(fs, "fs") and fs.fs is not None: + fs = fs.fs + else: + break + return [s.__enter__() for s in self] + + def __exit__(self, *args): + fs = self.fs + [s.__exit__(*args) for s in self] + if "r" not in self.mode: + while True: + if hasattr(fs, "open_many"): + # check for concurrent cache upload + fs.commit_many(self.files) + return + if hasattr(fs, "fs") and fs.fs is not None: + fs = fs.fs + else: + break + + def __getitem__(self, item): + out = super().__getitem__(item) + if isinstance(item, slice): + return OpenFiles(out, mode=self.mode, fs=self.fs) + return out + + def __repr__(self): + return f"" + + +def open_files( + urlpath, + mode="rb", + compression=None, + encoding="utf8", + errors=None, + name_function=None, + num=1, + protocol=None, + newline=None, + auto_mkdir=True, + expand=True, + **kwargs, +): + """Given a path or paths, return a list of ``OpenFile`` objects. + + For writing, a str path must contain the "*" character, which will be filled + in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2. + + For either reading or writing, can instead provide explicit list of paths. + + Parameters + ---------- + urlpath: string or list + Absolute or relative filepath(s). Prefix with a protocol like ``s3://`` + to read from alternative filesystems. To read from multiple files you + can pass a globstring or a list of paths, with the caveat that they + must all have the same protocol. + mode: 'rb', 'wt', etc. + compression: string or None + If given, open file using compression codec. Can either be a compression + name (a key in ``fsspec.compression.compr``) or "infer" to guess the + compression from the filename suffix. + encoding: str + For text mode only + errors: None or str + Passed to TextIOWrapper in text mode + name_function: function or None + if opening a set of files for writing, those files do not yet exist, + so we need to generate their names by formatting the urlpath for + each sequence number + num: int [1] + if writing mode, number of files we expect to create (passed to + name+function) + protocol: str or None + If given, overrides the protocol found in the URL. + newline: bytes or None + Used for line terminator in text mode. If None, uses system default; + if blank, uses no translation. + auto_mkdir: bool (True) + If in write mode, this will ensure the target directory exists before + writing, by calling ``fs.mkdirs(exist_ok=True)``. + expand: bool + **kwargs: dict + Extra options that make sense to a particular storage connection, e.g. + host, port, username, password, etc. + + Examples + -------- + >>> files = open_files('2015-*-*.csv') # doctest: +SKIP + >>> files = open_files( + ... 's3://bucket/2015-*-*.csv.gz', compression='gzip' + ... ) # doctest: +SKIP + + Returns + ------- + An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can + be used as a single context + + Notes + ----- + For a full list of the available protocols and the implementations that + they map across to see the latest online documentation: + + - For implementations built into ``fsspec`` see + https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations + - For implementations in separate packages see + https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations + """ + fs, fs_token, paths = get_fs_token_paths( + urlpath, + mode, + num=num, + name_function=name_function, + storage_options=kwargs, + protocol=protocol, + expand=expand, + ) + if fs.protocol == "file": + fs.auto_mkdir = auto_mkdir + elif "r" not in mode and auto_mkdir: + parents = {fs._parent(path) for path in paths} + for parent in parents: + try: + fs.makedirs(parent, exist_ok=True) + except PermissionError: + pass + return OpenFiles( + [ + OpenFile( + fs, + path, + mode=mode, + compression=compression, + encoding=encoding, + errors=errors, + newline=newline, + ) + for path in paths + ], + mode=mode, + fs=fs, + ) + + +def _un_chain(path, kwargs): + # Avoid a circular import + from fsspec.implementations.chained import ChainedFileSystem + + if "::" in path: + x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word + bits = [] + for p in path.split("::"): + if "://" in p or x.match(p): + bits.append(p) + else: + bits.append(p + "://") + else: + bits = [path] + # [[url, protocol, kwargs], ...] + out = [] + previous_bit = None + kwargs = kwargs.copy() + for bit in reversed(bits): + protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file" + cls = get_filesystem_class(protocol) + extra_kwargs = cls._get_kwargs_from_urls(bit) + kws = kwargs.pop(protocol, {}) + if bit is bits[0]: + kws.update(kwargs) + kw = dict( + **{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]}, + **kws, + ) + bit = cls._strip_protocol(bit) + if "target_protocol" not in kw and issubclass(cls, ChainedFileSystem): + bit = previous_bit + out.append((bit, protocol, kw)) + previous_bit = bit + out.reverse() + return out + + +def url_to_fs(url, **kwargs): + """ + Turn fully-qualified and potentially chained URL into filesystem instance + + Parameters + ---------- + url : str + The fsspec-compatible URL + **kwargs: dict + Extra options that make sense to a particular storage connection, e.g. + host, port, username, password, etc. + + Returns + ------- + filesystem : FileSystem + The new filesystem discovered from ``url`` and created with + ``**kwargs``. + urlpath : str + The file-systems-specific URL for ``url``. + """ + url = stringify_path(url) + # non-FS arguments that appear in fsspec.open() + # inspect could keep this in sync with open()'s signature + known_kwargs = { + "compression", + "encoding", + "errors", + "expand", + "mode", + "name_function", + "newline", + "num", + } + kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs} + chain = _un_chain(url, kwargs) + inkwargs = {} + # Reverse iterate the chain, creating a nested target_* structure + for i, ch in enumerate(reversed(chain)): + urls, protocol, kw = ch + if i == len(chain) - 1: + inkwargs = dict(**kw, **inkwargs) + continue + inkwargs["target_options"] = dict(**kw, **inkwargs) + inkwargs["target_protocol"] = protocol + inkwargs["fo"] = urls + urlpath, protocol, _ = chain[0] + fs = filesystem(protocol, **inkwargs) + return fs, urlpath + + +DEFAULT_EXPAND = conf.get("open_expand", False) + + +def open( + urlpath, + mode="rb", + compression=None, + encoding="utf8", + errors=None, + protocol=None, + newline=None, + expand=None, + **kwargs, +): + """Given a path or paths, return one ``OpenFile`` object. + + Parameters + ---------- + urlpath: string or list + Absolute or relative filepath. Prefix with a protocol like ``s3://`` + to read from alternative filesystems. Should not include glob + character(s). + mode: 'rb', 'wt', etc. + compression: string or None + If given, open file using compression codec. Can either be a compression + name (a key in ``fsspec.compression.compr``) or "infer" to guess the + compression from the filename suffix. + encoding: str + For text mode only + errors: None or str + Passed to TextIOWrapper in text mode + protocol: str or None + If given, overrides the protocol found in the URL. + newline: bytes or None + Used for line terminator in text mode. If None, uses system default; + if blank, uses no translation. + expand: bool or None + Whether to regard file paths containing special glob characters as needing + expansion (finding the first match) or absolute. Setting False allows using + paths which do embed such characters. If None (default), this argument + takes its value from the DEFAULT_EXPAND module variable, which takes + its initial value from the "open_expand" config value at startup, which will + be False if not set. + **kwargs: dict + Extra options that make sense to a particular storage connection, e.g. + host, port, username, password, etc. + + Examples + -------- + >>> openfile = open('2015-01-01.csv') # doctest: +SKIP + >>> openfile = open( + ... 's3://bucket/2015-01-01.csv.gz', compression='gzip' + ... ) # doctest: +SKIP + >>> with openfile as f: + ... df = pd.read_csv(f) # doctest: +SKIP + ... + + Returns + ------- + ``OpenFile`` object. + + Notes + ----- + For a full list of the available protocols and the implementations that + they map across to see the latest online documentation: + + - For implementations built into ``fsspec`` see + https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations + - For implementations in separate packages see + https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations + """ + expand = DEFAULT_EXPAND if expand is None else expand + out = open_files( + urlpath=[urlpath], + mode=mode, + compression=compression, + encoding=encoding, + errors=errors, + protocol=protocol, + newline=newline, + expand=expand, + **kwargs, + ) + if not out: + raise FileNotFoundError(urlpath) + return out[0] + + +def open_local( + url: str | list[str] | Path | list[Path], + mode: str = "rb", + **storage_options: dict, +) -> str | list[str]: + """Open file(s) which can be resolved to local + + For files which either are local, or get downloaded upon open + (e.g., by file caching) + + Parameters + ---------- + url: str or list(str) + mode: str + Must be read mode + storage_options: + passed on to FS for or used by open_files (e.g., compression) + """ + if "r" not in mode: + raise ValueError("Can only ensure local files when reading") + of = open_files(url, mode=mode, **storage_options) + if not getattr(of[0].fs, "local_file", False): + raise ValueError( + "open_local can only be used on a filesystem which" + " has attribute local_file=True" + ) + with of as files: + paths = [f.name for f in files] + if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path): + return paths[0] + return paths + + +def get_compression(urlpath, compression): + if compression == "infer": + compression = infer_compression(urlpath) + if compression is not None and compression not in compr: + raise ValueError(f"Compression type {compression} not supported") + return compression + + +def split_protocol(urlpath): + """Return protocol, path pair""" + urlpath = stringify_path(urlpath) + if "://" in urlpath: + protocol, path = urlpath.split("://", 1) + if len(protocol) > 1: + # excludes Windows paths + return protocol, path + if urlpath.startswith("data:"): + return urlpath.split(":", 1) + return None, urlpath + + +def strip_protocol(urlpath): + """Return only path part of full URL, according to appropriate backend""" + protocol, _ = split_protocol(urlpath) + cls = get_filesystem_class(protocol) + return cls._strip_protocol(urlpath) + + +def expand_paths_if_needed(paths, mode, num, fs, name_function): + """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]`` + in them (read mode). + + :param paths: list of paths + mode: str + Mode in which to open files. + num: int + If opening in writing mode, number of files we expect to create. + fs: filesystem object + name_function: callable + If opening in writing mode, this callable is used to generate path + names. Names are generated for each partition by + ``urlpath.replace('*', name_function(partition_index))``. + :return: list of paths + """ + expanded_paths = [] + paths = list(paths) + + if "w" in mode: # read mode + if sum(1 for p in paths if "*" in p) > 1: + raise ValueError( + "When writing data, only one filename mask can be specified." + ) + num = max(num, len(paths)) + + for curr_path in paths: + if "*" in curr_path: + # expand using name_function + expanded_paths.extend(_expand_paths(curr_path, name_function, num)) + else: + expanded_paths.append(curr_path) + # if we generated more paths that asked for, trim the list + if len(expanded_paths) > num: + expanded_paths = expanded_paths[:num] + + else: # read mode + for curr_path in paths: + if has_magic(curr_path): + # expand using glob + expanded_paths.extend(fs.glob(curr_path)) + else: + expanded_paths.append(curr_path) + + return expanded_paths + + +def get_fs_token_paths( + urlpath, + mode="rb", + num=1, + name_function=None, + storage_options=None, + protocol=None, + expand=True, +): + """Filesystem, deterministic token, and paths from a urlpath and options. + + Parameters + ---------- + urlpath: string or iterable + Absolute or relative filepath, URL (may include protocols like + ``s3://``), or globstring pointing to data. + mode: str, optional + Mode in which to open files. + num: int, optional + If opening in writing mode, number of files we expect to create. + name_function: callable, optional + If opening in writing mode, this callable is used to generate path + names. Names are generated for each partition by + ``urlpath.replace('*', name_function(partition_index))``. + storage_options: dict, optional + Additional keywords to pass to the filesystem class. + protocol: str or None + To override the protocol specifier in the URL + expand: bool + Expand string paths for writing, assuming the path is a directory + """ + if isinstance(urlpath, (list, tuple, set)): + if not urlpath: + raise ValueError("empty urlpath sequence") + urlpath0 = stringify_path(next(iter(urlpath))) + else: + urlpath0 = stringify_path(urlpath) + storage_options = storage_options or {} + if protocol: + storage_options["protocol"] = protocol + chain = _un_chain(urlpath0, storage_options or {}) + inkwargs = {} + # Reverse iterate the chain, creating a nested target_* structure + for i, ch in enumerate(reversed(chain)): + urls, nested_protocol, kw = ch + if i == len(chain) - 1: + inkwargs = dict(**kw, **inkwargs) + continue + inkwargs["target_options"] = dict(**kw, **inkwargs) + inkwargs["target_protocol"] = nested_protocol + inkwargs["fo"] = urls + paths, protocol, _ = chain[0] + fs = filesystem(protocol, **inkwargs) + if isinstance(urlpath, (list, tuple, set)): + pchains = [ + _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath + ] + if len({pc[1] for pc in pchains}) > 1: + raise ValueError("Protocol mismatch getting fs from %s", urlpath) + paths = [pc[0] for pc in pchains] + else: + paths = fs._strip_protocol(paths) + if isinstance(paths, (list, tuple, set)): + if expand: + paths = expand_paths_if_needed(paths, mode, num, fs, name_function) + elif not isinstance(paths, list): + paths = list(paths) + else: + if ("w" in mode or "x" in mode) and expand: + paths = _expand_paths(paths, name_function, num) + elif "*" in paths: + paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)] + else: + paths = [paths] + + return fs, fs._fs_token, paths + + +def _expand_paths(path, name_function, num): + if isinstance(path, str): + if path.count("*") > 1: + raise ValueError("Output path spec must contain exactly one '*'.") + elif "*" not in path: + path = os.path.join(path, "*.part") + + if name_function is None: + name_function = build_name_function(num - 1) + + paths = [path.replace("*", name_function(i)) for i in range(num)] + if paths != sorted(paths): + logger.warning( + "In order to preserve order between partitions" + " paths created with ``name_function`` should " + "sort to partition order" + ) + elif isinstance(path, (tuple, list)): + assert len(path) == num + paths = list(path) + else: + raise ValueError( + "Path should be either\n" + "1. A list of paths: ['foo.json', 'bar.json', ...]\n" + "2. A directory: 'foo/\n" + "3. A path with a '*' in it: 'foo.*.json'" + ) + return paths + + +class PickleableTextIOWrapper(io.TextIOWrapper): + """TextIOWrapper cannot be pickled. This solves it. + + Requires that ``buffer`` be pickleable, which all instances of + AbstractBufferedFile are. + """ + + def __init__( + self, + buffer, + encoding=None, + errors=None, + newline=None, + line_buffering=False, + write_through=False, + ): + self.args = buffer, encoding, errors, newline, line_buffering, write_through + super().__init__(*self.args) + + def __reduce__(self): + return PickleableTextIOWrapper, self.args diff --git a/venv/lib/python3.13/site-packages/fsspec/dircache.py b/venv/lib/python3.13/site-packages/fsspec/dircache.py new file mode 100644 index 0000000000000000000000000000000000000000..eca19566b135e5a7a4f6e7407d56411ec58bfe44 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/dircache.py @@ -0,0 +1,98 @@ +import time +from collections.abc import MutableMapping +from functools import lru_cache + + +class DirCache(MutableMapping): + """ + Caching of directory listings, in a structure like:: + + {"path0": [ + {"name": "path0/file0", + "size": 123, + "type": "file", + ... + }, + {"name": "path0/file1", + }, + ... + ], + "path1": [...] + } + + Parameters to this class control listing expiry or indeed turn + caching off + """ + + def __init__( + self, + use_listings_cache=True, + listings_expiry_time=None, + max_paths=None, + **kwargs, + ): + """ + + Parameters + ---------- + use_listings_cache: bool + If False, this cache never returns items, but always reports KeyError, + and setting items has no effect + listings_expiry_time: int or float (optional) + Time in seconds that a listing is considered valid. If None, + listings do not expire. + max_paths: int (optional) + The number of most recent listings that are considered valid; 'recent' + refers to when the entry was set. + """ + self._cache = {} + self._times = {} + if max_paths: + self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None)) + self.use_listings_cache = use_listings_cache + self.listings_expiry_time = listings_expiry_time + self.max_paths = max_paths + + def __getitem__(self, item): + if self.listings_expiry_time is not None: + if self._times.get(item, 0) - time.time() < -self.listings_expiry_time: + del self._cache[item] + if self.max_paths: + self._q(item) + return self._cache[item] # maybe raises KeyError + + def clear(self): + self._cache.clear() + + def __len__(self): + return len(self._cache) + + def __contains__(self, item): + try: + self[item] + return True + except KeyError: + return False + + def __setitem__(self, key, value): + if not self.use_listings_cache: + return + if self.max_paths: + self._q(key) + self._cache[key] = value + if self.listings_expiry_time is not None: + self._times[key] = time.time() + + def __delitem__(self, key): + del self._cache[key] + + def __iter__(self): + entries = list(self._cache) + + return (k for k in entries if k in self) + + def __reduce__(self): + return ( + DirCache, + (self.use_listings_cache, self.listings_expiry_time, self.max_paths), + ) diff --git a/venv/lib/python3.13/site-packages/fsspec/fuse.py b/venv/lib/python3.13/site-packages/fsspec/fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..566d520fce3e94e3bbaee48c3c6acc9f1db315a8 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/fuse.py @@ -0,0 +1,324 @@ +import argparse +import logging +import os +import stat +import threading +import time +from errno import EIO, ENOENT + +from fuse import FUSE, FuseOSError, LoggingMixIn, Operations + +from fsspec import __version__ +from fsspec.core import url_to_fs + +logger = logging.getLogger("fsspec.fuse") + + +class FUSEr(Operations): + def __init__(self, fs, path, ready_file=False): + self.fs = fs + self.cache = {} + self.root = path.rstrip("/") + "/" + self.counter = 0 + logger.info("Starting FUSE at %s", path) + self._ready_file = ready_file + + def getattr(self, path, fh=None): + logger.debug("getattr %s", path) + if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]: + return {"type": "file", "st_size": 5} + + path = "".join([self.root, path.lstrip("/")]).rstrip("/") + try: + info = self.fs.info(path) + except FileNotFoundError as exc: + raise FuseOSError(ENOENT) from exc + + data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)} + perm = info.get("mode", 0o777) + + if info["type"] != "file": + data["st_mode"] = stat.S_IFDIR | perm + data["st_size"] = 0 + data["st_blksize"] = 0 + else: + data["st_mode"] = stat.S_IFREG | perm + data["st_size"] = info["size"] + data["st_blksize"] = 5 * 2**20 + data["st_nlink"] = 1 + data["st_atime"] = info["atime"] if "atime" in info else time.time() + data["st_ctime"] = info["ctime"] if "ctime" in info else time.time() + data["st_mtime"] = info["mtime"] if "mtime" in info else time.time() + return data + + def readdir(self, path, fh): + logger.debug("readdir %s", path) + path = "".join([self.root, path.lstrip("/")]) + files = self.fs.ls(path, False) + files = [os.path.basename(f.rstrip("/")) for f in files] + return [".", ".."] + files + + def mkdir(self, path, mode): + path = "".join([self.root, path.lstrip("/")]) + self.fs.mkdir(path) + return 0 + + def rmdir(self, path): + path = "".join([self.root, path.lstrip("/")]) + self.fs.rmdir(path) + return 0 + + def read(self, path, size, offset, fh): + logger.debug("read %s", (path, size, offset)) + if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]: + # status indicator + return b"ready" + + f = self.cache[fh] + f.seek(offset) + out = f.read(size) + return out + + def write(self, path, data, offset, fh): + logger.debug("write %s", (path, offset)) + f = self.cache[fh] + f.seek(offset) + f.write(data) + return len(data) + + def create(self, path, flags, fi=None): + logger.debug("create %s", (path, flags)) + fn = "".join([self.root, path.lstrip("/")]) + self.fs.touch(fn) # OS will want to get attributes immediately + f = self.fs.open(fn, "wb") + self.cache[self.counter] = f + self.counter += 1 + return self.counter - 1 + + def open(self, path, flags): + logger.debug("open %s", (path, flags)) + fn = "".join([self.root, path.lstrip("/")]) + if flags % 2 == 0: + # read + mode = "rb" + else: + # write/create + mode = "wb" + self.cache[self.counter] = self.fs.open(fn, mode) + self.counter += 1 + return self.counter - 1 + + def truncate(self, path, length, fh=None): + fn = "".join([self.root, path.lstrip("/")]) + if length != 0: + raise NotImplementedError + # maybe should be no-op since open with write sets size to zero anyway + self.fs.touch(fn) + + def unlink(self, path): + fn = "".join([self.root, path.lstrip("/")]) + try: + self.fs.rm(fn, False) + except (OSError, FileNotFoundError) as exc: + raise FuseOSError(EIO) from exc + + def release(self, path, fh): + try: + if fh in self.cache: + f = self.cache[fh] + f.close() + self.cache.pop(fh) + except Exception as e: + print(e) + return 0 + + def chmod(self, path, mode): + if hasattr(self.fs, "chmod"): + path = "".join([self.root, path.lstrip("/")]) + return self.fs.chmod(path, mode) + raise NotImplementedError + + +def run( + fs, + path, + mount_point, + foreground=True, + threads=False, + ready_file=False, + ops_class=FUSEr, +): + """Mount stuff in a local directory + + This uses fusepy to make it appear as if a given path on an fsspec + instance is in fact resident within the local file-system. + + This requires that fusepy by installed, and that FUSE be available on + the system (typically requiring a package to be installed with + apt, yum, brew, etc.). + + Parameters + ---------- + fs: file-system instance + From one of the compatible implementations + path: str + Location on that file-system to regard as the root directory to + mount. Note that you typically should include the terminating "/" + character. + mount_point: str + An empty directory on the local file-system where the contents of + the remote path will appear. + foreground: bool + Whether or not calling this function will block. Operation will + typically be more stable if True. + threads: bool + Whether or not to create threads when responding to file operations + within the mounter directory. Operation will typically be more + stable if False. + ready_file: bool + Whether the FUSE process is ready. The ``.fuse_ready`` file will + exist in the ``mount_point`` directory if True. Debugging purpose. + ops_class: FUSEr or Subclass of FUSEr + To override the default behavior of FUSEr. For Example, logging + to file. + + """ + func = lambda: FUSE( + ops_class(fs, path, ready_file=ready_file), + mount_point, + nothreads=not threads, + foreground=foreground, + ) + if not foreground: + th = threading.Thread(target=func) + th.daemon = True + th.start() + return th + else: # pragma: no cover + try: + func() + except KeyboardInterrupt: + pass + + +def main(args): + """Mount filesystem from chained URL to MOUNT_POINT. + + Examples: + + python3 -m fsspec.fuse memory /usr/share /tmp/mem + + python3 -m fsspec.fuse local /tmp/source /tmp/local \\ + -l /tmp/fsspecfuse.log + + You can also mount chained-URLs and use special settings: + + python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\ + / /tmp/zip \\ + -o 'filecache-cache_storage=/tmp/simplecache' + + You can specify the type of the setting by using `[int]` or `[bool]`, + (`true`, `yes`, `1` represents the Boolean value `True`): + + python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\ + /historic/packages/RPMS /tmp/ftp \\ + -o 'simplecache-cache_storage=/tmp/simplecache' \\ + -o 'simplecache-check_files=false[bool]' \\ + -o 'ftp-listings_expiry_time=60[int]' \\ + -o 'ftp-username=anonymous' \\ + -o 'ftp-password=xieyanbo' + """ + + class RawDescriptionArgumentParser(argparse.ArgumentParser): + def format_help(self): + usage = super().format_help() + parts = usage.split("\n\n") + parts[1] = self.description.rstrip() + return "\n\n".join(parts) + + parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__) + parser.add_argument("--version", action="version", version=__version__) + parser.add_argument("url", type=str, help="fs url") + parser.add_argument("source_path", type=str, help="source directory in fs") + parser.add_argument("mount_point", type=str, help="local directory") + parser.add_argument( + "-o", + "--option", + action="append", + help="Any options of protocol included in the chained URL", + ) + parser.add_argument( + "-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')" + ) + parser.add_argument( + "-f", + "--foreground", + action="store_false", + help="Running in foreground or not (Default: False)", + ) + parser.add_argument( + "-t", + "--threads", + action="store_false", + help="Running with threads support (Default: False)", + ) + parser.add_argument( + "-r", + "--ready-file", + action="store_false", + help="The `.fuse_ready` file will exist after FUSE is ready. " + "(Debugging purpose, Default: False)", + ) + args = parser.parse_args(args) + + kwargs = {} + for item in args.option or []: + key, sep, value = item.partition("=") + if not sep: + parser.error(message=f"Wrong option: {item!r}") + val = value.lower() + if val.endswith("[int]"): + value = int(value[: -len("[int]")]) + elif val.endswith("[bool]"): + value = val[: -len("[bool]")] in ["1", "yes", "true"] + + if "-" in key: + fs_name, setting_name = key.split("-", 1) + if fs_name in kwargs: + kwargs[fs_name][setting_name] = value + else: + kwargs[fs_name] = {setting_name: value} + else: + kwargs[key] = value + + if args.log_file: + logging.basicConfig( + level=logging.DEBUG, + filename=args.log_file, + format="%(asctime)s %(message)s", + ) + + class LoggingFUSEr(FUSEr, LoggingMixIn): + pass + + fuser = LoggingFUSEr + else: + fuser = FUSEr + + fs, url_path = url_to_fs(args.url, **kwargs) + logger.debug("Mounting %s to %s", url_path, str(args.mount_point)) + run( + fs, + args.source_path, + args.mount_point, + foreground=args.foreground, + threads=args.threads, + ready_file=args.ready_file, + ops_class=fuser, + ) + + +if __name__ == "__main__": + import sys + + main(sys.argv[1:]) diff --git a/venv/lib/python3.13/site-packages/fsspec/generic.py b/venv/lib/python3.13/site-packages/fsspec/generic.py new file mode 100644 index 0000000000000000000000000000000000000000..0a641b0e2bcf70729a44064319eecb3647450379 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/generic.py @@ -0,0 +1,396 @@ +from __future__ import annotations + +import inspect +import logging +import os +import shutil +import uuid + +from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper +from .callbacks import DEFAULT_CALLBACK +from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs + +_generic_fs = {} +logger = logging.getLogger("fsspec.generic") + + +def set_generic_fs(protocol, **storage_options): + """Populate the dict used for method=="generic" lookups""" + _generic_fs[protocol] = filesystem(protocol, **storage_options) + + +def _resolve_fs(url, method, protocol=None, storage_options=None): + """Pick instance of backend FS""" + url = url[0] if isinstance(url, (list, tuple)) else url + protocol = protocol or split_protocol(url)[0] + storage_options = storage_options or {} + if method == "default": + return filesystem(protocol) + if method == "generic": + return _generic_fs[protocol] + if method == "current": + cls = get_filesystem_class(protocol) + return cls.current() + if method == "options": + fs, _ = url_to_fs(url, **storage_options.get(protocol, {})) + return fs + raise ValueError(f"Unknown FS resolution method: {method}") + + +def rsync( + source, + destination, + delete_missing=False, + source_field="size", + dest_field="size", + update_cond="different", + inst_kwargs=None, + fs=None, + **kwargs, +): + """Sync files between two directory trees + + (experimental) + + Parameters + ---------- + source: str + Root of the directory tree to take files from. This must be a directory, but + do not include any terminating "/" character + destination: str + Root path to copy into. The contents of this location should be + identical to the contents of ``source`` when done. This will be made a + directory, and the terminal "/" should not be included. + delete_missing: bool + If there are paths in the destination that don't exist in the + source and this is True, delete them. Otherwise, leave them alone. + source_field: str | callable + If ``update_field`` is "different", this is the key in the info + of source files to consider for difference. Maybe a function of the + info dict. + dest_field: str | callable + If ``update_field`` is "different", this is the key in the info + of destination files to consider for difference. May be a function of + the info dict. + update_cond: "different"|"always"|"never" + If "always", every file is copied, regardless of whether it exists in + the destination. If "never", files that exist in the destination are + not copied again. If "different" (default), only copy if the info + fields given by ``source_field`` and ``dest_field`` (usually "size") + are different. Other comparisons may be added in the future. + inst_kwargs: dict|None + If ``fs`` is None, use this set of keyword arguments to make a + GenericFileSystem instance + fs: GenericFileSystem|None + Instance to use if explicitly given. The instance defines how to + to make downstream file system instances from paths. + + Returns + ------- + dict of the copy operations that were performed, {source: destination} + """ + fs = fs or GenericFileSystem(**(inst_kwargs or {})) + source = fs._strip_protocol(source) + destination = fs._strip_protocol(destination) + allfiles = fs.find(source, withdirs=True, detail=True) + if not fs.isdir(source): + raise ValueError("Can only rsync on a directory") + otherfiles = fs.find(destination, withdirs=True, detail=True) + dirs = [ + a + for a, v in allfiles.items() + if v["type"] == "directory" and a.replace(source, destination) not in otherfiles + ] + logger.debug(f"{len(dirs)} directories to create") + if dirs: + fs.make_many_dirs( + [dirn.replace(source, destination) for dirn in dirs], exist_ok=True + ) + allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"} + logger.debug(f"{len(allfiles)} files to consider for copy") + to_delete = [ + o + for o, v in otherfiles.items() + if o.replace(destination, source) not in allfiles and v["type"] == "file" + ] + for k, v in allfiles.copy().items(): + otherfile = k.replace(source, destination) + if otherfile in otherfiles: + if update_cond == "always": + allfiles[k] = otherfile + elif update_cond == "never": + allfiles.pop(k) + elif update_cond == "different": + inf1 = source_field(v) if callable(source_field) else v[source_field] + v2 = otherfiles[otherfile] + inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field] + if inf1 != inf2: + # details mismatch, make copy + allfiles[k] = otherfile + else: + # details match, don't copy + allfiles.pop(k) + else: + # file not in target yet + allfiles[k] = otherfile + logger.debug(f"{len(allfiles)} files to copy") + if allfiles: + source_files, target_files = zip(*allfiles.items()) + fs.cp(source_files, target_files, **kwargs) + logger.debug(f"{len(to_delete)} files to delete") + if delete_missing and to_delete: + fs.rm(to_delete) + return allfiles + + +class GenericFileSystem(AsyncFileSystem): + """Wrapper over all other FS types + + + + This implementation is a single unified interface to be able to run FS operations + over generic URLs, and dispatch to the specific implementations using the URL + protocol prefix. + + Note: instances of this FS are always async, even if you never use it with any async + backend. + """ + + protocol = "generic" # there is no real reason to ever use a protocol with this FS + + def __init__(self, default_method="default", storage_options=None, **kwargs): + """ + + Parameters + ---------- + default_method: str (optional) + Defines how to configure backend FS instances. Options are: + - "default": instantiate like FSClass(), with no + extra arguments; this is the default instance of that FS, and can be + configured via the config system + - "generic": takes instances from the `_generic_fs` dict in this module, + which you must populate before use. Keys are by protocol + - "options": expects storage_options, a dict mapping protocol to + kwargs to use when constructing the filesystem + - "current": takes the most recently instantiated version of each FS + """ + self.method = default_method + self.st_opts = storage_options + super().__init__(**kwargs) + + def _parent(self, path): + fs = _resolve_fs(path, self.method, storage_options=self.st_opts) + return fs.unstrip_protocol(fs._parent(path)) + + def _strip_protocol(self, path): + # normalization only + fs = _resolve_fs(path, self.method, storage_options=self.st_opts) + return fs.unstrip_protocol(fs._strip_protocol(path)) + + async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): + fs = _resolve_fs(path, self.method, storage_options=self.st_opts) + if fs.async_impl: + out = await fs._find( + path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs + ) + else: + out = fs.find( + path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs + ) + result = {} + for k, v in out.items(): + v = v.copy() # don't corrupt target FS dircache + name = fs.unstrip_protocol(k) + v["name"] = name + result[name] = v + if detail: + return result + return list(result) + + async def _info(self, url, **kwargs): + fs = _resolve_fs(url, self.method) + if fs.async_impl: + out = await fs._info(url, **kwargs) + else: + out = fs.info(url, **kwargs) + out = out.copy() # don't edit originals + out["name"] = fs.unstrip_protocol(out["name"]) + return out + + async def _ls( + self, + url, + detail=True, + **kwargs, + ): + fs = _resolve_fs(url, self.method) + if fs.async_impl: + out = await fs._ls(url, detail=True, **kwargs) + else: + out = fs.ls(url, detail=True, **kwargs) + out = [o.copy() for o in out] # don't edit originals + for o in out: + o["name"] = fs.unstrip_protocol(o["name"]) + if detail: + return out + else: + return [o["name"] for o in out] + + async def _cat_file( + self, + url, + **kwargs, + ): + fs = _resolve_fs(url, self.method) + if fs.async_impl: + return await fs._cat_file(url, **kwargs) + else: + return fs.cat_file(url, **kwargs) + + async def _pipe_file( + self, + path, + value, + **kwargs, + ): + fs = _resolve_fs(path, self.method, storage_options=self.st_opts) + if fs.async_impl: + return await fs._pipe_file(path, value, **kwargs) + else: + return fs.pipe_file(path, value, **kwargs) + + async def _rm(self, url, **kwargs): + urls = url + if isinstance(urls, str): + urls = [urls] + fs = _resolve_fs(urls[0], self.method) + if fs.async_impl: + await fs._rm(urls, **kwargs) + else: + fs.rm(url, **kwargs) + + async def _makedirs(self, path, exist_ok=False): + logger.debug("Make dir %s", path) + fs = _resolve_fs(path, self.method, storage_options=self.st_opts) + if fs.async_impl: + await fs._makedirs(path, exist_ok=exist_ok) + else: + fs.makedirs(path, exist_ok=exist_ok) + + def rsync(self, source, destination, **kwargs): + """Sync files between two directory trees + + See `func:rsync` for more details. + """ + rsync(source, destination, fs=self, **kwargs) + + async def _cp_file( + self, + url, + url2, + blocksize=2**20, + callback=DEFAULT_CALLBACK, + tempdir: str | None = None, + **kwargs, + ): + fs = _resolve_fs(url, self.method) + fs2 = _resolve_fs(url2, self.method) + if fs is fs2: + # pure remote + if fs.async_impl: + return await fs._copy(url, url2, **kwargs) + else: + return fs.copy(url, url2, **kwargs) + await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise") + + async def _make_many_dirs(self, urls, exist_ok=True): + fs = _resolve_fs(urls[0], self.method) + if fs.async_impl: + coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls] + await _run_coros_in_chunks(coros) + else: + for u in urls: + fs.makedirs(u, exist_ok=exist_ok) + + make_many_dirs = sync_wrapper(_make_many_dirs) + + async def _copy( + self, + path1: list[str], + path2: list[str], + recursive: bool = False, + on_error: str = "ignore", + maxdepth: int | None = None, + batch_size: int | None = None, + tempdir: str | None = None, + **kwargs, + ): + # TODO: special case for one FS being local, which can use get/put + # TODO: special case for one being memFS, which can use cat/pipe + if recursive: + raise NotImplementedError("Please use fsspec.generic.rsync") + path1 = [path1] if isinstance(path1, str) else path1 + path2 = [path2] if isinstance(path2, str) else path2 + + fs = _resolve_fs(path1, self.method) + fs2 = _resolve_fs(path2, self.method) + + if fs is fs2: + if fs.async_impl: + return await fs._copy(path1, path2, **kwargs) + else: + return fs.copy(path1, path2, **kwargs) + + await copy_file_op( + fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error + ) + + +async def copy_file_op( + fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore" +): + import tempfile + + tempdir = tempdir or tempfile.mkdtemp() + try: + coros = [ + _copy_file_op( + fs1, + u1, + fs2, + u2, + os.path.join(tempdir, uuid.uuid4().hex), + ) + for u1, u2 in zip(url1, url2) + ] + out = await _run_coros_in_chunks( + coros, batch_size=batch_size, return_exceptions=True + ) + finally: + shutil.rmtree(tempdir) + if on_error == "return": + return out + elif on_error == "raise": + for o in out: + if isinstance(o, Exception): + raise o + + +async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"): + if fs1.async_impl: + await fs1._get_file(url1, local) + else: + fs1.get_file(url1, local) + if fs2.async_impl: + await fs2._put_file(local, url2) + else: + fs2.put_file(local, url2) + os.unlink(local) + logger.debug("Copy %s -> %s; done", url1, url2) + + +async def maybe_await(cor): + if inspect.iscoroutine(cor): + return await cor + else: + return cor diff --git a/venv/lib/python3.13/site-packages/fsspec/gui.py b/venv/lib/python3.13/site-packages/fsspec/gui.py new file mode 100644 index 0000000000000000000000000000000000000000..9d914c8beb6cabb2c2700eb8eee31028559be2bd --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/gui.py @@ -0,0 +1,417 @@ +import ast +import contextlib +import logging +import os +import re +from collections.abc import Sequence +from typing import ClassVar + +import panel as pn + +from .core import OpenFile, get_filesystem_class, split_protocol +from .registry import known_implementations + +pn.extension() +logger = logging.getLogger("fsspec.gui") + + +class SigSlot: + """Signal-slot mixin, for Panel event passing + + Include this class in a widget manager's superclasses to be able to + register events and callbacks on Panel widgets managed by that class. + + The method ``_register`` should be called as widgets are added, and external + code should call ``connect`` to associate callbacks. + + By default, all signals emit a DEBUG logging statement. + """ + + # names of signals that this class may emit each of which must be + # set by _register for any new instance + signals: ClassVar[Sequence[str]] = [] + # names of actions that this class may respond to + slots: ClassVar[Sequence[str]] = [] + + # each of which must be a method name + + def __init__(self): + self._ignoring_events = False + self._sigs = {} + self._map = {} + self._setup() + + def _setup(self): + """Create GUI elements and register signals""" + self.panel = pn.pane.PaneBase() + # no signals to set up in the base class + + def _register( + self, widget, name, thing="value", log_level=logging.DEBUG, auto=False + ): + """Watch the given attribute of a widget and assign it a named event + + This is normally called at the time a widget is instantiated, in the + class which owns it. + + Parameters + ---------- + widget : pn.layout.Panel or None + Widget to watch. If None, an anonymous signal not associated with + any widget. + name : str + Name of this event + thing : str + Attribute of the given widget to watch + log_level : int + When the signal is triggered, a logging event of the given level + will be fired in the dfviz logger. + auto : bool + If True, automatically connects with a method in this class of the + same name. + """ + if name not in self.signals: + raise ValueError(f"Attempt to assign an undeclared signal: {name}") + self._sigs[name] = { + "widget": widget, + "callbacks": [], + "thing": thing, + "log": log_level, + } + wn = "-".join( + [ + getattr(widget, "name", str(widget)) if widget is not None else "none", + thing, + ] + ) + self._map[wn] = name + if widget is not None: + widget.param.watch(self._signal, thing, onlychanged=True) + if auto and hasattr(self, name): + self.connect(name, getattr(self, name)) + + def _repr_mimebundle_(self, *args, **kwargs): + """Display in a notebook or a server""" + try: + return self.panel._repr_mimebundle_(*args, **kwargs) + except (ValueError, AttributeError) as exc: + raise NotImplementedError( + "Panel does not seem to be set up properly" + ) from exc + + def connect(self, signal, slot): + """Associate call back with given event + + The callback must be a function which takes the "new" value of the + watched attribute as the only parameter. If the callback return False, + this cancels any further processing of the given event. + + Alternatively, the callback can be a string, in which case it means + emitting the correspondingly-named event (i.e., connect to self) + """ + self._sigs[signal]["callbacks"].append(slot) + + def _signal(self, event): + """This is called by a an action on a widget + + Within an self.ignore_events context, nothing happens. + + Tests can execute this method by directly changing the values of + widget components. + """ + if not self._ignoring_events: + wn = "-".join([event.obj.name, event.name]) + if wn in self._map and self._map[wn] in self._sigs: + self._emit(self._map[wn], event.new) + + @contextlib.contextmanager + def ignore_events(self): + """Temporarily turn off events processing in this instance + + (does not propagate to children) + """ + self._ignoring_events = True + try: + yield + finally: + self._ignoring_events = False + + def _emit(self, sig, value=None): + """An event happened, call its callbacks + + This method can be used in tests to simulate message passing without + directly changing visual elements. + + Calling of callbacks will halt whenever one returns False. + """ + logger.log(self._sigs[sig]["log"], f"{sig}: {value}") + for callback in self._sigs[sig]["callbacks"]: + if isinstance(callback, str): + self._emit(callback) + else: + try: + # running callbacks should not break the interface + ret = callback(value) + if ret is False: + break + except Exception as e: + logger.exception( + "Exception (%s) while executing callback for signal: %s", + e, + sig, + ) + + def show(self, threads=False): + """Open a new browser tab and display this instance's interface""" + self.panel.show(threads=threads, verbose=False) + return self + + +class SingleSelect(SigSlot): + """A multiselect which only allows you to select one item for an event""" + + signals = ["_selected", "selected"] # the first is internal + slots = ["set_options", "set_selection", "add", "clear", "select"] + + def __init__(self, **kwargs): + self.kwargs = kwargs + super().__init__() + + def _setup(self): + self.panel = pn.widgets.MultiSelect(**self.kwargs) + self._register(self.panel, "_selected", "value") + self._register(None, "selected") + self.connect("_selected", self.select_one) + + def _signal(self, *args, **kwargs): + super()._signal(*args, **kwargs) + + def select_one(self, *_): + with self.ignore_events(): + val = [self.panel.value[-1]] if self.panel.value else [] + self.panel.value = val + self._emit("selected", self.panel.value) + + def set_options(self, options): + self.panel.options = options + + def clear(self): + self.panel.options = [] + + @property + def value(self): + return self.panel.value + + def set_selection(self, selection): + self.panel.value = [selection] + + +class FileSelector(SigSlot): + """Panel-based graphical file selector widget + + Instances of this widget are interactive and can be displayed in jupyter by having + them as the output of a cell, or in a separate browser tab using ``.show()``. + """ + + signals = [ + "protocol_changed", + "selection_changed", + "directory_entered", + "home_clicked", + "up_clicked", + "go_clicked", + "filters_changed", + ] + slots = ["set_filters", "go_home"] + + def __init__(self, url=None, filters=None, ignore=None, kwargs=None): + """ + + Parameters + ---------- + url : str (optional) + Initial value of the URL to populate the dialog; should include protocol + filters : list(str) (optional) + File endings to include in the listings. If not included, all files are + allowed. Does not affect directories. + If given, the endings will appear as checkboxes in the interface + ignore : list(str) (optional) + Regex(s) of file basename patterns to ignore, e.g., "\\." for typical + hidden files on posix + kwargs : dict (optional) + To pass to file system instance + """ + if url: + self.init_protocol, url = split_protocol(url) + else: + self.init_protocol, url = "file", os.getcwd() + self.init_url = url + self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}" + self.filters = filters + self.ignore = [re.compile(i) for i in ignore or []] + self._fs = None + super().__init__() + + def _setup(self): + self.url = pn.widgets.TextInput( + name="url", + value=self.init_url, + align="end", + sizing_mode="stretch_width", + width_policy="max", + ) + self.protocol = pn.widgets.Select( + options=sorted(known_implementations), + value=self.init_protocol, + name="protocol", + align="center", + ) + self.kwargs = pn.widgets.TextInput( + name="kwargs", value=self.init_kwargs, align="center" + ) + self.go = pn.widgets.Button(name="⇨", align="end", width=45) + self.main = SingleSelect(size=10) + self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end") + self.up = pn.widgets.Button(name="β€Ή", width=30, height=30, align="end") + + self._register(self.protocol, "protocol_changed", auto=True) + self._register(self.go, "go_clicked", "clicks", auto=True) + self._register(self.up, "up_clicked", "clicks", auto=True) + self._register(self.home, "home_clicked", "clicks", auto=True) + self._register(None, "selection_changed") + self.main.connect("selected", self.selection_changed) + self._register(None, "directory_entered") + self.prev_protocol = self.protocol.value + self.prev_kwargs = self.storage_options + + self.filter_sel = pn.widgets.CheckBoxGroup( + value=[], options=[], inline=False, align="end", width_policy="min" + ) + self._register(self.filter_sel, "filters_changed", auto=True) + + self.panel = pn.Column( + pn.Row(self.protocol, self.kwargs), + pn.Row(self.home, self.up, self.url, self.go, self.filter_sel), + self.main.panel, + ) + self.set_filters(self.filters) + self.go_clicked() + + def set_filters(self, filters=None): + self.filters = filters + if filters: + self.filter_sel.options = filters + self.filter_sel.value = filters + else: + self.filter_sel.options = [] + self.filter_sel.value = [] + + @property + def storage_options(self): + """Value of the kwargs box as a dictionary""" + return ast.literal_eval(self.kwargs.value) or {} + + @property + def fs(self): + """Current filesystem instance""" + if self._fs is None: + cls = get_filesystem_class(self.protocol.value) + self._fs = cls(**self.storage_options) + return self._fs + + @property + def urlpath(self): + """URL of currently selected item""" + return ( + (f"{self.protocol.value}://{self.main.value[0]}") + if self.main.value + else None + ) + + def open_file(self, mode="rb", compression=None, encoding=None): + """Create OpenFile instance for the currently selected item + + For example, in a notebook you might do something like + + .. code-block:: + + [ ]: sel = FileSelector(); sel + + # user selects their file + + [ ]: with sel.open_file('rb') as f: + ... out = f.read() + + Parameters + ---------- + mode: str (optional) + Open mode for the file. + compression: str (optional) + The interact with the file as compressed. Set to 'infer' to guess + compression from the file ending + encoding: str (optional) + If using text mode, use this encoding; defaults to UTF8. + """ + if self.urlpath is None: + raise ValueError("No file selected") + return OpenFile(self.fs, self.urlpath, mode, compression, encoding) + + def filters_changed(self, values): + self.filters = values + self.go_clicked() + + def selection_changed(self, *_): + if self.urlpath is None: + return + if self.fs.isdir(self.urlpath): + self.url.value = self.fs._strip_protocol(self.urlpath) + self.go_clicked() + + def go_clicked(self, *_): + if ( + self.prev_protocol != self.protocol.value + or self.prev_kwargs != self.storage_options + ): + self._fs = None # causes fs to be recreated + self.prev_protocol = self.protocol.value + self.prev_kwargs = self.storage_options + listing = sorted( + self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"] + ) + listing = [ + l + for l in listing + if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore) + ] + folders = { + "πŸ“ " + o["name"].rsplit("/", 1)[-1]: o["name"] + for o in listing + if o["type"] == "directory" + } + files = { + "πŸ“„ " + o["name"].rsplit("/", 1)[-1]: o["name"] + for o in listing + if o["type"] == "file" + } + if self.filters: + files = { + k: v + for k, v in files.items() + if any(v.endswith(ext) for ext in self.filters) + } + self.main.set_options(dict(**folders, **files)) + + def protocol_changed(self, *_): + self._fs = None + self.main.options = [] + self.url.value = "" + + def home_clicked(self, *_): + self.protocol.value = self.init_protocol + self.kwargs.value = self.init_kwargs + self.url.value = self.init_url + self.go_clicked() + + def up_clicked(self, *_): + self.url.value = self.fs._parent(self.url.value) + self.go_clicked() diff --git a/venv/lib/python3.13/site-packages/fsspec/json.py b/venv/lib/python3.13/site-packages/fsspec/json.py new file mode 100644 index 0000000000000000000000000000000000000000..3bd2485ef1cc581d608f8627cb4133c198e35293 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/json.py @@ -0,0 +1,117 @@ +import json +from collections.abc import Mapping, Sequence +from contextlib import suppress +from pathlib import PurePath +from typing import ( + Any, + Callable, + ClassVar, + Optional, +) + +from .registry import _import_class, get_filesystem_class +from .spec import AbstractFileSystem + + +class FilesystemJSONEncoder(json.JSONEncoder): + include_password: ClassVar[bool] = True + + def default(self, o: Any) -> Any: + if isinstance(o, AbstractFileSystem): + return o.to_dict(include_password=self.include_password) + if isinstance(o, PurePath): + cls = type(o) + return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)} + + return super().default(o) + + def make_serializable(self, obj: Any) -> Any: + """ + Recursively converts an object so that it can be JSON serialized via + :func:`json.dumps` and :func:`json.dump`, without actually calling + said functions. + """ + if isinstance(obj, (str, int, float, bool)): + return obj + if isinstance(obj, Mapping): + return {k: self.make_serializable(v) for k, v in obj.items()} + if isinstance(obj, Sequence): + return [self.make_serializable(v) for v in obj] + + return self.default(obj) + + +class FilesystemJSONDecoder(json.JSONDecoder): + def __init__( + self, + *, + object_hook: Optional[Callable[[dict[str, Any]], Any]] = None, + parse_float: Optional[Callable[[str], Any]] = None, + parse_int: Optional[Callable[[str], Any]] = None, + parse_constant: Optional[Callable[[str], Any]] = None, + strict: bool = True, + object_pairs_hook: Optional[Callable[[list[tuple[str, Any]]], Any]] = None, + ) -> None: + self.original_object_hook = object_hook + + super().__init__( + object_hook=self.custom_object_hook, + parse_float=parse_float, + parse_int=parse_int, + parse_constant=parse_constant, + strict=strict, + object_pairs_hook=object_pairs_hook, + ) + + @classmethod + def try_resolve_path_cls(cls, dct: dict[str, Any]): + with suppress(Exception): + fqp = dct["cls"] + + path_cls = _import_class(fqp) + + if issubclass(path_cls, PurePath): + return path_cls + + return None + + @classmethod + def try_resolve_fs_cls(cls, dct: dict[str, Any]): + with suppress(Exception): + if "cls" in dct: + try: + fs_cls = _import_class(dct["cls"]) + if issubclass(fs_cls, AbstractFileSystem): + return fs_cls + except Exception: + if "protocol" in dct: # Fallback if cls cannot be imported + return get_filesystem_class(dct["protocol"]) + + raise + + return None + + def custom_object_hook(self, dct: dict[str, Any]): + if "cls" in dct: + if (obj_cls := self.try_resolve_fs_cls(dct)) is not None: + return AbstractFileSystem.from_dict(dct) + if (obj_cls := self.try_resolve_path_cls(dct)) is not None: + return obj_cls(dct["str"]) + + if self.original_object_hook is not None: + return self.original_object_hook(dct) + + return dct + + def unmake_serializable(self, obj: Any) -> Any: + """ + Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`. + """ + if isinstance(obj, dict): + obj = self.custom_object_hook(obj) + if isinstance(obj, dict): + return {k: self.unmake_serializable(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [self.unmake_serializable(v) for v in obj] + + return obj diff --git a/venv/lib/python3.13/site-packages/fsspec/mapping.py b/venv/lib/python3.13/site-packages/fsspec/mapping.py new file mode 100644 index 0000000000000000000000000000000000000000..752eef35273b13eded7297e2e801b58e436a25b1 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/mapping.py @@ -0,0 +1,251 @@ +import array +import logging +import posixpath +import warnings +from collections.abc import MutableMapping +from functools import cached_property + +from fsspec.core import url_to_fs + +logger = logging.getLogger("fsspec.mapping") + + +class FSMap(MutableMapping): + """Wrap a FileSystem instance as a mutable wrapping. + + The keys of the mapping become files under the given root, and the + values (which must be bytes) the contents of those files. + + Parameters + ---------- + root: string + prefix for all the files + fs: FileSystem instance + check: bool (=True) + performs a touch at the location, to check for write access. + + Examples + -------- + >>> fs = FileSystem(**parameters) # doctest: +SKIP + >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP + or, more likely + >>> d = fs.get_mapper('my-data/path/') + + >>> d['loc1'] = b'Hello World' # doctest: +SKIP + >>> list(d.keys()) # doctest: +SKIP + ['loc1'] + >>> d['loc1'] # doctest: +SKIP + b'Hello World' + """ + + def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): + self.fs = fs + self.root = fs._strip_protocol(root) + self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] + if missing_exceptions is None: + missing_exceptions = ( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, + ) + self.missing_exceptions = missing_exceptions + self.check = check + self.create = create + if create: + if not self.fs.exists(root): + self.fs.mkdir(root) + if check: + if not self.fs.exists(root): + raise ValueError( + f"Path {root} does not exist. Create " + f" with the ``create=True`` keyword" + ) + self.fs.touch(root + "/a") + self.fs.rm(root + "/a") + + @cached_property + def dirfs(self): + """dirfs instance that can be used with the same keys as the mapper""" + from .implementations.dirfs import DirFileSystem + + return DirFileSystem(path=self._root_key_to_str, fs=self.fs) + + def clear(self): + """Remove all keys below root - empties out mapping""" + logger.info("Clear mapping at %s", self.root) + try: + self.fs.rm(self.root, True) + self.fs.mkdir(self.root) + except: # noqa: E722 + pass + + def getitems(self, keys, on_error="raise"): + """Fetch multiple items from the store + + If the backend is async-able, this might proceed concurrently + + Parameters + ---------- + keys: list(str) + They keys to be fetched + on_error : "raise", "omit", "return" + If raise, an underlying exception will be raised (converted to KeyError + if the type is in self.missing_exceptions); if omit, keys with exception + will simply not be included in the output; if "return", all keys are + included in the output, but the value will be bytes or an exception + instance. + + Returns + ------- + dict(key, bytes|exception) + """ + keys2 = [self._key_to_str(k) for k in keys] + oe = on_error if on_error == "raise" else "return" + try: + out = self.fs.cat(keys2, on_error=oe) + if isinstance(out, bytes): + out = {keys2[0]: out} + except self.missing_exceptions as e: + raise KeyError from e + out = { + k: (KeyError() if isinstance(v, self.missing_exceptions) else v) + for k, v in out.items() + } + return { + key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2)) + for key, k2 in zip(keys, keys2) + if on_error == "return" or not isinstance(out[k2], BaseException) + } + + def setitems(self, values_dict): + """Set the values of multiple items in the store + + Parameters + ---------- + values_dict: dict(str, bytes) + """ + values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()} + self.fs.pipe(values) + + def delitems(self, keys): + """Remove multiple keys from the store""" + self.fs.rm([self._key_to_str(k) for k in keys]) + + def _key_to_str(self, key): + """Generate full path for the key""" + if not isinstance(key, str): + # raise TypeError("key must be of type `str`, got `{type(key).__name__}`" + warnings.warn( + "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError", + DeprecationWarning, + ) + if isinstance(key, list): + key = tuple(key) + key = str(key) + return f"{self._root_key_to_str}{key}".rstrip("/") + + def _str_to_key(self, s): + """Strip path of to leave key name""" + return s[len(self.root) :].lstrip("/") + + def __getitem__(self, key, default=None): + """Retrieve data""" + k = self._key_to_str(key) + try: + result = self.fs.cat(k) + except self.missing_exceptions as exc: + if default is not None: + return default + raise KeyError(key) from exc + return result + + def pop(self, key, default=None): + """Pop data""" + result = self.__getitem__(key, default) + try: + del self[key] + except KeyError: + pass + return result + + def __setitem__(self, key, value): + """Store value in key""" + key = self._key_to_str(key) + self.fs.mkdirs(self.fs._parent(key), exist_ok=True) + self.fs.pipe_file(key, maybe_convert(value)) + + def __iter__(self): + return (self._str_to_key(x) for x in self.fs.find(self.root)) + + def __len__(self): + return len(self.fs.find(self.root)) + + def __delitem__(self, key): + """Remove key""" + try: + self.fs.rm(self._key_to_str(key)) + except Exception as exc: + raise KeyError from exc + + def __contains__(self, key): + """Does key exist in mapping?""" + path = self._key_to_str(key) + return self.fs.isfile(path) + + def __reduce__(self): + return FSMap, (self.root, self.fs, False, False, self.missing_exceptions) + + +def maybe_convert(value): + if isinstance(value, array.array) or hasattr(value, "__array__"): + # bytes-like things + if hasattr(value, "dtype") and value.dtype.kind in "Mm": + # The buffer interface doesn't support datetime64/timdelta64 numpy + # arrays + value = value.view("int64") + value = bytes(memoryview(value)) + return value + + +def get_mapper( + url="", + check=False, + create=False, + missing_exceptions=None, + alternate_root=None, + **kwargs, +): + """Create key-value interface for given URL and options + + The URL will be of the form "protocol://location" and point to the root + of the mapper required. All keys will be file-names below this location, + and their values the contents of each key. + + Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``. + + Parameters + ---------- + url: str + Root URL of mapping + check: bool + Whether to attempt to read from the location before instantiation, to + check that the mapping does exist + create: bool + Whether to make the directory corresponding to the root before + instantiating + missing_exceptions: None or tuple + If given, these exception types will be regarded as missing keys and + return KeyError when trying to read data. By default, you get + (FileNotFoundError, IsADirectoryError, NotADirectoryError) + alternate_root: None or str + In cases of complex URLs, the parser may fail to pick the correct part + for the mapper root, so this arg can override + + Returns + ------- + ``FSMap`` instance, the dict-like key-value store. + """ + # Removing protocol here - could defer to each open() on the backend + fs, urlpath = url_to_fs(url, **kwargs) + root = alternate_root if alternate_root is not None else urlpath + return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions) diff --git a/venv/lib/python3.13/site-packages/fsspec/parquet.py b/venv/lib/python3.13/site-packages/fsspec/parquet.py new file mode 100644 index 0000000000000000000000000000000000000000..faedb7b9e0aa90b6fb7cba33e794c4b4fb35eb77 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/parquet.py @@ -0,0 +1,541 @@ +import io +import json +import warnings + +from .core import url_to_fs +from .utils import merge_offset_ranges + +# Parquet-Specific Utilities for fsspec +# +# Most of the functions defined in this module are NOT +# intended for public consumption. The only exception +# to this is `open_parquet_file`, which should be used +# place of `fs.open()` to open parquet-formatted files +# on remote file systems. + + +def open_parquet_file( + path, + mode="rb", + fs=None, + metadata=None, + columns=None, + row_groups=None, + storage_options=None, + strict=False, + engine="auto", + max_gap=64_000, + max_block=256_000_000, + footer_sample_size=1_000_000, + **kwargs, +): + """ + Return a file-like object for a single Parquet file. + + The specified parquet `engine` will be used to parse the + footer metadata, and determine the required byte ranges + from the file. The target path will then be opened with + the "parts" (`KnownPartsOfAFile`) caching strategy. + + Note that this method is intended for usage with remote + file systems, and is unlikely to improve parquet-read + performance on local file systems. + + Parameters + ---------- + path: str + Target file path. + mode: str, optional + Mode option to be passed through to `fs.open`. Default is "rb". + metadata: Any, optional + Parquet metadata object. Object type must be supported + by the backend parquet engine. For now, only the "fastparquet" + engine supports an explicit `ParquetFile` metadata object. + If a metadata object is supplied, the remote footer metadata + will not need to be transferred into local memory. + fs: AbstractFileSystem, optional + Filesystem object to use for opening the file. If nothing is + specified, an `AbstractFileSystem` object will be inferred. + engine : str, default "auto" + Parquet engine to use for metadata parsing. Allowed options + include "fastparquet", "pyarrow", and "auto". The specified + engine must be installed in the current environment. If + "auto" is specified, and both engines are installed, + "fastparquet" will take precedence over "pyarrow". + columns: list, optional + List of all column names that may be read from the file. + row_groups : list, optional + List of all row-groups that may be read from the file. This + may be a list of row-group indices (integers), or it may be + a list of `RowGroup` metadata objects (if the "fastparquet" + engine is used). + storage_options : dict, optional + Used to generate an `AbstractFileSystem` object if `fs` was + not specified. + strict : bool, optional + Whether the resulting `KnownPartsOfAFile` cache should + fetch reads that go beyond a known byte-range boundary. + If `False` (the default), any read that ends outside a + known part will be zero padded. Note that using + `strict=True` may be useful for debugging. + max_gap : int, optional + Neighboring byte ranges will only be merged when their + inter-range gap is <= `max_gap`. Default is 64KB. + max_block : int, optional + Neighboring byte ranges will only be merged when the size of + the aggregated range is <= `max_block`. Default is 256MB. + footer_sample_size : int, optional + Number of bytes to read from the end of the path to look + for the footer metadata. If the sampled bytes do not contain + the footer, a second read request will be required, and + performance will suffer. Default is 1MB. + **kwargs : + Optional key-word arguments to pass to `fs.open` + """ + + # Make sure we have an `AbstractFileSystem` object + # to work with + if fs is None: + fs = url_to_fs(path, **(storage_options or {}))[0] + + # For now, `columns == []` not supported. Just use + # default `open` command with `path` input + if columns is not None and len(columns) == 0: + return fs.open(path, mode=mode) + + # Set the engine + engine = _set_engine(engine) + + # Fetch the known byte ranges needed to read + # `columns` and/or `row_groups` + data = _get_parquet_byte_ranges( + [path], + fs, + metadata=metadata, + columns=columns, + row_groups=row_groups, + engine=engine, + max_gap=max_gap, + max_block=max_block, + footer_sample_size=footer_sample_size, + ) + + # Extract file name from `data` + fn = next(iter(data)) if data else path + + # Call self.open with "parts" caching + options = kwargs.pop("cache_options", {}).copy() + return fs.open( + fn, + mode=mode, + cache_type="parts", + cache_options={ + **options, + "data": data.get(fn, {}), + "strict": strict, + }, + **kwargs, + ) + + +def _get_parquet_byte_ranges( + paths, + fs, + metadata=None, + columns=None, + row_groups=None, + max_gap=64_000, + max_block=256_000_000, + footer_sample_size=1_000_000, + engine="auto", +): + """Get a dictionary of the known byte ranges needed + to read a specific column/row-group selection from a + Parquet dataset. Each value in the output dictionary + is intended for use as the `data` argument for the + `KnownPartsOfAFile` caching strategy of a single path. + """ + + # Set engine if necessary + if isinstance(engine, str): + engine = _set_engine(engine) + + # Pass to specialized function if metadata is defined + if metadata is not None: + # Use the provided parquet metadata object + # to avoid transferring/parsing footer metadata + return _get_parquet_byte_ranges_from_metadata( + metadata, + fs, + engine, + columns=columns, + row_groups=row_groups, + max_gap=max_gap, + max_block=max_block, + ) + + # Get file sizes asynchronously + file_sizes = fs.sizes(paths) + + # Populate global paths, starts, & ends + result = {} + data_paths = [] + data_starts = [] + data_ends = [] + add_header_magic = True + if columns is None and row_groups is None: + # We are NOT selecting specific columns or row-groups. + # + # We can avoid sampling the footers, and just transfer + # all file data with cat_ranges + for i, path in enumerate(paths): + result[path] = {} + for b in range(0, file_sizes[i], max_block): + data_paths.append(path) + data_starts.append(b) + data_ends.append(min(b + max_block, file_sizes[i])) + add_header_magic = False # "Magic" should already be included + else: + # We ARE selecting specific columns or row-groups. + # + # Gather file footers. + # We just take the last `footer_sample_size` bytes of each + # file (or the entire file if it is smaller than that) + footer_starts = [] + footer_ends = [] + for i, path in enumerate(paths): + footer_ends.append(file_sizes[i]) + sample_size = max(0, file_sizes[i] - footer_sample_size) + footer_starts.append(sample_size) + footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends) + + # Check our footer samples and re-sample if necessary. + missing_footer_starts = footer_starts.copy() + large_footer = 0 + for i, path in enumerate(paths): + footer_size = int.from_bytes(footer_samples[i][-8:-4], "little") + real_footer_start = file_sizes[i] - (footer_size + 8) + if real_footer_start < footer_starts[i]: + missing_footer_starts[i] = real_footer_start + large_footer = max(large_footer, (footer_size + 8)) + if large_footer: + warnings.warn( + f"Not enough data was used to sample the parquet footer. " + f"Try setting footer_sample_size >= {large_footer}." + ) + for i, block in enumerate( + fs.cat_ranges( + paths, + missing_footer_starts, + footer_starts, + ) + ): + footer_samples[i] = block + footer_samples[i] + footer_starts[i] = missing_footer_starts[i] + + # Calculate required byte ranges for each path + for i, path in enumerate(paths): + # Deal with small-file case. + # Just include all remaining bytes of the file + # in a single range. + if file_sizes[i] < max_block: + if footer_starts[i] > 0: + # Only need to transfer the data if the + # footer sample isn't already the whole file + data_paths.append(path) + data_starts.append(0) + data_ends.append(footer_starts[i]) + continue + + # Use "engine" to collect data byte ranges + path_data_starts, path_data_ends = engine._parquet_byte_ranges( + columns, + row_groups=row_groups, + footer=footer_samples[i], + footer_start=footer_starts[i], + ) + + data_paths += [path] * len(path_data_starts) + data_starts += path_data_starts + data_ends += path_data_ends + + # Merge adjacent offset ranges + data_paths, data_starts, data_ends = merge_offset_ranges( + data_paths, + data_starts, + data_ends, + max_gap=max_gap, + max_block=max_block, + sort=False, # Should already be sorted + ) + + # Start by populating `result` with footer samples + for i, path in enumerate(paths): + result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]} + + # Transfer the data byte-ranges into local memory + _transfer_ranges(fs, result, data_paths, data_starts, data_ends) + + # Add b"PAR1" to header if necessary + if add_header_magic: + _add_header_magic(result) + + return result + + +def _get_parquet_byte_ranges_from_metadata( + metadata, + fs, + engine, + columns=None, + row_groups=None, + max_gap=64_000, + max_block=256_000_000, +): + """Simplified version of `_get_parquet_byte_ranges` for + the case that an engine-specific `metadata` object is + provided, and the remote footer metadata does not need to + be transferred before calculating the required byte ranges. + """ + + # Use "engine" to collect data byte ranges + data_paths, data_starts, data_ends = engine._parquet_byte_ranges( + columns, + row_groups=row_groups, + metadata=metadata, + ) + + # Merge adjacent offset ranges + data_paths, data_starts, data_ends = merge_offset_ranges( + data_paths, + data_starts, + data_ends, + max_gap=max_gap, + max_block=max_block, + sort=False, # Should be sorted + ) + + # Transfer the data byte-ranges into local memory + result = {fn: {} for fn in list(set(data_paths))} + _transfer_ranges(fs, result, data_paths, data_starts, data_ends) + + # Add b"PAR1" to header + _add_header_magic(result) + + return result + + +def _transfer_ranges(fs, blocks, paths, starts, ends): + # Use cat_ranges to gather the data byte_ranges + ranges = (paths, starts, ends) + for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)): + blocks[path][(start, stop)] = data + + +def _add_header_magic(data): + # Add b"PAR1" to file headers + for path in list(data.keys()): + add_magic = True + for k in data[path]: + if k[0] == 0 and k[1] >= 4: + add_magic = False + break + if add_magic: + data[path][(0, 4)] = b"PAR1" + + +def _set_engine(engine_str): + # Define a list of parquet engines to try + if engine_str == "auto": + try_engines = ("fastparquet", "pyarrow") + elif not isinstance(engine_str, str): + raise ValueError( + "Failed to set parquet engine! " + "Please pass 'fastparquet', 'pyarrow', or 'auto'" + ) + elif engine_str not in ("fastparquet", "pyarrow"): + raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`") + else: + try_engines = [engine_str] + + # Try importing the engines in `try_engines`, + # and choose the first one that succeeds + for engine in try_engines: + try: + if engine == "fastparquet": + return FastparquetEngine() + elif engine == "pyarrow": + return PyarrowEngine() + except ImportError: + pass + + # Raise an error if a supported parquet engine + # was not found + raise ImportError( + f"The following parquet engines are not installed " + f"in your python environment: {try_engines}." + f"Please install 'fastparquert' or 'pyarrow' to " + f"utilize the `fsspec.parquet` module." + ) + + +class FastparquetEngine: + # The purpose of the FastparquetEngine class is + # to check if fastparquet can be imported (on initialization) + # and to define a `_parquet_byte_ranges` method. In the + # future, this class may also be used to define other + # methods/logic that are specific to fastparquet. + + def __init__(self): + import fastparquet as fp + + self.fp = fp + + def _row_group_filename(self, row_group, pf): + return pf.row_group_filename(row_group) + + def _parquet_byte_ranges( + self, + columns, + row_groups=None, + metadata=None, + footer=None, + footer_start=None, + ): + # Initialize offset ranges and define ParqetFile metadata + pf = metadata + data_paths, data_starts, data_ends = [], [], [] + if pf is None: + pf = self.fp.ParquetFile(io.BytesIO(footer)) + + # Convert columns to a set and add any index columns + # specified in the pandas metadata (just in case) + column_set = None if columns is None else set(columns) + if column_set is not None and hasattr(pf, "pandas_metadata"): + md_index = [ + ind + for ind in pf.pandas_metadata.get("index_columns", []) + # Ignore RangeIndex information + if not isinstance(ind, dict) + ] + column_set |= set(md_index) + + # Check if row_groups is a list of integers + # or a list of row-group metadata + if row_groups and not isinstance(row_groups[0], int): + # Input row_groups contains row-group metadata + row_group_indices = None + else: + # Input row_groups contains row-group indices + row_group_indices = row_groups + row_groups = pf.row_groups + + # Loop through column chunks to add required byte ranges + for r, row_group in enumerate(row_groups): + # Skip this row-group if we are targeting + # specific row-groups + if row_group_indices is None or r in row_group_indices: + # Find the target parquet-file path for `row_group` + fn = self._row_group_filename(row_group, pf) + + for column in row_group.columns: + name = column.meta_data.path_in_schema[0] + # Skip this column if we are targeting a + # specific columns + if column_set is None or name in column_set: + file_offset0 = column.meta_data.dictionary_page_offset + if file_offset0 is None: + file_offset0 = column.meta_data.data_page_offset + num_bytes = column.meta_data.total_compressed_size + if footer_start is None or file_offset0 < footer_start: + data_paths.append(fn) + data_starts.append(file_offset0) + data_ends.append( + min( + file_offset0 + num_bytes, + footer_start or (file_offset0 + num_bytes), + ) + ) + + if metadata: + # The metadata in this call may map to multiple + # file paths. Need to include `data_paths` + return data_paths, data_starts, data_ends + return data_starts, data_ends + + +class PyarrowEngine: + # The purpose of the PyarrowEngine class is + # to check if pyarrow can be imported (on initialization) + # and to define a `_parquet_byte_ranges` method. In the + # future, this class may also be used to define other + # methods/logic that are specific to pyarrow. + + def __init__(self): + import pyarrow.parquet as pq + + self.pq = pq + + def _row_group_filename(self, row_group, metadata): + raise NotImplementedError + + def _parquet_byte_ranges( + self, + columns, + row_groups=None, + metadata=None, + footer=None, + footer_start=None, + ): + if metadata is not None: + raise ValueError("metadata input not supported for PyarrowEngine") + + data_starts, data_ends = [], [] + md = self.pq.ParquetFile(io.BytesIO(footer)).metadata + + # Convert columns to a set and add any index columns + # specified in the pandas metadata (just in case) + column_set = None if columns is None else set(columns) + if column_set is not None: + schema = md.schema.to_arrow_schema() + has_pandas_metadata = ( + schema.metadata is not None and b"pandas" in schema.metadata + ) + if has_pandas_metadata: + md_index = [ + ind + for ind in json.loads( + schema.metadata[b"pandas"].decode("utf8") + ).get("index_columns", []) + # Ignore RangeIndex information + if not isinstance(ind, dict) + ] + column_set |= set(md_index) + + # Loop through column chunks to add required byte ranges + for r in range(md.num_row_groups): + # Skip this row-group if we are targeting + # specific row-groups + if row_groups is None or r in row_groups: + row_group = md.row_group(r) + for c in range(row_group.num_columns): + column = row_group.column(c) + name = column.path_in_schema + # Skip this column if we are targeting a + # specific columns + split_name = name.split(".")[0] + if ( + column_set is None + or name in column_set + or split_name in column_set + ): + file_offset0 = column.dictionary_page_offset + if file_offset0 is None: + file_offset0 = column.data_page_offset + num_bytes = column.total_compressed_size + if file_offset0 < footer_start: + data_starts.append(file_offset0) + data_ends.append( + min(file_offset0 + num_bytes, footer_start) + ) + return data_starts, data_ends diff --git a/venv/lib/python3.13/site-packages/fsspec/registry.py b/venv/lib/python3.13/site-packages/fsspec/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..96ffad7f4a889662c8fb4a006e1c497652992430 --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/registry.py @@ -0,0 +1,330 @@ +from __future__ import annotations + +import importlib +import types +import warnings + +__all__ = ["registry", "get_filesystem_class", "default"] + +# internal, mutable +_registry: dict[str, type] = {} + +# external, immutable +registry = types.MappingProxyType(_registry) +default = "file" + + +def register_implementation(name, cls, clobber=False, errtxt=None): + """Add implementation class to the registry + + Parameters + ---------- + name: str + Protocol name to associate with the class + cls: class or str + if a class: fsspec-compliant implementation class (normally inherits from + ``fsspec.AbstractFileSystem``, gets added straight to the registry. If a + str, the full path to an implementation class like package.module.class, + which gets added to known_implementations, + so the import is deferred until the filesystem is actually used. + clobber: bool (optional) + Whether to overwrite a protocol with the same name; if False, will raise + instead. + errtxt: str (optional) + If given, then a failure to import the given class will result in this + text being given. + """ + if isinstance(cls, str): + if name in known_implementations and clobber is False: + if cls != known_implementations[name]["class"]: + raise ValueError( + f"Name ({name}) already in the known_implementations and clobber " + f"is False" + ) + else: + known_implementations[name] = { + "class": cls, + "err": errtxt or f"{cls} import failed for protocol {name}", + } + + else: + if name in registry and clobber is False: + if _registry[name] is not cls: + raise ValueError( + f"Name ({name}) already in the registry and clobber is False" + ) + else: + _registry[name] = cls + + +# protocols mapped to the class which implements them. This dict can be +# updated with register_implementation +known_implementations = { + "abfs": { + "class": "adlfs.AzureBlobFileSystem", + "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage", + }, + "adl": { + "class": "adlfs.AzureDatalakeFileSystem", + "err": "Install adlfs to access Azure Datalake Gen1", + }, + "arrow_hdfs": { + "class": "fsspec.implementations.arrow.HadoopFileSystem", + "err": "pyarrow and local java libraries required for HDFS", + }, + "asynclocal": { + "class": "morefs.asyn_local.AsyncLocalFileSystem", + "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem", + }, + "asyncwrapper": { + "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper", + }, + "az": { + "class": "adlfs.AzureBlobFileSystem", + "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage", + }, + "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"}, + "box": { + "class": "boxfs.BoxFileSystem", + "err": "Please install boxfs to access BoxFileSystem", + }, + "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"}, + "dask": { + "class": "fsspec.implementations.dask.DaskWorkerFileSystem", + "err": "Install dask distributed to access worker file system", + }, + "data": {"class": "fsspec.implementations.data.DataFileSystem"}, + "dbfs": { + "class": "fsspec.implementations.dbfs.DatabricksFileSystem", + "err": "Install the requests package to use the DatabricksFileSystem", + }, + "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"}, + "dropbox": { + "class": "dropboxdrivefs.DropboxDriveFileSystem", + "err": ( + 'DropboxFileSystem requires "dropboxdrivefs","requests" and "' + '"dropbox" to be installed' + ), + }, + "dvc": { + "class": "dvc.api.DVCFileSystem", + "err": "Install dvc to access DVCFileSystem", + }, + "file": {"class": "fsspec.implementations.local.LocalFileSystem"}, + "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"}, + "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"}, + "gcs": { + "class": "gcsfs.GCSFileSystem", + "err": "Please install gcsfs to access Google Storage", + }, + "gdrive": { + "class": "gdrive_fsspec.GoogleDriveFileSystem", + "err": "Please install gdrive_fs for access to Google Drive", + }, + "generic": {"class": "fsspec.generic.GenericFileSystem"}, + "gist": { + "class": "fsspec.implementations.gist.GistFileSystem", + "err": "Install the requests package to use the gist FS", + }, + "git": { + "class": "fsspec.implementations.git.GitFileSystem", + "err": "Install pygit2 to browse local git repos", + }, + "github": { + "class": "fsspec.implementations.github.GithubFileSystem", + "err": "Install the requests package to use the github FS", + }, + "gs": { + "class": "gcsfs.GCSFileSystem", + "err": "Please install gcsfs to access Google Storage", + }, + "hdfs": { + "class": "fsspec.implementations.arrow.HadoopFileSystem", + "err": "pyarrow and local java libraries required for HDFS", + }, + "hf": { + "class": "huggingface_hub.HfFileSystem", + "err": "Install huggingface_hub to access HfFileSystem", + }, + "http": { + "class": "fsspec.implementations.http.HTTPFileSystem", + "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed', + }, + "https": { + "class": "fsspec.implementations.http.HTTPFileSystem", + "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed', + }, + "jlab": { + "class": "fsspec.implementations.jupyter.JupyterFileSystem", + "err": "Jupyter FS requires requests to be installed", + }, + "jupyter": { + "class": "fsspec.implementations.jupyter.JupyterFileSystem", + "err": "Jupyter FS requires requests to be installed", + }, + "lakefs": { + "class": "lakefs_spec.LakeFSFileSystem", + "err": "Please install lakefs-spec to access LakeFSFileSystem", + }, + "libarchive": { + "class": "fsspec.implementations.libarchive.LibArchiveFileSystem", + "err": "LibArchive requires to be installed", + }, + "local": {"class": "fsspec.implementations.local.LocalFileSystem"}, + "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"}, + "oci": { + "class": "ocifs.OCIFileSystem", + "err": "Install ocifs to access OCI Object Storage", + }, + "ocilake": { + "class": "ocifs.OCIFileSystem", + "err": "Install ocifs to access OCI Data Lake", + }, + "oss": { + "class": "ossfs.OSSFileSystem", + "err": "Install ossfs to access Alibaba Object Storage System", + }, + "pyscript": { + "class": "pyscript_fsspec_client.client.PyscriptFileSystem", + "err": "Install requests (cpython) or run in pyscript", + }, + "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"}, + "root": { + "class": "fsspec_xrootd.XRootDFileSystem", + "err": ( + "Install fsspec-xrootd to access xrootd storage system. " + "Note: 'root' is the protocol name for xrootd storage systems, " + "not referring to root directories" + ), + }, + "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"}, + "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"}, + "sftp": { + "class": "fsspec.implementations.sftp.SFTPFileSystem", + "err": 'SFTPFileSystem requires "paramiko" to be installed', + }, + "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"}, + "smb": { + "class": "fsspec.implementations.smb.SMBFileSystem", + "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed', + }, + "ssh": { + "class": "fsspec.implementations.sftp.SFTPFileSystem", + "err": 'SFTPFileSystem requires "paramiko" to be installed', + }, + "tar": {"class": "fsspec.implementations.tar.TarFileSystem"}, + "tos": { + "class": "tosfs.TosFileSystem", + "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage", + }, + "tosfs": { + "class": "tosfs.TosFileSystem", + "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage", + }, + "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"}, + "webdav": { + "class": "webdav4.fsspec.WebdavFileSystem", + "err": "Install webdav4 to access WebDAV", + }, + "webhdfs": { + "class": "fsspec.implementations.webhdfs.WebHDFS", + "err": 'webHDFS access requires "requests" to be installed', + }, + "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"}, +} + +assert list(known_implementations) == sorted(known_implementations), ( + "Not in alphabetical order" +) + + +def get_filesystem_class(protocol): + """Fetch named protocol implementation from the registry + + The dict ``known_implementations`` maps protocol names to the locations + of classes implementing the corresponding file-system. When used for the + first time, appropriate imports will happen and the class will be placed in + the registry. All subsequent calls will fetch directly from the registry. + + Some protocol implementations require additional dependencies, and so the + import may fail. In this case, the string in the "err" field of the + ``known_implementations`` will be given as the error message. + """ + if not protocol: + protocol = default + + if protocol not in registry: + if protocol not in known_implementations: + raise ValueError(f"Protocol not known: {protocol}") + bit = known_implementations[protocol] + try: + register_implementation(protocol, _import_class(bit["class"])) + except ImportError as e: + raise ImportError(bit.get("err")) from e + cls = registry[protocol] + if getattr(cls, "protocol", None) in ("abstract", None): + cls.protocol = protocol + + return cls + + +s3_msg = """Your installed version of s3fs is very old and known to cause +severe performance issues, see also https://github.com/dask/dask/issues/10276 + +To fix, you should specify a lower version bound on s3fs, or +update the current installation. +""" + + +def _import_class(fqp: str): + """Take a fully-qualified path and return the imported class or identifier. + + ``fqp`` is of the form "package.module.klass" or + "package.module:subobject.klass". + + Warnings + -------- + This can import arbitrary modules. Make sure you haven't installed any modules + that may execute malicious code at import time. + """ + if ":" in fqp: + mod, name = fqp.rsplit(":", 1) + else: + mod, name = fqp.rsplit(".", 1) + + is_s3 = mod == "s3fs" + mod = importlib.import_module(mod) + if is_s3 and mod.__version__.split(".") < ["0", "5"]: + warnings.warn(s3_msg) + for part in name.split("."): + mod = getattr(mod, part) + + if not isinstance(mod, type): + raise TypeError(f"{fqp} is not a class") + + return mod + + +def filesystem(protocol, **storage_options): + """Instantiate filesystems for given protocol and arguments + + ``storage_options`` are specific to the protocol being chosen, and are + passed directly to the class. + """ + if protocol == "arrow_hdfs": + warnings.warn( + "The 'arrow_hdfs' protocol has been deprecated and will be " + "removed in the future. Specify it as 'hdfs'.", + DeprecationWarning, + ) + + cls = get_filesystem_class(protocol) + return cls(**storage_options) + + +def available_protocols(): + """Return a list of the implemented protocols. + + Note that any given protocol may require extra packages to be importable. + """ + return list(known_implementations) diff --git a/venv/lib/python3.13/site-packages/fsspec/spec.py b/venv/lib/python3.13/site-packages/fsspec/spec.py new file mode 100644 index 0000000000000000000000000000000000000000..b67d5c16fcdf09ce6f9e1354727196042cde3c4c --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/spec.py @@ -0,0 +1,2281 @@ +from __future__ import annotations + +import io +import json +import logging +import os +import threading +import warnings +import weakref +from errno import ESPIPE +from glob import has_magic +from hashlib import sha256 +from typing import Any, ClassVar + +from .callbacks import DEFAULT_CALLBACK +from .config import apply_config, conf +from .dircache import DirCache +from .transaction import Transaction +from .utils import ( + _unstrip_protocol, + glob_translate, + isfilelike, + other_paths, + read_block, + stringify_path, + tokenize, +) + +logger = logging.getLogger("fsspec") + + +def make_instance(cls, args, kwargs): + return cls(*args, **kwargs) + + +class _Cached(type): + """ + Metaclass for caching file system instances. + + Notes + ----- + Instances are cached according to + + * The values of the class attributes listed in `_extra_tokenize_attributes` + * The arguments passed to ``__init__``. + + This creates an additional reference to the filesystem, which prevents the + filesystem from being garbage collected when all *user* references go away. + A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also* + be made for a filesystem instance to be garbage collected. + """ + + def __init__(cls, *args, **kwargs): + super().__init__(*args, **kwargs) + # Note: we intentionally create a reference here, to avoid garbage + # collecting instances when all other references are gone. To really + # delete a FileSystem, the cache must be cleared. + if conf.get("weakref_instance_cache"): # pragma: no cover + # debug option for analysing fork/spawn conditions + cls._cache = weakref.WeakValueDictionary() + else: + cls._cache = {} + cls._pid = os.getpid() + + def __call__(cls, *args, **kwargs): + kwargs = apply_config(cls, kwargs) + extra_tokens = tuple( + getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes + ) + strip_tokenize_options = { + k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs + } + token = tokenize( + cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs + ) + skip = kwargs.pop("skip_instance_cache", False) + if os.getpid() != cls._pid: + cls._cache.clear() + cls._pid = os.getpid() + if not skip and cls.cachable and token in cls._cache: + cls._latest = token + return cls._cache[token] + else: + obj = super().__call__(*args, **kwargs, **strip_tokenize_options) + # Setting _fs_token here causes some static linters to complain. + obj._fs_token_ = token + obj.storage_args = args + obj.storage_options = kwargs + if obj.async_impl and obj.mirror_sync_methods: + from .asyn import mirror_sync_methods + + mirror_sync_methods(obj) + + if cls.cachable and not skip: + cls._latest = token + cls._cache[token] = obj + return obj + + +class AbstractFileSystem(metaclass=_Cached): + """ + An abstract super-class for pythonic file-systems + + Implementations are expected to be compatible with or, better, subclass + from here. + """ + + cachable = True # this class can be cached, instances reused + _cached = False + blocksize = 2**22 + sep = "/" + protocol: ClassVar[str | tuple[str, ...]] = "abstract" + _latest = None + async_impl = False + mirror_sync_methods = False + root_marker = "" # For some FSs, may require leading '/' or other character + transaction_type = Transaction + + #: Extra *class attributes* that should be considered when hashing. + _extra_tokenize_attributes = () + #: *storage options* that should not be considered when hashing. + _strip_tokenize_options = () + + # Set by _Cached metaclass + storage_args: tuple[Any, ...] + storage_options: dict[str, Any] + + def __init__(self, *args, **storage_options): + """Create and configure file-system instance + + Instances may be cachable, so if similar enough arguments are seen + a new instance is not required. The token attribute exists to allow + implementations to cache instances if they wish. + + A reasonable default should be provided if there are no arguments. + + Subclasses should call this method. + + Parameters + ---------- + use_listings_cache, listings_expiry_time, max_paths: + passed to ``DirCache``, if the implementation supports + directory listing caching. Pass use_listings_cache=False + to disable such caching. + skip_instance_cache: bool + If this is a cachable implementation, pass True here to force + creating a new instance even if a matching instance exists, and prevent + storing this instance. + asynchronous: bool + loop: asyncio-compatible IOLoop or None + """ + if self._cached: + # reusing instance, don't change + return + self._cached = True + self._intrans = False + self._transaction = None + self._invalidated_caches_in_transaction = [] + self.dircache = DirCache(**storage_options) + + if storage_options.pop("add_docs", None): + warnings.warn("add_docs is no longer supported.", FutureWarning) + + if storage_options.pop("add_aliases", None): + warnings.warn("add_aliases has been removed.", FutureWarning) + # This is set in _Cached + self._fs_token_ = None + + @property + def fsid(self): + """Persistent filesystem id that can be used to compare filesystems + across sessions. + """ + raise NotImplementedError + + @property + def _fs_token(self): + return self._fs_token_ + + def __dask_tokenize__(self): + return self._fs_token + + def __hash__(self): + return int(self._fs_token, 16) + + def __eq__(self, other): + return isinstance(other, type(self)) and self._fs_token == other._fs_token + + def __reduce__(self): + return make_instance, (type(self), self.storage_args, self.storage_options) + + @classmethod + def _strip_protocol(cls, path): + """Turn path from fully-qualified to file-system-specific + + May require FS-specific handling, e.g., for relative paths or links. + """ + if isinstance(path, list): + return [cls._strip_protocol(p) for p in path] + path = stringify_path(path) + protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol + for protocol in protos: + if path.startswith(protocol + "://"): + path = path[len(protocol) + 3 :] + elif path.startswith(protocol + "::"): + path = path[len(protocol) + 2 :] + path = path.rstrip("/") + # use of root_marker to make minimum required path, e.g., "/" + return path or cls.root_marker + + def unstrip_protocol(self, name: str) -> str: + """Format FS-specific path to generic, including protocol""" + protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol + for protocol in protos: + if name.startswith(f"{protocol}://"): + return name + return f"{protos[0]}://{name}" + + @staticmethod + def _get_kwargs_from_urls(path): + """If kwargs can be encoded in the paths, extract them here + + This should happen before instantiation of the class; incoming paths + then should be amended to strip the options in methods. + + Examples may look like an sftp path "sftp://user@host:/my/path", where + the user and host should become kwargs and later get stripped. + """ + # by default, nothing happens + return {} + + @classmethod + def current(cls): + """Return the most recently instantiated FileSystem + + If no instance has been created, then create one with defaults + """ + if cls._latest in cls._cache: + return cls._cache[cls._latest] + return cls() + + @property + def transaction(self): + """A context within which files are committed together upon exit + + Requires the file class to implement `.commit()` and `.discard()` + for the normal and exception cases. + """ + if self._transaction is None: + self._transaction = self.transaction_type(self) + return self._transaction + + def start_transaction(self): + """Begin write transaction for deferring files, non-context version""" + self._intrans = True + self._transaction = self.transaction_type(self) + return self.transaction + + def end_transaction(self): + """Finish write transaction, non-context version""" + self.transaction.complete() + self._transaction = None + # The invalid cache must be cleared after the transaction is completed. + for path in self._invalidated_caches_in_transaction: + self.invalidate_cache(path) + self._invalidated_caches_in_transaction.clear() + + def invalidate_cache(self, path=None): + """ + Discard any cached directory information + + Parameters + ---------- + path: string or None + If None, clear all listings cached else listings at or under given + path. + """ + # Not necessary to implement invalidation mechanism, may have no cache. + # But if have, you should call this method of parent class from your + # subclass to ensure expiring caches after transacations correctly. + # See the implementation of FTPFileSystem in ftp.py + if self._intrans: + self._invalidated_caches_in_transaction.append(path) + + def mkdir(self, path, create_parents=True, **kwargs): + """ + Create directory entry at path + + For systems that don't have true directories, may create an for + this instance only and not touch the real filesystem + + Parameters + ---------- + path: str + location + create_parents: bool + if True, this is equivalent to ``makedirs`` + kwargs: + may be permissions, etc. + """ + pass # not necessary to implement, may not have directories + + def makedirs(self, path, exist_ok=False): + """Recursively make directories + + Creates directory at path and any intervening required directories. + Raises exception if, for instance, the path already exists but is a + file. + + Parameters + ---------- + path: str + leaf directory name + exist_ok: bool (False) + If False, will error if the target already exists + """ + pass # not necessary to implement, may not have directories + + def rmdir(self, path): + """Remove a directory, if empty""" + pass # not necessary to implement, may not have directories + + def ls(self, path, detail=True, **kwargs): + """List objects at path. + + This should include subdirectories and files at that location. The + difference between a file and a directory must be clear when details + are requested. + + The specific keys, or perhaps a FileInfo class, or similar, is TBD, + but must be consistent across implementations. + Must include: + + - full path to the entry (without protocol) + - size of the entry, in bytes. If the value cannot be determined, will + be ``None``. + - type of entry, "file", "directory" or other + + Additional information + may be present, appropriate to the file-system, e.g., generation, + checksum, etc. + + May use refresh=True|False to allow use of self._ls_from_cache to + check for a saved listing and avoid calling the backend. This would be + common where listing may be expensive. + + Parameters + ---------- + path: str + detail: bool + if True, gives a list of dictionaries, where each is the same as + the result of ``info(path)``. If False, gives a list of paths + (str). + kwargs: may have additional backend-specific options, such as version + information + + Returns + ------- + List of strings if detail is False, or list of directory information + dicts if detail is True. + """ + raise NotImplementedError + + def _ls_from_cache(self, path): + """Check cache for listing + + Returns listing, if found (may be empty list for a directly that exists + but contains nothing), None if not in cache. + """ + parent = self._parent(path) + try: + return self.dircache[path.rstrip("/")] + except KeyError: + pass + try: + files = [ + f + for f in self.dircache[parent] + if f["name"] == path + or (f["name"] == path.rstrip("/") and f["type"] == "directory") + ] + if len(files) == 0: + # parent dir was listed but did not contain this file + raise FileNotFoundError(path) + return files + except KeyError: + pass + + def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): + """Return all files under the given path. + + List all files, recursing into subdirectories; output is iterator-style, + like ``os.walk()``. For a simple list of files, ``find()`` is available. + + When topdown is True, the caller can modify the dirnames list in-place (perhaps + using del or slice assignment), and walk() will + only recurse into the subdirectories whose names remain in dirnames; + this can be used to prune the search, impose a specific order of visiting, + or even to inform walk() about directories the caller creates or renames before + it resumes walk() again. + Modifying dirnames when topdown is False has no effect. (see os.walk) + + Note that the "files" outputted will include anything that is not + a directory, such as links. + + Parameters + ---------- + path: str + Root to recurse into + maxdepth: int + Maximum recursion depth. None means limitless, but not recommended + on link-based file-systems. + topdown: bool (True) + Whether to walk the directory tree from the top downwards or from + the bottom upwards. + on_error: "omit", "raise", a callable + if omit (default), path with exception will simply be empty; + If raise, an underlying exception will be raised; + if callable, it will be called with a single OSError instance as argument + kwargs: passed to ``ls`` + """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + path = self._strip_protocol(path) + full_dirs = {} + dirs = {} + files = {} + + detail = kwargs.pop("detail", False) + try: + listing = self.ls(path, detail=True, **kwargs) + except (FileNotFoundError, OSError) as e: + if on_error == "raise": + raise + if callable(on_error): + on_error(e) + return + + for info in listing: + # each info name must be at least [path]/part , but here + # we check also for names like [path]/part/ + pathname = info["name"].rstrip("/") + name = pathname.rsplit("/", 1)[-1] + if info["type"] == "directory" and pathname != path: + # do not include "self" path + full_dirs[name] = pathname + dirs[name] = info + elif pathname == path: + # file-like with same name as give path + files[""] = info + else: + files[name] = info + + if not detail: + dirs = list(dirs) + files = list(files) + + if topdown: + # Yield before recursion if walking top down + yield path, dirs, files + + if maxdepth is not None: + maxdepth -= 1 + if maxdepth < 1: + if not topdown: + yield path, dirs, files + return + + for d in dirs: + yield from self.walk( + full_dirs[d], + maxdepth=maxdepth, + detail=detail, + topdown=topdown, + **kwargs, + ) + + if not topdown: + # Yield after recursion if walking bottom up + yield path, dirs, files + + def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): + """List all files below path. + + Like posix ``find`` command without conditions + + Parameters + ---------- + path : str + maxdepth: int or None + If not None, the maximum number of levels to descend + withdirs: bool + Whether to include directory paths in the output. This is True + when used by glob, but users usually only want files. + kwargs are passed to ``ls``. + """ + # TODO: allow equivalent of -name parameter + path = self._strip_protocol(path) + out = {} + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and path != "" and self.isdir(path): + out[path] = self.info(path) + + for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs): + if withdirs: + files.update(dirs) + out.update({info["name"]: info for name, info in files.items()}) + if not out and self.isfile(path): + # walk works on directories, but find should also return [path] + # when path happens to be a file + out[path] = {} + names = sorted(out) + if not detail: + return names + else: + return {name: out[name] for name in names} + + def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): + """Space used by files and optionally directories within a path + + Directory size does not include the size of its contents. + + Parameters + ---------- + path: str + total: bool + Whether to sum all the file sizes + maxdepth: int or None + Maximum number of directory levels to descend, None for unlimited. + withdirs: bool + Whether to include directory paths in the output. + kwargs: passed to ``find`` + + Returns + ------- + Dict of {path: size} if total=False, or int otherwise, where numbers + refer to bytes used. + """ + sizes = {} + if withdirs and self.isdir(path): + # Include top-level directory in output + info = self.info(path) + sizes[info["name"]] = info["size"] + for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs): + info = self.info(f) + sizes[info["name"]] = info["size"] + if total: + return sum(sizes.values()) + else: + return sizes + + def glob(self, path, maxdepth=None, **kwargs): + """Find files by glob-matching. + + Pattern matching capabilities for finding files that match the given pattern. + + Parameters + ---------- + path: str + The glob pattern to match against + maxdepth: int or None + Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found. + Must be at least 1 if provided. + kwargs: + Additional arguments passed to ``find`` (e.g., detail=True) + + Returns + ------- + List of matched paths, or dict of paths and their info if detail=True + + Notes + ----- + Supported patterns: + - '*': Matches any sequence of characters within a single directory level + - ``'**'``: Matches any number of directory levels (must be an entire path component) + - '?': Matches exactly one character + - '[abc]': Matches any character in the set + - '[a-z]': Matches any character in the range + - '[!abc]': Matches any character NOT in the set + + Special behaviors: + - If the path ends with '/', only folders are returned + - Consecutive '*' characters are compressed into a single '*' + - Empty brackets '[]' never match anything + - Negated empty brackets '[!]' match any single character + - Special characters in character classes are escaped properly + + Limitations: + - ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``) + - No brace expansion ('{a,b}.txt') + - No extended glob patterns ('+(pattern)', '!(pattern)') + """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + import re + + seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,) + ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash + path = self._strip_protocol(path) + append_slash_to_dirname = ends_with_sep or path.endswith( + tuple(sep + "**" for sep in seps) + ) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) + + min_idx = min(idx_star, idx_qmark, idx_brace) + + detail = kwargs.pop("detail", False) + + if not has_magic(path): + if self.exists(path, **kwargs): + if not detail: + return [path] + else: + return {path: self.info(path, **kwargs)} + else: + if not detail: + return [] # glob of non-existent returns empty + else: + return {} + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 + else: + root = "" + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None + + allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs) + + pattern = glob_translate(path + ("/" if ends_with_sep else "")) + pattern = re.compile(pattern) + + out = { + p: info + for p, info in sorted(allpaths.items()) + if pattern.match( + p + "/" + if append_slash_to_dirname and info["type"] == "directory" + else p + ) + } + + if detail: + return out + else: + return list(out) + + def exists(self, path, **kwargs): + """Is there a file at the given path""" + try: + self.info(path, **kwargs) + return True + except: # noqa: E722 + # any exception allowed bar FileNotFoundError? + return False + + def lexists(self, path, **kwargs): + """If there is a file at the given path (including + broken links)""" + return self.exists(path) + + def info(self, path, **kwargs): + """Give details of entry at path + + Returns a single dictionary, with exactly the same information as ``ls`` + would with ``detail=True``. + + The default implementation calls ls and could be overridden by a + shortcut. kwargs are passed on to ```ls()``. + + Some file systems might not be able to measure the file's size, in + which case, the returned dict will include ``'size': None``. + + Returns + ------- + dict with keys: name (full path in the FS), size (in bytes), type (file, + directory, or something else) and other FS-specific keys. + """ + path = self._strip_protocol(path) + out = self.ls(self._parent(path), detail=True, **kwargs) + out = [o for o in out if o["name"].rstrip("/") == path] + if out: + return out[0] + out = self.ls(path, detail=True, **kwargs) + path = path.rstrip("/") + out1 = [o for o in out if o["name"].rstrip("/") == path] + if len(out1) == 1: + if "size" not in out1[0]: + out1[0]["size"] = None + return out1[0] + elif len(out1) > 1 or out: + return {"name": path, "size": 0, "type": "directory"} + else: + raise FileNotFoundError(path) + + def checksum(self, path): + """Unique value for current version of file + + If the checksum is the same from one moment to another, the contents + are guaranteed to be the same. If the checksum changes, the contents + *might* have changed. + + This should normally be overridden; default will probably capture + creation/modification timestamp (which would be good) or maybe + access timestamp (which would be bad) + """ + return int(tokenize(self.info(path)), 16) + + def size(self, path): + """Size in bytes of file""" + return self.info(path).get("size", None) + + def sizes(self, paths): + """Size in bytes of each file in a list of paths""" + return [self.size(p) for p in paths] + + def isdir(self, path): + """Is this entry directory-like?""" + try: + return self.info(path)["type"] == "directory" + except OSError: + return False + + def isfile(self, path): + """Is this entry file-like?""" + try: + return self.info(path)["type"] == "file" + except: # noqa: E722 + return False + + def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs): + """Get the contents of the file as a string. + + Parameters + ---------- + path: str + URL of file on this filesystems + encoding, errors, newline: same as `open`. + """ + with self.open( + path, + mode="r", + encoding=encoding, + errors=errors, + newline=newline, + **kwargs, + ) as f: + return f.read() + + def write_text( + self, path, value, encoding=None, errors=None, newline=None, **kwargs + ): + """Write the text to the given file. + + An existing file will be overwritten. + + Parameters + ---------- + path: str + URL of file on this filesystems + value: str + Text to write. + encoding, errors, newline: same as `open`. + """ + with self.open( + path, + mode="w", + encoding=encoding, + errors=errors, + newline=newline, + **kwargs, + ) as f: + return f.write(value) + + def cat_file(self, path, start=None, end=None, **kwargs): + """Get the content of a file + + Parameters + ---------- + path: URL of file on this filesystems + start, end: int + Bytes limits of the read. If negative, backwards from end, + like usual python slices. Either can be None for start or + end of file, respectively + kwargs: passed to ``open()``. + """ + # explicitly set buffering off? + with self.open(path, "rb", **kwargs) as f: + if start is not None: + if start >= 0: + f.seek(start) + else: + f.seek(max(0, f.size + start)) + if end is not None: + if end < 0: + end = f.size + end + return f.read(end - f.tell()) + return f.read() + + def pipe_file(self, path, value, mode="overwrite", **kwargs): + """Set the bytes of given file""" + if mode == "create" and self.exists(path): + # non-atomic but simple way; or could use "xb" in open(), which is likely + # not as well supported + raise FileExistsError + with self.open(path, "wb", **kwargs) as f: + f.write(value) + + def pipe(self, path, value=None, **kwargs): + """Put value into path + + (counterpart to ``cat``) + + Parameters + ---------- + path: string or dict(str, bytes) + If a string, a single remote location to put ``value`` bytes; if a dict, + a mapping of {path: bytesvalue}. + value: bytes, optional + If using a single path, these are the bytes to put there. Ignored if + ``path`` is a dict + """ + if isinstance(path, str): + self.pipe_file(self._strip_protocol(path), value, **kwargs) + elif isinstance(path, dict): + for k, v in path.items(): + self.pipe_file(self._strip_protocol(k), v, **kwargs) + else: + raise ValueError("path must be str or dict") + + def cat_ranges( + self, paths, starts, ends, max_gap=None, on_error="return", **kwargs + ): + """Get the contents of byte ranges from one or more files + + Parameters + ---------- + paths: list + A list of of filepaths on this filesystems + starts, ends: int or list + Bytes limits of the read. If using a single int, the same value will be + used to read all the specified files. + """ + if max_gap is not None: + raise NotImplementedError + if not isinstance(paths, list): + raise TypeError + if not isinstance(starts, list): + starts = [starts] * len(paths) + if not isinstance(ends, list): + ends = [ends] * len(paths) + if len(starts) != len(paths) or len(ends) != len(paths): + raise ValueError + out = [] + for p, s, e in zip(paths, starts, ends): + try: + out.append(self.cat_file(p, s, e)) + except Exception as e: + if on_error == "return": + out.append(e) + else: + raise + return out + + def cat(self, path, recursive=False, on_error="raise", **kwargs): + """Fetch (potentially multiple) paths' contents + + Parameters + ---------- + recursive: bool + If True, assume the path(s) are directories, and get all the + contained files + on_error : "raise", "omit", "return" + If raise, an underlying exception will be raised (converted to KeyError + if the type is in self.missing_exceptions); if omit, keys with exception + will simply not be included in the output; if "return", all keys are + included in the output, but the value will be bytes or an exception + instance. + kwargs: passed to cat_file + + Returns + ------- + dict of {path: contents} if there are multiple paths + or the path has been otherwise expanded + """ + paths = self.expand_path(path, recursive=recursive, **kwargs) + if ( + len(paths) > 1 + or isinstance(path, list) + or paths[0] != self._strip_protocol(path) + ): + out = {} + for path in paths: + try: + out[path] = self.cat_file(path, **kwargs) + except Exception as e: + if on_error == "raise": + raise + if on_error == "return": + out[path] = e + return out + else: + return self.cat_file(paths[0], **kwargs) + + def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs): + """Copy single remote file to local""" + from .implementations.local import LocalFileSystem + + if isfilelike(lpath): + outfile = lpath + elif self.isdir(rpath): + os.makedirs(lpath, exist_ok=True) + return None + + fs = LocalFileSystem(auto_mkdir=True) + fs.makedirs(fs._parent(lpath), exist_ok=True) + + with self.open(rpath, "rb", **kwargs) as f1: + if outfile is None: + outfile = open(lpath, "wb") + + try: + callback.set_size(getattr(f1, "size", None)) + data = True + while data: + data = f1.read(self.blocksize) + segment_len = outfile.write(data) + if segment_len is None: + segment_len = len(data) + callback.relative_update(segment_len) + finally: + if not isfilelike(lpath): + outfile.close() + + def get( + self, + rpath, + lpath, + recursive=False, + callback=DEFAULT_CALLBACK, + maxdepth=None, + **kwargs, + ): + """Copy file(s) to local. + + Copies a specific file or tree of files (if recursive=True). If lpath + ends with a "/", it will be assumed to be a directory, and target files + will go within. Can submit a list of paths, which may be glob-patterns + and will be expanded. + + Calls get_file for each source. + """ + if isinstance(lpath, list) and isinstance(rpath, list): + # No need to expand paths when both source and destination + # are provided as lists + rpaths = rpath + lpaths = lpath + else: + from .implementations.local import ( + LocalFileSystem, + make_path_posix, + trailing_sep, + ) + + source_is_str = isinstance(rpath, str) + rpaths = self.expand_path( + rpath, recursive=recursive, maxdepth=maxdepth, **kwargs + ) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))] + if not rpaths: + return + + if isinstance(lpath, str): + lpath = make_path_posix(lpath) + + source_is_file = len(rpaths) == 1 + dest_is_dir = isinstance(lpath, str) and ( + trailing_sep(lpath) or LocalFileSystem().isdir(lpath) + ) + + exists = source_is_str and ( + (has_magic(rpath) and source_is_file) + or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath)) + ) + lpaths = other_paths( + rpaths, + lpath, + exists=exists, + flatten=not source_is_str, + ) + + callback.set_size(len(lpaths)) + for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): + with callback.branched(rpath, lpath) as child: + self.get_file(rpath, lpath, callback=child, **kwargs) + + def put_file( + self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs + ): + """Copy single file to remote""" + if mode == "create" and self.exists(rpath): + raise FileExistsError + if os.path.isdir(lpath): + self.makedirs(rpath, exist_ok=True) + return None + + with open(lpath, "rb") as f1: + size = f1.seek(0, 2) + callback.set_size(size) + f1.seek(0) + + self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True) + with self.open(rpath, "wb", **kwargs) as f2: + while f1.tell() < size: + data = f1.read(self.blocksize) + segment_len = f2.write(data) + if segment_len is None: + segment_len = len(data) + callback.relative_update(segment_len) + + def put( + self, + lpath, + rpath, + recursive=False, + callback=DEFAULT_CALLBACK, + maxdepth=None, + **kwargs, + ): + """Copy file(s) from local. + + Copies a specific file or tree of files (if recursive=True). If rpath + ends with a "/", it will be assumed to be a directory, and target files + will go within. + + Calls put_file for each source. + """ + if isinstance(lpath, list) and isinstance(rpath, list): + # No need to expand paths when both source and destination + # are provided as lists + rpaths = rpath + lpaths = lpath + else: + from .implementations.local import ( + LocalFileSystem, + make_path_posix, + trailing_sep, + ) + + source_is_str = isinstance(lpath, str) + if source_is_str: + lpath = make_path_posix(lpath) + fs = LocalFileSystem() + lpaths = fs.expand_path( + lpath, recursive=recursive, maxdepth=maxdepth, **kwargs + ) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))] + if not lpaths: + return + + source_is_file = len(lpaths) == 1 + dest_is_dir = isinstance(rpath, str) and ( + trailing_sep(rpath) or self.isdir(rpath) + ) + + rpath = ( + self._strip_protocol(rpath) + if isinstance(rpath, str) + else [self._strip_protocol(p) for p in rpath] + ) + exists = source_is_str and ( + (has_magic(lpath) and source_is_file) + or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) + ) + rpaths = other_paths( + lpaths, + rpath, + exists=exists, + flatten=not source_is_str, + ) + + callback.set_size(len(rpaths)) + for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): + with callback.branched(lpath, rpath) as child: + self.put_file(lpath, rpath, callback=child, **kwargs) + + def head(self, path, size=1024): + """Get the first ``size`` bytes from file""" + with self.open(path, "rb") as f: + return f.read(size) + + def tail(self, path, size=1024): + """Get the last ``size`` bytes from file""" + with self.open(path, "rb") as f: + f.seek(max(-size, -f.size), 2) + return f.read() + + def cp_file(self, path1, path2, **kwargs): + raise NotImplementedError + + def copy( + self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs + ): + """Copy within two locations in the filesystem + + on_error : "raise", "ignore" + If raise, any not-found exceptions will be raised; if ignore any + not-found exceptions will cause the path to be skipped; defaults to + raise unless recursive is true, where the default is ignore + """ + if on_error is None and recursive: + on_error = "ignore" + elif on_error is None: + on_error = "raise" + + if isinstance(path1, list) and isinstance(path2, list): + # No need to expand paths when both source and destination + # are provided as lists + paths1 = path1 + paths2 = path2 + else: + from .implementations.local import trailing_sep + + source_is_str = isinstance(path1, str) + paths1 = self.expand_path( + path1, recursive=recursive, maxdepth=maxdepth, **kwargs + ) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))] + if not paths1: + return + + source_is_file = len(paths1) == 1 + dest_is_dir = isinstance(path2, str) and ( + trailing_sep(path2) or self.isdir(path2) + ) + + exists = source_is_str and ( + (has_magic(path1) and source_is_file) + or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) + ) + paths2 = other_paths( + paths1, + path2, + exists=exists, + flatten=not source_is_str, + ) + + for p1, p2 in zip(paths1, paths2): + try: + self.cp_file(p1, p2, **kwargs) + except FileNotFoundError: + if on_error == "raise": + raise + + def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): + """Turn one or more globs or directories into a list of all matching paths + to files or directories. + + kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls`` + """ + + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + if isinstance(path, (str, os.PathLike)): + out = self.expand_path([path], recursive, maxdepth, **kwargs) + else: + out = set() + path = [self._strip_protocol(p) for p in path] + for p in path: + if has_magic(p): + bit = set(self.glob(p, maxdepth=maxdepth, **kwargs)) + out |= bit + if recursive: + # glob call above expanded one depth so if maxdepth is defined + # then decrement it in expand_path call below. If it is zero + # after decrementing then avoid expand_path call. + if maxdepth is not None and maxdepth <= 1: + continue + out |= set( + self.expand_path( + list(bit), + recursive=recursive, + maxdepth=maxdepth - 1 if maxdepth is not None else None, + **kwargs, + ) + ) + continue + elif recursive: + rec = set( + self.find( + p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs + ) + ) + out |= rec + if p not in out and (recursive is False or self.exists(p)): + # should only check once, for the root + out.add(p) + if not out: + raise FileNotFoundError(path) + return sorted(out) + + def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): + """Move file(s) from one location to another""" + if path1 == path2: + logger.debug("%s mv: The paths are the same, so no files were moved.", self) + else: + # explicitly raise exception to prevent data corruption + self.copy( + path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise" + ) + self.rm(path1, recursive=recursive) + + def rm_file(self, path): + """Delete a file""" + self._rm(path) + + def _rm(self, path): + """Delete one file""" + # this is the old name for the method, prefer rm_file + raise NotImplementedError + + def rm(self, path, recursive=False, maxdepth=None): + """Delete files. + + Parameters + ---------- + path: str or list of str + File(s) to delete. + recursive: bool + If file(s) are directories, recursively delete contents and then + also remove the directory + maxdepth: int or None + Depth to pass to walk for finding files to delete, if recursive. + If None, there will be no limit and infinite recursion may be + possible. + """ + path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) + for p in reversed(path): + self.rm_file(p) + + @classmethod + def _parent(cls, path): + path = cls._strip_protocol(path) + if "/" in path: + parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker) + return cls.root_marker + parent + else: + return cls.root_marker + + def _open( + self, + path, + mode="rb", + block_size=None, + autocommit=True, + cache_options=None, + **kwargs, + ): + """Return raw bytes-mode file-like from the file-system""" + return AbstractBufferedFile( + self, + path, + mode, + block_size, + autocommit, + cache_options=cache_options, + **kwargs, + ) + + def open( + self, + path, + mode="rb", + block_size=None, + cache_options=None, + compression=None, + **kwargs, + ): + """ + Return a file-like object from the filesystem + + The resultant instance must function correctly in a context ``with`` + block. + + Parameters + ---------- + path: str + Target file + mode: str like 'rb', 'w' + See builtin ``open()`` + Mode "x" (exclusive write) may be implemented by the backend. Even if + it is, whether it is checked up front or on commit, and whether it is + atomic is implementation-dependent. + block_size: int + Some indication of buffering - this is a value in bytes + cache_options : dict, optional + Extra arguments to pass through to the cache. + compression: string or None + If given, open file using compression codec. Can either be a compression + name (a key in ``fsspec.compression.compr``) or "infer" to guess the + compression from the filename suffix. + encoding, errors, newline: passed on to TextIOWrapper for text mode + """ + import io + + path = self._strip_protocol(path) + if "b" not in mode: + mode = mode.replace("t", "") + "b" + + text_kwargs = { + k: kwargs.pop(k) + for k in ["encoding", "errors", "newline"] + if k in kwargs + } + return io.TextIOWrapper( + self.open( + path, + mode, + block_size=block_size, + cache_options=cache_options, + compression=compression, + **kwargs, + ), + **text_kwargs, + ) + else: + ac = kwargs.pop("autocommit", not self._intrans) + f = self._open( + path, + mode=mode, + block_size=block_size, + autocommit=ac, + cache_options=cache_options, + **kwargs, + ) + if compression is not None: + from fsspec.compression import compr + from fsspec.core import get_compression + + compression = get_compression(path, compression) + compress = compr[compression] + f = compress(f, mode=mode[0]) + + if not ac and "r" not in mode: + self.transaction.files.append(f) + return f + + def touch(self, path, truncate=True, **kwargs): + """Create empty file, or update timestamp + + Parameters + ---------- + path: str + file location + truncate: bool + If True, always set file size to 0; if False, update timestamp and + leave file unchanged, if backend allows this + """ + if truncate or not self.exists(path): + with self.open(path, "wb", **kwargs): + pass + else: + raise NotImplementedError # update timestamp, if possible + + def ukey(self, path): + """Hash of file properties, to tell if it has changed""" + return sha256(str(self.info(path)).encode()).hexdigest() + + def read_block(self, fn, offset, length, delimiter=None): + """Read a block of bytes from + + Starting at ``offset`` of the file, read ``length`` bytes. If + ``delimiter`` is set then we ensure that the read starts and stops at + delimiter boundaries that follow the locations ``offset`` and ``offset + + length``. If ``offset`` is zero then we start at zero. The + bytestring returned WILL include the end delimiter string. + + If offset+length is beyond the eof, reads to eof. + + Parameters + ---------- + fn: string + Path to filename + offset: int + Byte offset to start read + length: int + Number of bytes to read. If None, read to end. + delimiter: bytes (optional) + Ensure reading starts and stops at delimiter bytestring + + Examples + -------- + >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP + b'Alice, 100\\nBo' + >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP + b'Alice, 100\\nBob, 200\\n' + + Use ``length=None`` to read to the end of the file. + >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP + b'Alice, 100\\nBob, 200\\nCharlie, 300' + + See Also + -------- + :func:`fsspec.utils.read_block` + """ + with self.open(fn, "rb") as f: + size = f.size + if length is None: + length = size + if size is not None and offset + length > size: + length = size - offset + return read_block(f, offset, length, delimiter) + + def to_json(self, *, include_password: bool = True) -> str: + """ + JSON representation of this filesystem instance. + + Parameters + ---------- + include_password: bool, default True + Whether to include the password (if any) in the output. + + Returns + ------- + JSON string with keys ``cls`` (the python location of this class), + protocol (text name of this class's protocol, first one in case of + multiple), ``args`` (positional args, usually empty), and all other + keyword arguments as their own keys. + + Warnings + -------- + Serialized filesystems may contain sensitive information which have been + passed to the constructor, such as passwords and tokens. Make sure you + store and send them in a secure environment! + """ + from .json import FilesystemJSONEncoder + + return json.dumps( + self, + cls=type( + "_FilesystemJSONEncoder", + (FilesystemJSONEncoder,), + {"include_password": include_password}, + ), + ) + + @staticmethod + def from_json(blob: str) -> AbstractFileSystem: + """ + Recreate a filesystem instance from JSON representation. + + See ``.to_json()`` for the expected structure of the input. + + Parameters + ---------- + blob: str + + Returns + ------- + file system instance, not necessarily of this particular class. + + Warnings + -------- + This can import arbitrary modules (as determined by the ``cls`` key). + Make sure you haven't installed any modules that may execute malicious code + at import time. + """ + from .json import FilesystemJSONDecoder + + return json.loads(blob, cls=FilesystemJSONDecoder) + + def to_dict(self, *, include_password: bool = True) -> dict[str, Any]: + """ + JSON-serializable dictionary representation of this filesystem instance. + + Parameters + ---------- + include_password: bool, default True + Whether to include the password (if any) in the output. + + Returns + ------- + Dictionary with keys ``cls`` (the python location of this class), + protocol (text name of this class's protocol, first one in case of + multiple), ``args`` (positional args, usually empty), and all other + keyword arguments as their own keys. + + Warnings + -------- + Serialized filesystems may contain sensitive information which have been + passed to the constructor, such as passwords and tokens. Make sure you + store and send them in a secure environment! + """ + from .json import FilesystemJSONEncoder + + json_encoder = FilesystemJSONEncoder() + + cls = type(self) + proto = self.protocol + + storage_options = dict(self.storage_options) + if not include_password: + storage_options.pop("password", None) + + return dict( + cls=f"{cls.__module__}:{cls.__name__}", + protocol=proto[0] if isinstance(proto, (tuple, list)) else proto, + args=json_encoder.make_serializable(self.storage_args), + **json_encoder.make_serializable(storage_options), + ) + + @staticmethod + def from_dict(dct: dict[str, Any]) -> AbstractFileSystem: + """ + Recreate a filesystem instance from dictionary representation. + + See ``.to_dict()`` for the expected structure of the input. + + Parameters + ---------- + dct: Dict[str, Any] + + Returns + ------- + file system instance, not necessarily of this particular class. + + Warnings + -------- + This can import arbitrary modules (as determined by the ``cls`` key). + Make sure you haven't installed any modules that may execute malicious code + at import time. + """ + from .json import FilesystemJSONDecoder + + json_decoder = FilesystemJSONDecoder() + + dct = dict(dct) # Defensive copy + + cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct) + if cls is None: + raise ValueError("Not a serialized AbstractFileSystem") + + dct.pop("cls", None) + dct.pop("protocol", None) + + return cls( + *json_decoder.unmake_serializable(dct.pop("args", ())), + **json_decoder.unmake_serializable(dct), + ) + + def _get_pyarrow_filesystem(self): + """ + Make a version of the FS instance which will be acceptable to pyarrow + """ + # all instances already also derive from pyarrow + return self + + def get_mapper(self, root="", check=False, create=False, missing_exceptions=None): + """Create key/value store based on this file-system + + Makes a MutableMapping interface to the FS at the given root path. + See ``fsspec.mapping.FSMap`` for further details. + """ + from .mapping import FSMap + + return FSMap( + root, + self, + check=check, + create=create, + missing_exceptions=missing_exceptions, + ) + + @classmethod + def clear_instance_cache(cls): + """ + Clear the cache of filesystem instances. + + Notes + ----- + Unless overridden by setting the ``cachable`` class attribute to False, + the filesystem class stores a reference to newly created instances. This + prevents Python's normal rules around garbage collection from working, + since the instances refcount will not drop to zero until + ``clear_instance_cache`` is called. + """ + cls._cache.clear() + + def created(self, path): + """Return the created timestamp of a file as a datetime.datetime""" + raise NotImplementedError + + def modified(self, path): + """Return the modified timestamp of a file as a datetime.datetime""" + raise NotImplementedError + + def tree( + self, + path: str = "/", + recursion_limit: int = 2, + max_display: int = 25, + display_size: bool = False, + prefix: str = "", + is_last: bool = True, + first: bool = True, + indent_size: int = 4, + ) -> str: + """ + Return a tree-like structure of the filesystem starting from the given path as a string. + + Parameters + ---------- + path: Root path to start traversal from + recursion_limit: Maximum depth of directory traversal + max_display: Maximum number of items to display per directory + display_size: Whether to display file sizes + prefix: Current line prefix for visual tree structure + is_last: Whether current item is last in its level + first: Whether this is the first call (displays root path) + indent_size: Number of spaces by indent + + Returns + ------- + str: A string representing the tree structure. + + Example + ------- + >>> from fsspec import filesystem + + >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password') + >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10) + >>> print(tree) + """ + + def format_bytes(n: int) -> str: + """Format bytes as text.""" + for prefix, k in ( + ("P", 2**50), + ("T", 2**40), + ("G", 2**30), + ("M", 2**20), + ("k", 2**10), + ): + if n >= 0.9 * k: + return f"{n / k:.2f} {prefix}b" + return f"{n}B" + + result = [] + + if first: + result.append(path) + + if recursion_limit: + indent = " " * indent_size + contents = self.ls(path, detail=True) + contents.sort( + key=lambda x: (x.get("type") != "directory", x.get("name", "")) + ) + + if max_display is not None and len(contents) > max_display: + displayed_contents = contents[:max_display] + remaining_count = len(contents) - max_display + else: + displayed_contents = contents + remaining_count = 0 + + for i, item in enumerate(displayed_contents): + is_last_item = (i == len(displayed_contents) - 1) and ( + remaining_count == 0 + ) + + branch = ( + "β””" + ("─" * (indent_size - 2)) + if is_last_item + else "β”œ" + ("─" * (indent_size - 2)) + ) + branch += " " + new_prefix = prefix + ( + indent if is_last_item else "β”‚" + " " * (indent_size - 1) + ) + + name = os.path.basename(item.get("name", "")) + + if display_size and item.get("type") == "directory": + sub_contents = self.ls(item.get("name", ""), detail=True) + num_files = sum( + 1 for sub_item in sub_contents if sub_item.get("type") == "file" + ) + num_folders = sum( + 1 + for sub_item in sub_contents + if sub_item.get("type") == "directory" + ) + + if num_files == 0 and num_folders == 0: + size = " (empty folder)" + elif num_files == 0: + size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})" + elif num_folders == 0: + size = f" ({num_files} file{'s' if num_files > 1 else ''})" + else: + size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})" + elif display_size and item.get("type") == "file": + size = f" ({format_bytes(item.get('size', 0))})" + else: + size = "" + + result.append(f"{prefix}{branch}{name}{size}") + + if item.get("type") == "directory" and recursion_limit > 0: + result.append( + self.tree( + path=item.get("name", ""), + recursion_limit=recursion_limit - 1, + max_display=max_display, + display_size=display_size, + prefix=new_prefix, + is_last=is_last_item, + first=False, + indent_size=indent_size, + ) + ) + + if remaining_count > 0: + more_message = f"{remaining_count} more item(s) not displayed." + result.append( + f"{prefix}{'β””' + ('─' * (indent_size - 2))} {more_message}" + ) + + return "\n".join(_ for _ in result if _) + + # ------------------------------------------------------------------------ + # Aliases + + def read_bytes(self, path, start=None, end=None, **kwargs): + """Alias of `AbstractFileSystem.cat_file`.""" + return self.cat_file(path, start=start, end=end, **kwargs) + + def write_bytes(self, path, value, **kwargs): + """Alias of `AbstractFileSystem.pipe_file`.""" + self.pipe_file(path, value, **kwargs) + + def makedir(self, path, create_parents=True, **kwargs): + """Alias of `AbstractFileSystem.mkdir`.""" + return self.mkdir(path, create_parents=create_parents, **kwargs) + + def mkdirs(self, path, exist_ok=False): + """Alias of `AbstractFileSystem.makedirs`.""" + return self.makedirs(path, exist_ok=exist_ok) + + def listdir(self, path, detail=True, **kwargs): + """Alias of `AbstractFileSystem.ls`.""" + return self.ls(path, detail=detail, **kwargs) + + def cp(self, path1, path2, **kwargs): + """Alias of `AbstractFileSystem.copy`.""" + return self.copy(path1, path2, **kwargs) + + def move(self, path1, path2, **kwargs): + """Alias of `AbstractFileSystem.mv`.""" + return self.mv(path1, path2, **kwargs) + + def stat(self, path, **kwargs): + """Alias of `AbstractFileSystem.info`.""" + return self.info(path, **kwargs) + + def disk_usage(self, path, total=True, maxdepth=None, **kwargs): + """Alias of `AbstractFileSystem.du`.""" + return self.du(path, total=total, maxdepth=maxdepth, **kwargs) + + def rename(self, path1, path2, **kwargs): + """Alias of `AbstractFileSystem.mv`.""" + return self.mv(path1, path2, **kwargs) + + def delete(self, path, recursive=False, maxdepth=None): + """Alias of `AbstractFileSystem.rm`.""" + return self.rm(path, recursive=recursive, maxdepth=maxdepth) + + def upload(self, lpath, rpath, recursive=False, **kwargs): + """Alias of `AbstractFileSystem.put`.""" + return self.put(lpath, rpath, recursive=recursive, **kwargs) + + def download(self, rpath, lpath, recursive=False, **kwargs): + """Alias of `AbstractFileSystem.get`.""" + return self.get(rpath, lpath, recursive=recursive, **kwargs) + + def sign(self, path, expiration=100, **kwargs): + """Create a signed URL representing the given path + + Some implementations allow temporary URLs to be generated, as a + way of delegating credentials. + + Parameters + ---------- + path : str + The path on the filesystem + expiration : int + Number of seconds to enable the URL for (if supported) + + Returns + ------- + URL : str + The signed URL + + Raises + ------ + NotImplementedError : if method is not implemented for a filesystem + """ + raise NotImplementedError("Sign is not implemented for this filesystem") + + def _isfilestore(self): + # Originally inherited from pyarrow DaskFileSystem. Keeping this + # here for backwards compatibility as long as pyarrow uses its + # legacy fsspec-compatible filesystems and thus accepts fsspec + # filesystems as well + return False + + +class AbstractBufferedFile(io.IOBase): + """Convenient class to derive from to provide buffering + + In the case that the backend does not provide a pythonic file-like object + already, this class contains much of the logic to build one. The only + methods that need to be overridden are ``_upload_chunk``, + ``_initiate_upload`` and ``_fetch_range``. + """ + + DEFAULT_BLOCK_SIZE = 5 * 2**20 + _details = None + + def __init__( + self, + fs, + path, + mode="rb", + block_size="default", + autocommit=True, + cache_type="readahead", + cache_options=None, + size=None, + **kwargs, + ): + """ + Template for files with buffered reading and writing + + Parameters + ---------- + fs: instance of FileSystem + path: str + location in file-system + mode: str + Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file + systems may be read-only, and some may not support append. + block_size: int + Buffer size for reading or writing, 'default' for class default + autocommit: bool + Whether to write to final destination; may only impact what + happens when file is being closed. + cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead" + Caching policy in read mode. See the definitions in ``core``. + cache_options : dict + Additional options passed to the constructor for the cache specified + by `cache_type`. + size: int + If given and in read mode, suppressed having to look up the file size + kwargs: + Gets stored as self.kwargs + """ + from .core import caches + + self.path = path + self.fs = fs + self.mode = mode + self.blocksize = ( + self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size + ) + self.loc = 0 + self.autocommit = autocommit + self.end = None + self.start = None + self.closed = False + + if cache_options is None: + cache_options = {} + + if "trim" in kwargs: + warnings.warn( + "Passing 'trim' to control the cache behavior has been deprecated. " + "Specify it within the 'cache_options' argument instead.", + FutureWarning, + ) + cache_options["trim"] = kwargs.pop("trim") + + self.kwargs = kwargs + + if mode not in {"ab", "rb", "wb", "xb"}: + raise NotImplementedError("File mode not supported") + if mode == "rb": + if size is not None: + self.size = size + else: + self.size = self.details["size"] + self.cache = caches[cache_type]( + self.blocksize, self._fetch_range, self.size, **cache_options + ) + else: + self.buffer = io.BytesIO() + self.offset = None + self.forced = False + self.location = None + + @property + def details(self): + if self._details is None: + self._details = self.fs.info(self.path) + return self._details + + @details.setter + def details(self, value): + self._details = value + self.size = value["size"] + + @property + def full_name(self): + return _unstrip_protocol(self.path, self.fs) + + @property + def closed(self): + # get around this attr being read-only in IOBase + # use getattr here, since this can be called during del + return getattr(self, "_closed", True) + + @closed.setter + def closed(self, c): + self._closed = c + + def __hash__(self): + if "w" in self.mode: + return id(self) + else: + return int(tokenize(self.details), 16) + + def __eq__(self, other): + """Files are equal if they have the same checksum, only in read mode""" + if self is other: + return True + return ( + isinstance(other, type(self)) + and self.mode == "rb" + and other.mode == "rb" + and hash(self) == hash(other) + ) + + def commit(self): + """Move from temp to final destination""" + + def discard(self): + """Throw away temporary file""" + + def info(self): + """File information about this path""" + if self.readable(): + return self.details + else: + raise ValueError("Info not available while writing") + + def tell(self): + """Current file location""" + return self.loc + + def seek(self, loc, whence=0): + """Set current file location + + Parameters + ---------- + loc: int + byte location + whence: {0, 1, 2} + from start of file, current location or end of file, resp. + """ + loc = int(loc) + if not self.mode == "rb": + raise OSError(ESPIPE, "Seek only available in read mode") + if whence == 0: + nloc = loc + elif whence == 1: + nloc = self.loc + loc + elif whence == 2: + nloc = self.size + loc + else: + raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)") + if nloc < 0: + raise ValueError("Seek before start of file") + self.loc = nloc + return self.loc + + def write(self, data): + """ + Write data to buffer. + + Buffer only sent on flush() or if buffer is greater than + or equal to blocksize. + + Parameters + ---------- + data: bytes + Set of bytes to be written. + """ + if not self.writable(): + raise ValueError("File not in write mode") + if self.closed: + raise ValueError("I/O operation on closed file.") + if self.forced: + raise ValueError("This file has been force-flushed, can only close") + out = self.buffer.write(data) + self.loc += out + if self.buffer.tell() >= self.blocksize: + self.flush() + return out + + def flush(self, force=False): + """ + Write buffered data to backend store. + + Writes the current buffer, if it is larger than the block-size, or if + the file is being closed. + + Parameters + ---------- + force: bool + When closing, write the last block even if it is smaller than + blocks are allowed to be. Disallows further writing to this file. + """ + + if self.closed: + raise ValueError("Flush on closed file") + if force and self.forced: + raise ValueError("Force flush cannot be called more than once") + if force: + self.forced = True + + if self.readable(): + # no-op to flush on read-mode + return + + if not force and self.buffer.tell() < self.blocksize: + # Defer write on small block + return + + if self.offset is None: + # Initialize a multipart upload + self.offset = 0 + try: + self._initiate_upload() + except: + self.closed = True + raise + + if self._upload_chunk(final=force) is not False: + self.offset += self.buffer.seek(0, 2) + self.buffer = io.BytesIO() + + def _upload_chunk(self, final=False): + """Write one part of a multi-block file upload + + Parameters + ========== + final: bool + This is the last block, so should complete file, if + self.autocommit is True. + """ + # may not yet have been initialized, may need to call _initialize_upload + + def _initiate_upload(self): + """Create remote file/upload""" + pass + + def _fetch_range(self, start, end): + """Get the specified set of bytes from remote""" + return self.fs.cat_file(self.path, start=start, end=end) + + def read(self, length=-1): + """ + Return data from cache, or fetch pieces as necessary + + Parameters + ---------- + length: int (-1) + Number of bytes to read; if <0, all remaining bytes. + """ + length = -1 if length is None else int(length) + if self.mode != "rb": + raise ValueError("File not in read mode") + if length < 0: + length = self.size - self.loc + if self.closed: + raise ValueError("I/O operation on closed file.") + if length == 0: + # don't even bother calling fetch + return b"" + out = self.cache._fetch(self.loc, self.loc + length) + + logger.debug( + "%s read: %i - %i %s", + self, + self.loc, + self.loc + length, + self.cache._log_stats(), + ) + self.loc += len(out) + return out + + def readinto(self, b): + """mirrors builtin file's readinto method + + https://docs.python.org/3/library/io.html#io.RawIOBase.readinto + """ + out = memoryview(b).cast("B") + data = self.read(out.nbytes) + out[: len(data)] = data + return len(data) + + def readuntil(self, char=b"\n", blocks=None): + """Return data between current position and first occurrence of char + + char is included in the output, except if the end of the tile is + encountered first. + + Parameters + ---------- + char: bytes + Thing to find + blocks: None or int + How much to read in each go. Defaults to file blocksize - which may + mean a new read on every call. + """ + out = [] + while True: + start = self.tell() + part = self.read(blocks or self.blocksize) + if len(part) == 0: + break + found = part.find(char) + if found > -1: + out.append(part[: found + len(char)]) + self.seek(start + found + len(char)) + break + out.append(part) + return b"".join(out) + + def readline(self): + """Read until and including the first occurrence of newline character + + Note that, because of character encoding, this is not necessarily a + true line ending. + """ + return self.readuntil(b"\n") + + def __next__(self): + out = self.readline() + if out: + return out + raise StopIteration + + def __iter__(self): + return self + + def readlines(self): + """Return all data, split by the newline character, including the newline character""" + data = self.read() + lines = data.split(b"\n") + out = [l + b"\n" for l in lines[:-1]] + if data.endswith(b"\n"): + return out + else: + return out + [lines[-1]] + # return list(self) ??? + + def readinto1(self, b): + return self.readinto(b) + + def close(self): + """Close file + + Finalizes writes, discards cache + """ + if getattr(self, "_unclosable", False): + return + if self.closed: + return + try: + if self.mode == "rb": + self.cache = None + else: + if not self.forced: + self.flush(force=True) + + if self.fs is not None: + self.fs.invalidate_cache(self.path) + self.fs.invalidate_cache(self.fs._parent(self.path)) + finally: + self.closed = True + + def readable(self): + """Whether opened for reading""" + return "r" in self.mode and not self.closed + + def seekable(self): + """Whether is seekable (only in read mode)""" + return self.readable() + + def writable(self): + """Whether opened for writing""" + return self.mode in {"wb", "ab", "xb"} and not self.closed + + def __reduce__(self): + if self.mode != "rb": + raise RuntimeError("Pickling a writeable file is not supported") + + return reopen, ( + self.fs, + self.path, + self.mode, + self.blocksize, + self.loc, + self.size, + self.autocommit, + self.cache.name if self.cache else "none", + self.kwargs, + ) + + def __del__(self): + if not self.closed: + self.close() + + def __str__(self): + return f"" + + __repr__ = __str__ + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs): + file = fs.open( + path, + mode=mode, + block_size=blocksize, + autocommit=autocommit, + cache_type=cache_type, + size=size, + **kwargs, + ) + if loc > 0: + file.seek(loc) + return file diff --git a/venv/lib/python3.13/site-packages/fsspec/transaction.py b/venv/lib/python3.13/site-packages/fsspec/transaction.py new file mode 100644 index 0000000000000000000000000000000000000000..77293f63ecc5f611e19d849ef236d53e9c258efc --- /dev/null +++ b/venv/lib/python3.13/site-packages/fsspec/transaction.py @@ -0,0 +1,90 @@ +from collections import deque + + +class Transaction: + """Filesystem transaction write context + + Gathers files for deferred commit or discard, so that several write + operations can be finalized semi-atomically. This works by having this + instance as the ``.transaction`` attribute of the given filesystem + """ + + def __init__(self, fs, **kwargs): + """ + Parameters + ---------- + fs: FileSystem instance + """ + self.fs = fs + self.files = deque() + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """End transaction and commit, if exit is not due to exception""" + # only commit if there was no exception + self.complete(commit=exc_type is None) + if self.fs: + self.fs._intrans = False + self.fs._transaction = None + self.fs = None + + def start(self): + """Start a transaction on this FileSystem""" + self.files = deque() # clean up after previous failed completions + self.fs._intrans = True + + def complete(self, commit=True): + """Finish transaction: commit or discard all deferred files""" + while self.files: + f = self.files.popleft() + if commit: + f.commit() + else: + f.discard() + self.fs._intrans = False + self.fs._transaction = None + self.fs = None + + +class FileActor: + def __init__(self): + self.files = [] + + def commit(self): + for f in self.files: + f.commit() + self.files.clear() + + def discard(self): + for f in self.files: + f.discard() + self.files.clear() + + def append(self, f): + self.files.append(f) + + +class DaskTransaction(Transaction): + def __init__(self, fs): + """ + Parameters + ---------- + fs: FileSystem instance + """ + import distributed + + super().__init__(fs) + client = distributed.default_client() + self.files = client.submit(FileActor, actor=True).result() + + def complete(self, commit=True): + """Finish transaction: commit or discard all deferred files""" + if commit: + self.files.commit().result() + else: + self.files.discard().result() + self.fs._intrans = False + self.fs = None diff --git a/venv/lib/python3.13/site-packages/hf_xet/__init__.py b/venv/lib/python3.13/site-packages/hf_xet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..96ed54a8a066d681e4973e5841a0f5577b619698 --- /dev/null +++ b/venv/lib/python3.13/site-packages/hf_xet/__init__.py @@ -0,0 +1,5 @@ +from .hf_xet import * + +__doc__ = hf_xet.__doc__ +if hasattr(hf_xet, "__all__"): + __all__ = hf_xet.__all__ \ No newline at end of file diff --git a/venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER b/venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA b/venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..7a4a4b7a7e9adbe4542d98f6b2f40e65ee64b182 --- /dev/null +++ b/venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA @@ -0,0 +1,209 @@ +Metadata-Version: 2.4 +Name: idna +Version: 3.11 +Summary: Internationalized Domain Names in Applications (IDNA) +Author-email: Kim Davies +Requires-Python: >=3.8 +Description-Content-Type: text/x-rst +License-Expression: BSD-3-Clause +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: System Administrators +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Internet :: Name Service (DNS) +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Utilities +License-File: LICENSE.md +Requires-Dist: ruff >= 0.6.2 ; extra == "all" +Requires-Dist: mypy >= 1.11.2 ; extra == "all" +Requires-Dist: pytest >= 8.3.2 ; extra == "all" +Requires-Dist: flake8 >= 7.1.1 ; extra == "all" +Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst +Project-URL: Issue tracker, https://github.com/kjd/idna/issues +Project-URL: Source, https://github.com/kjd/idna +Provides-Extra: all + +Internationalized Domain Names in Applications (IDNA) +===================================================== + +Support for `Internationalized Domain Names in +Applications (IDNA) `_ +and `Unicode IDNA Compatibility Processing +`_. + +The latest versions of these standards supplied here provide +more comprehensive language coverage and reduce the potential of +allowing domains with known security vulnerabilities. This library +is a suitable replacement for the β€œencodings.idna” +module that comes with the Python standard library, but which +only supports an older superseded IDNA specification from 2003. + +Basic functions are simply executed: + +.. code-block:: pycon + + >>> import idna + >>> idna.encode('ドパむン.γƒ†γ‚Ήγƒˆ') + b'xn--eckwd4c7c.xn--zckzah' + >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) + ドパむン.γƒ†γ‚Ήγƒˆ + + +Installation +------------ + +This package is available for installation from PyPI via the +typical mechanisms, such as: + +.. code-block:: bash + + $ python3 -m pip install idna + + +Usage +----- + +For typical usage, the ``encode`` and ``decode`` functions will take a +domain name argument and perform a conversion to ASCII compatible encoding +(known as A-labels), or to Unicode strings (known as U-labels) +respectively. + +.. code-block:: pycon + + >>> import idna + >>> idna.encode('ドパむン.γƒ†γ‚Ήγƒˆ') + b'xn--eckwd4c7c.xn--zckzah' + >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) + ドパむン.γƒ†γ‚Ήγƒˆ + +Conversions can be applied at a per-label basis using the ``ulabel`` or +``alabel`` functions if necessary: + +.. code-block:: pycon + + >>> idna.alabel('ζ΅‹θ―•') + b'xn--0zwm56d' + + +Compatibility Mapping (UTS #46) ++++++++++++++++++++++++++++++++ + +This library provides support for `Unicode IDNA Compatibility +Processing `_ which normalizes input from +different potential ways a user may input a domain prior to performing the IDNA +conversion operations. This functionality, known as a +`mapping `_, is considered by the +specification to be a local user-interface issue distinct from IDNA +conversion functionality. + +For example, β€œKΓΆnigsgÀßchen” is not a permissible label as *LATIN +CAPITAL LETTER K* is not allowed (nor are capital letters in general). +UTS 46 will convert this into lower case prior to applying the IDNA +conversion. + +.. code-block:: pycon + + >>> import idna + >>> idna.encode('KΓΆnigsgÀßchen') + ... + idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'KΓΆnigsgÀßchen' not allowed + >>> idna.encode('KΓΆnigsgÀßchen', uts46=True) + b'xn--knigsgchen-b4a3dun' + >>> print(idna.decode('xn--knigsgchen-b4a3dun')) + kΓΆnigsgÀßchen + + +Exceptions +---------- + +All errors raised during the conversion following the specification +should raise an exception derived from the ``idna.IDNAError`` base +class. + +More specific exceptions that may be generated as ``idna.IDNABidiError`` +when the error reflects an illegal combination of left-to-right and +right-to-left characters in a label; ``idna.InvalidCodepoint`` when +a specific codepoint is an illegal character in an IDN label (i.e. +INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is +illegal based on its position in the string (i.e. it is CONTEXTO or CONTEXTJ +but the contextual requirements are not satisfied.) + +Building and Diagnostics +------------------------ + +The IDNA and UTS 46 functionality relies upon pre-calculated lookup +tables for performance. These tables are derived from computing against +eligibility criteria in the respective standards using the command-line +script ``tools/idna-data``. + +This tool will fetch relevant codepoint data from the Unicode repository +and perform the required calculations to identify eligibility. There are +three main modes: + +* ``idna-data make-libdata``. Generates ``idnadata.py`` and + ``uts46data.py``, the pre-calculated lookup tables used for IDNA and + UTS 46 conversions. Implementers who wish to track this library against + a different Unicode version may use this tool to manually generate a + different version of the ``idnadata.py`` and ``uts46data.py`` files. + +* ``idna-data make-table``. Generate a table of the IDNA disposition + (e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix + B.1 of RFC 5892 and the pre-computed tables published by `IANA + `_. + +* ``idna-data U+0061``. Prints debugging output on the various + properties associated with an individual Unicode codepoint (in this + case, U+0061), that are used to assess the IDNA and UTS 46 status of a + codepoint. This is helpful in debugging or analysis. + +The tool accepts a number of arguments, described using ``idna-data +-h``. Most notably, the ``--version`` argument allows the specification +of the version of Unicode to be used in computing the table data. For +example, ``idna-data --version 9.0.0 make-libdata`` will generate +library data against Unicode 9.0.0. + + +Additional Notes +---------------- + +* **Packages**. The latest tagged release version is published in the + `Python Package Index `_. + +* **Version support**. This library supports Python 3.8 and higher. + As this library serves as a low-level toolkit for a variety of + applications, many of which strive for broad compatibility with older + Python versions, there is no rush to remove older interpreter support. + Support for older versions are likely to be removed from new releases + as automated tests can no longer easily be run, i.e. once the Python + version is officially end-of-life. + +* **Testing**. The library has a test suite based on each rule of the + IDNA specification, as well as tests that are provided as part of the + Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing + `_. + +* **Emoji**. It is an occasional request to support emoji domains in + this library. Encoding of symbols like emoji is expressly prohibited by + the technical standard IDNA 2008 and emoji domains are broadly phased + out across the domain industry due to associated security risks. For + now, applications that need to support these non-compliant labels + may wish to consider trying the encode/decode operation in this library + first, and then falling back to using `encodings.idna`. See `the Github + project `_ for more discussion. + +* **Transitional processing**. Unicode 16.0.0 removed transitional + processing so the `transitional` argument for the encode() method + no longer has any effect and will be removed at a later date. + diff --git a/venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD b/venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..9215b054883f8fd946d6c9c5fb351bfac854d985 --- /dev/null +++ b/venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD @@ -0,0 +1,22 @@ +idna-3.11.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +idna-3.11.dist-info/METADATA,sha256=fCwSww9SuiN8TIHllFSASUQCW55hAs8dzKnr9RaEEbA,8378 +idna-3.11.dist-info/RECORD,, +idna-3.11.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82 +idna-3.11.dist-info/licenses/LICENSE.md,sha256=t6M2q_OwThgOwGXN0W5wXQeeHMehT5EKpukYfza5zYc,1541 +idna/__init__.py,sha256=MPqNDLZbXqGaNdXxAFhiqFPKEQXju2jNQhCey6-5eJM,868 +idna/__pycache__/__init__.cpython-313.pyc,, +idna/__pycache__/codec.cpython-313.pyc,, +idna/__pycache__/compat.cpython-313.pyc,, +idna/__pycache__/core.cpython-313.pyc,, +idna/__pycache__/idnadata.cpython-313.pyc,, +idna/__pycache__/intranges.cpython-313.pyc,, +idna/__pycache__/package_data.cpython-313.pyc,, +idna/__pycache__/uts46data.cpython-313.pyc,, +idna/codec.py,sha256=M2SGWN7cs_6B32QmKTyTN6xQGZeYQgQ2wiX3_DR6loE,3438 +idna/compat.py,sha256=RzLy6QQCdl9784aFhb2EX9EKGCJjg0P3PilGdeXXcx8,316 +idna/core.py,sha256=P26_XVycuMTZ1R2mNK1ZREVzM5mvTzdabBXfyZVU1Lc,13246 +idna/idnadata.py,sha256=SG8jhaGE53iiD6B49pt2pwTv_UvClciWE-N54oR2p4U,79623 +idna/intranges.py,sha256=amUtkdhYcQG8Zr-CoMM_kVRacxkivC1WgxN1b63KKdU,1898 +idna/package_data.py,sha256=_CUavOxobnbyNG2FLyHoN8QHP3QM9W1tKuw7eq9QwBk,21 +idna/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +idna/uts46data.py,sha256=H9J35VkD0F9L9mKOqjeNGd2A-Va6FlPoz6Jz4K7h-ps,243725 diff --git a/venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL b/venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..d8b9936dad9ab2513fa6979f411560d3b6b57e37 --- /dev/null +++ b/venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: flit 3.12.0 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/venv/lib/python3.13/site-packages/packaging/__init__.py b/venv/lib/python3.13/site-packages/packaging/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d45c22cfd88e2b88976b46d4171a502b602658ec --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/__init__.py @@ -0,0 +1,15 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" + +__version__ = "25.0" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = f"2014 {__author__}" diff --git a/venv/lib/python3.13/site-packages/packaging/_elffile.py b/venv/lib/python3.13/site-packages/packaging/_elffile.py new file mode 100644 index 0000000000000000000000000000000000000000..7a5afc33b0a2401cf509bb6a8d1143d5230324d9 --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/_elffile.py @@ -0,0 +1,109 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +from __future__ import annotations + +import enum +import os +import struct +from typing import IO + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error as e: + raise ELFInvalid("unable to parse identification") from e + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError as e: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or encoding ({self.encoding})" + ) from e + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> str | None: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/venv/lib/python3.13/site-packages/packaging/_manylinux.py b/venv/lib/python3.13/site-packages/packaging/_manylinux.py new file mode 100644 index 0000000000000000000000000000000000000000..95f55762e86f08af76aaa95f5cd147d4517c6d66 --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/_manylinux.py @@ -0,0 +1,262 @@ +from __future__ import annotations + +import collections +import contextlib +import functools +import os +import re +import sys +import warnings +from typing import Generator, Iterator, NamedTuple, Sequence + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]: + try: + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None + + +def _is_linux_armhf(executable: str) -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) + + +def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool: + if "armv7l" in archs: + return _is_linux_armhf(executable) + if "i686" in archs: + return _is_linux_i686(executable) + allowed_archs = { + "x86_64", + "aarch64", + "ppc64", + "ppc64le", + "s390x", + "loongarch64", + "riscv64", + } + return any(arch in allowed_archs for arch in archs) + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> str | None: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # Should be a string like "glibc 2.17". + version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.rsplit() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> str | None: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> str | None: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + f"Expected glibc version with 2 components major.minor, got: {version_str}", + RuntimeWarning, + stacklevel=2, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache +def _get_glibc_version() -> tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate manylinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be manylinux-compatible. + + :returns: An iterator of compatible manylinux tags. + """ + if not _have_compatible_abi(sys.executable, archs): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if set(archs) & {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for arch in archs: + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(arch, glibc_version): + yield f"{tag}_{arch}" + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(arch, glibc_version): + yield f"{legacy_tag}_{arch}" diff --git a/venv/lib/python3.13/site-packages/packaging/_musllinux.py b/venv/lib/python3.13/site-packages/packaging/_musllinux.py new file mode 100644 index 0000000000000000000000000000000000000000..d2bf30b56319ba862c5c9a1a39a87c6d1cb68718 --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/_musllinux.py @@ -0,0 +1,85 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +from __future__ import annotations + +import functools +import re +import subprocess +import sys +from typing import Iterator, NamedTuple, Sequence + +from ._elffile import ELFFile + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> _MuslVersion | None: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache +def _get_musl_version(executable: str) -> _MuslVersion | None: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for arch in archs: + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/venv/lib/python3.13/site-packages/packaging/_parser.py b/venv/lib/python3.13/site-packages/packaging/_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..0007c0aa64aeae24c7e0ca90790a25ba51dbf7d6 --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/_parser.py @@ -0,0 +1,353 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains EBNF-inspired grammar representing +the implementation. +""" + +from __future__ import annotations + +import ast +from typing import NamedTuple, Sequence, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]] +MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: list[str] + specifier: str + marker: MarkerList | None + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> tuple[str, str, MarkerList | None]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> list[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> list[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: list[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: + retval = _parse_marker(tokenizer) + tokenizer.expect("END", expected="end of marker expression") + return retval + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if env_var in ("platform_python_implementation", "python_implementation"): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/venv/lib/python3.13/site-packages/packaging/_structures.py b/venv/lib/python3.13/site-packages/packaging/_structures.py new file mode 100644 index 0000000000000000000000000000000000000000..90a6465f9682c886363eea5327dac64bf623a6ff --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/_structures.py @@ -0,0 +1,61 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + + +class InfinityType: + def __repr__(self) -> str: + return "Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return False + + def __le__(self, other: object) -> bool: + return False + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return True + + def __ge__(self, other: object) -> bool: + return True + + def __neg__(self: object) -> "NegativeInfinityType": + return NegativeInfinity + + +Infinity = InfinityType() + + +class NegativeInfinityType: + def __repr__(self) -> str: + return "-Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return True + + def __le__(self, other: object) -> bool: + return True + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return False + + def __ge__(self, other: object) -> bool: + return False + + def __neg__(self: object) -> InfinityType: + return Infinity + + +NegativeInfinity = NegativeInfinityType() diff --git a/venv/lib/python3.13/site-packages/packaging/_tokenizer.py b/venv/lib/python3.13/site-packages/packaging/_tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..d28a9b6cf5daca9fa3ce5a8c8ae583a57ee84f12 --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/_tokenizer.py @@ -0,0 +1,195 @@ +from __future__ import annotations + +import contextlib +import re +from dataclasses import dataclass +from typing import Iterator, NoReturn + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: dict[str, str | re.Pattern[str]] = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extras? + |dependency_groups + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: dict[str, str | re.Pattern[str]], + ) -> None: + self.source = source + self.rules: dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Token | None = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert self.next_token is None, ( + f"Cannot check for {name!r}, already have {self.next_token!r}" + ) + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: int | None = None, + span_end: int | None = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/venv/lib/python3.13/site-packages/packaging/markers.py b/venv/lib/python3.13/site-packages/packaging/markers.py new file mode 100644 index 0000000000000000000000000000000000000000..e7cea57297a23809548fcd0d344740796bae945b --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/markers.py @@ -0,0 +1,362 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +from __future__ import annotations + +import operator +import os +import platform +import sys +from typing import AbstractSet, Any, Callable, Literal, TypedDict, Union, cast + +from ._parser import MarkerAtom, MarkerList, Op, Value, Variable +from ._parser import parse_marker as _parse_marker +from ._tokenizer import ParserSyntaxError +from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name + +__all__ = [ + "EvaluateContext", + "InvalidMarker", + "Marker", + "UndefinedComparison", + "UndefinedEnvironmentName", + "default_environment", +] + +Operator = Callable[[str, Union[str, AbstractSet[str]]], bool] +EvaluateContext = Literal["metadata", "lock_file", "requirement"] +MARKERS_ALLOWING_SET = {"extras", "dependency_groups"} + + +class InvalidMarker(ValueError): + """ + An invalid marker was found, users should refer to PEP 508. + """ + + +class UndefinedComparison(ValueError): + """ + An invalid operation was attempted on a value that doesn't support it. + """ + + +class UndefinedEnvironmentName(ValueError): + """ + A name was attempted to be used that does not exist inside of the + environment. + """ + + +class Environment(TypedDict): + implementation_name: str + """The implementation's identifier, e.g. ``'cpython'``.""" + + implementation_version: str + """ + The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or + ``'7.3.13'`` for PyPy3.10 v7.3.13. + """ + + os_name: str + """ + The value of :py:data:`os.name`. The name of the operating system dependent module + imported, e.g. ``'posix'``. + """ + + platform_machine: str + """ + Returns the machine type, e.g. ``'i386'``. + + An empty string if the value cannot be determined. + """ + + platform_release: str + """ + The system's release, e.g. ``'2.2.0'`` or ``'NT'``. + + An empty string if the value cannot be determined. + """ + + platform_system: str + """ + The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``. + + An empty string if the value cannot be determined. + """ + + platform_version: str + """ + The system's release version, e.g. ``'#3 on degas'``. + + An empty string if the value cannot be determined. + """ + + python_full_version: str + """ + The Python version as string ``'major.minor.patchlevel'``. + + Note that unlike the Python :py:data:`sys.version`, this value will always include + the patchlevel (it defaults to 0). + """ + + platform_python_implementation: str + """ + A string identifying the Python implementation, e.g. ``'CPython'``. + """ + + python_version: str + """The Python version as string ``'major.minor'``.""" + + sys_platform: str + """ + This string contains a platform identifier that can be used to append + platform-specific components to :py:data:`sys.path`, for instance. + + For Unix systems, except on Linux and AIX, this is the lowercased OS name as + returned by ``uname -s`` with the first part of the version as returned by + ``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python + was built. + """ + + +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results + + +def _format_marker( + marker: list[str] | MarkerAtom | str, first: bool | None = True +) -> str: + assert isinstance(marker, (list, tuple, str)) + + # Sometimes we have a structure like [[...]] which is a single item list + # where the single item is itself it's own list. In that case we want skip + # the rest of this function so that we don't get extraneous () on the + # outside. + if ( + isinstance(marker, list) + and len(marker) == 1 + and isinstance(marker[0], (list, tuple)) + ): + return _format_marker(marker[0]) + + if isinstance(marker, list): + inner = (_format_marker(m, first=False) for m in marker) + if first: + return " ".join(inner) + else: + return "(" + " ".join(inner) + ")" + elif isinstance(marker, tuple): + return " ".join([m.serialize() for m in marker]) + else: + return marker + + +_operators: dict[str, Operator] = { + "in": lambda lhs, rhs: lhs in rhs, + "not in": lambda lhs, rhs: lhs not in rhs, + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, +} + + +def _eval_op(lhs: str, op: Op, rhs: str | AbstractSet[str]) -> bool: + if isinstance(rhs, str): + try: + spec = Specifier("".join([op.serialize(), rhs])) + except InvalidSpecifier: + pass + else: + return spec.contains(lhs, prereleases=True) + + oper: Operator | None = _operators.get(op.serialize()) + if oper is None: + raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.") + + return oper(lhs, rhs) + + +def _normalize( + lhs: str, rhs: str | AbstractSet[str], key: str +) -> tuple[str, str | AbstractSet[str]]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + assert isinstance(rhs, str), "extra value must be a string" + return (canonicalize_name(lhs), canonicalize_name(rhs)) + if key in MARKERS_ALLOWING_SET: + if isinstance(rhs, str): # pragma: no cover + return (canonicalize_name(lhs), canonicalize_name(rhs)) + else: + return (canonicalize_name(lhs), {canonicalize_name(v) for v in rhs}) + + # other environment markers don't have such standards + return lhs, rhs + + +def _evaluate_markers( + markers: MarkerList, environment: dict[str, str | AbstractSet[str]] +) -> bool: + groups: list[list[bool]] = [[]] + + for marker in markers: + assert isinstance(marker, (list, tuple, str)) + + if isinstance(marker, list): + groups[-1].append(_evaluate_markers(marker, environment)) + elif isinstance(marker, tuple): + lhs, op, rhs = marker + + if isinstance(lhs, Variable): + environment_key = lhs.value + lhs_value = environment[environment_key] + rhs_value = rhs.value + else: + lhs_value = lhs.value + environment_key = rhs.value + rhs_value = environment[environment_key] + assert isinstance(lhs_value, str), "lhs must be a string" + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) + groups[-1].append(_eval_op(lhs_value, op, rhs_value)) + else: + assert marker in ["and", "or"] + if marker == "or": + groups.append([]) + + return any(all(item) for item in groups) + + +def format_full_version(info: sys._version_info) -> str: + version = f"{info.major}.{info.minor}.{info.micro}" + kind = info.releaselevel + if kind != "final": + version += kind[0] + str(info.serial) + return version + + +def default_environment() -> Environment: + iver = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name + return { + "implementation_name": implementation_name, + "implementation_version": iver, + "os_name": os.name, + "platform_machine": platform.machine(), + "platform_release": platform.release(), + "platform_system": platform.system(), + "platform_version": platform.version(), + "python_full_version": platform.python_version(), + "platform_python_implementation": platform.python_implementation(), + "python_version": ".".join(platform.python_version_tuple()[:2]), + "sys_platform": sys.platform, + } + + +class Marker: + def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. + try: + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e + + def __str__(self) -> str: + return _format_marker(self._markers) + + def __repr__(self) -> str: + return f"" + + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + + def evaluate( + self, + environment: dict[str, str] | None = None, + context: EvaluateContext = "metadata", + ) -> bool: + """Evaluate a marker. + + Return the boolean from evaluating the given marker against the + environment. environment is an optional argument to override all or + part of the determined environment. The *context* parameter specifies what + context the markers are being evaluated for, which influences what markers + are considered valid. Acceptable values are "metadata" (for core metadata; + default), "lock_file", and "requirement" (i.e. all other situations). + + The environment is determined from the current Python process. + """ + current_environment = cast( + "dict[str, str | AbstractSet[str]]", default_environment() + ) + if context == "lock_file": + current_environment.update( + extras=frozenset(), dependency_groups=frozenset() + ) + elif context == "metadata": + current_environment["extra"] = "" + if environment is not None: + current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if "extra" in current_environment and current_environment["extra"] is None: + current_environment["extra"] = "" + + return _evaluate_markers( + self._markers, _repair_python_full_version(current_environment) + ) + + +def _repair_python_full_version( + env: dict[str, str | AbstractSet[str]], +) -> dict[str, str | AbstractSet[str]]: + """ + Work around platform.python_version() returning something that is not PEP 440 + compliant for non-tagged Python builds. + """ + python_full_version = cast(str, env["python_full_version"]) + if python_full_version.endswith("+"): + env["python_full_version"] = f"{python_full_version}local" + return env diff --git a/venv/lib/python3.13/site-packages/packaging/metadata.py b/venv/lib/python3.13/site-packages/packaging/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..3bd8602d36c53374f1f10a0716d28fdc64e2d3ac --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/metadata.py @@ -0,0 +1,862 @@ +from __future__ import annotations + +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import pathlib +import sys +import typing +from typing import ( + Any, + Callable, + Generic, + Literal, + TypedDict, + cast, +) + +from . import licenses, requirements, specifiers, utils +from . import version as version_module +from .licenses import NormalizedLicenseExpression + +T = typing.TypeVar("T") + + +if sys.version_info >= (3, 11): # pragma: no cover + ExceptionGroup = ExceptionGroup +else: # pragma: no cover + + class ExceptionGroup(Exception): + """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. + + If :external:exc:`ExceptionGroup` is already defined by Python itself, + that version is used instead. + """ + + message: str + exceptions: list[Exception] + + def __init__(self, message: str, exceptions: list[Exception]) -> None: + self.message = message + self.exceptions = exceptions + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" + + +class InvalidMetadata(ValueError): + """A metadata field contains invalid data.""" + + field: str + """The name of the field that contains invalid data.""" + + def __init__(self, field: str, message: str) -> None: + self.field = field + super().__init__(message) + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. See :class:`Metadata` whose attributes + match the keys of this dictionary. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: list[str] + summary: str + description: str + keywords: list[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: list[str] + download_url: str + classifiers: list[str] + requires: list[str] + provides: list[str] + obsoletes: list[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: list[str] + provides_dist: list[str] + obsoletes_dist: list[str] + requires_python: str + requires_external: list[str] + project_urls: dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emitting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: list[str] + + # Metadata 2.2 - PEP 643 + dynamic: list[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + # Metadata 2.4 - PEP 639 + license_expression: str + license_files: list[str] + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "license_expression", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_FIELDS = { + "classifiers", + "dynamic", + "license_files", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + +_DICT_FIELDS = { + "project_urls", +} + + +def _parse_keywords(data: str) -> list[str]: + """Split a string of comma-separated keywords into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: list[str]) -> dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: bytes | str) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload = msg.get_payload() + assert isinstance(payload, str) + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload = msg.get_payload(decode=True) + assert isinstance(bpayload, bytes) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError as exc: + raise ValueError("payload in an invalid encoding") from exc + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "license-expression": "license_expression", + "license-file": "license_files", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} +_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} + + +def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]: + """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: dict[str, str | list[str] | dict[str, str]] = {} + unparsed: dict[str, list[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) or [] + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: list[tuple[bytes, str | None]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload] + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed + + +_NOT_FOUND = object() + + +# Keep the two values in sync. +_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"] +_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"] + +_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) + + +class _Validator(Generic[T]): + """Validate a metadata field. + + All _process_*() methods correspond to a core metadata field. The method is + called with the field's raw value. If the raw value is valid it is returned + in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). + If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause + as appropriate). + """ + + name: str + raw_name: str + added: _MetadataVersion + + def __init__( + self, + *, + added: _MetadataVersion = "1.0", + ) -> None: + self.added = added + + def __set_name__(self, _owner: Metadata, name: str) -> None: + self.name = name + self.raw_name = _RAW_TO_EMAIL_MAPPING[name] + + def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T: + # With Python 3.8, the caching can be replaced with functools.cached_property(). + # No need to check the cache as attribute lookup will resolve into the + # instance's __dict__ before __get__ is called. + cache = instance.__dict__ + value = instance._raw.get(self.name) + + # To make the _process_* methods easier, we'll check if the value is None + # and if this field is NOT a required attribute, and if both of those + # things are true, we'll skip the the converter. This will mean that the + # converters never have to deal with the None union. + if self.name in _REQUIRED_ATTRS or value is not None: + try: + converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") + except AttributeError: + pass + else: + value = converter(value) + + cache[self.name] = value + try: + del instance._raw[self.name] # type: ignore[misc] + except KeyError: + pass + + return cast(T, value) + + def _invalid_metadata( + self, msg: str, cause: Exception | None = None + ) -> InvalidMetadata: + exc = InvalidMetadata( + self.raw_name, msg.format_map({"field": repr(self.raw_name)}) + ) + exc.__cause__ = cause + return exc + + def _process_metadata_version(self, value: str) -> _MetadataVersion: + # Implicitly makes Metadata-Version required. + if value not in _VALID_METADATA_VERSIONS: + raise self._invalid_metadata(f"{value!r} is not a valid metadata version") + return cast(_MetadataVersion, value) + + def _process_name(self, value: str) -> str: + if not value: + raise self._invalid_metadata("{field} is a required field") + # Validate the name as a side-effect. + try: + utils.canonicalize_name(value, validate=True) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) from exc + else: + return value + + def _process_version(self, value: str) -> version_module.Version: + if not value: + raise self._invalid_metadata("{field} is a required field") + try: + return version_module.parse(value) + except version_module.InvalidVersion as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) from exc + + def _process_summary(self, value: str) -> str: + """Check the field contains no newlines.""" + if "\n" in value: + raise self._invalid_metadata("{field} must be a single line") + return value + + def _process_description_content_type(self, value: str) -> str: + content_types = {"text/plain", "text/x-rst", "text/markdown"} + message = email.message.EmailMessage() + message["content-type"] = value + + content_type, parameters = ( + # Defaults to `text/plain` if parsing failed. + message.get_content_type().lower(), + message["content-type"].params, + ) + # Check if content-type is valid or defaulted to `text/plain` and thus was + # not parseable. + if content_type not in content_types or content_type not in value.lower(): + raise self._invalid_metadata( + f"{{field}} must be one of {list(content_types)}, not {value!r}" + ) + + charset = parameters.get("charset", "UTF-8") + if charset != "UTF-8": + raise self._invalid_metadata( + f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" + ) + + markdown_variants = {"GFM", "CommonMark"} + variant = parameters.get("variant", "GFM") # Use an acceptable default. + if content_type == "text/markdown" and variant not in markdown_variants: + raise self._invalid_metadata( + f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " + f"not {variant!r}", + ) + return value + + def _process_dynamic(self, value: list[str]) -> list[str]: + for dynamic_field in map(str.lower, value): + if dynamic_field in {"name", "version", "metadata-version"}: + raise self._invalid_metadata( + f"{dynamic_field!r} is not allowed as a dynamic field" + ) + elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: + raise self._invalid_metadata( + f"{dynamic_field!r} is not a valid dynamic field" + ) + return list(map(str.lower, value)) + + def _process_provides_extra( + self, + value: list[str], + ) -> list[utils.NormalizedName]: + normalized_names = [] + try: + for name in value: + normalized_names.append(utils.canonicalize_name(name, validate=True)) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{name!r} is invalid for {{field}}", cause=exc + ) from exc + else: + return normalized_names + + def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: + try: + return specifiers.SpecifierSet(value) + except specifiers.InvalidSpecifier as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) from exc + + def _process_requires_dist( + self, + value: list[str], + ) -> list[requirements.Requirement]: + reqs = [] + try: + for req in value: + reqs.append(requirements.Requirement(req)) + except requirements.InvalidRequirement as exc: + raise self._invalid_metadata( + f"{req!r} is invalid for {{field}}", cause=exc + ) from exc + else: + return reqs + + def _process_license_expression( + self, value: str + ) -> NormalizedLicenseExpression | None: + try: + return licenses.canonicalize_license_expression(value) + except ValueError as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) from exc + + def _process_license_files(self, value: list[str]) -> list[str]: + paths = [] + for path in value: + if ".." in path: + raise self._invalid_metadata( + f"{path!r} is invalid for {{field}}, " + "parent directory indicators are not allowed" + ) + if "*" in path: + raise self._invalid_metadata( + f"{path!r} is invalid for {{field}}, paths must be resolved" + ) + if ( + pathlib.PurePosixPath(path).is_absolute() + or pathlib.PureWindowsPath(path).is_absolute() + ): + raise self._invalid_metadata( + f"{path!r} is invalid for {{field}}, paths must be relative" + ) + if pathlib.PureWindowsPath(path).as_posix() != path: + raise self._invalid_metadata( + f"{path!r} is invalid for {{field}}, paths must use '/' delimiter" + ) + paths.append(path) + return paths + + +class Metadata: + """Representation of distribution metadata. + + Compared to :class:`RawMetadata`, this class provides objects representing + metadata fields instead of only using built-in types. Any invalid metadata + will cause :exc:`InvalidMetadata` to be raised (with a + :py:attr:`~BaseException.__cause__` attribute as appropriate). + """ + + _raw: RawMetadata + + @classmethod + def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata: + """Create an instance from :class:`RawMetadata`. + + If *validate* is true, all metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + ins = cls() + ins._raw = data.copy() # Mutations occur due to caching enriched values. + + if validate: + exceptions: list[Exception] = [] + try: + metadata_version = ins.metadata_version + metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) + except InvalidMetadata as metadata_version_exc: + exceptions.append(metadata_version_exc) + metadata_version = None + + # Make sure to check for the fields that are present, the required + # fields (so their absence can be reported). + fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS + # Remove fields that have already been checked. + fields_to_check -= {"metadata_version"} + + for key in fields_to_check: + try: + if metadata_version: + # Can't use getattr() as that triggers descriptor protocol which + # will fail due to no value for the instance argument. + try: + field_metadata_version = cls.__dict__[key].added + except KeyError: + exc = InvalidMetadata(key, f"unrecognized field: {key!r}") + exceptions.append(exc) + continue + field_age = _VALID_METADATA_VERSIONS.index( + field_metadata_version + ) + if field_age > metadata_age: + field = _RAW_TO_EMAIL_MAPPING[key] + exc = InvalidMetadata( + field, + f"{field} introduced in metadata version " + f"{field_metadata_version}, not {metadata_version}", + ) + exceptions.append(exc) + continue + getattr(ins, key) + except InvalidMetadata as exc: + exceptions.append(exc) + + if exceptions: + raise ExceptionGroup("invalid metadata", exceptions) + + return ins + + @classmethod + def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata: + """Parse metadata from email headers. + + If *validate* is true, the metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + raw, unparsed = parse_email(data) + + if validate: + exceptions: list[Exception] = [] + for unparsed_key in unparsed: + if unparsed_key in _EMAIL_TO_RAW_MAPPING: + message = f"{unparsed_key!r} has invalid data" + else: + message = f"unrecognized field: {unparsed_key!r}" + exceptions.append(InvalidMetadata(unparsed_key, message)) + + if exceptions: + raise ExceptionGroup("unparsed", exceptions) + + try: + return cls.from_raw(raw, validate=validate) + except ExceptionGroup as exc_group: + raise ExceptionGroup( + "invalid or unparsed metadata", exc_group.exceptions + ) from None + + metadata_version: _Validator[_MetadataVersion] = _Validator() + """:external:ref:`core-metadata-metadata-version` + (required; validated to be a valid metadata version)""" + # `name` is not normalized/typed to NormalizedName so as to provide access to + # the original/raw name. + name: _Validator[str] = _Validator() + """:external:ref:`core-metadata-name` + (required; validated using :func:`~packaging.utils.canonicalize_name` and its + *validate* parameter)""" + version: _Validator[version_module.Version] = _Validator() + """:external:ref:`core-metadata-version` (required)""" + dynamic: _Validator[list[str] | None] = _Validator( + added="2.2", + ) + """:external:ref:`core-metadata-dynamic` + (validated against core metadata field names and lowercased)""" + platforms: _Validator[list[str] | None] = _Validator() + """:external:ref:`core-metadata-platform`""" + supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1") + """:external:ref:`core-metadata-supported-platform`""" + summary: _Validator[str | None] = _Validator() + """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" + description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body + """:external:ref:`core-metadata-description`""" + description_content_type: _Validator[str | None] = _Validator(added="2.1") + """:external:ref:`core-metadata-description-content-type` (validated)""" + keywords: _Validator[list[str] | None] = _Validator() + """:external:ref:`core-metadata-keywords`""" + home_page: _Validator[str | None] = _Validator() + """:external:ref:`core-metadata-home-page`""" + download_url: _Validator[str | None] = _Validator(added="1.1") + """:external:ref:`core-metadata-download-url`""" + author: _Validator[str | None] = _Validator() + """:external:ref:`core-metadata-author`""" + author_email: _Validator[str | None] = _Validator() + """:external:ref:`core-metadata-author-email`""" + maintainer: _Validator[str | None] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer`""" + maintainer_email: _Validator[str | None] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer-email`""" + license: _Validator[str | None] = _Validator() + """:external:ref:`core-metadata-license`""" + license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator( + added="2.4" + ) + """:external:ref:`core-metadata-license-expression`""" + license_files: _Validator[list[str] | None] = _Validator(added="2.4") + """:external:ref:`core-metadata-license-file`""" + classifiers: _Validator[list[str] | None] = _Validator(added="1.1") + """:external:ref:`core-metadata-classifier`""" + requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-dist`""" + requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-python`""" + # Because `Requires-External` allows for non-PEP 440 version specifiers, we + # don't do any processing on the values. + requires_external: _Validator[list[str] | None] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-external`""" + project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2") + """:external:ref:`core-metadata-project-url`""" + # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation + # regardless of metadata version. + provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator( + added="2.1", + ) + """:external:ref:`core-metadata-provides-extra`""" + provides_dist: _Validator[list[str] | None] = _Validator(added="1.2") + """:external:ref:`core-metadata-provides-dist`""" + obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2") + """:external:ref:`core-metadata-obsoletes-dist`""" + requires: _Validator[list[str] | None] = _Validator(added="1.1") + """``Requires`` (deprecated)""" + provides: _Validator[list[str] | None] = _Validator(added="1.1") + """``Provides`` (deprecated)""" + obsoletes: _Validator[list[str] | None] = _Validator(added="1.1") + """``Obsoletes`` (deprecated)""" diff --git a/venv/lib/python3.13/site-packages/packaging/py.typed b/venv/lib/python3.13/site-packages/packaging/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/venv/lib/python3.13/site-packages/packaging/requirements.py b/venv/lib/python3.13/site-packages/packaging/requirements.py new file mode 100644 index 0000000000000000000000000000000000000000..4e068c9567def3564f238a76fe7ab46b569f33e5 --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/requirements.py @@ -0,0 +1,91 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import annotations + +from typing import Any, Iterator + +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet +from .utils import canonicalize_name + + +class InvalidRequirement(ValueError): + """ + An invalid requirement was found, users should refer to PEP 508. + """ + + +class Requirement: + """Parse a requirement. + + Parse a given requirement string into its parts, such as name, specifier, + URL, and extras. Raises InvalidRequirement on a badly-formed requirement + string. + """ + + # TODO: Can we test whether something is contained within a requirement? + # If so how do we do that? Do we need to test against the _name_ of + # the thing as well as the version? What about the markers? + # TODO: Can we normalize the name and extra name? + + def __init__(self, requirement_string: str) -> None: + try: + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e + + self.name: str = parsed.name + self.url: str | None = parsed.url or None + self.extras: set[str] = set(parsed.extras or []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Marker | None = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) + + def _iter_parts(self, name: str) -> Iterator[str]: + yield name + + if self.extras: + formatted_extras = ",".join(sorted(self.extras)) + yield f"[{formatted_extras}]" + + if self.specifier: + yield str(self.specifier) + + if self.url: + yield f"@ {self.url}" + if self.marker: + yield " " + + if self.marker: + yield f"; {self.marker}" + + def __str__(self) -> str: + return "".join(self._iter_parts(self.name)) + + def __repr__(self) -> str: + return f"" + + def __hash__(self) -> int: + return hash( + ( + self.__class__.__name__, + *self._iter_parts(canonicalize_name(self.name)), + ) + ) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + canonicalize_name(self.name) == canonicalize_name(other.name) + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/venv/lib/python3.13/site-packages/packaging/specifiers.py b/venv/lib/python3.13/site-packages/packaging/specifiers.py new file mode 100644 index 0000000000000000000000000000000000000000..c8448043006f1c60ee92df69da47986d10229b1d --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/specifiers.py @@ -0,0 +1,1019 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +""" +.. testsetup:: + + from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from packaging.version import Version +""" + +from __future__ import annotations + +import abc +import itertools +import re +from typing import Callable, Iterable, Iterator, TypeVar, Union + +from .utils import canonicalize_version +from .version import Version + +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] + + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version + + +class InvalidSpecifier(ValueError): + """ + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' + """ + + +class BaseSpecifier(metaclass=abc.ABCMeta): + @abc.abstractmethod + def __str__(self) -> str: + """ + Returns the str representation of this Specifier-like object. This + should be representative of the Specifier itself. + """ + + @abc.abstractmethod + def __hash__(self) -> int: + """ + Returns a hash value for this Specifier-like object. + """ + + @abc.abstractmethod + def __eq__(self, other: object) -> bool: + """ + Returns a boolean representing whether or not the two Specifier-like + objects are equal. + + :param other: The other object to check against. + """ + + @property + @abc.abstractmethod + def prereleases(self) -> bool | None: + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. + """ + + @prereleases.setter + def prereleases(self, value: bool) -> None: + """Setter for :attr:`prereleases`. + + :param value: The value to set. + """ + + @abc.abstractmethod + def contains(self, item: str, prereleases: bool | None = None) -> bool: + """ + Determines if the given item is contained within this specifier. + """ + + @abc.abstractmethod + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: bool | None = None + ) -> Iterator[UnparsedVersionVar]: + """ + Takes an iterable of items and filters them so that only items which + are contained within this specifier are allowed in it. + """ + + +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. + + .. tip:: + + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ + + _operator_regex_str = r""" + (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" + (?P + (?: + # The identity operators allow for an escape hatch that will + # do an exact string match of the version you wish to install. + # This will not be parsed by PEP 440 and we cannot determine + # any semantic meaning from it. This operator is discouraged + # but included entirely as an escape hatch. + (?<====) # Only match for the identity operator + \s* + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. + ) + | + (?: + # The (non)equality operators allow for wild card and local + # versions to be specified so we have to define these two + # operators separately to enable that. + (?<===|!=) # Only match for equals and not equals + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)* # release + + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. + (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local + )? + ) + | + (?: + # The compatible operator requires at least two digits in the + # release segment. + (?<=~=) # Only match for the compatible operator + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + ) + | + (?: + # All other operators only allow a sub set of what the + # (non)equality operators do. Specifically they do not allow + # local versions to be specified nor do they allow the prefix + # matching wild cards. + (?=": "greater_than_equal", + "<": "less_than", + ">": "greater_than", + "===": "arbitrary", + } + + def __init__(self, spec: str = "", prereleases: bool | None = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: {spec!r}") + + self._spec: tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "===", ">", "<"]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: + # Compatible releases have an equivalent combination of >= and ==. That + # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to + # implement this in terms of the other specifiers instead of + # implementing it ourselves. The only thing we need to do is construct + # the other specifiers. + + # We want everything but the last item in the version, but we want to + # ignore suffix segments. + prefix = _version_join( + list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] + ) + + # Add the prefix notation to the end of our string + prefix += ".*" + + return self._get_operator(">=")(prospective, spec) and self._get_operator("==")( + prospective, prefix + ) + + def _compare_equal(self, prospective: Version, spec: str) -> bool: + # We need special logic to handle prefix matching + if spec.endswith(".*"): + # In the case of prefix matching we want to ignore local segment. + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) + # Split the spec out by bangs and dots, and pretend that there is + # an implicit dot in between a release segment and a pre-release segment. + split_spec = _version_split(normalized_spec) + + # Split the prospective version out by bangs and dots, and pretend + # that there is an implicit dot in between a release segment and + # a pre-release segment. + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) + + # Shorten the prospective version to be the same length as the spec + # so that we can determine if the specifier is a prefix of the + # prospective version or not. + shortened_prospective = padded_prospective[: len(split_spec)] + + return shortened_prospective == split_spec + else: + # Convert our spec string into a Version + spec_version = Version(spec) + + # If the specifier does not have a local segment, then we want to + # act as if the prospective version also does not have a local + # segment. + if not spec_version.local: + prospective = Version(prospective.public) + + return prospective == spec_version + + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: + return not self._compare_equal(prospective, spec) + + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) <= Version(spec) + + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) >= Version(spec) + + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is less than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective < spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a pre-release version, that we do not accept pre-release + # versions for the version mentioned in the specifier (e.g. <3.1 should + # not match 3.1.dev0, but should match 3.0.dev0). + if not spec.is_prerelease and prospective.is_prerelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # less than the spec version *and* it's not a pre-release of the same + # version in the spec. + return True + + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is greater than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective > spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a post-release version, that we do not accept + # post-release versions for the version mentioned in the specifier + # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0). + if not spec.is_postrelease and prospective.is_postrelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # Ensure that we do not allow a local version of the version mentioned + # in the specifier, which is technically greater than, to match. + if prospective.local is not None: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # greater than the spec version *and* it's not a pre-release of the + # same version in the spec. + return True + + def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: + return str(prospective).lower() == str(spec).lower() + + def __contains__(self, item: str | Version) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) + + def contains(self, item: UnparsedVersion, prereleases: bool | None = None) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ + + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: bool | None = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version + + +_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") + + +def _version_split(version: str) -> list[str]: + """Split version into components. + + The split components are intended for version comparison. The logic does + not attempt to retain the original version string, so joining the + components back with :func:`_version_join` may not produce the original + version string. + """ + result: list[str] = [] + + epoch, _, rest = version.rpartition("!") + result.append(epoch or "0") + + for item in rest.split("."): + match = _prefix_regex.search(item) + if match: + result.extend(match.groups()) + else: + result.append(item) + return result + + +def _version_join(components: list[str]) -> str: + """Join split version components into a version string. + + This function assumes the input came from :func:`_version_split`, where the + first component must be the epoch (either empty or numeric), and all other + components numeric. + """ + epoch, *rest = components + return f"{epoch}!{'.'.join(rest)}" + + +def _is_not_suffix(segment: str) -> bool: + return not any( + segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") + ) + + +def _pad_version(left: list[str], right: list[str]) -> tuple[list[str], list[str]]: + left_split, right_split = [], [] + + # Get the release segment of our versions + left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left))) + right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right))) + + # Get the rest of our versions + left_split.append(left[len(left_split[0]) :]) + right_split.append(right[len(right_split[0]) :]) + + # Insert our padding + left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0]))) + right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0]))) + + return ( + list(itertools.chain.from_iterable(left_split)), + list(itertools.chain.from_iterable(right_split)), + ) + + +class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + + def __init__( + self, + specifiers: str | Iterable[Specifier] = "", + prereleases: bool | None = None, + ) -> None: + """Initialize a SpecifierSet instance. + + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + May also be an iterable of ``Specifier`` instances, which will be used + as is. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ + + if isinstance(specifiers, str): + # Split on `,` to break each individual specifier into its own item, and + # strip each item to remove leading/trailing whitespace. + split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] + + # Make each individual specifier a Specifier and save in a frozen set + # for later. + self._specs = frozenset(map(Specifier, split_specifiers)) + else: + # Save the supplied specifiers in a frozen set. + self._specs = frozenset(specifiers) + + # Store our prereleases value so we can use it later to determine if + # we accept prereleases or not. + self._prereleases = prereleases + + @property + def prereleases(self) -> bool | None: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"" + + def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ + return ",".join(sorted(str(s) for s in self._specs)) + + def __hash__(self) -> int: + return hash(self._specs) + + def __and__(self, other: SpecifierSet | str) -> SpecifierSet: + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ + if isinstance(other, str): + other = SpecifierSet(other) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + specifier = SpecifierSet() + specifier._specs = frozenset(self._specs | other._specs) + + if self._prereleases is None and other._prereleases is not None: + specifier._prereleases = other._prereleases + elif self._prereleases is not None and other._prereleases is None: + specifier._prereleases = self._prereleases + elif self._prereleases == other._prereleases: + specifier._prereleases = self._prereleases + else: + raise ValueError( + "Cannot combine SpecifierSets with True and False prerelease overrides." + ) + + return specifier + + def __eq__(self, other: object) -> bool: + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): + other = SpecifierSet(str(other)) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + return self._specs == other._specs + + def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" + return len(self._specs) + + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. + + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ + return iter(self._specs) + + def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ + return self.contains(item) + + def contains( + self, + item: UnparsedVersion, + prereleases: bool | None = None, + installed: bool | None = None, + ) -> bool: + """Return whether or not the item is contained in this SpecifierSet. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) + + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # We can determine if we're going to allow pre-releases by looking to + # see if any of the underlying items supports them. If none of them do + # and this item is a pre-release then we do not allow it and we can + # short circuit that here. + # Note: This means that 1.0.dev1 would not be contained in something + # like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0 + if not prereleases and item.is_prerelease: + return False + + if installed and item.is_prerelease: + item = Version(item.base_version) + + # We simply dispatch to the underlying specs here to make sure that the + # given version is contained within all of them. + # Note: This use of all() here means that an empty set of specifiers + # will always return True, this is an explicit design decision. + return all(s.contains(item, prereleases=prereleases) for s in self._specs) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: bool | None = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # If we have any specifiers, then we want to wrap our iterable in the + # filter method for each one, this will act as a logical AND amongst + # each specifier. + if self._specs: + for spec in self._specs: + iterable = spec.filter(iterable, prereleases=bool(prereleases)) + return iter(iterable) + # If we do not have any specifiers, then we need to have a rough filter + # which will filter out any pre-releases, unless there are no final + # releases. + else: + filtered: list[UnparsedVersionVar] = [] + found_prereleases: list[UnparsedVersionVar] = [] + + for item in iterable: + parsed_version = _coerce_version(item) + + # Store any item which is a pre-release for later unless we've + # already found a final version or we are accepting prereleases + if parsed_version.is_prerelease and not prereleases: + if not filtered: + found_prereleases.append(item) + else: + filtered.append(item) + + # If we've found no items except for pre-releases, then we'll go + # ahead and use the pre-releases + if not filtered and found_prereleases and prereleases is None: + return iter(found_prereleases) + + return iter(filtered) diff --git a/venv/lib/python3.13/site-packages/packaging/tags.py b/venv/lib/python3.13/site-packages/packaging/tags.py new file mode 100644 index 0000000000000000000000000000000000000000..8522f59c4f2d0039c1a02ea9aef66fe017dbdb20 --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/tags.py @@ -0,0 +1,656 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +from __future__ import annotations + +import logging +import platform +import re +import struct +import subprocess +import sys +import sysconfig +from importlib.machinery import EXTENSION_SUFFIXES +from typing import ( + Iterable, + Iterator, + Sequence, + Tuple, + cast, +) + +from . import _manylinux, _musllinux + +logger = logging.getLogger(__name__) + +PythonVersion = Sequence[int] +AppleVersion = Tuple[int, int] + +INTERPRETER_SHORT_NAMES: dict[str, str] = { + "python": "py", # Generic. + "cpython": "cp", + "pypy": "pp", + "ironpython": "ip", + "jython": "jy", +} + + +_32_BIT_INTERPRETER = struct.calcsize("P") == 4 + + +class Tag: + """ + A representation of the tag triple for a wheel. + + Instances are considered immutable and thus are hashable. Equality checking + is also supported. + """ + + __slots__ = ["_abi", "_hash", "_interpreter", "_platform"] + + def __init__(self, interpreter: str, abi: str, platform: str) -> None: + self._interpreter = interpreter.lower() + self._abi = abi.lower() + self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) + + @property + def interpreter(self) -> str: + return self._interpreter + + @property + def abi(self) -> str: + return self._abi + + @property + def platform(self) -> str: + return self._platform + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Tag): + return NotImplemented + + return ( + (self._hash == other._hash) # Short-circuit ASAP for perf reasons. + and (self._platform == other._platform) + and (self._abi == other._abi) + and (self._interpreter == other._interpreter) + ) + + def __hash__(self) -> int: + return self._hash + + def __str__(self) -> str: + return f"{self._interpreter}-{self._abi}-{self._platform}" + + def __repr__(self) -> str: + return f"<{self} @ {id(self)}>" + + +def parse_tag(tag: str) -> frozenset[Tag]: + """ + Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. + + Returning a set is required due to the possibility that the tag is a + compressed tag set. + """ + tags = set() + interpreters, abis, platforms = tag.split("-") + for interpreter in interpreters.split("."): + for abi in abis.split("."): + for platform_ in platforms.split("."): + tags.add(Tag(interpreter, abi, platform_)) + return frozenset(tags) + + +def _get_config_var(name: str, warn: bool = False) -> int | str | None: + value: int | str | None = sysconfig.get_config_var(name) + if value is None and warn: + logger.debug( + "Config variable '%s' is unset, Python ABI tag may be incorrect", name + ) + return value + + +def _normalize_string(string: str) -> str: + return string.replace(".", "_").replace("-", "_").replace(" ", "_") + + +def _is_threaded_cpython(abis: list[str]) -> bool: + """ + Determine if the ABI corresponds to a threaded (`--disable-gil`) build. + + The threaded builds are indicated by a "t" in the abiflags. + """ + if len(abis) == 0: + return False + # expect e.g., cp313 + m = re.match(r"cp\d+(.*)", abis[0]) + if not m: + return False + abiflags = m.group(1) + return "t" in abiflags + + +def _abi3_applies(python_version: PythonVersion, threading: bool) -> bool: + """ + Determine if the Python version supports abi3. + + PEP 384 was first implemented in Python 3.2. The threaded (`--disable-gil`) + builds do not support abi3. + """ + return len(python_version) > 1 and tuple(python_version) >= (3, 2) and not threading + + +def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> list[str]: + py_version = tuple(py_version) # To allow for version comparison. + abis = [] + version = _version_nodot(py_version[:2]) + threading = debug = pymalloc = ucs4 = "" + with_debug = _get_config_var("Py_DEBUG", warn) + has_refcount = hasattr(sys, "gettotalrefcount") + # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled + # extension modules is the best option. + # https://github.com/pypa/pip/issues/3383#issuecomment-173267692 + has_ext = "_d.pyd" in EXTENSION_SUFFIXES + if with_debug or (with_debug is None and (has_refcount or has_ext)): + debug = "d" + if py_version >= (3, 13) and _get_config_var("Py_GIL_DISABLED", warn): + threading = "t" + if py_version < (3, 8): + with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) + if with_pymalloc or with_pymalloc is None: + pymalloc = "m" + if py_version < (3, 3): + unicode_size = _get_config_var("Py_UNICODE_SIZE", warn) + if unicode_size == 4 or ( + unicode_size is None and sys.maxunicode == 0x10FFFF + ): + ucs4 = "u" + elif debug: + # Debug builds can also load "normal" extension modules. + # We can also assume no UCS-4 or pymalloc requirement. + abis.append(f"cp{version}{threading}") + abis.insert(0, f"cp{version}{threading}{debug}{pymalloc}{ucs4}") + return abis + + +def cpython_tags( + python_version: PythonVersion | None = None, + abis: Iterable[str] | None = None, + platforms: Iterable[str] | None = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a CPython interpreter. + + The tags consist of: + - cp-- + - cp-abi3- + - cp-none- + - cp-abi3- # Older Python versions down to 3.2. + + If python_version only specifies a major version then user-provided ABIs and + the 'none' ABItag will be used. + + If 'abi3' or 'none' are specified in 'abis' then they will be yielded at + their normal position and not at the beginning. + """ + if not python_version: + python_version = sys.version_info[:2] + + interpreter = f"cp{_version_nodot(python_version[:2])}" + + if abis is None: + if len(python_version) > 1: + abis = _cpython_abis(python_version, warn) + else: + abis = [] + abis = list(abis) + # 'abi3' and 'none' are explicitly handled later. + for explicit_abi in ("abi3", "none"): + try: + abis.remove(explicit_abi) + except ValueError: + pass + + platforms = list(platforms or platform_tags()) + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + threading = _is_threaded_cpython(abis) + use_abi3 = _abi3_applies(python_version, threading) + if use_abi3: + yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) + yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) + + if use_abi3: + for minor_version in range(python_version[1] - 1, 1, -1): + for platform_ in platforms: + version = _version_nodot((python_version[0], minor_version)) + interpreter = f"cp{version}" + yield Tag(interpreter, "abi3", platform_) + + +def _generic_abi() -> list[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] + + +def generic_tags( + interpreter: str | None = None, + abis: Iterable[str] | None = None, + platforms: Iterable[str] | None = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a generic interpreter. + + The tags consist of: + - -- + + The "none" ABI will be added if it was not explicitly provided. + """ + if not interpreter: + interp_name = interpreter_name() + interp_version = interpreter_version(warn=warn) + interpreter = "".join([interp_name, interp_version]) + if abis is None: + abis = _generic_abi() + else: + abis = list(abis) + platforms = list(platforms or platform_tags()) + if "none" not in abis: + abis.append("none") + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + +def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: + """ + Yields Python versions in descending order. + + After the latest version, the major-only version will be yielded, and then + all previous versions of that major version. + """ + if len(py_version) > 1: + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" + if len(py_version) > 1: + for minor in range(py_version[1] - 1, -1, -1): + yield f"py{_version_nodot((py_version[0], minor))}" + + +def compatible_tags( + python_version: PythonVersion | None = None, + interpreter: str | None = None, + platforms: Iterable[str] | None = None, +) -> Iterator[Tag]: + """ + Yields the sequence of tags that are compatible with a specific version of Python. + + The tags consist of: + - py*-none- + - -none-any # ... if `interpreter` is provided. + - py*-none-any + """ + if not python_version: + python_version = sys.version_info[:2] + platforms = list(platforms or platform_tags()) + for version in _py_interpreter_range(python_version): + for platform_ in platforms: + yield Tag(version, "none", platform_) + if interpreter: + yield Tag(interpreter, "none", "any") + for version in _py_interpreter_range(python_version): + yield Tag(version, "none", "any") + + +def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str: + if not is_32bit: + return arch + + if arch.startswith("ppc"): + return "ppc" + + return "i386" + + +def _mac_binary_formats(version: AppleVersion, cpu_arch: str) -> list[str]: + formats = [cpu_arch] + if cpu_arch == "x86_64": + if version < (10, 4): + return [] + formats.extend(["intel", "fat64", "fat32"]) + + elif cpu_arch == "i386": + if version < (10, 4): + return [] + formats.extend(["intel", "fat32", "fat"]) + + elif cpu_arch == "ppc64": + # TODO: Need to care about 32-bit PPC for ppc64 through 10.2? + if version > (10, 5) or version < (10, 4): + return [] + formats.append("fat64") + + elif cpu_arch == "ppc": + if version > (10, 6): + return [] + formats.extend(["fat32", "fat"]) + + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + + return formats + + +def mac_platforms( + version: AppleVersion | None = None, arch: str | None = None +) -> Iterator[str]: + """ + Yields the platform tags for a macOS system. + + The `version` parameter is a two-item tuple specifying the macOS version to + generate platform tags for. The `arch` parameter is the CPU architecture to + generate platform tags for. Both parameters default to the appropriate value + for the current system. + """ + version_str, _, cpu_arch = platform.mac_ver() + if version is None: + version = cast("AppleVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + text=True, + ).stdout + version = cast("AppleVersion", tuple(map(int, version_str.split(".")[:2]))) + else: + version = version + if arch is None: + arch = _mac_arch(cpu_arch) + else: + arch = arch + + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + major_version = 10 + for minor_version in range(version[1], -1, -1): + compat_version = major_version, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield f"macosx_{major_version}_{minor_version}_{binary_format}" + + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + minor_version = 0 + for major_version in range(version[0], 10, -1): + compat_version = major_version, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield f"macosx_{major_version}_{minor_version}_{binary_format}" + + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + major_version = 10 + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = major_version, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield f"macosx_{major_version}_{minor_version}_{binary_format}" + else: + for minor_version in range(16, 3, -1): + compat_version = major_version, minor_version + binary_format = "universal2" + yield f"macosx_{major_version}_{minor_version}_{binary_format}" + + +def ios_platforms( + version: AppleVersion | None = None, multiarch: str | None = None +) -> Iterator[str]: + """ + Yields the platform tags for an iOS system. + + :param version: A two-item tuple specifying the iOS version to generate + platform tags for. Defaults to the current iOS version. + :param multiarch: The CPU architecture+ABI to generate platform tags for - + (the value used by `sys.implementation._multiarch` e.g., + `arm64_iphoneos` or `x84_64_iphonesimulator`). Defaults to the current + multiarch value. + """ + if version is None: + # if iOS is the current platform, ios_ver *must* be defined. However, + # it won't exist for CPython versions before 3.13, which causes a mypy + # error. + _, release, _, _ = platform.ios_ver() # type: ignore[attr-defined, unused-ignore] + version = cast("AppleVersion", tuple(map(int, release.split(".")[:2]))) + + if multiarch is None: + multiarch = sys.implementation._multiarch + multiarch = multiarch.replace("-", "_") + + ios_platform_template = "ios_{major}_{minor}_{multiarch}" + + # Consider any iOS major.minor version from the version requested, down to + # 12.0. 12.0 is the first iOS version that is known to have enough features + # to support CPython. Consider every possible minor release up to X.9. There + # highest the minor has ever gone is 8 (14.8 and 15.8) but having some extra + # candidates that won't ever match doesn't really hurt, and it saves us from + # having to keep an explicit list of known iOS versions in the code. Return + # the results descending order of version number. + + # If the requested major version is less than 12, there won't be any matches. + if version[0] < 12: + return + + # Consider the actual X.Y version that was requested. + yield ios_platform_template.format( + major=version[0], minor=version[1], multiarch=multiarch + ) + + # Consider every minor version from X.0 to the minor version prior to the + # version requested by the platform. + for minor in range(version[1] - 1, -1, -1): + yield ios_platform_template.format( + major=version[0], minor=minor, multiarch=multiarch + ) + + for major in range(version[0] - 1, 11, -1): + for minor in range(9, -1, -1): + yield ios_platform_template.format( + major=major, minor=minor, multiarch=multiarch + ) + + +def android_platforms( + api_level: int | None = None, abi: str | None = None +) -> Iterator[str]: + """ + Yields the :attr:`~Tag.platform` tags for Android. If this function is invoked on + non-Android platforms, the ``api_level`` and ``abi`` arguments are required. + + :param int api_level: The maximum `API level + `__ to return. Defaults + to the current system's version, as returned by ``platform.android_ver``. + :param str abi: The `Android ABI `__, + e.g. ``arm64_v8a``. Defaults to the current system's ABI , as returned by + ``sysconfig.get_platform``. Hyphens and periods will be replaced with + underscores. + """ + if platform.system() != "Android" and (api_level is None or abi is None): + raise TypeError( + "on non-Android platforms, the api_level and abi arguments are required" + ) + + if api_level is None: + # Python 3.13 was the first version to return platform.system() == "Android", + # and also the first version to define platform.android_ver(). + api_level = platform.android_ver().api_level # type: ignore[attr-defined] + + if abi is None: + abi = sysconfig.get_platform().split("-")[-1] + abi = _normalize_string(abi) + + # 16 is the minimum API level known to have enough features to support CPython + # without major patching. Yield every API level from the maximum down to the + # minimum, inclusive. + min_api_level = 16 + for ver in range(api_level, min_api_level - 1, -1): + yield f"android_{ver}_{abi}" + + +def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: + linux = _normalize_string(sysconfig.get_platform()) + if not linux.startswith("linux_"): + # we should never be here, just yield the sysconfig one and return + yield linux + return + if is_32bit: + if linux == "linux_x86_64": + linux = "linux_i686" + elif linux == "linux_aarch64": + linux = "linux_armv8l" + _, arch = linux.split("_", 1) + archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch]) + yield from _manylinux.platform_tags(archs) + yield from _musllinux.platform_tags(archs) + for arch in archs: + yield f"linux_{arch}" + + +def _generic_platforms() -> Iterator[str]: + yield _normalize_string(sysconfig.get_platform()) + + +def platform_tags() -> Iterator[str]: + """ + Provides the platform tags for this installation. + """ + if platform.system() == "Darwin": + return mac_platforms() + elif platform.system() == "iOS": + return ios_platforms() + elif platform.system() == "Android": + return android_platforms() + elif platform.system() == "Linux": + return _linux_platforms() + else: + return _generic_platforms() + + +def interpreter_name() -> str: + """ + Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. + """ + name = sys.implementation.name + return INTERPRETER_SHORT_NAMES.get(name) or name + + +def interpreter_version(*, warn: bool = False) -> str: + """ + Returns the version of the running interpreter. + """ + version = _get_config_var("py_version_nodot", warn=warn) + if version: + version = str(version) + else: + version = _version_nodot(sys.version_info[:2]) + return version + + +def _version_nodot(version: PythonVersion) -> str: + return "".join(map(str, version)) + + +def sys_tags(*, warn: bool = False) -> Iterator[Tag]: + """ + Returns the sequence of tag triples for the running interpreter. + + The order of the sequence corresponds to priority order for the + interpreter, from most to least important. + """ + + interp_name = interpreter_name() + if interp_name == "cp": + yield from cpython_tags(warn=warn) + else: + yield from generic_tags() + + if interp_name == "pp": + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) + else: + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/venv/lib/python3.13/site-packages/packaging/utils.py b/venv/lib/python3.13/site-packages/packaging/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..23450953df74eccd9c13cd2a955ce09d1f968565 --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/utils.py @@ -0,0 +1,163 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +from __future__ import annotations + +import functools +import re +from typing import NewType, Tuple, Union, cast + +from .tags import Tag, parse_tag +from .version import InvalidVersion, Version, _TrimmedRelease + +BuildTag = Union[Tuple[()], Tuple[int, str]] +NormalizedName = NewType("NormalizedName", str) + + +class InvalidName(ValueError): + """ + An invalid distribution name; users should refer to the packaging user guide. + """ + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ + + +# Core metadata spec for `Name` +_validate_regex = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE +) +_canonicalize_regex = re.compile(r"[-_.]+") +_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") + + +def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName: + if validate and not _validate_regex.match(name): + raise InvalidName(f"name is invalid: {name!r}") + # This is taken from PEP 503. + value = _canonicalize_regex.sub("-", name).lower() + return cast(NormalizedName, value) + + +def is_normalized_name(name: str) -> bool: + return _normalized_regex.match(name) is not None + + +@functools.singledispatch +def canonicalize_version( + version: Version | str, *, strip_trailing_zero: bool = True +) -> str: + """ + Return a canonical form of a version as a string. + + >>> canonicalize_version('1.0.1') + '1.0.1' + + Per PEP 625, versions may have multiple canonical forms, differing + only by trailing zeros. + + >>> canonicalize_version('1.0.0') + '1' + >>> canonicalize_version('1.0.0', strip_trailing_zero=False) + '1.0.0' + + Invalid versions are returned unaltered. + + >>> canonicalize_version('foo bar baz') + 'foo bar baz' + """ + return str(_TrimmedRelease(str(version)) if strip_trailing_zero else version) + + +@canonicalize_version.register +def _(version: str, *, strip_trailing_zero: bool = True) -> str: + try: + parsed = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + return canonicalize_version(parsed, strip_trailing_zero=strip_trailing_zero) + + +def parse_wheel_filename( + filename: str, +) -> tuple[NormalizedName, Version, BuildTag, frozenset[Tag]]: + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + f"Invalid wheel filename (extension must be '.whl'): {filename!r}" + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + f"Invalid wheel filename (wrong number of parts): {filename!r}" + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name. + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename(f"Invalid project name: {filename!r}") + name = canonicalize_name(name_part) + + try: + version = Version(parts[1]) + except InvalidVersion as e: + raise InvalidWheelFilename( + f"Invalid wheel filename (invalid version): {filename!r}" + ) from e + + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + f"Invalid build number: {build_part} in {filename!r}" + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename: str) -> tuple[NormalizedName, Version]: + if filename.endswith(".tar.gz"): + file_stem = filename[: -len(".tar.gz")] + elif filename.endswith(".zip"): + file_stem = filename[: -len(".zip")] + else: + raise InvalidSdistFilename( + f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):" + f" {filename!r}" + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = file_stem.rpartition("-") + if not sep: + raise InvalidSdistFilename(f"Invalid sdist filename: {filename!r}") + + name = canonicalize_name(name_part) + + try: + version = Version(version_part) + except InvalidVersion as e: + raise InvalidSdistFilename( + f"Invalid sdist filename (invalid version): {filename!r}" + ) from e + + return (name, version) diff --git a/venv/lib/python3.13/site-packages/packaging/version.py b/venv/lib/python3.13/site-packages/packaging/version.py new file mode 100644 index 0000000000000000000000000000000000000000..c9bbda20e463b8d9389ecd65f74af33810a02bdd --- /dev/null +++ b/venv/lib/python3.13/site-packages/packaging/version.py @@ -0,0 +1,582 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +""" +.. testsetup:: + + from packaging.version import parse, Version +""" + +from __future__ import annotations + +import itertools +import re +from typing import Any, Callable, NamedTuple, SupportsInt, Tuple, Union + +from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType + +__all__ = ["VERSION_PATTERN", "InvalidVersion", "Version", "parse"] + +LocalType = Tuple[Union[int, str], ...] + +CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]] +CmpLocalType = Union[ + NegativeInfinityType, + Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...], +] +CmpKey = Tuple[ + int, + Tuple[int, ...], + CmpPrePostDevType, + CmpPrePostDevType, + CmpPrePostDevType, + CmpLocalType, +] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] + + +class _Version(NamedTuple): + epoch: int + release: tuple[int, ...] + dev: tuple[str, int] | None + pre: tuple[str, int] | None + post: tuple[str, int] | None + local: LocalType | None + + +def parse(version: str) -> Version: + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. + """ + return Version(version) + + +class InvalidVersion(ValueError): + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' + """ + + +class _BaseVersion: + _key: tuple[Any, ...] + + def __hash__(self) -> int: + return hash(self._key) + + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: _BaseVersion) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key < other._key + + def __le__(self, other: _BaseVersion) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other: _BaseVersion) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other: _BaseVersion) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key + + +# Deliberately not anchored to the start and end of the string, to make it +# easier for 3rd party code to reuse +_VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?Palpha|a|beta|b|preview|pre|c|rc)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+VERSION_PATTERN = _VERSION_PATTERN
+"""
+A string containing the regular expression used to match a valid version.
+
+The pattern is not anchored at either end, and is intended for embedding in larger
+expressions (for example, matching a version number as part of a file name). The
+regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
+flags set.
+
+:meta hide-value:
+"""
+
+
+class Version(_BaseVersion):
+    """This class abstracts handling of a project's versions.
+
+    A :class:`Version` instance is comparison aware and can be compared and
+    sorted using the standard Python interfaces.
+
+    >>> v1 = Version("1.0a5")
+    >>> v2 = Version("1.0")
+    >>> v1
+    
+    >>> v2
+    
+    >>> v1 < v2
+    True
+    >>> v1 == v2
+    False
+    >>> v1 > v2
+    False
+    >>> v1 >= v2
+    False
+    >>> v1 <= v2
+    True
+    """
+
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
+
+    def __init__(self, version: str) -> None:
+        """Initialize a Version object.
+
+        :param version:
+            The string representation of a version which will be parsed and normalized
+            before use.
+        :raises InvalidVersion:
+            If the ``version`` does not conform to PEP 440 in any way then this
+            exception will be raised.
+        """
+
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion(f"Invalid version: {version!r}")
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self) -> str:
+        """A representation of the Version that shows all internal state.
+
+        >>> Version('1.0.0')
+        
+        """
+        return f""
+
+    def __str__(self) -> str:
+        """A string representation of the version that can be round-tripped.
+
+        >>> str(Version("1.0a5"))
+        '1.0a5'
+        """
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join(str(x) for x in self.pre))
+
+        # Post-release
+        if self.post is not None:
+            parts.append(f".post{self.post}")
+
+        # Development release
+        if self.dev is not None:
+            parts.append(f".dev{self.dev}")
+
+        # Local version segment
+        if self.local is not None:
+            parts.append(f"+{self.local}")
+
+        return "".join(parts)
+
+    @property
+    def epoch(self) -> int:
+        """The epoch of the version.
+
+        >>> Version("2.0.0").epoch
+        0
+        >>> Version("1!2.0.0").epoch
+        1
+        """
+        return self._version.epoch
+
+    @property
+    def release(self) -> tuple[int, ...]:
+        """The components of the "release" segment of the version.
+
+        >>> Version("1.2.3").release
+        (1, 2, 3)
+        >>> Version("2.0.0").release
+        (2, 0, 0)
+        >>> Version("1!2.0.0.post0").release
+        (2, 0, 0)
+
+        Includes trailing zeroes but not the epoch or any pre-release / development /
+        post-release suffixes.
+        """
+        return self._version.release
+
+    @property
+    def pre(self) -> tuple[str, int] | None:
+        """The pre-release segment of the version.
+
+        >>> print(Version("1.2.3").pre)
+        None
+        >>> Version("1.2.3a1").pre
+        ('a', 1)
+        >>> Version("1.2.3b1").pre
+        ('b', 1)
+        >>> Version("1.2.3rc1").pre
+        ('rc', 1)
+        """
+        return self._version.pre
+
+    @property
+    def post(self) -> int | None:
+        """The post-release number of the version.
+
+        >>> print(Version("1.2.3").post)
+        None
+        >>> Version("1.2.3.post1").post
+        1
+        """
+        return self._version.post[1] if self._version.post else None
+
+    @property
+    def dev(self) -> int | None:
+        """The development number of the version.
+
+        >>> print(Version("1.2.3").dev)
+        None
+        >>> Version("1.2.3.dev1").dev
+        1
+        """
+        return self._version.dev[1] if self._version.dev else None
+
+    @property
+    def local(self) -> str | None:
+        """The local version segment of the version.
+
+        >>> print(Version("1.2.3").local)
+        None
+        >>> Version("1.2.3+abc").local
+        'abc'
+        """
+        if self._version.local:
+            return ".".join(str(x) for x in self._version.local)
+        else:
+            return None
+
+    @property
+    def public(self) -> str:
+        """The public portion of the version.
+
+        >>> Version("1.2.3").public
+        '1.2.3'
+        >>> Version("1.2.3+abc").public
+        '1.2.3'
+        >>> Version("1!1.2.3dev1+abc").public
+        '1!1.2.3.dev1'
+        """
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self) -> str:
+        """The "base version" of the version.
+
+        >>> Version("1.2.3").base_version
+        '1.2.3'
+        >>> Version("1.2.3+abc").base_version
+        '1.2.3'
+        >>> Version("1!1.2.3dev1+abc").base_version
+        '1!1.2.3'
+
+        The "base version" is the public version of the project without any pre or post
+        release markers.
+        """
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        return "".join(parts)
+
+    @property
+    def is_prerelease(self) -> bool:
+        """Whether this version is a pre-release.
+
+        >>> Version("1.2.3").is_prerelease
+        False
+        >>> Version("1.2.3a1").is_prerelease
+        True
+        >>> Version("1.2.3b1").is_prerelease
+        True
+        >>> Version("1.2.3rc1").is_prerelease
+        True
+        >>> Version("1.2.3dev1").is_prerelease
+        True
+        """
+        return self.dev is not None or self.pre is not None
+
+    @property
+    def is_postrelease(self) -> bool:
+        """Whether this version is a post-release.
+
+        >>> Version("1.2.3").is_postrelease
+        False
+        >>> Version("1.2.3.post1").is_postrelease
+        True
+        """
+        return self.post is not None
+
+    @property
+    def is_devrelease(self) -> bool:
+        """Whether this version is a development release.
+
+        >>> Version("1.2.3").is_devrelease
+        False
+        >>> Version("1.2.3.dev1").is_devrelease
+        True
+        """
+        return self.dev is not None
+
+    @property
+    def major(self) -> int:
+        """The first item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").major
+        1
+        """
+        return self.release[0] if len(self.release) >= 1 else 0
+
+    @property
+    def minor(self) -> int:
+        """The second item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").minor
+        2
+        >>> Version("1").minor
+        0
+        """
+        return self.release[1] if len(self.release) >= 2 else 0
+
+    @property
+    def micro(self) -> int:
+        """The third item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").micro
+        3
+        >>> Version("1").micro
+        0
+        """
+        return self.release[2] if len(self.release) >= 3 else 0
+
+
+class _TrimmedRelease(Version):
+    @property
+    def release(self) -> tuple[int, ...]:
+        """
+        Release segment without any trailing zeros.
+
+        >>> _TrimmedRelease('1.0.0').release
+        (1,)
+        >>> _TrimmedRelease('0.0').release
+        (0,)
+        """
+        rel = super().release
+        nonzeros = (index for index, val in enumerate(rel) if val)
+        last_nonzero = max(nonzeros, default=0)
+        return rel[: last_nonzero + 1]
+
+
+def _parse_letter_version(
+    letter: str | None, number: str | bytes | SupportsInt | None
+) -> tuple[str, int] | None:
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+
+    assert not letter
+    if number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+    return None
+
+
+_local_version_separators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local: str | None) -> LocalType | None:
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+
+
+def _cmpkey(
+    epoch: int,
+    release: tuple[int, ...],
+    pre: tuple[str, int] | None,
+    post: tuple[str, int] | None,
+    dev: tuple[str, int] | None,
+    local: LocalType | None,
+) -> CmpKey:
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre: CmpPrePostDevType = NegativeInfinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post: CmpPrePostDevType = NegativeInfinity
+
+    else:
+        _post = post
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev: CmpPrePostDevType = Infinity
+
+    else:
+        _dev = dev
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local: CmpLocalType = NegativeInfinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
+        )
+
+    return epoch, _release, _pre, _post, _dev, _local
diff --git a/venv/lib/python3.13/site-packages/tqdm/__init__.py b/venv/lib/python3.13/site-packages/tqdm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8081f77b8812f3b42d7949daa4195d2c35dc70ac
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/__init__.py
@@ -0,0 +1,38 @@
+from ._monitor import TMonitor, TqdmSynchronisationWarning
+from ._tqdm_pandas import tqdm_pandas
+from .cli import main  # TODO: remove in v5.0.0
+from .gui import tqdm as tqdm_gui  # TODO: remove in v5.0.0
+from .gui import trange as tgrange  # TODO: remove in v5.0.0
+from .std import (
+    TqdmDeprecationWarning, TqdmExperimentalWarning, TqdmKeyError, TqdmMonitorWarning,
+    TqdmTypeError, TqdmWarning, tqdm, trange)
+from .version import __version__
+
+__all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange', 'tqdm_pandas',
+           'tqdm_notebook', 'tnrange', 'main', 'TMonitor',
+           'TqdmTypeError', 'TqdmKeyError',
+           'TqdmWarning', 'TqdmDeprecationWarning',
+           'TqdmExperimentalWarning',
+           'TqdmMonitorWarning', 'TqdmSynchronisationWarning',
+           '__version__']
+
+
+def tqdm_notebook(*args, **kwargs):  # pragma: no cover
+    """See tqdm.notebook.tqdm for full documentation"""
+    from warnings import warn
+
+    from .notebook import tqdm as _tqdm_notebook
+    warn("This function will be removed in tqdm==5.0.0\n"
+         "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`",
+         TqdmDeprecationWarning, stacklevel=2)
+    return _tqdm_notebook(*args, **kwargs)
+
+
+def tnrange(*args, **kwargs):  # pragma: no cover
+    """Shortcut for `tqdm.notebook.tqdm(range(*args), **kwargs)`."""
+    from warnings import warn
+
+    from .notebook import trange as _tnrange
+    warn("Please use `tqdm.notebook.trange` instead of `tqdm.tnrange`",
+         TqdmDeprecationWarning, stacklevel=2)
+    return _tnrange(*args, **kwargs)
diff --git a/venv/lib/python3.13/site-packages/tqdm/__main__.py b/venv/lib/python3.13/site-packages/tqdm/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e28416e104515e90fca4b69cc60d0c61fd15d61
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/__main__.py
@@ -0,0 +1,3 @@
+from .cli import main
+
+main()
diff --git a/venv/lib/python3.13/site-packages/tqdm/_dist_ver.py b/venv/lib/python3.13/site-packages/tqdm/_dist_ver.py
new file mode 100644
index 0000000000000000000000000000000000000000..61af7d5bb0b25d8dc934b45b18ea35bd32dbb465
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/_dist_ver.py
@@ -0,0 +1 @@
+__version__ = '4.67.1'
diff --git a/venv/lib/python3.13/site-packages/tqdm/_main.py b/venv/lib/python3.13/site-packages/tqdm/_main.py
new file mode 100644
index 0000000000000000000000000000000000000000..04fdeeff17b5cc84b210f445b54b87d5b99e3748
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/_main.py
@@ -0,0 +1,9 @@
+from warnings import warn
+
+from .cli import *  # NOQA
+from .cli import __all__  # NOQA
+from .std import TqdmDeprecationWarning
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.cli.*` instead of `tqdm._main.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/venv/lib/python3.13/site-packages/tqdm/_monitor.py b/venv/lib/python3.13/site-packages/tqdm/_monitor.py
new file mode 100644
index 0000000000000000000000000000000000000000..f71aa56817ca77eba5df4a2dd11cb0c4a9a7ea1c
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/_monitor.py
@@ -0,0 +1,95 @@
+import atexit
+from threading import Event, Thread, current_thread
+from time import time
+from warnings import warn
+
+__all__ = ["TMonitor", "TqdmSynchronisationWarning"]
+
+
+class TqdmSynchronisationWarning(RuntimeWarning):
+    """tqdm multi-thread/-process errors which may cause incorrect nesting
+    but otherwise no adverse effects"""
+    pass
+
+
+class TMonitor(Thread):
+    """
+    Monitoring thread for tqdm bars.
+    Monitors if tqdm bars are taking too much time to display
+    and readjusts miniters automatically if necessary.
+
+    Parameters
+    ----------
+    tqdm_cls  : class
+        tqdm class to use (can be core tqdm or a submodule).
+    sleep_interval  : float
+        Time to sleep between monitoring checks.
+    """
+    _test = {}  # internal vars for unit testing
+
+    def __init__(self, tqdm_cls, sleep_interval):
+        Thread.__init__(self)
+        self.daemon = True  # kill thread when main killed (KeyboardInterrupt)
+        self.woken = 0  # last time woken up, to sync with monitor
+        self.tqdm_cls = tqdm_cls
+        self.sleep_interval = sleep_interval
+        self._time = self._test.get("time", time)
+        self.was_killed = self._test.get("Event", Event)()
+        atexit.register(self.exit)
+        self.start()
+
+    def exit(self):
+        self.was_killed.set()
+        if self is not current_thread():
+            self.join()
+        return self.report()
+
+    def get_instances(self):
+        # returns a copy of started `tqdm_cls` instances
+        return [i for i in self.tqdm_cls._instances.copy()
+                # Avoid race by checking that the instance started
+                if hasattr(i, 'start_t')]
+
+    def run(self):
+        cur_t = self._time()
+        while True:
+            # After processing and before sleeping, notify that we woke
+            # Need to be done just before sleeping
+            self.woken = cur_t
+            # Sleep some time...
+            self.was_killed.wait(self.sleep_interval)
+            # Quit if killed
+            if self.was_killed.is_set():
+                return
+            # Then monitor!
+            # Acquire lock (to access _instances)
+            with self.tqdm_cls.get_lock():
+                cur_t = self._time()
+                # Check tqdm instances are waiting too long to print
+                instances = self.get_instances()
+                for instance in instances:
+                    # Check event in loop to reduce blocking time on exit
+                    if self.was_killed.is_set():
+                        return
+                    # Only if mininterval > 1 (else iterations are just slow)
+                    # and last refresh exceeded maxinterval
+                    if (
+                        instance.miniters > 1
+                        and (cur_t - instance.last_print_t) >= instance.maxinterval
+                    ):
+                        # force bypassing miniters on next iteration
+                        # (dynamic_miniters adjusts mininterval automatically)
+                        instance.miniters = 1
+                        # Refresh now! (works only for manual tqdm)
+                        instance.refresh(nolock=True)
+                    # Remove accidental long-lived strong reference
+                    del instance
+                if instances != self.get_instances():  # pragma: nocover
+                    warn("Set changed size during iteration" +
+                         " (see https://github.com/tqdm/tqdm/issues/481)",
+                         TqdmSynchronisationWarning, stacklevel=2)
+                # Remove accidental long-lived strong references
+                del instances
+
+    def report(self):
+        return not self.was_killed.is_set()
diff --git a/venv/lib/python3.13/site-packages/tqdm/_tqdm.py b/venv/lib/python3.13/site-packages/tqdm/_tqdm.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fc4962774a4651db7a739a3f143633b6215a9bd
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/_tqdm.py
@@ -0,0 +1,9 @@
+from warnings import warn
+
+from .std import *  # NOQA
+from .std import __all__  # NOQA
+from .std import TqdmDeprecationWarning
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.std.*` instead of `tqdm._tqdm.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/venv/lib/python3.13/site-packages/tqdm/_tqdm_gui.py b/venv/lib/python3.13/site-packages/tqdm/_tqdm_gui.py
new file mode 100644
index 0000000000000000000000000000000000000000..f32aa894f54b3a5b47a0fbf4263c2fd20df56c9d
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/_tqdm_gui.py
@@ -0,0 +1,9 @@
+from warnings import warn
+
+from .gui import *  # NOQA
+from .gui import __all__  # NOQA
+from .std import TqdmDeprecationWarning
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.gui.*` instead of `tqdm._tqdm_gui.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/venv/lib/python3.13/site-packages/tqdm/_tqdm_notebook.py b/venv/lib/python3.13/site-packages/tqdm/_tqdm_notebook.py
new file mode 100644
index 0000000000000000000000000000000000000000..f225fbf5b52d04987ccf68f4d5ee4b735e3158b0
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/_tqdm_notebook.py
@@ -0,0 +1,9 @@
+from warnings import warn
+
+from .notebook import *  # NOQA
+from .notebook import __all__  # NOQA
+from .std import TqdmDeprecationWarning
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/venv/lib/python3.13/site-packages/tqdm/_tqdm_pandas.py b/venv/lib/python3.13/site-packages/tqdm/_tqdm_pandas.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4fe6efdc603579e7f8acfa27ac10dccdf3e94ce
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/_tqdm_pandas.py
@@ -0,0 +1,24 @@
+import sys
+
+__author__ = "github.com/casperdcl"
+__all__ = ['tqdm_pandas']
+
+
+def tqdm_pandas(tclass, **tqdm_kwargs):
+    """
+    Registers the given `tqdm` instance with
+    `pandas.core.groupby.DataFrameGroupBy.progress_apply`.
+    """
+    from tqdm import TqdmDeprecationWarning
+
+    if isinstance(tclass, type) or (getattr(tclass, '__name__', '').startswith(
+            'tqdm_')):  # delayed adapter case
+        TqdmDeprecationWarning(
+            "Please use `tqdm.pandas(...)` instead of `tqdm_pandas(tqdm, ...)`.",
+            fp_write=getattr(tqdm_kwargs.get('file', None), 'write', sys.stderr.write))
+        tclass.pandas(**tqdm_kwargs)
+    else:
+        TqdmDeprecationWarning(
+            "Please use `tqdm.pandas(...)` instead of `tqdm_pandas(tqdm(...))`.",
+            fp_write=getattr(tclass.fp, 'write', sys.stderr.write))
+        type(tclass).pandas(deprecated_t=tclass)
diff --git a/venv/lib/python3.13/site-packages/tqdm/_utils.py b/venv/lib/python3.13/site-packages/tqdm/_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..385e849e106d1319fe21045f14eb0aa6552fb153
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/_utils.py
@@ -0,0 +1,11 @@
+from warnings import warn
+
+from .std import TqdmDeprecationWarning
+from .utils import (  # NOQA, pylint: disable=unused-import
+    CUR_OS, IS_NIX, IS_WIN, RE_ANSI, Comparable, FormatReplace, SimpleTextIOWrapper,
+    _environ_cols_wrapper, _is_ascii, _is_utf, _screen_shape_linux, _screen_shape_tput,
+    _screen_shape_windows, _screen_shape_wrapper, _supports_unicode, _term_move_up, colorama)
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.utils.*` instead of `tqdm._utils.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/venv/lib/python3.13/site-packages/tqdm/asyncio.py b/venv/lib/python3.13/site-packages/tqdm/asyncio.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d00a0a2e755f36068d079ccc12ca84d86ff42be
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/asyncio.py
@@ -0,0 +1,93 @@
+"""
+Asynchronous progressbar decorator for iterators.
+Includes a default `range` iterator printing to `stderr`.
+
+Usage:
+>>> from tqdm.asyncio import trange, tqdm
+>>> async for i in trange(10):
+...     ...
+"""
+import asyncio
+from sys import version_info
+
+from .std import tqdm as std_tqdm
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['tqdm_asyncio', 'tarange', 'tqdm', 'trange']
+
+
+class tqdm_asyncio(std_tqdm):
+    """
+    Asynchronous-friendly version of tqdm.
+    """
+    def __init__(self, iterable=None, *args, **kwargs):
+        super().__init__(iterable, *args, **kwargs)
+        self.iterable_awaitable = False
+        if iterable is not None:
+            if hasattr(iterable, "__anext__"):
+                self.iterable_next = iterable.__anext__
+                self.iterable_awaitable = True
+            elif hasattr(iterable, "__next__"):
+                self.iterable_next = iterable.__next__
+            else:
+                self.iterable_iterator = iter(iterable)
+                self.iterable_next = self.iterable_iterator.__next__
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        try:
+            if self.iterable_awaitable:
+                res = await self.iterable_next()
+            else:
+                res = self.iterable_next()
+            self.update()
+            return res
+        except StopIteration:
+            self.close()
+            raise StopAsyncIteration
+        except BaseException:
+            self.close()
+            raise
+
+    def send(self, *args, **kwargs):
+        return self.iterable.send(*args, **kwargs)
+
+    @classmethod
+    def as_completed(cls, fs, *, loop=None, timeout=None, total=None, **tqdm_kwargs):
+        """
+        Wrapper for `asyncio.as_completed`.
+        """
+        if total is None:
+            total = len(fs)
+        kwargs = {}
+        if version_info[:2] < (3, 10):
+            kwargs['loop'] = loop
+        yield from cls(asyncio.as_completed(fs, timeout=timeout, **kwargs),
+                       total=total, **tqdm_kwargs)
+
+    @classmethod
+    async def gather(cls, *fs, loop=None, timeout=None, total=None, **tqdm_kwargs):
+        """
+        Wrapper for `asyncio.gather`.
+        """
+        async def wrap_awaitable(i, f):
+            return i, await f
+
+        ifs = [wrap_awaitable(i, f) for i, f in enumerate(fs)]
+        res = [await f for f in cls.as_completed(ifs, loop=loop, timeout=timeout,
+                                                 total=total, **tqdm_kwargs)]
+        return [i for _, i in sorted(res)]
+
+
+def tarange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.asyncio.tqdm(range(*args), **kwargs)`.
+    """
+    return tqdm_asyncio(range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_asyncio
+trange = tarange
diff --git a/venv/lib/python3.13/site-packages/tqdm/auto.py b/venv/lib/python3.13/site-packages/tqdm/auto.py
new file mode 100644
index 0000000000000000000000000000000000000000..206c4409d5269594bdbab3a092ef6e09e7c01947
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/auto.py
@@ -0,0 +1,40 @@
+"""
+Enables multiple commonly used features.
+
+Method resolution order:
+
+- `tqdm.autonotebook` without import warnings
+- `tqdm.asyncio`
+- `tqdm.std` base class
+
+Usage:
+>>> from tqdm.auto import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+import warnings
+
+from .std import TqdmExperimentalWarning
+
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore", category=TqdmExperimentalWarning)
+    from .autonotebook import tqdm as notebook_tqdm
+
+from .asyncio import tqdm as asyncio_tqdm
+from .std import tqdm as std_tqdm
+
+if notebook_tqdm != std_tqdm:
+    class tqdm(notebook_tqdm, asyncio_tqdm):  # pylint: disable=inconsistent-mro
+        pass
+else:
+    tqdm = asyncio_tqdm
+
+
+def trange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.auto.tqdm(range(*args), **kwargs)`.
+    """
+    return tqdm(range(*args), **kwargs)
+
+
+__all__ = ["tqdm", "trange"]
diff --git a/venv/lib/python3.13/site-packages/tqdm/autonotebook.py b/venv/lib/python3.13/site-packages/tqdm/autonotebook.py
new file mode 100644
index 0000000000000000000000000000000000000000..a09f2ec4b8c95f12b8c7b7774f84d5ec55826334
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/autonotebook.py
@@ -0,0 +1,29 @@
+"""
+Automatically choose between `tqdm.notebook` and `tqdm.std`.
+
+Usage:
+>>> from tqdm.autonotebook import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+import sys
+from warnings import warn
+
+try:
+    get_ipython = sys.modules['IPython'].get_ipython
+    if 'IPKernelApp' not in get_ipython().config:  # pragma: no cover
+        raise ImportError("console")
+    from .notebook import WARN_NOIPYW, IProgress
+    if IProgress is None:
+        from .std import TqdmWarning
+        warn(WARN_NOIPYW, TqdmWarning, stacklevel=2)
+        raise ImportError('ipywidgets')
+except Exception:
+    from .std import tqdm, trange
+else:  # pragma: no cover
+    from .notebook import tqdm, trange
+    from .std import TqdmExperimentalWarning
+    warn("Using `tqdm.autonotebook.tqdm` in notebook mode."
+         " Use `tqdm.tqdm` instead to force console mode"
+         " (e.g. in jupyter console)", TqdmExperimentalWarning, stacklevel=2)
+__all__ = ["tqdm", "trange"]
diff --git a/venv/lib/python3.13/site-packages/tqdm/cli.py b/venv/lib/python3.13/site-packages/tqdm/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..e54a7fc8599fe0dfef12cd53b76b27ae51b68b4b
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/cli.py
@@ -0,0 +1,324 @@
+"""
+Module version for monitoring CLI pipes (`... | python -m tqdm | ...`).
+"""
+import logging
+import re
+import sys
+from ast import literal_eval as numeric
+from textwrap import indent
+
+from .std import TqdmKeyError, TqdmTypeError, tqdm
+from .version import __version__
+
+__all__ = ["main"]
+log = logging.getLogger(__name__)
+
+
+def cast(val, typ):
+    log.debug((val, typ))
+    if " or " in typ:
+        for t in typ.split(" or "):
+            try:
+                return cast(val, t)
+            except TqdmTypeError:
+                pass
+        raise TqdmTypeError(f"{val} : {typ}")
+
+    # sys.stderr.write('\ndebug | `val:type`: `' + val + ':' + typ + '`.\n')
+    if typ == 'bool':
+        if (val == 'True') or (val == ''):
+            return True
+        if val == 'False':
+            return False
+        raise TqdmTypeError(val + ' : ' + typ)
+    if typ == 'chr':
+        if len(val) == 1:
+            return val.encode()
+        if re.match(r"^\\\w+$", val):
+            return eval(f'"{val}"').encode()
+        raise TqdmTypeError(f"{val} : {typ}")
+    if typ == 'str':
+        return val
+    if typ == 'int':
+        try:
+            return int(val)
+        except ValueError as exc:
+            raise TqdmTypeError(f"{val} : {typ}") from exc
+    if typ == 'float':
+        try:
+            return float(val)
+        except ValueError as exc:
+            raise TqdmTypeError(f"{val} : {typ}") from exc
+    raise TqdmTypeError(f"{val} : {typ}")
+
+
+def posix_pipe(fin, fout, delim=b'\\n', buf_size=256,
+               callback=lambda float: None, callback_len=True):
+    """
+    Params
+    ------
+    fin  : binary file with `read(buf_size : int)` method
+    fout  : binary file with `write` (and optionally `flush`) methods.
+    callback  : function(float), e.g.: `tqdm.update`
+    callback_len  : If (default: True) do `callback(len(buffer))`.
+      Otherwise, do `callback(data) for data in buffer.split(delim)`.
+    """
+    fp_write = fout.write
+
+    if not delim:
+        while True:
+            tmp = fin.read(buf_size)
+
+            # flush at EOF
+            if not tmp:
+                getattr(fout, 'flush', lambda: None)()
+                return
+
+            fp_write(tmp)
+            callback(len(tmp))
+        # return
+
+    buf = b''
+    len_delim = len(delim)
+    # n = 0
+    while True:
+        tmp = fin.read(buf_size)
+
+        # flush at EOF
+        if not tmp:
+            if buf:
+                fp_write(buf)
+                if callback_len:
+                    # n += 1 + buf.count(delim)
+                    callback(1 + buf.count(delim))
+                else:
+                    for i in buf.split(delim):
+                        callback(i)
+            getattr(fout, 'flush', lambda: None)()
+            return  # n
+
+        while True:
+            i = tmp.find(delim)
+            if i < 0:
+                buf += tmp
+                break
+            fp_write(buf + tmp[:i + len(delim)])
+            # n += 1
+            callback(1 if callback_len else (buf + tmp[:i]))
+            buf = b''
+            tmp = tmp[i + len_delim:]
+
+
+# ((opt, type), ... )
+RE_OPTS = re.compile(r'\n {4}(\S+)\s{2,}:\s*([^,]+)')
+# better split method assuming no positional args
+RE_SHLEX = re.compile(r'\s*(?  : \2', d)
+    split = RE_OPTS.split(d)
+    opt_types_desc = zip(split[1::3], split[2::3], split[3::3])
+    d = ''.join(('\n  --{0}  : {2}{3}' if otd[1] == 'bool' else
+                 '\n  --{0}=<{1}>  : {2}{3}').format(
+                     otd[0].replace('_', '-'), otd[0], *otd[1:])
+                for otd in opt_types_desc if otd[0] not in UNSUPPORTED_OPTS)
+
+    help_short = "Usage:\n  tqdm [--help | options]\n"
+    d = help_short + """
+Options:
+  -h, --help     Print this help and exit.
+  -v, --version  Print version and exit.
+""" + d.strip('\n') + '\n'
+
+    # opts = docopt(d, version=__version__)
+    if any(v in argv for v in ('-v', '--version')):
+        sys.stdout.write(__version__ + '\n')
+        sys.exit(0)
+    elif any(v in argv for v in ('-h', '--help')):
+        sys.stdout.write(d + '\n')
+        sys.exit(0)
+    elif argv and argv[0][:2] != '--':
+        sys.stderr.write(f"Error:Unknown argument:{argv[0]}\n{help_short}")
+
+    argv = RE_SHLEX.split(' '.join(["tqdm"] + argv))
+    opts = dict(zip(argv[1::3], argv[3::3]))
+
+    log.debug(opts)
+    opts.pop('log', True)
+
+    tqdm_args = {'file': fp}
+    try:
+        for (o, v) in opts.items():
+            o = o.replace('-', '_')
+            try:
+                tqdm_args[o] = cast(v, opt_types[o])
+            except KeyError as e:
+                raise TqdmKeyError(str(e))
+        log.debug('args:' + str(tqdm_args))
+
+        delim_per_char = tqdm_args.pop('bytes', False)
+        update = tqdm_args.pop('update', False)
+        update_to = tqdm_args.pop('update_to', False)
+        if sum((delim_per_char, update, update_to)) > 1:
+            raise TqdmKeyError("Can only have one of --bytes --update --update_to")
+    except Exception:
+        fp.write("\nError:\n" + help_short)
+        stdin, stdout_write = sys.stdin, sys.stdout.write
+        for i in stdin:
+            stdout_write(i)
+        raise
+    else:
+        buf_size = tqdm_args.pop('buf_size', 256)
+        delim = tqdm_args.pop('delim', b'\\n')
+        tee = tqdm_args.pop('tee', False)
+        manpath = tqdm_args.pop('manpath', None)
+        comppath = tqdm_args.pop('comppath', None)
+        if tqdm_args.pop('null', False):
+            class stdout(object):
+                @staticmethod
+                def write(_):
+                    pass
+        else:
+            stdout = sys.stdout
+            stdout = getattr(stdout, 'buffer', stdout)
+        stdin = getattr(sys.stdin, 'buffer', sys.stdin)
+        if manpath or comppath:
+            try:  # py<3.9
+                import importlib_resources as resources
+            except ImportError:
+                from importlib import resources
+            from pathlib import Path
+
+            def cp(name, dst):
+                """copy resource `name` to `dst`"""
+                fi = resources.files('tqdm') / name
+                dst.write_bytes(fi.read_bytes())
+                log.info("written:%s", dst)
+            if manpath is not None:
+                cp('tqdm.1', Path(manpath) / 'tqdm.1')
+            if comppath is not None:
+                cp('completion.sh', Path(comppath) / 'tqdm_completion.sh')
+            sys.exit(0)
+        if tee:
+            stdout_write = stdout.write
+            fp_write = getattr(fp, 'buffer', fp).write
+
+            class stdout(object):  # pylint: disable=function-redefined
+                @staticmethod
+                def write(x):
+                    with tqdm.external_write_mode(file=fp):
+                        fp_write(x)
+                    stdout_write(x)
+        if delim_per_char:
+            tqdm_args.setdefault('unit', 'B')
+            tqdm_args.setdefault('unit_scale', True)
+            tqdm_args.setdefault('unit_divisor', 1024)
+            log.debug(tqdm_args)
+            with tqdm(**tqdm_args) as t:
+                posix_pipe(stdin, stdout, '', buf_size, t.update)
+        elif delim == b'\\n':
+            log.debug(tqdm_args)
+            write = stdout.write
+            if update or update_to:
+                with tqdm(**tqdm_args) as t:
+                    if update:
+                        def callback(i):
+                            t.update(numeric(i.decode()))
+                    else:  # update_to
+                        def callback(i):
+                            t.update(numeric(i.decode()) - t.n)
+                    for i in stdin:
+                        write(i)
+                        callback(i)
+            else:
+                for i in tqdm(stdin, **tqdm_args):
+                    write(i)
+        else:
+            log.debug(tqdm_args)
+            with tqdm(**tqdm_args) as t:
+                callback_len = False
+                if update:
+                    def callback(i):
+                        t.update(numeric(i.decode()))
+                elif update_to:
+                    def callback(i):
+                        t.update(numeric(i.decode()) - t.n)
+                else:
+                    callback = t.update
+                    callback_len = True
+                posix_pipe(stdin, stdout, delim, buf_size, callback, callback_len)
diff --git a/venv/lib/python3.13/site-packages/tqdm/completion.sh b/venv/lib/python3.13/site-packages/tqdm/completion.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9f61c7f14bb8c1f6099b9eb75dce28ece6a7ae96
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/completion.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+_tqdm(){
+  local cur prv
+  cur="${COMP_WORDS[COMP_CWORD]}"
+  prv="${COMP_WORDS[COMP_CWORD - 1]}"
+
+  case ${prv} in
+  --bar_format|--buf_size|--colour|--comppath|--delay|--delim|--desc|--initial|--lock_args|--manpath|--maxinterval|--mininterval|--miniters|--ncols|--nrows|--position|--postfix|--smoothing|--total|--unit|--unit_divisor)
+    # await user input
+    ;;
+  "--log")
+    COMPREPLY=($(compgen -W       'CRITICAL FATAL ERROR WARN WARNING INFO DEBUG NOTSET' -- ${cur}))
+    ;;
+  *)
+    COMPREPLY=($(compgen -W '--ascii --bar_format --buf_size --bytes --colour --comppath --delay --delim --desc --disable --dynamic_ncols --help --initial --leave --lock_args --log --manpath --maxinterval --mininterval --miniters --ncols --nrows --null --position --postfix --smoothing --tee --total --unit --unit_divisor --unit_scale --update --update_to --version --write_bytes -h -v' -- ${cur}))
+    ;;
+  esac
+}
+complete -F _tqdm tqdm
diff --git a/venv/lib/python3.13/site-packages/tqdm/dask.py b/venv/lib/python3.13/site-packages/tqdm/dask.py
new file mode 100644
index 0000000000000000000000000000000000000000..57f1b668f59dc5991019eee34c7df3232a2c2cd7
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/dask.py
@@ -0,0 +1,44 @@
+from functools import partial
+
+from dask.callbacks import Callback
+
+from .auto import tqdm as tqdm_auto
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['TqdmCallback']
+
+
+class TqdmCallback(Callback):
+    """Dask callback for task progress."""
+    def __init__(self, start=None, pretask=None, tqdm_class=tqdm_auto,
+                 **tqdm_kwargs):
+        """
+        Parameters
+        ----------
+        tqdm_class  : optional
+            `tqdm` class to use for bars [default: `tqdm.auto.tqdm`].
+        tqdm_kwargs  : optional
+            Any other arguments used for all bars.
+        """
+        super().__init__(start=start, pretask=pretask)
+        if tqdm_kwargs:
+            tqdm_class = partial(tqdm_class, **tqdm_kwargs)
+        self.tqdm_class = tqdm_class
+
+    def _start_state(self, _, state):
+        self.pbar = self.tqdm_class(total=sum(
+            len(state[k]) for k in ['ready', 'waiting', 'running', 'finished']))
+
+    def _posttask(self, *_, **__):
+        self.pbar.update()
+
+    def _finish(self, *_, **__):
+        self.pbar.close()
+
+    def display(self):
+        """Displays in the current cell in Notebooks."""
+        container = getattr(self.bar, 'container', None)
+        if container is None:
+            return
+        from .notebook import display
+        display(container)
diff --git a/venv/lib/python3.13/site-packages/tqdm/gui.py b/venv/lib/python3.13/site-packages/tqdm/gui.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb52fb91a8661f4c73edd352bbc6f21b877dcfee
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/gui.py
@@ -0,0 +1,179 @@
+"""
+Matplotlib GUI progressbar decorator for iterators.
+
+Usage:
+>>> from tqdm.gui import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+# future division is important to divide integers and get as
+# a result precise floating numbers (instead of truncated int)
+import re
+from warnings import warn
+
+# to inherit from the tqdm class
+from .std import TqdmExperimentalWarning
+from .std import tqdm as std_tqdm
+
+# import compatibility functions and utilities
+
+__author__ = {"github.com/": ["casperdcl", "lrq3000"]}
+__all__ = ['tqdm_gui', 'tgrange', 'tqdm', 'trange']
+
+
+class tqdm_gui(std_tqdm):  # pragma: no cover
+    """Experimental Matplotlib GUI version of tqdm!"""
+    # TODO: @classmethod: write() on GUI?
+    def __init__(self, *args, **kwargs):
+        from collections import deque
+
+        import matplotlib as mpl
+        import matplotlib.pyplot as plt
+        kwargs = kwargs.copy()
+        kwargs['gui'] = True
+        colour = kwargs.pop('colour', 'g')
+        super().__init__(*args, **kwargs)
+
+        if self.disable:
+            return
+
+        warn("GUI is experimental/alpha", TqdmExperimentalWarning, stacklevel=2)
+        self.mpl = mpl
+        self.plt = plt
+
+        # Remember if external environment uses toolbars
+        self.toolbar = self.mpl.rcParams['toolbar']
+        self.mpl.rcParams['toolbar'] = 'None'
+
+        self.mininterval = max(self.mininterval, 0.5)
+        self.fig, ax = plt.subplots(figsize=(9, 2.2))
+        # self.fig.subplots_adjust(bottom=0.2)
+        total = self.__len__()  # avoids TypeError on None #971
+        if total is not None:
+            self.xdata = []
+            self.ydata = []
+            self.zdata = []
+        else:
+            self.xdata = deque([])
+            self.ydata = deque([])
+            self.zdata = deque([])
+        self.line1, = ax.plot(self.xdata, self.ydata, color='b')
+        self.line2, = ax.plot(self.xdata, self.zdata, color='k')
+        ax.set_ylim(0, 0.001)
+        if total is not None:
+            ax.set_xlim(0, 100)
+            ax.set_xlabel("percent")
+            self.fig.legend((self.line1, self.line2), ("cur", "est"),
+                            loc='center right')
+            # progressbar
+            self.hspan = plt.axhspan(0, 0.001, xmin=0, xmax=0, color=colour)
+        else:
+            # ax.set_xlim(-60, 0)
+            ax.set_xlim(0, 60)
+            ax.invert_xaxis()
+            ax.set_xlabel("seconds")
+            ax.legend(("cur", "est"), loc='lower left')
+        ax.grid()
+        # ax.set_xlabel('seconds')
+        ax.set_ylabel((self.unit if self.unit else "it") + "/s")
+        if self.unit_scale:
+            plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+            ax.yaxis.get_offset_text().set_x(-0.15)
+
+        # Remember if external environment is interactive
+        self.wasion = plt.isinteractive()
+        plt.ion()
+        self.ax = ax
+
+    def close(self):
+        if self.disable:
+            return
+
+        self.disable = True
+
+        with self.get_lock():
+            self._instances.remove(self)
+
+        # Restore toolbars
+        self.mpl.rcParams['toolbar'] = self.toolbar
+        # Return to non-interactive mode
+        if not self.wasion:
+            self.plt.ioff()
+        if self.leave:
+            self.display()
+        else:
+            self.plt.close(self.fig)
+
+    def clear(self, *_, **__):
+        pass
+
+    def display(self, *_, **__):
+        n = self.n
+        cur_t = self._time()
+        elapsed = cur_t - self.start_t
+        delta_it = n - self.last_print_n
+        delta_t = cur_t - self.last_print_t
+
+        # Inline due to multiple calls
+        total = self.total
+        xdata = self.xdata
+        ydata = self.ydata
+        zdata = self.zdata
+        ax = self.ax
+        line1 = self.line1
+        line2 = self.line2
+        hspan = getattr(self, 'hspan', None)
+        # instantaneous rate
+        y = delta_it / delta_t
+        # overall rate
+        z = n / elapsed
+        # update line data
+        xdata.append(n * 100.0 / total if total else cur_t)
+        ydata.append(y)
+        zdata.append(z)
+
+        # Discard old values
+        # xmin, xmax = ax.get_xlim()
+        # if (not total) and elapsed > xmin * 1.1:
+        if (not total) and elapsed > 66:
+            xdata.popleft()
+            ydata.popleft()
+            zdata.popleft()
+
+        ymin, ymax = ax.get_ylim()
+        if y > ymax or z > ymax:
+            ymax = 1.1 * y
+            ax.set_ylim(ymin, ymax)
+            ax.figure.canvas.draw()
+
+        if total:
+            line1.set_data(xdata, ydata)
+            line2.set_data(xdata, zdata)
+            if hspan:
+                hspan.set_xy((0, ymin))
+                hspan.set_height(ymax - ymin)
+                hspan.set_width(n / total)
+        else:
+            t_ago = [cur_t - i for i in xdata]
+            line1.set_data(t_ago, ydata)
+            line2.set_data(t_ago, zdata)
+
+        d = self.format_dict
+        # remove {bar}
+        d['bar_format'] = (d['bar_format'] or "{l_bar}{r_bar}").replace(
+            "{bar}", "")
+        msg = self.format_meter(**d)
+        if '' in msg:
+            msg = "".join(re.split(r'\|?\|?', msg, maxsplit=1))
+        ax.set_title(msg, fontname="DejaVu Sans Mono", fontsize=11)
+        self.plt.pause(1e-9)
+
+
+def tgrange(*args, **kwargs):
+    """Shortcut for `tqdm.gui.tqdm(range(*args), **kwargs)`."""
+    return tqdm_gui(range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_gui
+trange = tgrange
diff --git a/venv/lib/python3.13/site-packages/tqdm/keras.py b/venv/lib/python3.13/site-packages/tqdm/keras.py
new file mode 100644
index 0000000000000000000000000000000000000000..cce9467c51a95388aaa502d1da9a42f3ebf0af24
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/keras.py
@@ -0,0 +1,122 @@
+from copy import copy
+from functools import partial
+
+from .auto import tqdm as tqdm_auto
+
+try:
+    import keras
+except (ImportError, AttributeError) as e:
+    try:
+        from tensorflow import keras
+    except ImportError:
+        raise e
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['TqdmCallback']
+
+
+class TqdmCallback(keras.callbacks.Callback):
+    """Keras callback for epoch and batch progress."""
+    @staticmethod
+    def bar2callback(bar, pop=None, delta=(lambda logs: 1)):
+        def callback(_, logs=None):
+            n = delta(logs)
+            if logs:
+                if pop:
+                    logs = copy(logs)
+                    [logs.pop(i, 0) for i in pop]
+                bar.set_postfix(logs, refresh=False)
+            bar.update(n)
+
+        return callback
+
+    def __init__(self, epochs=None, data_size=None, batch_size=None, verbose=1,
+                 tqdm_class=tqdm_auto, **tqdm_kwargs):
+        """
+        Parameters
+        ----------
+        epochs  : int, optional
+        data_size  : int, optional
+            Number of training pairs.
+        batch_size  : int, optional
+            Number of training pairs per batch.
+        verbose  : int
+            0: epoch, 1: batch (transient), 2: batch. [default: 1].
+            Will be set to `0` unless both `data_size` and `batch_size`
+            are given.
+        tqdm_class  : optional
+            `tqdm` class to use for bars [default: `tqdm.auto.tqdm`].
+        tqdm_kwargs  : optional
+            Any other arguments used for all bars.
+        """
+        if tqdm_kwargs:
+            tqdm_class = partial(tqdm_class, **tqdm_kwargs)
+        self.tqdm_class = tqdm_class
+        self.epoch_bar = tqdm_class(total=epochs, unit='epoch')
+        self.on_epoch_end = self.bar2callback(self.epoch_bar)
+        if data_size and batch_size:
+            self.batches = batches = (data_size + batch_size - 1) // batch_size
+        else:
+            self.batches = batches = None
+        self.verbose = verbose
+        if verbose == 1:
+            self.batch_bar = tqdm_class(total=batches, unit='batch', leave=False)
+            self.on_batch_end = self.bar2callback(
+                self.batch_bar, pop=['batch', 'size'],
+                delta=lambda logs: logs.get('size', 1))
+
+    def on_train_begin(self, *_, **__):
+        params = self.params.get
+        auto_total = params('epochs', params('nb_epoch', None))
+        if auto_total is not None and auto_total != self.epoch_bar.total:
+            self.epoch_bar.reset(total=auto_total)
+
+    def on_epoch_begin(self, epoch, *_, **__):
+        if self.epoch_bar.n < epoch:
+            ebar = self.epoch_bar
+            ebar.n = ebar.last_print_n = ebar.initial = epoch
+        if self.verbose:
+            params = self.params.get
+            total = params('samples', params(
+                'nb_sample', params('steps', None))) or self.batches
+            if self.verbose == 2:
+                if hasattr(self, 'batch_bar'):
+                    self.batch_bar.close()
+                self.batch_bar = self.tqdm_class(
+                    total=total, unit='batch', leave=True,
+                    unit_scale=1 / (params('batch_size', 1) or 1))
+                self.on_batch_end = self.bar2callback(
+                    self.batch_bar, pop=['batch', 'size'],
+                    delta=lambda logs: logs.get('size', 1))
+            elif self.verbose == 1:
+                self.batch_bar.unit_scale = 1 / (params('batch_size', 1) or 1)
+                self.batch_bar.reset(total=total)
+            else:
+                raise KeyError('Unknown verbosity')
+
+    def on_train_end(self, *_, **__):
+        if hasattr(self, 'batch_bar'):
+            self.batch_bar.close()
+        self.epoch_bar.close()
+
+    def display(self):
+        """Displays in the current cell in Notebooks."""
+        container = getattr(self.epoch_bar, 'container', None)
+        if container is None:
+            return
+        from .notebook import display
+        display(container)
+        batch_bar = getattr(self, 'batch_bar', None)
+        if batch_bar is not None:
+            display(batch_bar.container)
+
+    @staticmethod
+    def _implements_train_batch_hooks():
+        return True
+
+    @staticmethod
+    def _implements_test_batch_hooks():
+        return True
+
+    @staticmethod
+    def _implements_predict_batch_hooks():
+        return True
diff --git a/venv/lib/python3.13/site-packages/tqdm/notebook.py b/venv/lib/python3.13/site-packages/tqdm/notebook.py
new file mode 100644
index 0000000000000000000000000000000000000000..77b91bdd43183998fcb99e92dd4597ff7fc6c3fb
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/notebook.py
@@ -0,0 +1,317 @@
+"""
+IPython/Jupyter Notebook progressbar decorator for iterators.
+Includes a default `range` iterator printing to `stderr`.
+
+Usage:
+>>> from tqdm.notebook import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+# import compatibility functions and utilities
+import re
+import sys
+from html import escape
+from weakref import proxy
+
+# to inherit from the tqdm class
+from .std import tqdm as std_tqdm
+
+if True:  # pragma: no cover
+    # import IPython/Jupyter base widget and display utilities
+    IPY = 0
+    try:  # IPython 4.x
+        import ipywidgets
+        IPY = 4
+    except ImportError:  # IPython 3.x / 2.x
+        IPY = 32
+        import warnings
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                'ignore', message=".*The `IPython.html` package has been deprecated.*")
+            try:
+                import IPython.html.widgets as ipywidgets  # NOQA: F401
+            except ImportError:
+                pass
+
+    try:  # IPython 4.x / 3.x
+        if IPY == 32:
+            from IPython.html.widgets import HTML
+            from IPython.html.widgets import FloatProgress as IProgress
+            from IPython.html.widgets import HBox
+            IPY = 3
+        else:
+            from ipywidgets import HTML
+            from ipywidgets import FloatProgress as IProgress
+            from ipywidgets import HBox
+    except ImportError:
+        try:  # IPython 2.x
+            from IPython.html.widgets import HTML
+            from IPython.html.widgets import ContainerWidget as HBox
+            from IPython.html.widgets import FloatProgressWidget as IProgress
+            IPY = 2
+        except ImportError:
+            IPY = 0
+            IProgress = None
+            HBox = object
+
+    try:
+        from IPython.display import display  # , clear_output
+    except ImportError:
+        pass
+
+__author__ = {"github.com/": ["lrq3000", "casperdcl", "alexanderkuk"]}
+__all__ = ['tqdm_notebook', 'tnrange', 'tqdm', 'trange']
+WARN_NOIPYW = ("IProgress not found. Please update jupyter and ipywidgets."
+               " See https://ipywidgets.readthedocs.io/en/stable"
+               "/user_install.html")
+
+
+class TqdmHBox(HBox):
+    """`ipywidgets.HBox` with a pretty representation"""
+    def _json_(self, pretty=None):
+        pbar = getattr(self, 'pbar', None)
+        if pbar is None:
+            return {}
+        d = pbar.format_dict
+        if pretty is not None:
+            d["ascii"] = not pretty
+        return d
+
+    def __repr__(self, pretty=False):
+        pbar = getattr(self, 'pbar', None)
+        if pbar is None:
+            return super().__repr__()
+        return pbar.format_meter(**self._json_(pretty))
+
+    def _repr_pretty_(self, pp, *_, **__):
+        pp.text(self.__repr__(True))
+
+
+class tqdm_notebook(std_tqdm):
+    """
+    Experimental IPython/Jupyter Notebook widget using tqdm!
+    """
+    @staticmethod
+    def status_printer(_, total=None, desc=None, ncols=None):
+        """
+        Manage the printing of an IPython/Jupyter Notebook progress bar widget.
+        """
+        # Fallback to text bar if there's no total
+        # DEPRECATED: replaced with an 'info' style bar
+        # if not total:
+        #    return super(tqdm_notebook, tqdm_notebook).status_printer(file)
+
+        # fp = file
+
+        # Prepare IPython progress bar
+        if IProgress is None:  # #187 #451 #558 #872
+            raise ImportError(WARN_NOIPYW)
+        if total:
+            pbar = IProgress(min=0, max=total)
+        else:  # No total? Show info style bar with no progress tqdm status
+            pbar = IProgress(min=0, max=1)
+            pbar.value = 1
+            pbar.bar_style = 'info'
+            if ncols is None:
+                pbar.layout.width = "20px"
+
+        ltext = HTML()
+        rtext = HTML()
+        if desc:
+            ltext.value = desc
+        container = TqdmHBox(children=[ltext, pbar, rtext])
+        # Prepare layout
+        if ncols is not None:  # use default style of ipywidgets
+            # ncols could be 100, "100px", "100%"
+            ncols = str(ncols)  # ipywidgets only accepts string
+            try:
+                if int(ncols) > 0:  # isnumeric and positive
+                    ncols += 'px'
+            except ValueError:
+                pass
+            pbar.layout.flex = '2'
+            container.layout.width = ncols
+            container.layout.display = 'inline-flex'
+            container.layout.flex_flow = 'row wrap'
+
+        return container
+
+    def display(self, msg=None, pos=None,
+                # additional signals
+                close=False, bar_style=None, check_delay=True):
+        # Note: contrary to native tqdm, msg='' does NOT clear bar
+        # goal is to keep all infos if error happens so user knows
+        # at which iteration the loop failed.
+
+        # Clear previous output (really necessary?)
+        # clear_output(wait=1)
+
+        if not msg and not close:
+            d = self.format_dict
+            # remove {bar}
+            d['bar_format'] = (d['bar_format'] or "{l_bar}{r_bar}").replace(
+                "{bar}", "")
+            msg = self.format_meter(**d)
+
+        ltext, pbar, rtext = self.container.children
+        pbar.value = self.n
+
+        if msg:
+            msg = msg.replace(' ', u'\u2007')  # fix html space padding
+            # html escape special characters (like '&')
+            if '' in msg:
+                left, right = map(escape, re.split(r'\|?\|?', msg, maxsplit=1))
+            else:
+                left, right = '', escape(msg)
+
+            # Update description
+            ltext.value = left
+            # never clear the bar (signal: msg='')
+            if right:
+                rtext.value = right
+
+        # Change bar style
+        if bar_style:
+            # Hack-ish way to avoid the danger bar_style being overridden by
+            # success because the bar gets closed after the error...
+            if pbar.bar_style != 'danger' or bar_style != 'success':
+                pbar.bar_style = bar_style
+
+        # Special signal to close the bar
+        if close and pbar.bar_style != 'danger':  # hide only if no error
+            try:
+                self.container.close()
+            except AttributeError:
+                self.container.visible = False
+            self.container.layout.visibility = 'hidden'  # IPYW>=8
+
+        if check_delay and self.delay > 0 and not self.displayed:
+            display(self.container)
+            self.displayed = True
+
+    @property
+    def colour(self):
+        if hasattr(self, 'container'):
+            return self.container.children[-2].style.bar_color
+
+    @colour.setter
+    def colour(self, bar_color):
+        if hasattr(self, 'container'):
+            self.container.children[-2].style.bar_color = bar_color
+
+    def __init__(self, *args, **kwargs):
+        """
+        Supports the usual `tqdm.tqdm` parameters as well as those listed below.
+
+        Parameters
+        ----------
+        display  : Whether to call `display(self.container)` immediately
+            [default: True].
+        """
+        kwargs = kwargs.copy()
+        # Setup default output
+        file_kwarg = kwargs.get('file', sys.stderr)
+        if file_kwarg is sys.stderr or file_kwarg is None:
+            kwargs['file'] = sys.stdout  # avoid the red block in IPython
+
+        # Initialize parent class + avoid printing by using gui=True
+        kwargs['gui'] = True
+        # convert disable = None to False
+        kwargs['disable'] = bool(kwargs.get('disable', False))
+        colour = kwargs.pop('colour', None)
+        display_here = kwargs.pop('display', True)
+        super().__init__(*args, **kwargs)
+        if self.disable or not kwargs['gui']:
+            self.disp = lambda *_, **__: None
+            return
+
+        # Get bar width
+        self.ncols = '100%' if self.dynamic_ncols else kwargs.get("ncols", None)
+
+        # Replace with IPython progress bar display (with correct total)
+        unit_scale = 1 if self.unit_scale is True else self.unit_scale or 1
+        total = self.total * unit_scale if self.total else self.total
+        self.container = self.status_printer(self.fp, total, self.desc, self.ncols)
+        self.container.pbar = proxy(self)
+        self.displayed = False
+        if display_here and self.delay <= 0:
+            display(self.container)
+            self.displayed = True
+        self.disp = self.display
+        self.colour = colour
+
+        # Print initial bar state
+        if not self.disable:
+            self.display(check_delay=False)
+
+    def __iter__(self):
+        try:
+            it = super().__iter__()
+            for obj in it:
+                # return super(tqdm...) will not catch exception
+                yield obj
+        # NB: except ... [ as ...] breaks IPython async KeyboardInterrupt
+        except:  # NOQA
+            self.disp(bar_style='danger')
+            raise
+        # NB: don't `finally: close()`
+        # since this could be a shared bar which the user will `reset()`
+
+    def update(self, n=1):
+        try:
+            return super().update(n=n)
+        # NB: except ... [ as ...] breaks IPython async KeyboardInterrupt
+        except:  # NOQA
+            # cannot catch KeyboardInterrupt when using manual tqdm
+            # as the interrupt will most likely happen on another statement
+            self.disp(bar_style='danger')
+            raise
+        # NB: don't `finally: close()`
+        # since this could be a shared bar which the user will `reset()`
+
+    def close(self):
+        if self.disable:
+            return
+        super().close()
+        # Try to detect if there was an error or KeyboardInterrupt
+        # in manual mode: if n < total, things probably got wrong
+        if self.total and self.n < self.total:
+            self.disp(bar_style='danger', check_delay=False)
+        else:
+            if self.leave:
+                self.disp(bar_style='success', check_delay=False)
+            else:
+                self.disp(close=True, check_delay=False)
+
+    def clear(self, *_, **__):
+        pass
+
+    def reset(self, total=None):
+        """
+        Resets to 0 iterations for repeated use.
+
+        Consider combining with `leave=True`.
+
+        Parameters
+        ----------
+        total  : int or float, optional. Total to use for the new bar.
+        """
+        if self.disable:
+            return super().reset(total=total)
+        _, pbar, _ = self.container.children
+        pbar.bar_style = ''
+        if total is not None:
+            pbar.max = total
+            if not self.total and self.ncols is None:  # no longer unknown total
+                pbar.layout.width = None  # reset width
+        return super().reset(total=total)
+
+
+def tnrange(*args, **kwargs):
+    """Shortcut for `tqdm.notebook.tqdm(range(*args), **kwargs)`."""
+    return tqdm_notebook(range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_notebook
+trange = tnrange
diff --git a/venv/lib/python3.13/site-packages/tqdm/rich.py b/venv/lib/python3.13/site-packages/tqdm/rich.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d392edaf115a93f7c145de52cbe8978dcf1ede8
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/rich.py
@@ -0,0 +1,151 @@
+"""
+`rich.progress` decorator for iterators.
+
+Usage:
+>>> from tqdm.rich import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+from warnings import warn
+
+from rich.progress import (
+    BarColumn, Progress, ProgressColumn, Text, TimeElapsedColumn, TimeRemainingColumn, filesize)
+
+from .std import TqdmExperimentalWarning
+from .std import tqdm as std_tqdm
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['tqdm_rich', 'trrange', 'tqdm', 'trange']
+
+
+class FractionColumn(ProgressColumn):
+    """Renders completed/total, e.g. '0.5/2.3 G'."""
+    def __init__(self, unit_scale=False, unit_divisor=1000):
+        self.unit_scale = unit_scale
+        self.unit_divisor = unit_divisor
+        super().__init__()
+
+    def render(self, task):
+        """Calculate common unit for completed and total."""
+        completed = int(task.completed)
+        total = int(task.total)
+        if self.unit_scale:
+            unit, suffix = filesize.pick_unit_and_suffix(
+                total,
+                ["", "K", "M", "G", "T", "P", "E", "Z", "Y"],
+                self.unit_divisor,
+            )
+        else:
+            unit, suffix = filesize.pick_unit_and_suffix(total, [""], 1)
+        precision = 0 if unit == 1 else 1
+        return Text(
+            f"{completed/unit:,.{precision}f}/{total/unit:,.{precision}f} {suffix}",
+            style="progress.download")
+
+
+class RateColumn(ProgressColumn):
+    """Renders human readable transfer speed."""
+    def __init__(self, unit="", unit_scale=False, unit_divisor=1000):
+        self.unit = unit
+        self.unit_scale = unit_scale
+        self.unit_divisor = unit_divisor
+        super().__init__()
+
+    def render(self, task):
+        """Show data transfer speed."""
+        speed = task.speed
+        if speed is None:
+            return Text(f"? {self.unit}/s", style="progress.data.speed")
+        if self.unit_scale:
+            unit, suffix = filesize.pick_unit_and_suffix(
+                speed,
+                ["", "K", "M", "G", "T", "P", "E", "Z", "Y"],
+                self.unit_divisor,
+            )
+        else:
+            unit, suffix = filesize.pick_unit_and_suffix(speed, [""], 1)
+        precision = 0 if unit == 1 else 1
+        return Text(f"{speed/unit:,.{precision}f} {suffix}{self.unit}/s",
+                    style="progress.data.speed")
+
+
+class tqdm_rich(std_tqdm):  # pragma: no cover
+    """Experimental rich.progress GUI version of tqdm!"""
+    # TODO: @classmethod: write()?
+    def __init__(self, *args, **kwargs):
+        """
+        This class accepts the following parameters *in addition* to
+        the parameters accepted by `tqdm`.
+
+        Parameters
+        ----------
+        progress  : tuple, optional
+            arguments for `rich.progress.Progress()`.
+        options  : dict, optional
+            keyword arguments for `rich.progress.Progress()`.
+        """
+        kwargs = kwargs.copy()
+        kwargs['gui'] = True
+        # convert disable = None to False
+        kwargs['disable'] = bool(kwargs.get('disable', False))
+        progress = kwargs.pop('progress', None)
+        options = kwargs.pop('options', {}).copy()
+        super().__init__(*args, **kwargs)
+
+        if self.disable:
+            return
+
+        warn("rich is experimental/alpha", TqdmExperimentalWarning, stacklevel=2)
+        d = self.format_dict
+        if progress is None:
+            progress = (
+                "[progress.description]{task.description}"
+                "[progress.percentage]{task.percentage:>4.0f}%",
+                BarColumn(bar_width=None),
+                FractionColumn(
+                    unit_scale=d['unit_scale'], unit_divisor=d['unit_divisor']),
+                "[", TimeElapsedColumn(), "<", TimeRemainingColumn(),
+                ",", RateColumn(unit=d['unit'], unit_scale=d['unit_scale'],
+                                unit_divisor=d['unit_divisor']), "]"
+            )
+        options.setdefault('transient', not self.leave)
+        self._prog = Progress(*progress, **options)
+        self._prog.__enter__()
+        self._task_id = self._prog.add_task(self.desc or "", **d)
+
+    def close(self):
+        if self.disable:
+            return
+        self.display()  # print 100%, vis #1306
+        super().close()
+        self._prog.__exit__(None, None, None)
+
+    def clear(self, *_, **__):
+        pass
+
+    def display(self, *_, **__):
+        if not hasattr(self, '_prog'):
+            return
+        self._prog.update(self._task_id, completed=self.n, description=self.desc)
+
+    def reset(self, total=None):
+        """
+        Resets to 0 iterations for repeated use.
+
+        Parameters
+        ----------
+        total  : int or float, optional. Total to use for the new bar.
+        """
+        if hasattr(self, '_prog'):
+            self._prog.reset(total=total)
+        super().reset(total=total)
+
+
+def trrange(*args, **kwargs):
+    """Shortcut for `tqdm.rich.tqdm(range(*args), **kwargs)`."""
+    return tqdm_rich(range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_rich
+trange = trrange
diff --git a/venv/lib/python3.13/site-packages/tqdm/std.py b/venv/lib/python3.13/site-packages/tqdm/std.py
new file mode 100644
index 0000000000000000000000000000000000000000..e91ad3090392916fc2bc1e34bc471e43212fe699
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/std.py
@@ -0,0 +1,1524 @@
+"""
+Customisable progressbar decorator for iterators.
+Includes a default `range` iterator printing to `stderr`.
+
+Usage:
+>>> from tqdm import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+import sys
+from collections import OrderedDict, defaultdict
+from contextlib import contextmanager
+from datetime import datetime, timedelta, timezone
+from numbers import Number
+from time import time
+from warnings import warn
+from weakref import WeakSet
+
+from ._monitor import TMonitor
+from .utils import (
+    CallbackIOWrapper, Comparable, DisableOnWriteError, FormatReplace, SimpleTextIOWrapper,
+    _is_ascii, _screen_shape_wrapper, _supports_unicode, _term_move_up, disp_len, disp_trim,
+    envwrap)
+
+__author__ = "https://github.com/tqdm/tqdm#contributions"
+__all__ = ['tqdm', 'trange',
+           'TqdmTypeError', 'TqdmKeyError', 'TqdmWarning',
+           'TqdmExperimentalWarning', 'TqdmDeprecationWarning',
+           'TqdmMonitorWarning']
+
+
+class TqdmTypeError(TypeError):
+    pass
+
+
+class TqdmKeyError(KeyError):
+    pass
+
+
+class TqdmWarning(Warning):
+    """base class for all tqdm warnings.
+
+    Used for non-external-code-breaking errors, such as garbled printing.
+    """
+    def __init__(self, msg, fp_write=None, *a, **k):
+        if fp_write is not None:
+            fp_write("\n" + self.__class__.__name__ + ": " + str(msg).rstrip() + '\n')
+        else:
+            super().__init__(msg, *a, **k)
+
+
+class TqdmExperimentalWarning(TqdmWarning, FutureWarning):
+    """beta feature, unstable API and behaviour"""
+    pass
+
+
+class TqdmDeprecationWarning(TqdmWarning, DeprecationWarning):
+    # not suppressed if raised
+    pass
+
+
+class TqdmMonitorWarning(TqdmWarning, RuntimeWarning):
+    """tqdm monitor errors which do not affect external functionality"""
+    pass
+
+
+def TRLock(*args, **kwargs):
+    """threading RLock"""
+    try:
+        from threading import RLock
+        return RLock(*args, **kwargs)
+    except (ImportError, OSError):  # pragma: no cover
+        pass
+
+
+class TqdmDefaultWriteLock(object):
+    """
+    Provide a default write lock for thread and multiprocessing safety.
+    Works only on platforms supporting `fork` (so Windows is excluded).
+    You must initialise a `tqdm` or `TqdmDefaultWriteLock` instance
+    before forking in order for the write lock to work.
+    On Windows, you need to supply the lock from the parent to the children as
+    an argument to joblib or the parallelism lib you use.
+    """
+    # global thread lock so no setup required for multithreading.
+    # NB: Do not create multiprocessing lock as it sets the multiprocessing
+    # context, disallowing `spawn()`/`forkserver()`
+    th_lock = TRLock()
+
+    def __init__(self):
+        # Create global parallelism locks to avoid racing issues with parallel
+        # bars works only if fork available (Linux/MacOSX, but not Windows)
+        cls = type(self)
+        root_lock = cls.th_lock
+        if root_lock is not None:
+            root_lock.acquire()
+        cls.create_mp_lock()
+        self.locks = [lk for lk in [cls.mp_lock, cls.th_lock] if lk is not None]
+        if root_lock is not None:
+            root_lock.release()
+
+    def acquire(self, *a, **k):
+        for lock in self.locks:
+            lock.acquire(*a, **k)
+
+    def release(self):
+        for lock in self.locks[::-1]:  # Release in inverse order of acquisition
+            lock.release()
+
+    def __enter__(self):
+        self.acquire()
+
+    def __exit__(self, *exc):
+        self.release()
+
+    @classmethod
+    def create_mp_lock(cls):
+        if not hasattr(cls, 'mp_lock'):
+            try:
+                from multiprocessing import RLock
+                cls.mp_lock = RLock()
+            except (ImportError, OSError):  # pragma: no cover
+                cls.mp_lock = None
+
+    @classmethod
+    def create_th_lock(cls):
+        assert hasattr(cls, 'th_lock')
+        warn("create_th_lock not needed anymore", TqdmDeprecationWarning, stacklevel=2)
+
+
+class Bar(object):
+    """
+    `str.format`-able bar with format specifiers: `[width][type]`
+
+    - `width`
+      + unspecified (default): use `self.default_len`
+      + `int >= 0`: overrides `self.default_len`
+      + `int < 0`: subtract from `self.default_len`
+    - `type`
+      + `a`: ascii (`charset=self.ASCII` override)
+      + `u`: unicode (`charset=self.UTF` override)
+      + `b`: blank (`charset="  "` override)
+    """
+    ASCII = " 123456789#"
+    UTF = u" " + u''.join(map(chr, range(0x258F, 0x2587, -1)))
+    BLANK = "  "
+    COLOUR_RESET = '\x1b[0m'
+    COLOUR_RGB = '\x1b[38;2;%d;%d;%dm'
+    COLOURS = {'BLACK': '\x1b[30m', 'RED': '\x1b[31m', 'GREEN': '\x1b[32m',
+               'YELLOW': '\x1b[33m', 'BLUE': '\x1b[34m', 'MAGENTA': '\x1b[35m',
+               'CYAN': '\x1b[36m', 'WHITE': '\x1b[37m'}
+
+    def __init__(self, frac, default_len=10, charset=UTF, colour=None):
+        if not 0 <= frac <= 1:
+            warn("clamping frac to range [0, 1]", TqdmWarning, stacklevel=2)
+            frac = max(0, min(1, frac))
+        assert default_len > 0
+        self.frac = frac
+        self.default_len = default_len
+        self.charset = charset
+        self.colour = colour
+
+    @property
+    def colour(self):
+        return self._colour
+
+    @colour.setter
+    def colour(self, value):
+        if not value:
+            self._colour = None
+            return
+        try:
+            if value.upper() in self.COLOURS:
+                self._colour = self.COLOURS[value.upper()]
+            elif value[0] == '#' and len(value) == 7:
+                self._colour = self.COLOUR_RGB % tuple(
+                    int(i, 16) for i in (value[1:3], value[3:5], value[5:7]))
+            else:
+                raise KeyError
+        except (KeyError, AttributeError):
+            warn("Unknown colour (%s); valid choices: [hex (#00ff00), %s]" % (
+                 value, ", ".join(self.COLOURS)),
+                 TqdmWarning, stacklevel=2)
+            self._colour = None
+
+    def __format__(self, format_spec):
+        if format_spec:
+            _type = format_spec[-1].lower()
+            try:
+                charset = {'a': self.ASCII, 'u': self.UTF, 'b': self.BLANK}[_type]
+            except KeyError:
+                charset = self.charset
+            else:
+                format_spec = format_spec[:-1]
+            if format_spec:
+                N_BARS = int(format_spec)
+                if N_BARS < 0:
+                    N_BARS += self.default_len
+            else:
+                N_BARS = self.default_len
+        else:
+            charset = self.charset
+            N_BARS = self.default_len
+
+        nsyms = len(charset) - 1
+        bar_length, frac_bar_length = divmod(int(self.frac * N_BARS * nsyms), nsyms)
+
+        res = charset[-1] * bar_length
+        if bar_length < N_BARS:  # whitespace padding
+            res = res + charset[frac_bar_length] + charset[0] * (N_BARS - bar_length - 1)
+        return self.colour + res + self.COLOUR_RESET if self.colour else res
+
+
+class EMA(object):
+    """
+    Exponential moving average: smoothing to give progressively lower
+    weights to older values.
+
+    Parameters
+    ----------
+    smoothing  : float, optional
+        Smoothing factor in range [0, 1], [default: 0.3].
+        Increase to give more weight to recent values.
+        Ranges from 0 (yields old value) to 1 (yields new value).
+    """
+    def __init__(self, smoothing=0.3):
+        self.alpha = smoothing
+        self.last = 0
+        self.calls = 0
+
+    def __call__(self, x=None):
+        """
+        Parameters
+        ----------
+        x  : float
+            New value to include in EMA.
+        """
+        beta = 1 - self.alpha
+        if x is not None:
+            self.last = self.alpha * x + beta * self.last
+            self.calls += 1
+        return self.last / (1 - beta ** self.calls) if self.calls else self.last
+
+
+class tqdm(Comparable):
+    """
+    Decorate an iterable object, returning an iterator which acts exactly
+    like the original iterable, but prints a dynamically updating
+    progressbar every time a value is requested.
+
+    Parameters
+    ----------
+    iterable  : iterable, optional
+        Iterable to decorate with a progressbar.
+        Leave blank to manually manage the updates.
+    desc  : str, optional
+        Prefix for the progressbar.
+    total  : int or float, optional
+        The number of expected iterations. If unspecified,
+        len(iterable) is used if possible. If float("inf") or as a last
+        resort, only basic progress statistics are displayed
+        (no ETA, no progressbar).
+        If `gui` is True and this parameter needs subsequent updating,
+        specify an initial arbitrary large positive number,
+        e.g. 9e9.
+    leave  : bool, optional
+        If [default: True], keeps all traces of the progressbar
+        upon termination of iteration.
+        If `None`, will leave only if `position` is `0`.
+    file  : `io.TextIOWrapper` or `io.StringIO`, optional
+        Specifies where to output the progress messages
+        (default: sys.stderr). Uses `file.write(str)` and `file.flush()`
+        methods.  For encoding, see `write_bytes`.
+    ncols  : int, optional
+        The width of the entire output message. If specified,
+        dynamically resizes the progressbar to stay within this bound.
+        If unspecified, attempts to use environment width. The
+        fallback is a meter width of 10 and no limit for the counter and
+        statistics. If 0, will not print any meter (only stats).
+    mininterval  : float, optional
+        Minimum progress display update interval [default: 0.1] seconds.
+    maxinterval  : float, optional
+        Maximum progress display update interval [default: 10] seconds.
+        Automatically adjusts `miniters` to correspond to `mininterval`
+        after long display update lag. Only works if `dynamic_miniters`
+        or monitor thread is enabled.
+    miniters  : int or float, optional
+        Minimum progress display update interval, in iterations.
+        If 0 and `dynamic_miniters`, will automatically adjust to equal
+        `mininterval` (more CPU efficient, good for tight loops).
+        If > 0, will skip display of specified number of iterations.
+        Tweak this and `mininterval` to get very efficient loops.
+        If your progress is erratic with both fast and slow iterations
+        (network, skipping items, etc) you should set miniters=1.
+    ascii  : bool or str, optional
+        If unspecified or False, use unicode (smooth blocks) to fill
+        the meter. The fallback is to use ASCII characters " 123456789#".
+    disable  : bool, optional
+        Whether to disable the entire progressbar wrapper
+        [default: False]. If set to None, disable on non-TTY.
+    unit  : str, optional
+        String that will be used to define the unit of each iteration
+        [default: it].
+    unit_scale  : bool or int or float, optional
+        If 1 or True, the number of iterations will be reduced/scaled
+        automatically and a metric prefix following the
+        International System of Units standard will be added
+        (kilo, mega, etc.) [default: False]. If any other non-zero
+        number, will scale `total` and `n`.
+    dynamic_ncols  : bool, optional
+        If set, constantly alters `ncols` and `nrows` to the
+        environment (allowing for window resizes) [default: False].
+    smoothing  : float, optional
+        Exponential moving average smoothing factor for speed estimates
+        (ignored in GUI mode). Ranges from 0 (average speed) to 1
+        (current/instantaneous speed) [default: 0.3].
+    bar_format  : str, optional
+        Specify a custom bar string formatting. May impact performance.
+        [default: '{l_bar}{bar}{r_bar}'], where
+        l_bar='{desc}: {percentage:3.0f}%|' and
+        r_bar='| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, '
+            '{rate_fmt}{postfix}]'
+        Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt,
+            percentage, elapsed, elapsed_s, ncols, nrows, desc, unit,
+            rate, rate_fmt, rate_noinv, rate_noinv_fmt,
+            rate_inv, rate_inv_fmt, postfix, unit_divisor,
+            remaining, remaining_s, eta.
+        Note that a trailing ": " is automatically removed after {desc}
+        if the latter is empty.
+    initial  : int or float, optional
+        The initial counter value. Useful when restarting a progress
+        bar [default: 0]. If using float, consider specifying `{n:.3f}`
+        or similar in `bar_format`, or specifying `unit_scale`.
+    position  : int, optional
+        Specify the line offset to print this bar (starting from 0)
+        Automatic if unspecified.
+        Useful to manage multiple bars at once (eg, from threads).
+    postfix  : dict or *, optional
+        Specify additional stats to display at the end of the bar.
+        Calls `set_postfix(**postfix)` if possible (dict).
+    unit_divisor  : float, optional
+        [default: 1000], ignored unless `unit_scale` is True.
+    write_bytes  : bool, optional
+        Whether to write bytes. If (default: False) will write unicode.
+    lock_args  : tuple, optional
+        Passed to `refresh` for intermediate output
+        (initialisation, iterating, and updating).
+    nrows  : int, optional
+        The screen height. If specified, hides nested bars outside this
+        bound. If unspecified, attempts to use environment height.
+        The fallback is 20.
+    colour  : str, optional
+        Bar colour (e.g. 'green', '#00ff00').
+    delay  : float, optional
+        Don't display until [default: 0] seconds have elapsed.
+    gui  : bool, optional
+        WARNING: internal parameter - do not use.
+        Use tqdm.gui.tqdm(...) instead. If set, will attempt to use
+        matplotlib animations for a graphical output [default: False].
+
+    Returns
+    -------
+    out  : decorated iterator.
+    """
+
+    monitor_interval = 10  # set to 0 to disable the thread
+    monitor = None
+    _instances = WeakSet()
+
+    @staticmethod
+    def format_sizeof(num, suffix='', divisor=1000):
+        """
+        Formats a number (greater than unity) with SI Order of Magnitude
+        prefixes.
+
+        Parameters
+        ----------
+        num  : float
+            Number ( >= 1) to format.
+        suffix  : str, optional
+            Post-postfix [default: ''].
+        divisor  : float, optional
+            Divisor between prefixes [default: 1000].
+
+        Returns
+        -------
+        out  : str
+            Number with Order of Magnitude SI unit postfix.
+        """
+        for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']:
+            if abs(num) < 999.5:
+                if abs(num) < 99.95:
+                    if abs(num) < 9.995:
+                        return f'{num:1.2f}{unit}{suffix}'
+                    return f'{num:2.1f}{unit}{suffix}'
+                return f'{num:3.0f}{unit}{suffix}'
+            num /= divisor
+        return f'{num:3.1f}Y{suffix}'
+
+    @staticmethod
+    def format_interval(t):
+        """
+        Formats a number of seconds as a clock time, [H:]MM:SS
+
+        Parameters
+        ----------
+        t  : int
+            Number of seconds.
+
+        Returns
+        -------
+        out  : str
+            [H:]MM:SS
+        """
+        mins, s = divmod(int(t), 60)
+        h, m = divmod(mins, 60)
+        return f'{h:d}:{m:02d}:{s:02d}' if h else f'{m:02d}:{s:02d}'
+
+    @staticmethod
+    def format_num(n):
+        """
+        Intelligent scientific notation (.3g).
+
+        Parameters
+        ----------
+        n  : int or float or Numeric
+            A Number.
+
+        Returns
+        -------
+        out  : str
+            Formatted number.
+        """
+        f = f'{n:.3g}'.replace('e+0', 'e+').replace('e-0', 'e-')
+        n = str(n)
+        return f if len(f) < len(n) else n
+
+    @staticmethod
+    def status_printer(file):
+        """
+        Manage the printing and in-place updating of a line of characters.
+        Note that if the string is longer than a line, then in-place
+        updating may not work (it will print a new line at each refresh).
+        """
+        fp = file
+        fp_flush = getattr(fp, 'flush', lambda: None)  # pragma: no cover
+        if fp in (sys.stderr, sys.stdout):
+            getattr(sys.stderr, 'flush', lambda: None)()
+            getattr(sys.stdout, 'flush', lambda: None)()
+
+        def fp_write(s):
+            fp.write(str(s))
+            fp_flush()
+
+        last_len = [0]
+
+        def print_status(s):
+            len_s = disp_len(s)
+            fp_write('\r' + s + (' ' * max(last_len[0] - len_s, 0)))
+            last_len[0] = len_s
+
+        return print_status
+
+    @staticmethod
+    def format_meter(n, total, elapsed, ncols=None, prefix='', ascii=False, unit='it',
+                     unit_scale=False, rate=None, bar_format=None, postfix=None,
+                     unit_divisor=1000, initial=0, colour=None, **extra_kwargs):
+        """
+        Return a string-based progress bar given some parameters
+
+        Parameters
+        ----------
+        n  : int or float
+            Number of finished iterations.
+        total  : int or float
+            The expected total number of iterations. If meaningless (None),
+            only basic progress statistics are displayed (no ETA).
+        elapsed  : float
+            Number of seconds passed since start.
+        ncols  : int, optional
+            The width of the entire output message. If specified,
+            dynamically resizes `{bar}` to stay within this bound
+            [default: None]. If `0`, will not print any bar (only stats).
+            The fallback is `{bar:10}`.
+        prefix  : str, optional
+            Prefix message (included in total width) [default: ''].
+            Use as {desc} in bar_format string.
+        ascii  : bool, optional or str, optional
+            If not set, use unicode (smooth blocks) to fill the meter
+            [default: False]. The fallback is to use ASCII characters
+            " 123456789#".
+        unit  : str, optional
+            The iteration unit [default: 'it'].
+        unit_scale  : bool or int or float, optional
+            If 1 or True, the number of iterations will be printed with an
+            appropriate SI metric prefix (k = 10^3, M = 10^6, etc.)
+            [default: False]. If any other non-zero number, will scale
+            `total` and `n`.
+        rate  : float, optional
+            Manual override for iteration rate.
+            If [default: None], uses n/elapsed.
+        bar_format  : str, optional
+            Specify a custom bar string formatting. May impact performance.
+            [default: '{l_bar}{bar}{r_bar}'], where
+            l_bar='{desc}: {percentage:3.0f}%|' and
+            r_bar='| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, '
+              '{rate_fmt}{postfix}]'
+            Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt,
+              percentage, elapsed, elapsed_s, ncols, nrows, desc, unit,
+              rate, rate_fmt, rate_noinv, rate_noinv_fmt,
+              rate_inv, rate_inv_fmt, postfix, unit_divisor,
+              remaining, remaining_s, eta.
+            Note that a trailing ": " is automatically removed after {desc}
+            if the latter is empty.
+        postfix  : *, optional
+            Similar to `prefix`, but placed at the end
+            (e.g. for additional stats).
+            Note: postfix is usually a string (not a dict) for this method,
+            and will if possible be set to postfix = ', ' + postfix.
+            However other types are supported (#382).
+        unit_divisor  : float, optional
+            [default: 1000], ignored unless `unit_scale` is True.
+        initial  : int or float, optional
+            The initial counter value [default: 0].
+        colour  : str, optional
+            Bar colour (e.g. 'green', '#00ff00').
+
+        Returns
+        -------
+        out  : Formatted meter and stats, ready to display.
+        """
+
+        # sanity check: total
+        if total and n >= (total + 0.5):  # allow float imprecision (#849)
+            total = None
+
+        # apply custom scale if necessary
+        if unit_scale and unit_scale not in (True, 1):
+            if total:
+                total *= unit_scale
+            n *= unit_scale
+            if rate:
+                rate *= unit_scale  # by default rate = self.avg_dn / self.avg_dt
+            unit_scale = False
+
+        elapsed_str = tqdm.format_interval(elapsed)
+
+        # if unspecified, attempt to use rate = average speed
+        # (we allow manual override since predicting time is an arcane art)
+        if rate is None and elapsed:
+            rate = (n - initial) / elapsed
+        inv_rate = 1 / rate if rate else None
+        format_sizeof = tqdm.format_sizeof
+        rate_noinv_fmt = ((format_sizeof(rate) if unit_scale else f'{rate:5.2f}')
+                          if rate else '?') + unit + '/s'
+        rate_inv_fmt = (
+            (format_sizeof(inv_rate) if unit_scale else f'{inv_rate:5.2f}')
+            if inv_rate else '?') + 's/' + unit
+        rate_fmt = rate_inv_fmt if inv_rate and inv_rate > 1 else rate_noinv_fmt
+
+        if unit_scale:
+            n_fmt = format_sizeof(n, divisor=unit_divisor)
+            total_fmt = format_sizeof(total, divisor=unit_divisor) if total is not None else '?'
+        else:
+            n_fmt = str(n)
+            total_fmt = str(total) if total is not None else '?'
+
+        try:
+            postfix = ', ' + postfix if postfix else ''
+        except TypeError:
+            pass
+
+        remaining = (total - n) / rate if rate and total else 0
+        remaining_str = tqdm.format_interval(remaining) if rate else '?'
+        try:
+            eta_dt = (datetime.now() + timedelta(seconds=remaining)
+                      if rate and total else datetime.fromtimestamp(0, timezone.utc))
+        except OverflowError:
+            eta_dt = datetime.max
+
+        # format the stats displayed to the left and right sides of the bar
+        if prefix:
+            # old prefix setup work around
+            bool_prefix_colon_already = (prefix[-2:] == ": ")
+            l_bar = prefix if bool_prefix_colon_already else prefix + ": "
+        else:
+            l_bar = ''
+
+        r_bar = f'| {n_fmt}/{total_fmt} [{elapsed_str}<{remaining_str}, {rate_fmt}{postfix}]'
+
+        # Custom bar formatting
+        # Populate a dict with all available progress indicators
+        format_dict = {
+            # slight extension of self.format_dict
+            'n': n, 'n_fmt': n_fmt, 'total': total, 'total_fmt': total_fmt,
+            'elapsed': elapsed_str, 'elapsed_s': elapsed,
+            'ncols': ncols, 'desc': prefix or '', 'unit': unit,
+            'rate': inv_rate if inv_rate and inv_rate > 1 else rate,
+            'rate_fmt': rate_fmt, 'rate_noinv': rate,
+            'rate_noinv_fmt': rate_noinv_fmt, 'rate_inv': inv_rate,
+            'rate_inv_fmt': rate_inv_fmt,
+            'postfix': postfix, 'unit_divisor': unit_divisor,
+            'colour': colour,
+            # plus more useful definitions
+            'remaining': remaining_str, 'remaining_s': remaining,
+            'l_bar': l_bar, 'r_bar': r_bar, 'eta': eta_dt,
+            **extra_kwargs}
+
+        # total is known: we can predict some stats
+        if total:
+            # fractional and percentage progress
+            frac = n / total
+            percentage = frac * 100
+
+            l_bar += f'{percentage:3.0f}%|'
+
+            if ncols == 0:
+                return l_bar[:-1] + r_bar[1:]
+
+            format_dict.update(l_bar=l_bar)
+            if bar_format:
+                format_dict.update(percentage=percentage)
+
+                # auto-remove colon for empty `{desc}`
+                if not prefix:
+                    bar_format = bar_format.replace("{desc}: ", '')
+            else:
+                bar_format = "{l_bar}{bar}{r_bar}"
+
+            full_bar = FormatReplace()
+            nobar = bar_format.format(bar=full_bar, **format_dict)
+            if not full_bar.format_called:
+                return nobar  # no `{bar}`; nothing else to do
+
+            # Formatting progress bar space available for bar's display
+            full_bar = Bar(frac,
+                           max(1, ncols - disp_len(nobar)) if ncols else 10,
+                           charset=Bar.ASCII if ascii is True else ascii or Bar.UTF,
+                           colour=colour)
+            if not _is_ascii(full_bar.charset) and _is_ascii(bar_format):
+                bar_format = str(bar_format)
+            res = bar_format.format(bar=full_bar, **format_dict)
+            return disp_trim(res, ncols) if ncols else res
+
+        elif bar_format:
+            # user-specified bar_format but no total
+            l_bar += '|'
+            format_dict.update(l_bar=l_bar, percentage=0)
+            full_bar = FormatReplace()
+            nobar = bar_format.format(bar=full_bar, **format_dict)
+            if not full_bar.format_called:
+                return nobar
+            full_bar = Bar(0,
+                           max(1, ncols - disp_len(nobar)) if ncols else 10,
+                           charset=Bar.BLANK, colour=colour)
+            res = bar_format.format(bar=full_bar, **format_dict)
+            return disp_trim(res, ncols) if ncols else res
+        else:
+            # no total: no progressbar, ETA, just progress stats
+            return (f'{(prefix + ": ") if prefix else ""}'
+                    f'{n_fmt}{unit} [{elapsed_str}, {rate_fmt}{postfix}]')
+
+    def __new__(cls, *_, **__):
+        instance = object.__new__(cls)
+        with cls.get_lock():  # also constructs lock if non-existent
+            cls._instances.add(instance)
+            # create monitoring thread
+            if cls.monitor_interval and (cls.monitor is None
+                                         or not cls.monitor.report()):
+                try:
+                    cls.monitor = TMonitor(cls, cls.monitor_interval)
+                except Exception as e:  # pragma: nocover
+                    warn("tqdm:disabling monitor support"
+                         " (monitor_interval = 0) due to:\n" + str(e),
+                         TqdmMonitorWarning, stacklevel=2)
+                    cls.monitor_interval = 0
+        return instance
+
+    @classmethod
+    def _get_free_pos(cls, instance=None):
+        """Skips specified instance."""
+        positions = {abs(inst.pos) for inst in cls._instances
+                     if inst is not instance and hasattr(inst, "pos")}
+        return min(set(range(len(positions) + 1)).difference(positions))
+
+    @classmethod
+    def _decr_instances(cls, instance):
+        """
+        Remove from list and reposition another unfixed bar
+        to fill the new gap.
+
+        This means that by default (where all nested bars are unfixed),
+        order is not maintained but screen flicker/blank space is minimised.
+        (tqdm<=4.44.1 moved ALL subsequent unfixed bars up.)
+        """
+        with cls._lock:
+            try:
+                cls._instances.remove(instance)
+            except KeyError:
+                # if not instance.gui:  # pragma: no cover
+                #     raise
+                pass  # py2: maybe magically removed already
+            # else:
+            if not instance.gui:
+                last = (instance.nrows or 20) - 1
+                # find unfixed (`pos >= 0`) overflow (`pos >= nrows - 1`)
+                instances = list(filter(
+                    lambda i: hasattr(i, "pos") and last <= i.pos,
+                    cls._instances))
+                # set first found to current `pos`
+                if instances:
+                    inst = min(instances, key=lambda i: i.pos)
+                    inst.clear(nolock=True)
+                    inst.pos = abs(instance.pos)
+
+    @classmethod
+    def write(cls, s, file=None, end="\n", nolock=False):
+        """Print a message via tqdm (without overlap with bars)."""
+        fp = file if file is not None else sys.stdout
+        with cls.external_write_mode(file=file, nolock=nolock):
+            # Write the message
+            fp.write(s)
+            fp.write(end)
+
+    @classmethod
+    @contextmanager
+    def external_write_mode(cls, file=None, nolock=False):
+        """
+        Disable tqdm within context and refresh tqdm when exits.
+        Useful when writing to standard output stream
+        """
+        fp = file if file is not None else sys.stdout
+
+        try:
+            if not nolock:
+                cls.get_lock().acquire()
+            # Clear all bars
+            inst_cleared = []
+            for inst in getattr(cls, '_instances', []):
+                # Clear instance if in the target output file
+                # or if write output + tqdm output are both either
+                # sys.stdout or sys.stderr (because both are mixed in terminal)
+                if hasattr(inst, "start_t") and (inst.fp == fp or all(
+                        f in (sys.stdout, sys.stderr) for f in (fp, inst.fp))):
+                    inst.clear(nolock=True)
+                    inst_cleared.append(inst)
+            yield
+            # Force refresh display of bars we cleared
+            for inst in inst_cleared:
+                inst.refresh(nolock=True)
+        finally:
+            if not nolock:
+                cls._lock.release()
+
+    @classmethod
+    def set_lock(cls, lock):
+        """Set the global lock."""
+        cls._lock = lock
+
+    @classmethod
+    def get_lock(cls):
+        """Get the global lock. Construct it if it does not exist."""
+        if not hasattr(cls, '_lock'):
+            cls._lock = TqdmDefaultWriteLock()
+        return cls._lock
+
+    @classmethod
+    def pandas(cls, **tqdm_kwargs):
+        """
+        Registers the current `tqdm` class with
+            pandas.core.
+            ( frame.DataFrame
+            | series.Series
+            | groupby.(generic.)DataFrameGroupBy
+            | groupby.(generic.)SeriesGroupBy
+            ).progress_apply
+
+        A new instance will be created every time `progress_apply` is called,
+        and each instance will automatically `close()` upon completion.
+
+        Parameters
+        ----------
+        tqdm_kwargs  : arguments for the tqdm instance
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> from tqdm import tqdm
+        >>> from tqdm.gui import tqdm as tqdm_gui
+        >>>
+        >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
+        >>> tqdm.pandas(ncols=50)  # can use tqdm_gui, optional kwargs, etc
+        >>> # Now you can use `progress_apply` instead of `apply`
+        >>> df.groupby(0).progress_apply(lambda x: x**2)
+
+        References
+        ----------
+        
+        """
+        from warnings import catch_warnings, simplefilter
+
+        from pandas.core.frame import DataFrame
+        from pandas.core.series import Series
+        try:
+            with catch_warnings():
+                simplefilter("ignore", category=FutureWarning)
+                from pandas import Panel
+        except ImportError:  # pandas>=1.2.0
+            Panel = None
+        Rolling, Expanding = None, None
+        try:  # pandas>=1.0.0
+            from pandas.core.window.rolling import _Rolling_and_Expanding
+        except ImportError:
+            try:  # pandas>=0.18.0
+                from pandas.core.window import _Rolling_and_Expanding
+            except ImportError:  # pandas>=1.2.0
+                try:  # pandas>=1.2.0
+                    from pandas.core.window.expanding import Expanding
+                    from pandas.core.window.rolling import Rolling
+                    _Rolling_and_Expanding = Rolling, Expanding
+                except ImportError:  # pragma: no cover
+                    _Rolling_and_Expanding = None
+        try:  # pandas>=0.25.0
+            from pandas.core.groupby.generic import SeriesGroupBy  # , NDFrameGroupBy
+            from pandas.core.groupby.generic import DataFrameGroupBy
+        except ImportError:  # pragma: no cover
+            try:  # pandas>=0.23.0
+                from pandas.core.groupby.groupby import DataFrameGroupBy, SeriesGroupBy
+            except ImportError:
+                from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy
+        try:  # pandas>=0.23.0
+            from pandas.core.groupby.groupby import GroupBy
+        except ImportError:  # pragma: no cover
+            from pandas.core.groupby import GroupBy
+
+        try:  # pandas>=0.23.0
+            from pandas.core.groupby.groupby import PanelGroupBy
+        except ImportError:
+            try:
+                from pandas.core.groupby import PanelGroupBy
+            except ImportError:  # pandas>=0.25.0
+                PanelGroupBy = None
+
+        tqdm_kwargs = tqdm_kwargs.copy()
+        deprecated_t = [tqdm_kwargs.pop('deprecated_t', None)]
+
+        def inner_generator(df_function='apply'):
+            def inner(df, func, *args, **kwargs):
+                """
+                Parameters
+                ----------
+                df  : (DataFrame|Series)[GroupBy]
+                    Data (may be grouped).
+                func  : function
+                    To be applied on the (grouped) data.
+                **kwargs  : optional
+                    Transmitted to `df.apply()`.
+                """
+
+                # Precompute total iterations
+                total = tqdm_kwargs.pop("total", getattr(df, 'ngroups', None))
+                if total is None:  # not grouped
+                    if df_function == 'applymap':
+                        total = df.size
+                    elif isinstance(df, Series):
+                        total = len(df)
+                    elif (_Rolling_and_Expanding is None or
+                          not isinstance(df, _Rolling_and_Expanding)):
+                        # DataFrame or Panel
+                        axis = kwargs.get('axis', 0)
+                        if axis == 'index':
+                            axis = 0
+                        elif axis == 'columns':
+                            axis = 1
+                        # when axis=0, total is shape[axis1]
+                        total = df.size // df.shape[axis]
+
+                # Init bar
+                if deprecated_t[0] is not None:
+                    t = deprecated_t[0]
+                    deprecated_t[0] = None
+                else:
+                    t = cls(total=total, **tqdm_kwargs)
+
+                if len(args) > 0:
+                    # *args intentionally not supported (see #244, #299)
+                    TqdmDeprecationWarning(
+                        "Except func, normal arguments are intentionally" +
+                        " not supported by" +
+                        " `(DataFrame|Series|GroupBy).progress_apply`." +
+                        " Use keyword arguments instead.",
+                        fp_write=getattr(t.fp, 'write', sys.stderr.write))
+
+                try:  # pandas>=1.3.0
+                    from pandas.core.common import is_builtin_func
+                except ImportError:
+                    is_builtin_func = df._is_builtin_func
+                try:
+                    func = is_builtin_func(func)
+                except TypeError:
+                    pass
+
+                # Define bar updating wrapper
+                def wrapper(*args, **kwargs):
+                    # update tbar correctly
+                    # it seems `pandas apply` calls `func` twice
+                    # on the first column/row to decide whether it can
+                    # take a fast or slow code path; so stop when t.total==t.n
+                    t.update(n=1 if not t.total or t.n < t.total else 0)
+                    return func(*args, **kwargs)
+
+                # Apply the provided function (in **kwargs)
+                # on the df using our wrapper (which provides bar updating)
+                try:
+                    return getattr(df, df_function)(wrapper, **kwargs)
+                finally:
+                    t.close()
+
+            return inner
+
+        # Monkeypatch pandas to provide easy methods
+        # Enable custom tqdm progress in pandas!
+        Series.progress_apply = inner_generator()
+        SeriesGroupBy.progress_apply = inner_generator()
+        Series.progress_map = inner_generator('map')
+        SeriesGroupBy.progress_map = inner_generator('map')
+
+        DataFrame.progress_apply = inner_generator()
+        DataFrameGroupBy.progress_apply = inner_generator()
+        DataFrame.progress_applymap = inner_generator('applymap')
+        DataFrame.progress_map = inner_generator('map')
+        DataFrameGroupBy.progress_map = inner_generator('map')
+
+        if Panel is not None:
+            Panel.progress_apply = inner_generator()
+        if PanelGroupBy is not None:
+            PanelGroupBy.progress_apply = inner_generator()
+
+        GroupBy.progress_apply = inner_generator()
+        GroupBy.progress_aggregate = inner_generator('aggregate')
+        GroupBy.progress_transform = inner_generator('transform')
+
+        if Rolling is not None and Expanding is not None:
+            Rolling.progress_apply = inner_generator()
+            Expanding.progress_apply = inner_generator()
+        elif _Rolling_and_Expanding is not None:
+            _Rolling_and_Expanding.progress_apply = inner_generator()
+
+    # override defaults via env vars
+    @envwrap("TQDM_", is_method=True, types={'total': float, 'ncols': int, 'miniters': float,
+                                             'position': int, 'nrows': int})
+    def __init__(self, iterable=None, desc=None, total=None, leave=True, file=None,
+                 ncols=None, mininterval=0.1, maxinterval=10.0, miniters=None,
+                 ascii=None, disable=False, unit='it', unit_scale=False,
+                 dynamic_ncols=False, smoothing=0.3, bar_format=None, initial=0,
+                 position=None, postfix=None, unit_divisor=1000, write_bytes=False,
+                 lock_args=None, nrows=None, colour=None, delay=0.0, gui=False,
+                 **kwargs):
+        """see tqdm.tqdm for arguments"""
+        if file is None:
+            file = sys.stderr
+
+        if write_bytes:
+            # Despite coercing unicode into bytes, py2 sys.std* streams
+            # should have bytes written to them.
+            file = SimpleTextIOWrapper(
+                file, encoding=getattr(file, 'encoding', None) or 'utf-8')
+
+        file = DisableOnWriteError(file, tqdm_instance=self)
+
+        if disable is None and hasattr(file, "isatty") and not file.isatty():
+            disable = True
+
+        if total is None and iterable is not None:
+            try:
+                total = len(iterable)
+            except (TypeError, AttributeError):
+                total = None
+        if total == float("inf"):
+            # Infinite iterations, behave same as unknown
+            total = None
+
+        if disable:
+            self.iterable = iterable
+            self.disable = disable
+            with self._lock:
+                self.pos = self._get_free_pos(self)
+                self._instances.remove(self)
+            self.n = initial
+            self.total = total
+            self.leave = leave
+            return
+
+        if kwargs:
+            self.disable = True
+            with self._lock:
+                self.pos = self._get_free_pos(self)
+                self._instances.remove(self)
+            raise (
+                TqdmDeprecationWarning(
+                    "`nested` is deprecated and automated.\n"
+                    "Use `position` instead for manual control.\n",
+                    fp_write=getattr(file, 'write', sys.stderr.write))
+                if "nested" in kwargs else
+                TqdmKeyError("Unknown argument(s): " + str(kwargs)))
+
+        # Preprocess the arguments
+        if (
+            (ncols is None or nrows is None) and (file in (sys.stderr, sys.stdout))
+        ) or dynamic_ncols:  # pragma: no cover
+            if dynamic_ncols:
+                dynamic_ncols = _screen_shape_wrapper()
+                if dynamic_ncols:
+                    ncols, nrows = dynamic_ncols(file)
+            else:
+                _dynamic_ncols = _screen_shape_wrapper()
+                if _dynamic_ncols:
+                    _ncols, _nrows = _dynamic_ncols(file)
+                    if ncols is None:
+                        ncols = _ncols
+                    if nrows is None:
+                        nrows = _nrows
+
+        if miniters is None:
+            miniters = 0
+            dynamic_miniters = True
+        else:
+            dynamic_miniters = False
+
+        if mininterval is None:
+            mininterval = 0
+
+        if maxinterval is None:
+            maxinterval = 0
+
+        if ascii is None:
+            ascii = not _supports_unicode(file)
+
+        if bar_format and ascii is not True and not _is_ascii(ascii):
+            # Convert bar format into unicode since terminal uses unicode
+            bar_format = str(bar_format)
+
+        if smoothing is None:
+            smoothing = 0
+
+        # Store the arguments
+        self.iterable = iterable
+        self.desc = desc or ''
+        self.total = total
+        self.leave = leave
+        self.fp = file
+        self.ncols = ncols
+        self.nrows = nrows
+        self.mininterval = mininterval
+        self.maxinterval = maxinterval
+        self.miniters = miniters
+        self.dynamic_miniters = dynamic_miniters
+        self.ascii = ascii
+        self.disable = disable
+        self.unit = unit
+        self.unit_scale = unit_scale
+        self.unit_divisor = unit_divisor
+        self.initial = initial
+        self.lock_args = lock_args
+        self.delay = delay
+        self.gui = gui
+        self.dynamic_ncols = dynamic_ncols
+        self.smoothing = smoothing
+        self._ema_dn = EMA(smoothing)
+        self._ema_dt = EMA(smoothing)
+        self._ema_miniters = EMA(smoothing)
+        self.bar_format = bar_format
+        self.postfix = None
+        self.colour = colour
+        self._time = time
+        if postfix:
+            try:
+                self.set_postfix(refresh=False, **postfix)
+            except TypeError:
+                self.postfix = postfix
+
+        # Init the iterations counters
+        self.last_print_n = initial
+        self.n = initial
+
+        # if nested, at initial sp() call we replace '\r' by '\n' to
+        # not overwrite the outer progress bar
+        with self._lock:
+            # mark fixed positions as negative
+            self.pos = self._get_free_pos(self) if position is None else -position
+
+        if not gui:
+            # Initialize the screen printer
+            self.sp = self.status_printer(self.fp)
+            if delay <= 0:
+                self.refresh(lock_args=self.lock_args)
+
+        # Init the time counter
+        self.last_print_t = self._time()
+        # NB: Avoid race conditions by setting start_t at the very end of init
+        self.start_t = self.last_print_t
+
+    def __bool__(self):
+        if self.total is not None:
+            return self.total > 0
+        if self.iterable is None:
+            raise TypeError('bool() undefined when iterable == total == None')
+        return bool(self.iterable)
+
+    def __len__(self):
+        return (
+            self.total if self.iterable is None
+            else self.iterable.shape[0] if hasattr(self.iterable, "shape")
+            else len(self.iterable) if hasattr(self.iterable, "__len__")
+            else self.iterable.__length_hint__() if hasattr(self.iterable, "__length_hint__")
+            else getattr(self, "total", None))
+
+    def __reversed__(self):
+        try:
+            orig = self.iterable
+        except AttributeError:
+            raise TypeError("'tqdm' object is not reversible")
+        else:
+            self.iterable = reversed(self.iterable)
+            return self.__iter__()
+        finally:
+            self.iterable = orig
+
+    def __contains__(self, item):
+        contains = getattr(self.iterable, '__contains__', None)
+        return contains(item) if contains is not None else item in self.__iter__()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        try:
+            self.close()
+        except AttributeError:
+            # maybe eager thread cleanup upon external error
+            if (exc_type, exc_value, traceback) == (None, None, None):
+                raise
+            warn("AttributeError ignored", TqdmWarning, stacklevel=2)
+
+    def __del__(self):
+        self.close()
+
+    def __str__(self):
+        return self.format_meter(**self.format_dict)
+
+    @property
+    def _comparable(self):
+        return abs(getattr(self, "pos", 1 << 31))
+
+    def __hash__(self):
+        return id(self)
+
+    def __iter__(self):
+        """Backward-compatibility to use: for x in tqdm(iterable)"""
+
+        # Inlining instance variables as locals (speed optimisation)
+        iterable = self.iterable
+
+        # If the bar is disabled, then just walk the iterable
+        # (note: keep this check outside the loop for performance)
+        if self.disable:
+            for obj in iterable:
+                yield obj
+            return
+
+        mininterval = self.mininterval
+        last_print_t = self.last_print_t
+        last_print_n = self.last_print_n
+        min_start_t = self.start_t + self.delay
+        n = self.n
+        time = self._time
+
+        try:
+            for obj in iterable:
+                yield obj
+                # Update and possibly print the progressbar.
+                # Note: does not call self.update(1) for speed optimisation.
+                n += 1
+
+                if n - last_print_n >= self.miniters:
+                    cur_t = time()
+                    dt = cur_t - last_print_t
+                    if dt >= mininterval and cur_t >= min_start_t:
+                        self.update(n - last_print_n)
+                        last_print_n = self.last_print_n
+                        last_print_t = self.last_print_t
+        finally:
+            self.n = n
+            self.close()
+
+    def update(self, n=1):
+        """
+        Manually update the progress bar, useful for streams
+        such as reading files.
+        E.g.:
+        >>> t = tqdm(total=filesize) # Initialise
+        >>> for current_buffer in stream:
+        ...    ...
+        ...    t.update(len(current_buffer))
+        >>> t.close()
+        The last line is highly recommended, but possibly not necessary if
+        `t.update()` will be called in such a way that `filesize` will be
+        exactly reached and printed.
+
+        Parameters
+        ----------
+        n  : int or float, optional
+            Increment to add to the internal counter of iterations
+            [default: 1]. If using float, consider specifying `{n:.3f}`
+            or similar in `bar_format`, or specifying `unit_scale`.
+
+        Returns
+        -------
+        out  : bool or None
+            True if a `display()` was triggered.
+        """
+        if self.disable:
+            return
+
+        if n < 0:
+            self.last_print_n += n  # for auto-refresh logic to work
+        self.n += n
+
+        # check counter first to reduce calls to time()
+        if self.n - self.last_print_n >= self.miniters:
+            cur_t = self._time()
+            dt = cur_t - self.last_print_t
+            if dt >= self.mininterval and cur_t >= self.start_t + self.delay:
+                cur_t = self._time()
+                dn = self.n - self.last_print_n  # >= n
+                if self.smoothing and dt and dn:
+                    # EMA (not just overall average)
+                    self._ema_dn(dn)
+                    self._ema_dt(dt)
+                self.refresh(lock_args=self.lock_args)
+                if self.dynamic_miniters:
+                    # If no `miniters` was specified, adjust automatically to the
+                    # maximum iteration rate seen so far between two prints.
+                    # e.g.: After running `tqdm.update(5)`, subsequent
+                    # calls to `tqdm.update()` will only cause an update after
+                    # at least 5 more iterations.
+                    if self.maxinterval and dt >= self.maxinterval:
+                        self.miniters = dn * (self.mininterval or self.maxinterval) / dt
+                    elif self.smoothing:
+                        # EMA miniters update
+                        self.miniters = self._ema_miniters(
+                            dn * (self.mininterval / dt if self.mininterval and dt
+                                  else 1))
+                    else:
+                        # max iters between two prints
+                        self.miniters = max(self.miniters, dn)
+
+                # Store old values for next call
+                self.last_print_n = self.n
+                self.last_print_t = cur_t
+                return True
+
+    def close(self):
+        """Cleanup and (if leave=False) close the progressbar."""
+        if self.disable:
+            return
+
+        # Prevent multiple closures
+        self.disable = True
+
+        # decrement instance pos and remove from internal set
+        pos = abs(self.pos)
+        self._decr_instances(self)
+
+        if self.last_print_t < self.start_t + self.delay:
+            # haven't ever displayed; nothing to clear
+            return
+
+        # GUI mode
+        if getattr(self, 'sp', None) is None:
+            return
+
+        # annoyingly, _supports_unicode isn't good enough
+        def fp_write(s):
+            self.fp.write(str(s))
+
+        try:
+            fp_write('')
+        except ValueError as e:
+            if 'closed' in str(e):
+                return
+            raise  # pragma: no cover
+
+        leave = pos == 0 if self.leave is None else self.leave
+
+        with self._lock:
+            if leave:
+                # stats for overall rate (no weighted average)
+                self._ema_dt = lambda: None
+                self.display(pos=0)
+                fp_write('\n')
+            else:
+                # clear previous display
+                if self.display(msg='', pos=pos) and not pos:
+                    fp_write('\r')
+
+    def clear(self, nolock=False):
+        """Clear current bar display."""
+        if self.disable:
+            return
+
+        if not nolock:
+            self._lock.acquire()
+        pos = abs(self.pos)
+        if pos < (self.nrows or 20):
+            self.moveto(pos)
+            self.sp('')
+            self.fp.write('\r')  # place cursor back at the beginning of line
+            self.moveto(-pos)
+        if not nolock:
+            self._lock.release()
+
+    def refresh(self, nolock=False, lock_args=None):
+        """
+        Force refresh the display of this bar.
+
+        Parameters
+        ----------
+        nolock  : bool, optional
+            If `True`, does not lock.
+            If [default: `False`]: calls `acquire()` on internal lock.
+        lock_args  : tuple, optional
+            Passed to internal lock's `acquire()`.
+            If specified, will only `display()` if `acquire()` returns `True`.
+        """
+        if self.disable:
+            return
+
+        if not nolock:
+            if lock_args:
+                if not self._lock.acquire(*lock_args):
+                    return False
+            else:
+                self._lock.acquire()
+        self.display()
+        if not nolock:
+            self._lock.release()
+        return True
+
+    def unpause(self):
+        """Restart tqdm timer from last print time."""
+        if self.disable:
+            return
+        cur_t = self._time()
+        self.start_t += cur_t - self.last_print_t
+        self.last_print_t = cur_t
+
+    def reset(self, total=None):
+        """
+        Resets to 0 iterations for repeated use.
+
+        Consider combining with `leave=True`.
+
+        Parameters
+        ----------
+        total  : int or float, optional. Total to use for the new bar.
+        """
+        self.n = 0
+        if total is not None:
+            self.total = total
+        if self.disable:
+            return
+        self.last_print_n = 0
+        self.last_print_t = self.start_t = self._time()
+        self._ema_dn = EMA(self.smoothing)
+        self._ema_dt = EMA(self.smoothing)
+        self._ema_miniters = EMA(self.smoothing)
+        self.refresh()
+
+    def set_description(self, desc=None, refresh=True):
+        """
+        Set/modify description of the progress bar.
+
+        Parameters
+        ----------
+        desc  : str, optional
+        refresh  : bool, optional
+            Forces refresh [default: True].
+        """
+        self.desc = desc + ': ' if desc else ''
+        if refresh:
+            self.refresh()
+
+    def set_description_str(self, desc=None, refresh=True):
+        """Set/modify description without ': ' appended."""
+        self.desc = desc or ''
+        if refresh:
+            self.refresh()
+
+    def set_postfix(self, ordered_dict=None, refresh=True, **kwargs):
+        """
+        Set/modify postfix (additional stats)
+        with automatic formatting based on datatype.
+
+        Parameters
+        ----------
+        ordered_dict  : dict or OrderedDict, optional
+        refresh  : bool, optional
+            Forces refresh [default: True].
+        kwargs  : dict, optional
+        """
+        # Sort in alphabetical order to be more deterministic
+        postfix = OrderedDict([] if ordered_dict is None else ordered_dict)
+        for key in sorted(kwargs.keys()):
+            postfix[key] = kwargs[key]
+        # Preprocess stats according to datatype
+        for key in postfix.keys():
+            # Number: limit the length of the string
+            if isinstance(postfix[key], Number):
+                postfix[key] = self.format_num(postfix[key])
+            # Else for any other type, try to get the string conversion
+            elif not isinstance(postfix[key], str):
+                postfix[key] = str(postfix[key])
+            # Else if it's a string, don't need to preprocess anything
+        # Stitch together to get the final postfix
+        self.postfix = ', '.join(key + '=' + postfix[key].strip()
+                                 for key in postfix.keys())
+        if refresh:
+            self.refresh()
+
+    def set_postfix_str(self, s='', refresh=True):
+        """
+        Postfix without dictionary expansion, similar to prefix handling.
+        """
+        self.postfix = str(s)
+        if refresh:
+            self.refresh()
+
+    def moveto(self, n):
+        # TODO: private method
+        self.fp.write('\n' * n + _term_move_up() * -n)
+        getattr(self.fp, 'flush', lambda: None)()
+
+    @property
+    def format_dict(self):
+        """Public API for read-only member access."""
+        if self.disable and not hasattr(self, 'unit'):
+            return defaultdict(lambda: None, {
+                'n': self.n, 'total': self.total, 'elapsed': 0, 'unit': 'it'})
+        if self.dynamic_ncols:
+            self.ncols, self.nrows = self.dynamic_ncols(self.fp)
+        return {
+            'n': self.n, 'total': self.total,
+            'elapsed': self._time() - self.start_t if hasattr(self, 'start_t') else 0,
+            'ncols': self.ncols, 'nrows': self.nrows, 'prefix': self.desc,
+            'ascii': self.ascii, 'unit': self.unit, 'unit_scale': self.unit_scale,
+            'rate': self._ema_dn() / self._ema_dt() if self._ema_dt() else None,
+            'bar_format': self.bar_format, 'postfix': self.postfix,
+            'unit_divisor': self.unit_divisor, 'initial': self.initial,
+            'colour': self.colour}
+
+    def display(self, msg=None, pos=None):
+        """
+        Use `self.sp` to display `msg` in the specified `pos`.
+
+        Consider overloading this function when inheriting to use e.g.:
+        `self.some_frontend(**self.format_dict)` instead of `self.sp`.
+
+        Parameters
+        ----------
+        msg  : str, optional. What to display (default: `repr(self)`).
+        pos  : int, optional. Position to `moveto`
+          (default: `abs(self.pos)`).
+        """
+        if pos is None:
+            pos = abs(self.pos)
+
+        nrows = self.nrows or 20
+        if pos >= nrows - 1:
+            if pos >= nrows:
+                return False
+            if msg or msg is None:  # override at `nrows - 1`
+                msg = " ... (more hidden) ..."
+
+        if not hasattr(self, "sp"):
+            raise TqdmDeprecationWarning(
+                "Please use `tqdm.gui.tqdm(...)`"
+                " instead of `tqdm(..., gui=True)`\n",
+                fp_write=getattr(self.fp, 'write', sys.stderr.write))
+
+        if pos:
+            self.moveto(pos)
+        self.sp(self.__str__() if msg is None else msg)
+        if pos:
+            self.moveto(-pos)
+        return True
+
+    @classmethod
+    @contextmanager
+    def wrapattr(cls, stream, method, total=None, bytes=True, **tqdm_kwargs):
+        """
+        stream  : file-like object.
+        method  : str, "read" or "write". The result of `read()` and
+            the first argument of `write()` should have a `len()`.
+
+        >>> with tqdm.wrapattr(file_obj, "read", total=file_obj.size) as fobj:
+        ...     while True:
+        ...         chunk = fobj.read(chunk_size)
+        ...         if not chunk:
+        ...             break
+        """
+        with cls(total=total, **tqdm_kwargs) as t:
+            if bytes:
+                t.unit = "B"
+                t.unit_scale = True
+                t.unit_divisor = 1024
+            yield CallbackIOWrapper(t.update, stream, method)
+
+
+def trange(*args, **kwargs):
+    """Shortcut for tqdm(range(*args), **kwargs)."""
+    return tqdm(range(*args), **kwargs)
diff --git a/venv/lib/python3.13/site-packages/tqdm/tk.py b/venv/lib/python3.13/site-packages/tqdm/tk.py
new file mode 100644
index 0000000000000000000000000000000000000000..788303c8687e007338ce816bf9afeec8581f0188
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/tk.py
@@ -0,0 +1,196 @@
+"""
+Tkinter GUI progressbar decorator for iterators.
+
+Usage:
+>>> from tqdm.tk import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+import re
+import sys
+import tkinter
+import tkinter.ttk as ttk
+from warnings import warn
+
+from .std import TqdmExperimentalWarning, TqdmWarning
+from .std import tqdm as std_tqdm
+
+__author__ = {"github.com/": ["richardsheridan", "casperdcl"]}
+__all__ = ['tqdm_tk', 'ttkrange', 'tqdm', 'trange']
+
+
+class tqdm_tk(std_tqdm):  # pragma: no cover
+    """
+    Experimental Tkinter GUI version of tqdm!
+
+    Note: Window interactivity suffers if `tqdm_tk` is not running within
+    a Tkinter mainloop and values are generated infrequently. In this case,
+    consider calling `tqdm_tk.refresh()` frequently in the Tk thread.
+    """
+
+    # TODO: @classmethod: write()?
+
+    def __init__(self, *args, **kwargs):
+        """
+        This class accepts the following parameters *in addition* to
+        the parameters accepted by `tqdm`.
+
+        Parameters
+        ----------
+        grab  : bool, optional
+            Grab the input across all windows of the process.
+        tk_parent  : `tkinter.Wm`, optional
+            Parent Tk window.
+        cancel_callback  : Callable, optional
+            Create a cancel button and set `cancel_callback` to be called
+            when the cancel or window close button is clicked.
+        """
+        kwargs = kwargs.copy()
+        kwargs['gui'] = True
+        # convert disable = None to False
+        kwargs['disable'] = bool(kwargs.get('disable', False))
+        self._warn_leave = 'leave' in kwargs
+        grab = kwargs.pop('grab', False)
+        tk_parent = kwargs.pop('tk_parent', None)
+        self._cancel_callback = kwargs.pop('cancel_callback', None)
+        super().__init__(*args, **kwargs)
+
+        if self.disable:
+            return
+
+        if tk_parent is None:  # Discover parent widget
+            try:
+                tk_parent = tkinter._default_root
+            except AttributeError:
+                raise AttributeError(
+                    "`tk_parent` required when using `tkinter.NoDefaultRoot()`")
+            if tk_parent is None:  # use new default root window as display
+                self._tk_window = tkinter.Tk()
+            else:  # some other windows already exist
+                self._tk_window = tkinter.Toplevel()
+        else:
+            self._tk_window = tkinter.Toplevel(tk_parent)
+
+        warn("GUI is experimental/alpha", TqdmExperimentalWarning, stacklevel=2)
+        self._tk_dispatching = self._tk_dispatching_helper()
+
+        self._tk_window.protocol("WM_DELETE_WINDOW", self.cancel)
+        self._tk_window.wm_title(self.desc)
+        self._tk_window.wm_attributes("-topmost", 1)
+        self._tk_window.after(0, lambda: self._tk_window.wm_attributes("-topmost", 0))
+        self._tk_n_var = tkinter.DoubleVar(self._tk_window, value=0)
+        self._tk_text_var = tkinter.StringVar(self._tk_window)
+        pbar_frame = ttk.Frame(self._tk_window, padding=5)
+        pbar_frame.pack()
+        _tk_label = ttk.Label(pbar_frame, textvariable=self._tk_text_var,
+                              wraplength=600, anchor="center", justify="center")
+        _tk_label.pack()
+        self._tk_pbar = ttk.Progressbar(
+            pbar_frame, variable=self._tk_n_var, length=450)
+        if self.total is not None:
+            self._tk_pbar.configure(maximum=self.total)
+        else:
+            self._tk_pbar.configure(mode="indeterminate")
+        self._tk_pbar.pack()
+        if self._cancel_callback is not None:
+            _tk_button = ttk.Button(pbar_frame, text="Cancel", command=self.cancel)
+            _tk_button.pack()
+        if grab:
+            self._tk_window.grab_set()
+
+    def close(self):
+        if self.disable:
+            return
+
+        self.disable = True
+
+        with self.get_lock():
+            self._instances.remove(self)
+
+        def _close():
+            self._tk_window.after('idle', self._tk_window.destroy)
+            if not self._tk_dispatching:
+                self._tk_window.update()
+
+        self._tk_window.protocol("WM_DELETE_WINDOW", _close)
+
+        # if leave is set but we are self-dispatching, the left window is
+        # totally unresponsive unless the user manually dispatches
+        if not self.leave:
+            _close()
+        elif not self._tk_dispatching:
+            if self._warn_leave:
+                warn("leave flag ignored if not in tkinter mainloop",
+                     TqdmWarning, stacklevel=2)
+            _close()
+
+    def clear(self, *_, **__):
+        pass
+
+    def display(self, *_, **__):
+        self._tk_n_var.set(self.n)
+        d = self.format_dict
+        # remove {bar}
+        d['bar_format'] = (d['bar_format'] or "{l_bar}{r_bar}").replace(
+            "{bar}", "")
+        msg = self.format_meter(**d)
+        if '' in msg:
+            msg = "".join(re.split(r'\|?\|?', msg, maxsplit=1))
+        self._tk_text_var.set(msg)
+        if not self._tk_dispatching:
+            self._tk_window.update()
+
+    def set_description(self, desc=None, refresh=True):
+        self.set_description_str(desc, refresh)
+
+    def set_description_str(self, desc=None, refresh=True):
+        self.desc = desc
+        if not self.disable:
+            self._tk_window.wm_title(desc)
+            if refresh and not self._tk_dispatching:
+                self._tk_window.update()
+
+    def cancel(self):
+        """
+        `cancel_callback()` followed by `close()`
+        when close/cancel buttons clicked.
+        """
+        if self._cancel_callback is not None:
+            self._cancel_callback()
+        self.close()
+
+    def reset(self, total=None):
+        """
+        Resets to 0 iterations for repeated use.
+
+        Parameters
+        ----------
+        total  : int or float, optional. Total to use for the new bar.
+        """
+        if hasattr(self, '_tk_pbar'):
+            if total is None:
+                self._tk_pbar.configure(maximum=100, mode="indeterminate")
+            else:
+                self._tk_pbar.configure(maximum=total, mode="determinate")
+        super().reset(total=total)
+
+    @staticmethod
+    def _tk_dispatching_helper():
+        """determine if Tkinter mainloop is dispatching events"""
+        codes = {tkinter.mainloop.__code__, tkinter.Misc.mainloop.__code__}
+        for frame in sys._current_frames().values():
+            while frame:
+                if frame.f_code in codes:
+                    return True
+                frame = frame.f_back
+        return False
+
+
+def ttkrange(*args, **kwargs):
+    """Shortcut for `tqdm.tk.tqdm(range(*args), **kwargs)`."""
+    return tqdm_tk(range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_tk
+trange = ttkrange
diff --git a/venv/lib/python3.13/site-packages/tqdm/tqdm.1 b/venv/lib/python3.13/site-packages/tqdm/tqdm.1
new file mode 100644
index 0000000000000000000000000000000000000000..b90ab4b9ebdd183c98ee8ae0c7f0a65ac676e3b7
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/tqdm.1
@@ -0,0 +1,314 @@
+.\" Automatically generated by Pandoc 1.19.2
+.\"
+.TH "TQDM" "1" "2015\-2021" "tqdm User Manuals" ""
+.hy
+.SH NAME
+.PP
+tqdm \- fast, extensible progress bar for Python and CLI
+.SH SYNOPSIS
+.PP
+tqdm [\f[I]options\f[]]
+.SH DESCRIPTION
+.PP
+See .
+Can be used as a pipe:
+.IP
+.nf
+\f[C]
+$\ #\ count\ lines\ of\ code
+$\ cat\ *.py\ |\ tqdm\ |\ wc\ \-l
+327it\ [00:00,\ 981773.38it/s]
+327
+
+$\ #\ find\ all\ files
+$\ find\ .\ \-name\ "*.py"\ |\ tqdm\ |\ wc\ \-l
+432it\ [00:00,\ 833842.30it/s]
+432
+
+#\ ...\ and\ more\ info
+$\ find\ .\ \-name\ \[aq]*.py\[aq]\ \-exec\ wc\ \-l\ \\{}\ \\;\ \\
+\ \ |\ tqdm\ \-\-total\ 432\ \-\-unit\ files\ \-\-desc\ counting\ \\
+\ \ |\ awk\ \[aq]{\ sum\ +=\ $1\ };\ END\ {\ print\ sum\ }\[aq]
+counting:\ 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ|\ 432/432\ [00:00<00:00,\ 794361.83files/s]
+131998
+\f[]
+.fi
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Print this help and exit.
+.RS
+.RE
+.TP
+.B \-v, \-\-version
+Print version and exit.
+.RS
+.RE
+.TP
+.B \-\-desc=\f[I]desc\f[]
+str, optional.
+Prefix for the progressbar.
+.RS
+.RE
+.TP
+.B \-\-total=\f[I]total\f[]
+int or float, optional.
+The number of expected iterations.
+If unspecified, len(iterable) is used if possible.
+If float("inf") or as a last resort, only basic progress statistics are
+displayed (no ETA, no progressbar).
+If \f[C]gui\f[] is True and this parameter needs subsequent updating,
+specify an initial arbitrary large positive number, e.g.
+9e9.
+.RS
+.RE
+.TP
+.B \-\-leave
+bool, optional.
+If [default: True], keeps all traces of the progressbar upon termination
+of iteration.
+If \f[C]None\f[], will leave only if \f[C]position\f[] is \f[C]0\f[].
+.RS
+.RE
+.TP
+.B \-\-ncols=\f[I]ncols\f[]
+int, optional.
+The width of the entire output message.
+If specified, dynamically resizes the progressbar to stay within this
+bound.
+If unspecified, attempts to use environment width.
+The fallback is a meter width of 10 and no limit for the counter and
+statistics.
+If 0, will not print any meter (only stats).
+.RS
+.RE
+.TP
+.B \-\-mininterval=\f[I]mininterval\f[]
+float, optional.
+Minimum progress display update interval [default: 0.1] seconds.
+.RS
+.RE
+.TP
+.B \-\-maxinterval=\f[I]maxinterval\f[]
+float, optional.
+Maximum progress display update interval [default: 10] seconds.
+Automatically adjusts \f[C]miniters\f[] to correspond to
+\f[C]mininterval\f[] after long display update lag.
+Only works if \f[C]dynamic_miniters\f[] or monitor thread is enabled.
+.RS
+.RE
+.TP
+.B \-\-miniters=\f[I]miniters\f[]
+int or float, optional.
+Minimum progress display update interval, in iterations.
+If 0 and \f[C]dynamic_miniters\f[], will automatically adjust to equal
+\f[C]mininterval\f[] (more CPU efficient, good for tight loops).
+If > 0, will skip display of specified number of iterations.
+Tweak this and \f[C]mininterval\f[] to get very efficient loops.
+If your progress is erratic with both fast and slow iterations (network,
+skipping items, etc) you should set miniters=1.
+.RS
+.RE
+.TP
+.B \-\-ascii=\f[I]ascii\f[]
+bool or str, optional.
+If unspecified or False, use unicode (smooth blocks) to fill the meter.
+The fallback is to use ASCII characters " 123456789#".
+.RS
+.RE
+.TP
+.B \-\-disable
+bool, optional.
+Whether to disable the entire progressbar wrapper [default: False].
+If set to None, disable on non\-TTY.
+.RS
+.RE
+.TP
+.B \-\-unit=\f[I]unit\f[]
+str, optional.
+String that will be used to define the unit of each iteration [default:
+it].
+.RS
+.RE
+.TP
+.B \-\-unit\-scale=\f[I]unit_scale\f[]
+bool or int or float, optional.
+If 1 or True, the number of iterations will be reduced/scaled
+automatically and a metric prefix following the International System of
+Units standard will be added (kilo, mega, etc.) [default: False].
+If any other non\-zero number, will scale \f[C]total\f[] and \f[C]n\f[].
+.RS
+.RE
+.TP
+.B \-\-dynamic\-ncols
+bool, optional.
+If set, constantly alters \f[C]ncols\f[] and \f[C]nrows\f[] to the
+environment (allowing for window resizes) [default: False].
+.RS
+.RE
+.TP
+.B \-\-smoothing=\f[I]smoothing\f[]
+float, optional.
+Exponential moving average smoothing factor for speed estimates (ignored
+in GUI mode).
+Ranges from 0 (average speed) to 1 (current/instantaneous speed)
+[default: 0.3].
+.RS
+.RE
+.TP
+.B \-\-bar\-format=\f[I]bar_format\f[]
+str, optional.
+Specify a custom bar string formatting.
+May impact performance.
+[default: \[aq]{l_bar}{bar}{r_bar}\[aq]], where l_bar=\[aq]{desc}:
+{percentage:3.0f}%|\[aq] and r_bar=\[aq]| {n_fmt}/{total_fmt}
+[{elapsed}<{remaining}, \[aq] \[aq]{rate_fmt}{postfix}]\[aq] Possible
+vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt, percentage,
+elapsed, elapsed_s, ncols, nrows, desc, unit, rate, rate_fmt,
+rate_noinv, rate_noinv_fmt, rate_inv, rate_inv_fmt, postfix,
+unit_divisor, remaining, remaining_s, eta.
+Note that a trailing ": " is automatically removed after {desc} if the
+latter is empty.
+.RS
+.RE
+.TP
+.B \-\-initial=\f[I]initial\f[]
+int or float, optional.
+The initial counter value.
+Useful when restarting a progress bar [default: 0].
+If using float, consider specifying \f[C]{n:.3f}\f[] or similar in
+\f[C]bar_format\f[], or specifying \f[C]unit_scale\f[].
+.RS
+.RE
+.TP
+.B \-\-position=\f[I]position\f[]
+int, optional.
+Specify the line offset to print this bar (starting from 0) Automatic if
+unspecified.
+Useful to manage multiple bars at once (eg, from threads).
+.RS
+.RE
+.TP
+.B \-\-postfix=\f[I]postfix\f[]
+dict or *, optional.
+Specify additional stats to display at the end of the bar.
+Calls \f[C]set_postfix(**postfix)\f[] if possible (dict).
+.RS
+.RE
+.TP
+.B \-\-unit\-divisor=\f[I]unit_divisor\f[]
+float, optional.
+[default: 1000], ignored unless \f[C]unit_scale\f[] is True.
+.RS
+.RE
+.TP
+.B \-\-write\-bytes
+bool, optional.
+Whether to write bytes.
+If (default: False) will write unicode.
+.RS
+.RE
+.TP
+.B \-\-lock\-args=\f[I]lock_args\f[]
+tuple, optional.
+Passed to \f[C]refresh\f[] for intermediate output (initialisation,
+iterating, and updating).
+.RS
+.RE
+.TP
+.B \-\-nrows=\f[I]nrows\f[]
+int, optional.
+The screen height.
+If specified, hides nested bars outside this bound.
+If unspecified, attempts to use environment height.
+The fallback is 20.
+.RS
+.RE
+.TP
+.B \-\-colour=\f[I]colour\f[]
+str, optional.
+Bar colour (e.g.
+\[aq]green\[aq], \[aq]#00ff00\[aq]).
+.RS
+.RE
+.TP
+.B \-\-delay=\f[I]delay\f[]
+float, optional.
+Don\[aq]t display until [default: 0] seconds have elapsed.
+.RS
+.RE
+.TP
+.B \-\-delim=\f[I]delim\f[]
+chr, optional.
+Delimiting character [default: \[aq]\\n\[aq]].
+Use \[aq]\\0\[aq] for null.
+N.B.: on Windows systems, Python converts \[aq]\\n\[aq] to
+\[aq]\\r\\n\[aq].
+.RS
+.RE
+.TP
+.B \-\-buf\-size=\f[I]buf_size\f[]
+int, optional.
+String buffer size in bytes [default: 256] used when \f[C]delim\f[] is
+specified.
+.RS
+.RE
+.TP
+.B \-\-bytes
+bool, optional.
+If true, will count bytes, ignore \f[C]delim\f[], and default
+\f[C]unit_scale\f[] to True, \f[C]unit_divisor\f[] to 1024, and
+\f[C]unit\f[] to \[aq]B\[aq].
+.RS
+.RE
+.TP
+.B \-\-tee
+bool, optional.
+If true, passes \f[C]stdin\f[] to both \f[C]stderr\f[] and
+\f[C]stdout\f[].
+.RS
+.RE
+.TP
+.B \-\-update
+bool, optional.
+If true, will treat input as newly elapsed iterations, i.e.
+numbers to pass to \f[C]update()\f[].
+Note that this is slow (~2e5 it/s) since every input must be decoded as
+a number.
+.RS
+.RE
+.TP
+.B \-\-update\-to
+bool, optional.
+If true, will treat input as total elapsed iterations, i.e.
+numbers to assign to \f[C]self.n\f[].
+Note that this is slow (~2e5 it/s) since every input must be decoded as
+a number.
+.RS
+.RE
+.TP
+.B \-\-null
+bool, optional.
+If true, will discard input (no stdout).
+.RS
+.RE
+.TP
+.B \-\-manpath=\f[I]manpath\f[]
+str, optional.
+Directory in which to install tqdm man pages.
+.RS
+.RE
+.TP
+.B \-\-comppath=\f[I]comppath\f[]
+str, optional.
+Directory in which to place tqdm completion.
+.RS
+.RE
+.TP
+.B \-\-log=\f[I]log\f[]
+str, optional.
+CRITICAL|FATAL|ERROR|WARN(ING)|[default: \[aq]INFO\[aq]]|DEBUG|NOTSET.
+.RS
+.RE
+.SH AUTHORS
+tqdm developers .
diff --git a/venv/lib/python3.13/site-packages/tqdm/utils.py b/venv/lib/python3.13/site-packages/tqdm/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..af3ec7ded55daa98e1f268a3ee891e9a6bd72974
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/utils.py
@@ -0,0 +1,399 @@
+"""
+General helpers required for `tqdm.std`.
+"""
+import os
+import re
+import sys
+from functools import partial, partialmethod, wraps
+from inspect import signature
+# TODO consider using wcswidth third-party package for 0-width characters
+from unicodedata import east_asian_width
+from warnings import warn
+from weakref import proxy
+
+_range, _unich, _unicode, _basestring = range, chr, str, str
+CUR_OS = sys.platform
+IS_WIN = any(CUR_OS.startswith(i) for i in ['win32', 'cygwin'])
+IS_NIX = any(CUR_OS.startswith(i) for i in ['aix', 'linux', 'darwin', 'freebsd'])
+RE_ANSI = re.compile(r"\x1b\[[;\d]*[A-Za-z]")
+
+try:
+    if IS_WIN:
+        import colorama
+    else:
+        raise ImportError
+except ImportError:
+    colorama = None
+else:
+    try:
+        colorama.init(strip=False)
+    except TypeError:
+        colorama.init()
+
+
+def envwrap(prefix, types=None, is_method=False):
+    """
+    Override parameter defaults via `os.environ[prefix + param_name]`.
+    Maps UPPER_CASE env vars map to lower_case param names.
+    camelCase isn't supported (because Windows ignores case).
+
+    Precedence (highest first):
+
+    - call (`foo(a=3)`)
+    - environ (`FOO_A=2`)
+    - signature (`def foo(a=1)`)
+
+    Parameters
+    ----------
+    prefix  : str
+        Env var prefix, e.g. "FOO_"
+    types  : dict, optional
+        Fallback mappings `{'param_name': type, ...}` if types cannot be
+        inferred from function signature.
+        Consider using `types=collections.defaultdict(lambda: ast.literal_eval)`.
+    is_method  : bool, optional
+        Whether to use `functools.partialmethod`. If (default: False) use `functools.partial`.
+
+    Examples
+    --------
+    ```
+    $ cat foo.py
+    from tqdm.utils import envwrap
+    @envwrap("FOO_")
+    def test(a=1, b=2, c=3):
+        print(f"received: a={a}, b={b}, c={c}")
+
+    $ FOO_A=42 FOO_C=1337 python -c 'import foo; foo.test(c=99)'
+    received: a=42, b=2, c=99
+    ```
+    """
+    if types is None:
+        types = {}
+    i = len(prefix)
+    env_overrides = {k[i:].lower(): v for k, v in os.environ.items() if k.startswith(prefix)}
+    part = partialmethod if is_method else partial
+
+    def wrap(func):
+        params = signature(func).parameters
+        # ignore unknown env vars
+        overrides = {k: v for k, v in env_overrides.items() if k in params}
+        # infer overrides' `type`s
+        for k in overrides:
+            param = params[k]
+            if param.annotation is not param.empty:  # typehints
+                for typ in getattr(param.annotation, '__args__', (param.annotation,)):
+                    try:
+                        overrides[k] = typ(overrides[k])
+                    except Exception:
+                        pass
+                    else:
+                        break
+            elif param.default is not None:  # type of default value
+                overrides[k] = type(param.default)(overrides[k])
+            else:
+                try:  # `types` fallback
+                    overrides[k] = types[k](overrides[k])
+                except KeyError:  # keep unconverted (`str`)
+                    pass
+        return part(func, **overrides)
+    return wrap
+
+
+class FormatReplace(object):
+    """
+    >>> a = FormatReplace('something')
+    >>> f"{a:5d}"
+    'something'
+    """  # NOQA: P102
+    def __init__(self, replace=''):
+        self.replace = replace
+        self.format_called = 0
+
+    def __format__(self, _):
+        self.format_called += 1
+        return self.replace
+
+
+class Comparable(object):
+    """Assumes child has self._comparable attr/@property"""
+    def __lt__(self, other):
+        return self._comparable < other._comparable
+
+    def __le__(self, other):
+        return (self < other) or (self == other)
+
+    def __eq__(self, other):
+        return self._comparable == other._comparable
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __gt__(self, other):
+        return not self <= other
+
+    def __ge__(self, other):
+        return not self < other
+
+
+class ObjectWrapper(object):
+    def __getattr__(self, name):
+        return getattr(self._wrapped, name)
+
+    def __setattr__(self, name, value):
+        return setattr(self._wrapped, name, value)
+
+    def wrapper_getattr(self, name):
+        """Actual `self.getattr` rather than self._wrapped.getattr"""
+        try:
+            return object.__getattr__(self, name)
+        except AttributeError:  # py2
+            return getattr(self, name)
+
+    def wrapper_setattr(self, name, value):
+        """Actual `self.setattr` rather than self._wrapped.setattr"""
+        return object.__setattr__(self, name, value)
+
+    def __init__(self, wrapped):
+        """
+        Thin wrapper around a given object
+        """
+        self.wrapper_setattr('_wrapped', wrapped)
+
+
+class SimpleTextIOWrapper(ObjectWrapper):
+    """
+    Change only `.write()` of the wrapped object by encoding the passed
+    value and passing the result to the wrapped object's `.write()` method.
+    """
+    # pylint: disable=too-few-public-methods
+    def __init__(self, wrapped, encoding):
+        super().__init__(wrapped)
+        self.wrapper_setattr('encoding', encoding)
+
+    def write(self, s):
+        """
+        Encode `s` and pass to the wrapped object's `.write()` method.
+        """
+        return self._wrapped.write(s.encode(self.wrapper_getattr('encoding')))
+
+    def __eq__(self, other):
+        return self._wrapped == getattr(other, '_wrapped', other)
+
+
+class DisableOnWriteError(ObjectWrapper):
+    """
+    Disable the given `tqdm_instance` upon `write()` or `flush()` errors.
+    """
+    @staticmethod
+    def disable_on_exception(tqdm_instance, func):
+        """
+        Quietly set `tqdm_instance.miniters=inf` if `func` raises `errno=5`.
+        """
+        tqdm_instance = proxy(tqdm_instance)
+
+        def inner(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except OSError as e:
+                if e.errno != 5:
+                    raise
+                try:
+                    tqdm_instance.miniters = float('inf')
+                except ReferenceError:
+                    pass
+            except ValueError as e:
+                if 'closed' not in str(e):
+                    raise
+                try:
+                    tqdm_instance.miniters = float('inf')
+                except ReferenceError:
+                    pass
+        return inner
+
+    def __init__(self, wrapped, tqdm_instance):
+        super().__init__(wrapped)
+        if hasattr(wrapped, 'write'):
+            self.wrapper_setattr(
+                'write', self.disable_on_exception(tqdm_instance, wrapped.write))
+        if hasattr(wrapped, 'flush'):
+            self.wrapper_setattr(
+                'flush', self.disable_on_exception(tqdm_instance, wrapped.flush))
+
+    def __eq__(self, other):
+        return self._wrapped == getattr(other, '_wrapped', other)
+
+
+class CallbackIOWrapper(ObjectWrapper):
+    def __init__(self, callback, stream, method="read"):
+        """
+        Wrap a given `file`-like object's `read()` or `write()` to report
+        lengths to the given `callback`
+        """
+        super().__init__(stream)
+        func = getattr(stream, method)
+        if method == "write":
+            @wraps(func)
+            def write(data, *args, **kwargs):
+                res = func(data, *args, **kwargs)
+                callback(len(data))
+                return res
+            self.wrapper_setattr('write', write)
+        elif method == "read":
+            @wraps(func)
+            def read(*args, **kwargs):
+                data = func(*args, **kwargs)
+                callback(len(data))
+                return data
+            self.wrapper_setattr('read', read)
+        else:
+            raise KeyError("Can only wrap read/write methods")
+
+
+def _is_utf(encoding):
+    try:
+        u'\u2588\u2589'.encode(encoding)
+    except UnicodeEncodeError:
+        return False
+    except Exception:
+        try:
+            return encoding.lower().startswith('utf-') or ('U8' == encoding)
+        except Exception:
+            return False
+    else:
+        return True
+
+
+def _supports_unicode(fp):
+    try:
+        return _is_utf(fp.encoding)
+    except AttributeError:
+        return False
+
+
+def _is_ascii(s):
+    if isinstance(s, str):
+        for c in s:
+            if ord(c) > 255:
+                return False
+        return True
+    return _supports_unicode(s)
+
+
+def _screen_shape_wrapper():  # pragma: no cover
+    """
+    Return a function which returns console dimensions (width, height).
+    Supported: linux, osx, windows, cygwin.
+    """
+    _screen_shape = None
+    if IS_WIN:
+        _screen_shape = _screen_shape_windows
+        if _screen_shape is None:
+            _screen_shape = _screen_shape_tput
+    if IS_NIX:
+        _screen_shape = _screen_shape_linux
+    return _screen_shape
+
+
+def _screen_shape_windows(fp):  # pragma: no cover
+    try:
+        import struct
+        from ctypes import create_string_buffer, windll
+        from sys import stdin, stdout
+
+        io_handle = -12  # assume stderr
+        if fp == stdin:
+            io_handle = -10
+        elif fp == stdout:
+            io_handle = -11
+
+        h = windll.kernel32.GetStdHandle(io_handle)
+        csbi = create_string_buffer(22)
+        res = windll.kernel32.GetConsoleScreenBufferInfo(h, csbi)
+        if res:
+            (_bufx, _bufy, _curx, _cury, _wattr, left, top, right, bottom,
+             _maxx, _maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw)
+            return right - left, bottom - top  # +1
+    except Exception:  # nosec
+        pass
+    return None, None
+
+
+def _screen_shape_tput(*_):  # pragma: no cover
+    """cygwin xterm (windows)"""
+    try:
+        import shlex
+        from subprocess import check_call  # nosec
+        return [int(check_call(shlex.split('tput ' + i))) - 1
+                for i in ('cols', 'lines')]
+    except Exception:  # nosec
+        pass
+    return None, None
+
+
+def _screen_shape_linux(fp):  # pragma: no cover
+
+    try:
+        from array import array
+        from fcntl import ioctl
+        from termios import TIOCGWINSZ
+    except ImportError:
+        return None, None
+    else:
+        try:
+            rows, cols = array('h', ioctl(fp, TIOCGWINSZ, '\0' * 8))[:2]
+            return cols, rows
+        except Exception:
+            try:
+                return [int(os.environ[i]) - 1 for i in ("COLUMNS", "LINES")]
+            except (KeyError, ValueError):
+                return None, None
+
+
+def _environ_cols_wrapper():  # pragma: no cover
+    """
+    Return a function which returns console width.
+    Supported: linux, osx, windows, cygwin.
+    """
+    warn("Use `_screen_shape_wrapper()(file)[0]` instead of"
+         " `_environ_cols_wrapper()(file)`", DeprecationWarning, stacklevel=2)
+    shape = _screen_shape_wrapper()
+    if not shape:
+        return None
+
+    @wraps(shape)
+    def inner(fp):
+        return shape(fp)[0]
+
+    return inner
+
+
+def _term_move_up():  # pragma: no cover
+    return '' if (os.name == 'nt') and (colorama is None) else '\x1b[A'
+
+
+def _text_width(s):
+    return sum(2 if east_asian_width(ch) in 'FW' else 1 for ch in str(s))
+
+
+def disp_len(data):
+    """
+    Returns the real on-screen length of a string which may contain
+    ANSI control codes and wide chars.
+    """
+    return _text_width(RE_ANSI.sub('', data))
+
+
+def disp_trim(data, length):
+    """
+    Trim a string which may contain ANSI control characters.
+    """
+    if len(data) == disp_len(data):
+        return data[:length]
+
+    ansi_present = bool(RE_ANSI.search(data))
+    while disp_len(data) > length:  # carefully delete one char at a time
+        data = data[:-1]
+    if ansi_present and bool(RE_ANSI.search(data)):
+        # assume ANSI reset is required
+        return data if data.endswith("\033[0m") else data + "\033[0m"
+    return data
diff --git a/venv/lib/python3.13/site-packages/tqdm/version.py b/venv/lib/python3.13/site-packages/tqdm/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..11cbaea79d1f4f46f9ae4bea542d7c66ded96e34
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/tqdm/version.py
@@ -0,0 +1,9 @@
+"""`tqdm` version detector. Precedence: installed dist, git, 'UNKNOWN'."""
+try:
+    from ._dist_ver import __version__
+except ImportError:
+    try:
+        from setuptools_scm import get_version
+        __version__ = get_version(root='..', relative_to=__file__)
+    except (ImportError, LookupError):
+        __version__ = "UNKNOWN"
diff --git a/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/INSTALLER b/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/INSTALLER
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/METADATA b/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/METADATA
new file mode 100644
index 0000000000000000000000000000000000000000..15116c784ed1f8f0f524513d606a26bf6765e923
--- /dev/null
+++ b/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/METADATA
@@ -0,0 +1,154 @@
+Metadata-Version: 2.4
+Name: urllib3
+Version: 2.5.0
+Summary: HTTP library with thread-safe connection pooling, file post, and more.
+Project-URL: Changelog, https://github.com/urllib3/urllib3/blob/main/CHANGES.rst
+Project-URL: Documentation, https://urllib3.readthedocs.io
+Project-URL: Code, https://github.com/urllib3/urllib3
+Project-URL: Issue tracker, https://github.com/urllib3/urllib3/issues
+Author-email: Andrey Petrov 
+Maintainer-email: Seth Michael Larson , Quentin Pradet , Illia Volochii 
+License-Expression: MIT
+License-File: LICENSE.txt
+Keywords: filepost,http,httplib,https,pooling,ssl,threadsafe,urllib
+Classifier: Environment :: Web Environment
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Internet :: WWW/HTTP
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.9
+Provides-Extra: brotli
+Requires-Dist: brotli>=1.0.9; (platform_python_implementation == 'CPython') and extra == 'brotli'
+Requires-Dist: brotlicffi>=0.8.0; (platform_python_implementation != 'CPython') and extra == 'brotli'
+Provides-Extra: h2
+Requires-Dist: h2<5,>=4; extra == 'h2'
+Provides-Extra: socks
+Requires-Dist: pysocks!=1.5.7,<2.0,>=1.5.6; extra == 'socks'
+Provides-Extra: zstd
+Requires-Dist: zstandard>=0.18.0; extra == 'zstd'
+Description-Content-Type: text/markdown
+
+

+ +![urllib3](https://github.com/urllib3/urllib3/raw/main/docs/_static/banner_github.svg) + +

+ +

+ PyPI Version + Python Versions + Join our Discord + Coverage Status + Build Status on GitHub + Documentation Status
+ OpenSSF Scorecard + SLSA 3 + CII Best Practices +

+ +urllib3 is a powerful, *user-friendly* HTTP client for Python. Much of the +Python ecosystem already uses urllib3 and you should too. +urllib3 brings many critical features that are missing from the Python +standard libraries: + +- Thread safety. +- Connection pooling. +- Client-side SSL/TLS verification. +- File uploads with multipart encoding. +- Helpers for retrying requests and dealing with HTTP redirects. +- Support for gzip, deflate, brotli, and zstd encoding. +- Proxy support for HTTP and SOCKS. +- 100% test coverage. + +urllib3 is powerful and easy to use: + +```python3 +>>> import urllib3 +>>> resp = urllib3.request("GET", "http://httpbin.org/robots.txt") +>>> resp.status +200 +>>> resp.data +b"User-agent: *\nDisallow: /deny\n" +``` + +## Installing + +urllib3 can be installed with [pip](https://pip.pypa.io): + +```bash +$ python -m pip install urllib3 +``` + +Alternatively, you can grab the latest source code from [GitHub](https://github.com/urllib3/urllib3): + +```bash +$ git clone https://github.com/urllib3/urllib3.git +$ cd urllib3 +$ pip install . +``` + + +## Documentation + +urllib3 has usage and reference documentation at [urllib3.readthedocs.io](https://urllib3.readthedocs.io). + + +## Community + +urllib3 has a [community Discord channel](https://discord.gg/urllib3) for asking questions and +collaborating with other contributors. Drop by and say hello πŸ‘‹ + + +## Contributing + +urllib3 happily accepts contributions. Please see our +[contributing documentation](https://urllib3.readthedocs.io/en/latest/contributing.html) +for some tips on getting started. + + +## Security Disclosures + +To report a security vulnerability, please use the +[Tidelift security contact](https://tidelift.com/security). +Tidelift will coordinate the fix and disclosure with maintainers. + + +## Maintainers + +- [@sethmlarson](https://github.com/sethmlarson) (Seth M. Larson) +- [@pquentin](https://github.com/pquentin) (Quentin Pradet) +- [@illia-v](https://github.com/illia-v) (Illia Volochii) +- [@theacodes](https://github.com/theacodes) (Thea Flowers) +- [@haikuginger](https://github.com/haikuginger) (Jess Shapiro) +- [@lukasa](https://github.com/lukasa) (Cory Benfield) +- [@sigmavirus24](https://github.com/sigmavirus24) (Ian Stapleton Cordasco) +- [@shazow](https://github.com/shazow) (Andrey Petrov) + +πŸ‘‹ + + +## Sponsorship + +If your company benefits from this library, please consider [sponsoring its +development](https://urllib3.readthedocs.io/en/latest/sponsors.html). + + +## For Enterprise + +Professional support for urllib3 is available as part of the [Tidelift +Subscription][1]. Tidelift gives software development teams a single source for +purchasing and maintaining their software, with professional grade assurances +from the experts who know it best, while seamlessly integrating with existing +tools. + +[1]: https://tidelift.com/subscription/pkg/pypi-urllib3?utm_source=pypi-urllib3&utm_medium=referral&utm_campaign=readme diff --git a/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/RECORD b/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..acc37391946449fd3ba1475f30763f8d9706d359 --- /dev/null +++ b/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/RECORD @@ -0,0 +1,79 @@ +urllib3-2.5.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +urllib3-2.5.0.dist-info/METADATA,sha256=maYkTIZt0a-lkEC-hMZWbCBmcGZyJcYOeRk4_nuTrNc,6461 +urllib3-2.5.0.dist-info/RECORD,, +urllib3-2.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87 +urllib3-2.5.0.dist-info/licenses/LICENSE.txt,sha256=Ew46ZNX91dCWp1JpRjSn2d8oRGnehuVzIQAmgEHj1oY,1093 +urllib3/__init__.py,sha256=JMo1tg1nIV1AeJ2vENC_Txfl0e5h6Gzl9DGVk1rWRbo,6979 +urllib3/__pycache__/__init__.cpython-313.pyc,, +urllib3/__pycache__/_base_connection.cpython-313.pyc,, +urllib3/__pycache__/_collections.cpython-313.pyc,, +urllib3/__pycache__/_request_methods.cpython-313.pyc,, +urllib3/__pycache__/_version.cpython-313.pyc,, +urllib3/__pycache__/connection.cpython-313.pyc,, +urllib3/__pycache__/connectionpool.cpython-313.pyc,, +urllib3/__pycache__/exceptions.cpython-313.pyc,, +urllib3/__pycache__/fields.cpython-313.pyc,, +urllib3/__pycache__/filepost.cpython-313.pyc,, +urllib3/__pycache__/poolmanager.cpython-313.pyc,, +urllib3/__pycache__/response.cpython-313.pyc,, +urllib3/_base_connection.py,sha256=T1cwH3RhzsrBh6Bz3AOGVDboRsE7veijqZPXXQTR2Rg,5568 +urllib3/_collections.py,sha256=tM7c6J1iKtWZYV_QGYb8-r7Nr1524Dehnsa0Ufh6_mU,17295 +urllib3/_request_methods.py,sha256=gCeF85SO_UU4WoPwYHIoz_tw-eM_EVOkLFp8OFsC7DA,9931 +urllib3/_version.py,sha256=ZlSUkBo_Pd90B6pM0GDO7l2vitQD3QCK3xPR_K0zFJA,511 +urllib3/connection.py,sha256=iP4pgSJtpusXyYlejzNn-gih_wWCxMU-qy6OU1kaapc,42613 +urllib3/connectionpool.py,sha256=ZEhudsa8BIubD2M0XoxBBsjxbsXwMgUScH7oQ9i-j1Y,43371 +urllib3/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +urllib3/contrib/__pycache__/__init__.cpython-313.pyc,, +urllib3/contrib/__pycache__/pyopenssl.cpython-313.pyc,, +urllib3/contrib/__pycache__/socks.cpython-313.pyc,, +urllib3/contrib/emscripten/__init__.py,sha256=u6KNgzjlFZbuAAXa_ybCR7gQ71VJESnF-IIdDA73brw,733 +urllib3/contrib/emscripten/__pycache__/__init__.cpython-313.pyc,, +urllib3/contrib/emscripten/__pycache__/connection.cpython-313.pyc,, +urllib3/contrib/emscripten/__pycache__/fetch.cpython-313.pyc,, +urllib3/contrib/emscripten/__pycache__/request.cpython-313.pyc,, +urllib3/contrib/emscripten/__pycache__/response.cpython-313.pyc,, +urllib3/contrib/emscripten/connection.py,sha256=j8DR_flE7hsoFhNfiqHLiaPaCsVbzG44jgahwvsQ52A,8771 +urllib3/contrib/emscripten/emscripten_fetch_worker.js,sha256=CDfYF_9CDobtx2lGidyJ1zjDEvwNT5F-dchmVWXDh0E,3655 +urllib3/contrib/emscripten/fetch.py,sha256=kco06lWoQ-fdFfN51-nzeTywPVBEHg89WIst33H3xcg,23484 +urllib3/contrib/emscripten/request.py,sha256=mL28szy1KvE3NJhWor5jNmarp8gwplDU-7gwGZY5g0Q,566 +urllib3/contrib/emscripten/response.py,sha256=7oVPENYZHuzEGRtG40HonpH5tAIYHsGcHPbJt2Z0U-Y,9507 +urllib3/contrib/pyopenssl.py,sha256=Xp5Ym05VgXGhHa0C4wlutvHxY8SnKSS6WLb2t5Miu0s,19720 +urllib3/contrib/socks.py,sha256=-iardc61GypsJzD6W6yuRS7KVCyfowcQrl_719H7lIM,7549 +urllib3/exceptions.py,sha256=pziumHf0Vwx3z4gvUy7ou8nlM2yIYX0N3l3znEdeF5U,9938 +urllib3/fields.py,sha256=FCf7UULSkf10cuTRUWTQESzxgl1WT8e2aCy3kfyZins,10829 +urllib3/filepost.py,sha256=U8eNZ-mpKKHhrlbHEEiTxxgK16IejhEa7uz42yqA_dI,2388 +urllib3/http2/__init__.py,sha256=xzrASH7R5ANRkPJOot5lGnATOq3KKuyXzI42rcnwmqs,1741 +urllib3/http2/__pycache__/__init__.cpython-313.pyc,, +urllib3/http2/__pycache__/connection.cpython-313.pyc,, +urllib3/http2/__pycache__/probe.cpython-313.pyc,, +urllib3/http2/connection.py,sha256=4DB0DkZEC3yIkhGjUDIHB17wrYCLaL0Ag5bDW2_mGPI,12694 +urllib3/http2/probe.py,sha256=nnAkqbhAakOiF75rz7W0udZ38Eeh_uD8fjV74N73FEI,3014 +urllib3/poolmanager.py,sha256=oKsgP1EsAI4OVgK9-9D3AYXZS5HYV8yKUSog-QbJ8Ts,23866 +urllib3/py.typed,sha256=UaCuPFa3H8UAakbt-5G8SPacldTOGvJv18pPjUJ5gDY,93 +urllib3/response.py,sha256=TVTSu6Q1U0U7hoHYMIRxxuh4zroeMo8b5EI4DOA13Eo,46480 +urllib3/util/__init__.py,sha256=-qeS0QceivazvBEKDNFCAI-6ACcdDOE4TMvo7SLNlAQ,1001 +urllib3/util/__pycache__/__init__.cpython-313.pyc,, +urllib3/util/__pycache__/connection.cpython-313.pyc,, +urllib3/util/__pycache__/proxy.cpython-313.pyc,, +urllib3/util/__pycache__/request.cpython-313.pyc,, +urllib3/util/__pycache__/response.cpython-313.pyc,, +urllib3/util/__pycache__/retry.cpython-313.pyc,, +urllib3/util/__pycache__/ssl_.cpython-313.pyc,, +urllib3/util/__pycache__/ssl_match_hostname.cpython-313.pyc,, +urllib3/util/__pycache__/ssltransport.cpython-313.pyc,, +urllib3/util/__pycache__/timeout.cpython-313.pyc,, +urllib3/util/__pycache__/url.cpython-313.pyc,, +urllib3/util/__pycache__/util.cpython-313.pyc,, +urllib3/util/__pycache__/wait.cpython-313.pyc,, +urllib3/util/connection.py,sha256=JjO722lzHlzLXPTkr9ZWBdhseXnMVjMSb1DJLVrXSnQ,4444 +urllib3/util/proxy.py,sha256=seP8-Q5B6bB0dMtwPj-YcZZQ30vHuLqRu-tI0JZ2fzs,1148 +urllib3/util/request.py,sha256=XuAsEBT58DAZYUTwpMH5Hr3A1OPoMNvNIYIunbIqbc8,8411 +urllib3/util/response.py,sha256=vQE639uoEhj1vpjEdxu5lNIhJCSUZkd7pqllUI0BZOA,3374 +urllib3/util/retry.py,sha256=bj-2YUqblxLlv8THg5fxww-DM54XCbjgZXIQ71XioCY,18459 +urllib3/util/ssl_.py,sha256=jxnQ3msYkVaokJVWqHNnAVdVtDdidrTHDeyk50gwqaQ,19786 +urllib3/util/ssl_match_hostname.py,sha256=Di7DU7zokoltapT_F0Sj21ffYxwaS_cE5apOtwueeyA,5845 +urllib3/util/ssltransport.py,sha256=Ez4O8pR_vT8dan_FvqBYS6dgDfBXEMfVfrzcdUoWfi4,8847 +urllib3/util/timeout.py,sha256=4eT1FVeZZU7h7mYD1Jq2OXNe4fxekdNvhoWUkZusRpA,10346 +urllib3/util/url.py,sha256=WRh-TMYXosmgp8m8lT4H5spoHw5yUjlcMCfU53AkoAs,15205 +urllib3/util/util.py,sha256=j3lbZK1jPyiwD34T8IgJzdWEZVT-4E-0vYIJi9UjeNA,1146 +urllib3/util/wait.py,sha256=_ph8IrUR3sqPqi0OopQgJUlH4wzkGeM5CiyA7XGGtmI,4423 diff --git a/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/WHEEL b/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..12228d414b6cfed7c39d3781c85c63256a1d7fb5 --- /dev/null +++ b/venv/lib/python3.13/site-packages/urllib3-2.5.0.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: hatchling 1.27.0 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/venv/lib/python3.13/site-packages/yaml/__init__.py b/venv/lib/python3.13/site-packages/yaml/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d58f0891737def7f38e5d86dde2dbf9be0c13dce --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/__init__.py @@ -0,0 +1,390 @@ + +from .error import * + +from .tokens import * +from .events import * +from .nodes import * + +from .loader import * +from .dumper import * + +__version__ = '6.0.3' +try: + from .cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +import io + +#------------------------------------------------------------------------------ +# XXX "Warnings control" is now deprecated. Leaving in the API function to not +# break code that uses it. +#------------------------------------------------------------------------------ +def warnings(settings=None): + if settings is None: + return {} + +#------------------------------------------------------------------------------ +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + +def load(stream, Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + +def load_all(stream, Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + +def full_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + + Resolve all tags except those known to be + unsafe on untrusted input. + """ + return load(stream, FullLoader) + +def full_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + + Resolve all tags except those known to be + unsafe on untrusted input. + """ + return load_all(stream, FullLoader) + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + + Resolve only basic YAML tags. This is known + to be safe for untrusted input. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + + Resolve only basic YAML tags. This is known + to be safe for untrusted input. + """ + return load_all(stream, SafeLoader) + +def unsafe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + + Resolve all tags, even those known to be + unsafe on untrusted input. + """ + return load(stream, UnsafeLoader) + +def unsafe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + + Resolve all tags, even those known to be + unsafe on untrusted input. + """ + return load_all(stream, UnsafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + stream = io.StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=None, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + if Loader is None: + loader.Loader.add_implicit_resolver(tag, regexp, first) + loader.FullLoader.add_implicit_resolver(tag, regexp, first) + loader.UnsafeLoader.add_implicit_resolver(tag, regexp, first) + else: + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=None, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + if Loader is None: + loader.Loader.add_path_resolver(tag, path, kind) + loader.FullLoader.add_path_resolver(tag, path, kind) + loader.UnsafeLoader.add_path_resolver(tag, path, kind) + else: + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=None): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + if Loader is None: + loader.Loader.add_constructor(tag, constructor) + loader.FullLoader.add_constructor(tag, constructor) + loader.UnsafeLoader.add_constructor(tag, constructor) + else: + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=None): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + if Loader is None: + loader.Loader.add_multi_constructor(tag_prefix, multi_constructor) + loader.FullLoader.add_multi_constructor(tag_prefix, multi_constructor) + loader.UnsafeLoader.add_multi_constructor(tag_prefix, multi_constructor) + else: + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + if isinstance(cls.yaml_loader, list): + for loader in cls.yaml_loader: + loader.add_constructor(cls.yaml_tag, cls.from_yaml) + else: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(metaclass=YAMLObjectMetaclass): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = [Loader, FullLoader, UnsafeLoader] + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + @classmethod + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + + @classmethod + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + diff --git a/venv/lib/python3.13/site-packages/yaml/composer.py b/venv/lib/python3.13/site-packages/yaml/composer.py new file mode 100644 index 0000000000000000000000000000000000000000..6d15cb40e3b4198819c91c6f8d8b32807fcf53b2 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/composer.py @@ -0,0 +1,139 @@ + +__all__ = ['Composer', 'ComposerError'] + +from .error import MarkedYAMLError +from .events import * +from .nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer: + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor, event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurrence" + % anchor, self.anchors[anchor].start_mark, + "second occurrence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == '!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/venv/lib/python3.13/site-packages/yaml/constructor.py b/venv/lib/python3.13/site-packages/yaml/constructor.py new file mode 100644 index 0000000000000000000000000000000000000000..619acd3070a4845c653fcf22a626e05158035bc2 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/constructor.py @@ -0,0 +1,748 @@ + +__all__ = [ + 'BaseConstructor', + 'SafeConstructor', + 'FullConstructor', + 'UnsafeConstructor', + 'Constructor', + 'ConstructorError' +] + +from .error import * +from .nodes import * + +import collections.abc, datetime, base64, binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor: + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def check_state_key(self, key): + """Block special attributes/methods from being set in a newly created + object, to prevent user-controlled methods from being called during + deserialization""" + if self.get_state_keys_blacklist_regexp().match(key): + raise ConstructorError(None, None, + "blacklisted key '%s' in instance state found" % (key,), None) + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if tag_prefix is not None and node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = next(generator) + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + if not isinstance(key, collections.abc.Hashable): + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + @classmethod + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + + @classmethod + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == 'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return super().construct_scalar(node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == 'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return super().construct_mapping(node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + 'yes': True, + 'no': False, + 'true': True, + 'false': False, + 'on': True, + 'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + r'''^(?P[0-9][0-9][0-9][0-9]) + -(?P[0-9][0-9]?) + -(?P[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P[0-9][0-9]?) + :(?P[0-9][0-9]) + :(?P[0-9][0-9]) + (?:\.(?P[0-9]*))? + (?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) + (?::(?P[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + tzinfo = None + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + tzinfo = datetime.timezone(delta) + elif values['tz']: + tzinfo = datetime.timezone.utc + return datetime.datetime(year, month, day, hour, minute, second, fraction, + tzinfo=tzinfo) + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + return self.construct_scalar(node) + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag, + node.start_mark) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class FullConstructor(SafeConstructor): + # 'extend' is blacklisted because it is used by + # construct_python_object_apply to add `listitems` to a newly generate + # python instance + def get_state_keys_blacklist(self): + return ['^extend$', '^__.*__$'] + + def get_state_keys_blacklist_regexp(self): + if not hasattr(self, 'state_keys_blacklist_regexp'): + self.state_keys_blacklist_regexp = re.compile('(' + '|'.join(self.get_state_keys_blacklist()) + ')') + return self.state_keys_blacklist_regexp + + def construct_python_str(self, node): + return self.construct_scalar(node) + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_bytes(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + def construct_python_long(self, node): + return self.construct_yaml_int(node) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark, unsafe=False): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + if unsafe: + try: + __import__(name) + except ImportError as exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name, exc), mark) + if name not in sys.modules: + raise ConstructorError("while constructing a Python module", mark, + "module %r is not imported" % name, mark) + return sys.modules[name] + + def find_python_name(self, name, mark, unsafe=False): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if '.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = 'builtins' + object_name = name + if unsafe: + try: + __import__(module_name) + except ImportError as exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name, exc), mark) + if module_name not in sys.modules: + raise ConstructorError("while constructing a Python object", mark, + "module %r is not imported" % module_name, mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" + % (object_name, module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False, unsafe=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if not (unsafe or isinstance(cls, type)): + raise ConstructorError("while constructing a Python instance", node.start_mark, + "expected a class, but found %r" % type(cls), + node.start_mark) + if newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state, unsafe=False): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + if not unsafe and state: + for key in state.keys(): + self.check_state_key(key) + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + if not unsafe: + self.check_state_key(key) + setattr(instance, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/none', + FullConstructor.construct_yaml_null) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/bool', + FullConstructor.construct_yaml_bool) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/str', + FullConstructor.construct_python_str) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/unicode', + FullConstructor.construct_python_unicode) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/bytes', + FullConstructor.construct_python_bytes) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/int', + FullConstructor.construct_yaml_int) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/long', + FullConstructor.construct_python_long) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/float', + FullConstructor.construct_yaml_float) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/complex', + FullConstructor.construct_python_complex) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/list', + FullConstructor.construct_yaml_seq) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/tuple', + FullConstructor.construct_python_tuple) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/dict', + FullConstructor.construct_yaml_map) + +FullConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/name:', + FullConstructor.construct_python_name) + +class UnsafeConstructor(FullConstructor): + + def find_python_module(self, name, mark): + return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True) + + def find_python_name(self, name, mark): + return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True) + + def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False): + return super(UnsafeConstructor, self).make_python_instance( + suffix, node, args, kwds, newobj, unsafe=True) + + def set_python_instance_state(self, instance, state): + return super(UnsafeConstructor, self).set_python_instance_state( + instance, state, unsafe=True) + +UnsafeConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/module:', + UnsafeConstructor.construct_python_module) + +UnsafeConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object:', + UnsafeConstructor.construct_python_object) + +UnsafeConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/new:', + UnsafeConstructor.construct_python_object_new) + +UnsafeConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/apply:', + UnsafeConstructor.construct_python_object_apply) + +# Constructor is same as UnsafeConstructor. Need to leave this in place in case +# people have extended it directly. +class Constructor(UnsafeConstructor): + pass diff --git a/venv/lib/python3.13/site-packages/yaml/cyaml.py b/venv/lib/python3.13/site-packages/yaml/cyaml.py new file mode 100644 index 0000000000000000000000000000000000000000..0c21345879b298bb8668201bebe7d289586b17f9 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/cyaml.py @@ -0,0 +1,101 @@ + +__all__ = [ + 'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper' +] + +from yaml._yaml import CParser, CEmitter + +from .constructor import * + +from .serializer import * +from .representer import * + +from .resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CFullLoader(CParser, FullConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + FullConstructor.__init__(self) + Resolver.__init__(self) + +class CUnsafeLoader(CParser, UnsafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + UnsafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + diff --git a/venv/lib/python3.13/site-packages/yaml/dumper.py b/venv/lib/python3.13/site-packages/yaml/dumper.py new file mode 100644 index 0000000000000000000000000000000000000000..6aadba551f3836b02f4752277f4b3027073defad --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/dumper.py @@ -0,0 +1,62 @@ + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from .emitter import * +from .serializer import * +from .representer import * +from .resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + diff --git a/venv/lib/python3.13/site-packages/yaml/emitter.py b/venv/lib/python3.13/site-packages/yaml/emitter.py new file mode 100644 index 0000000000000000000000000000000000000000..a664d011162af69184df2f8e59ab7feec818f7c7 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/emitter.py @@ -0,0 +1,1137 @@ + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from .error import YAMLError +from .events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis: + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter: + + DEFAULT_TAG_PREFIXES = { + '!' : '!', + 'tag:yaml.org,2002:' : '!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overridden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = '\n' + if line_break in ['\r', '\n', '\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not hasattr(self.stream, 'encoding'): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator('...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = sorted(self.event.tags.keys()) + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator('---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator('...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator('...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor('&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor('*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator('[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator(']', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator('{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator('}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator('}', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator('-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == '') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = '!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return '%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != '!' or handle[-1] != '!': + raise EmitterError("tag handle must start and end with '!': %r" % handle) + for ch in handle[1:-1]: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch, handle)) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == '!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return ''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == '!': + return tag + handle = None + suffix = tag + prefixes = sorted(self.tag_prefixes.keys()) + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == '!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.~*\'()[]' \ + or (ch == '!' and handle != '!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ch) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = ''.join(chunks) + if handle: + return '%s%s' % (handle, suffix_text) + else: + return '!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch, anchor)) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith('---') or scalar.startswith('...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in '\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in '#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in '?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in ',?[]{}': + flow_indicators = True + if ch == ':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '#' and preceded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in '\n\x85\u2028\u2029': + line_breaks = True + if not (ch == '\n' or '\x20' <= ch <= '\x7E'): + if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD' + or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == ' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in '\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write('\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = ' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = ' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = '%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = '%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator('\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != ' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == '\'': + data = '\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + self.write_indicator('\'', False) + + ESCAPE_REPLACEMENTS = { + '\0': '0', + '\x07': 'a', + '\x08': 'b', + '\x09': 't', + '\x0A': 'n', + '\x0B': 'v', + '\x0C': 'f', + '\x0D': 'r', + '\x1B': 'e', + '\"': '\"', + '\\': '\\', + '\x85': 'N', + '\xA0': '_', + '\u2028': 'L', + '\u2029': 'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator('"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ + or not ('\x20' <= ch <= '\x7E' + or (self.allow_unicode + and ('\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = '\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= '\xFF': + data = '\\x%02X' % ord(ch) + elif ch <= '\uFFFF': + data = '\\u%04X' % ord(ch) + else: + data = '\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == ' ': + data = '\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator('"', False) + + def determine_block_hints(self, text): + hints = '' + if text: + if text[0] in ' \n\x85\u2028\u2029': + hints += str(self.best_indent) + if text[-1] not in '\n\x85\u2028\u2029': + hints += '-' + elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': + hints += '+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('>'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != ' ' \ + and text[start] == '\n': + self.write_line_break() + leading_space = (ch == ' ') + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + spaces = (ch == ' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('|'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in '\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = ' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 diff --git a/venv/lib/python3.13/site-packages/yaml/error.py b/venv/lib/python3.13/site-packages/yaml/error.py new file mode 100644 index 0000000000000000000000000000000000000000..b796b4dc519512c4825ff539a2e6aa20f4d370d0 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/error.py @@ -0,0 +1,75 @@ + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark: + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end] + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/venv/lib/python3.13/site-packages/yaml/events.py b/venv/lib/python3.13/site-packages/yaml/events.py new file mode 100644 index 0000000000000000000000000000000000000000..f79ad389cb6c9517e391dcd25534866bc9ccd36a --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/events.py @@ -0,0 +1,86 @@ + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/venv/lib/python3.13/site-packages/yaml/loader.py b/venv/lib/python3.13/site-packages/yaml/loader.py new file mode 100644 index 0000000000000000000000000000000000000000..e90c11224c38e559cdf0cb205f0692ebd4fb8681 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/loader.py @@ -0,0 +1,63 @@ + +__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader'] + +from .reader import * +from .scanner import * +from .parser import * +from .composer import * +from .constructor import * +from .resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + FullConstructor.__init__(self) + Resolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + +# UnsafeLoader is the same as Loader (which is and was always unsafe on +# untrusted input). Use of either Loader or UnsafeLoader should be rare, since +# FullLoad should be able to load almost all YAML safely. Loader is left intact +# to ensure backwards compatibility. +class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) diff --git a/venv/lib/python3.13/site-packages/yaml/nodes.py b/venv/lib/python3.13/site-packages/yaml/nodes.py new file mode 100644 index 0000000000000000000000000000000000000000..c4f070c41e1fb1bc01af27d69329e92dded38908 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/nodes.py @@ -0,0 +1,49 @@ + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/venv/lib/python3.13/site-packages/yaml/parser.py b/venv/lib/python3.13/site-packages/yaml/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..13a5995d292045d0f865a99abf692bd35dc87814 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/parser.py @@ -0,0 +1,589 @@ + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from .error import MarkedYAMLError +from .tokens import * +from .events import * +from .scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser: + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + '!': '!', + '!!': 'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == 'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == 'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle, + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle, + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == '!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == '!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == '!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), '', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), '', mark, mark) + diff --git a/venv/lib/python3.13/site-packages/yaml/reader.py b/venv/lib/python3.13/site-packages/yaml/reader.py new file mode 100644 index 0000000000000000000000000000000000000000..774b0219b5932a0ee1c27e637371de5ba8d9cb16 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/reader.py @@ -0,0 +1,185 @@ +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from .error import YAMLError, Mark + +import codecs, re + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, bytes): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to a unicode string, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `bytes` object, + # - a `str` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = '' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, str): + self.name = "" + self.check_printable(stream) + self.buffer = stream+'\0' + elif isinstance(stream, bytes): + self.name = "" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "") + self.eof = False + self.raw_buffer = None + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in '\n\x85\u2028\u2029' \ + or (ch == '\r' and self.buffer[self.pointer] != '\n'): + self.line += 1 + self.column = 0 + elif ch != '\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): + self.update_raw() + if isinstance(self.raw_buffer, bytes): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += '\0' + self.raw_buffer = None + break + + def update_raw(self, size=4096): + data = self.stream.read(size) + if self.raw_buffer is None: + self.raw_buffer = data + else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: + self.eof = True diff --git a/venv/lib/python3.13/site-packages/yaml/representer.py b/venv/lib/python3.13/site-packages/yaml/representer.py new file mode 100644 index 0000000000000000000000000000000000000000..808ca06dfbd60c9a23eb079151b74a82ef688749 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/representer.py @@ -0,0 +1,389 @@ + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from .error import * +from .nodes import * + +import datetime, copyreg, types, base64, collections + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter: + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=False, sort_keys=True): + self.default_style = default_style + self.sort_keys = sort_keys + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, str(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + @classmethod + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + + @classmethod + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = list(mapping.items()) + if self.sort_keys: + try: + mapping = sorted(mapping) + except TypeError: + pass + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data is None: + return True + if isinstance(data, tuple) and data == (): + return True + if isinstance(data, (str, bytes, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar('tag:yaml.org,2002:null', 'null') + + def represent_str(self, data): + return self.represent_scalar('tag:yaml.org,2002:str', data) + + def represent_binary(self, data): + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + data = base64.encodestring(data).decode('ascii') + return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') + + def represent_bool(self, data): + if data: + value = 'true' + else: + value = 'false' + return self.represent_scalar('tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar('tag:yaml.org,2002:int', str(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = '.nan' + elif data == self.inf_value: + value = '.inf' + elif data == -self.inf_value: + value = '-.inf' + else: + value = repr(data).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if '.' not in value and 'e' in value: + value = value.replace('e', '.0e', 1) + return self.represent_scalar('tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence('tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping('tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping('tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = data.isoformat() + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = data.isoformat(' ') + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object", data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(bytes, + SafeRepresenter.represent_binary) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_complex(self, data): + if data.imag == 0.0: + data = '%r' % data.real + elif data.real == 0.0: + data = '%rj' % data.imag + elif data.imag > 0: + data = '%r+%rj' % (data.real, data.imag) + else: + data = '%r%rj' % (data.real, data.imag) + return self.represent_scalar('tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = '%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') + + def represent_module(self, data): + return self.represent_scalar( + 'tag:yaml.org,2002:python/module:'+data.__name__, '') + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copyreg.dispatch_table: + reduce = copyreg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent an object", data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = 'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = 'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = '%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + 'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + + def represent_ordered_dict(self, data): + # Provide uniform representation across different Python versions. + data_type = type(data) + tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \ + % (data_type.__module__, data_type.__name__) + items = [[key, value] for key, value in data.items()] + return self.represent_sequence(tag, [items]) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_multi_representer(type, + Representer.represent_name) + +Representer.add_representer(collections.OrderedDict, + Representer.represent_ordered_dict) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/venv/lib/python3.13/site-packages/yaml/resolver.py b/venv/lib/python3.13/site-packages/yaml/resolver.py new file mode 100644 index 0000000000000000000000000000000000000000..3522bdaaf6358110b608f4e6503b9d314c82d887 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/resolver.py @@ -0,0 +1,227 @@ + +__all__ = ['BaseResolver', 'Resolver'] + +from .error import * +from .nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver: + + DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + @classmethod + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + implicit_resolvers = {} + for key in cls.yaml_implicit_resolvers: + implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:] + cls.yaml_implicit_resolvers = implicit_resolvers + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + + @classmethod + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, str) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (str, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, str): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, str): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == '': + resolvers = self.yaml_implicit_resolvers.get('', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + wildcard_resolvers = self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers + wildcard_resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:bool', + re.compile(r'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list('yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:float', + re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9][0-9_]*(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list('-+0123456789.')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:int', + re.compile(r'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list('-+0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:merge', + re.compile(r'^(?:<<)$'), + ['<']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:null', + re.compile(r'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + ['~', 'n', 'N', '']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:timestamp', + re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list('0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:value', + re.compile(r'^(?:=)$'), + ['=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:yaml', + re.compile(r'^(?:!|&|\*)$'), + list('!&*')) + diff --git a/venv/lib/python3.13/site-packages/yaml/scanner.py b/venv/lib/python3.13/site-packages/yaml/scanner.py new file mode 100644 index 0000000000000000000000000000000000000000..de925b07f1eaec33c9c305a8a69f9eb7ac5983c5 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/scanner.py @@ -0,0 +1,1435 @@ + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from .error import MarkedYAMLError +from .tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey: + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner: + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + # Return None if no more tokens. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + else: + return None + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == '\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == '%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == '-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == '.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == '\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == '[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == '{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == ']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == '}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == ',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == '-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == '?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == ':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == '*': + return self.fetch_alias() + + # Is it an anchor? + if ch == '&': + return self.fetch_anchor() + + # Is it a tag? + if ch == '!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == '|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == '>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == '\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == '\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" % ch, + self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in list(self.possible_simple_keys): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not find expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not find expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid indentation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current indentation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current indentation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current indentation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not necessary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be caught by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '---' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '...' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' + and (ch == '-' or (not self.flow_level and ch in '?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if : + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == '\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == 'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == 'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" % self.peek(), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" % self.peek(), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not ('0' <= ch <= '9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch, self.get_mark()) + length = 0 + while '0' <= self.peek(length) <= '9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == ' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != ' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch, self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpreted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == '*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == '<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != '>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek(), + self.get_mark()) + self.forward() + elif ch in '\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = '!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = '!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = '!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = '' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != '\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in ' \t' + length = 0 + while self.peek(length) not in '\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != '\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == '\n' \ + and leading_non_space and self.peek() not in ' \t': + if not breaks: + chunks.append(' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == '\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch, self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" % ch, + self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() != ' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + while self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '\"': '\"', + '\\': '\\', + '/': '/', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', + } + + ESCAPE_CODES = { + 'x': 2, + 'u': 4, + 'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == '\'' and self.peek(1) == '\'': + chunks.append('\'') + self.forward(2) + elif (double and ch == '\'') or (not double and ch in '\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == '\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexadecimal numbers, but found %r" % + (length, self.peek(k)), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(chr(code)) + self.forward(length) + elif ch in '\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch, self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in ' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == '\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in ' \t': + self.forward() + if self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',' or '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == '#': + break + while True: + ch = self.peek(length) + if ch in '\0 \t\r\n\x85\u2028\u2029' \ + or (ch == ':' and + self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029' + + (u',[]{}' if self.flow_level else u''))\ + or (self.flow_level and ch in ',?[]{}'): + break + length += 1 + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == '#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in ' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != '!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if ch != '!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.!~*\'()[]%': + if ch == '%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch, self.get_mark()) + return ''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + codes = [] + mark = self.get_mark() + while self.peek() == '%': + self.forward() + for k in range(2): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexadecimal numbers, but found %r" + % self.peek(k), self.get_mark()) + codes.append(int(self.prefix(2), 16)) + self.forward(2) + try: + value = bytes(codes).decode('utf-8') + except UnicodeDecodeError as exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in '\r\n\x85': + if self.prefix(2) == '\r\n': + self.forward(2) + else: + self.forward() + return '\n' + elif ch in '\u2028\u2029': + self.forward() + return ch + return '' diff --git a/venv/lib/python3.13/site-packages/yaml/serializer.py b/venv/lib/python3.13/site-packages/yaml/serializer.py new file mode 100644 index 0000000000000000000000000000000000000000..fe911e67ae7a739abb491fbbc6834b9c37bbda4b --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/serializer.py @@ -0,0 +1,111 @@ + +__all__ = ['Serializer', 'SerializerError'] + +from .error import YAMLError +from .events import * +from .nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer: + + ANCHOR_TEMPLATE = 'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/venv/lib/python3.13/site-packages/yaml/tokens.py b/venv/lib/python3.13/site-packages/yaml/tokens.py new file mode 100644 index 0000000000000000000000000000000000000000..4d0b48a394ac8c019b401516a12f688df361cf90 --- /dev/null +++ b/venv/lib/python3.13/site-packages/yaml/tokens.py @@ -0,0 +1,104 @@ + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '' + +class DirectiveToken(Token): + id = '' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '' + +class DocumentEndToken(Token): + id = '' + +class StreamStartToken(Token): + id = '' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '' + +class BlockSequenceStartToken(Token): + id = '' + +class BlockMappingStartToken(Token): + id = '' + +class BlockEndToken(Token): + id = '' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style +