diff --git a/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/INSTALLER b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/METADATA b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..13b96da6f3cd4612b12188297eb88ae1d526b5f2 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/METADATA @@ -0,0 +1,750 @@ +Metadata-Version: 2.4 +Name: charset-normalizer +Version: 3.4.3 +Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. +Author-email: "Ahmed R. TAHRI" +Maintainer-email: "Ahmed R. TAHRI" +License: MIT +Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md +Project-URL: Documentation, https://charset-normalizer.readthedocs.io/ +Project-URL: Code, https://github.com/jawah/charset_normalizer +Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues +Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Text Processing :: Linguistic +Classifier: Topic :: Utilities +Classifier: Typing :: Typed +Requires-Python: >=3.7 +Description-Content-Type: text/markdown +License-File: LICENSE +Provides-Extra: unicode-backport +Dynamic: license-file + +

Charset Detection, for Everyone 👋

+ +

+ The Real First Universal Charset Detector
+ + + + + Download Count Total + + + + +

+

+ Featured Packages
+ + Static Badge + + + Static Badge + +

+

+ In other language (unofficial port - by the community)
+ + Static Badge + +

+ +> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`, +> I'm trying to resolve the issue by taking a new approach. +> All IANA character set names for which the Python core library provides codecs are supported. + +

+ >>>>> 👉 Try Me Online Now, Then Adopt Me 👈 <<<<< +

+ +This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**. + +| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) | +|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:| +| `Fast` | ❌ | ✅ | ✅ | +| `Universal**` | ❌ | ✅ | ❌ | +| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ | +| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ | +| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ | +| `Native Python` | ✅ | ✅ | ❌ | +| `Detect spoken language` | ❌ | ✅ | N/A | +| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ | +| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB | +| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 | + +

+Reading Normalized TextCat Reading Text +

+ +*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*
+ +## ⚡ Performance + +This package offer better performance than its counterpart Chardet. Here are some numbers. + +| Package | Accuracy | Mean per file (ms) | File per sec (est) | +|-----------------------------------------------|:--------:|:------------------:|:------------------:| +| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec | +| charset-normalizer | **98 %** | **10 ms** | 100 file/sec | + +| Package | 99th percentile | 95th percentile | 50th percentile | +|-----------------------------------------------|:---------------:|:---------------:|:---------------:| +| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms | +| charset-normalizer | 100 ms | 50 ms | 5 ms | + +_updated as of december 2024 using CPython 3.12_ + +Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload. + +> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows. +> And yes, these results might change at any time. The dataset can be updated to include more files. +> The actual delays heavily depends on your CPU capabilities. The factors should remain the same. +> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability +> (e.g. Supported Encoding) Challenge-them if you want. + +## ✨ Installation + +Using pip: + +```sh +pip install charset-normalizer -U +``` + +## 🚀 Basic Usage + +### CLI +This package comes with a CLI. + +``` +usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD] + file [file ...] + +The Real First Universal Charset Detector. Discover originating encoding used +on text file. Normalize text to unicode. + +positional arguments: + files File(s) to be analysed + +optional arguments: + -h, --help show this help message and exit + -v, --verbose Display complementary information about file if any. + Stdout will contain logs about the detection process. + -a, --with-alternative + Output complementary possibilities if any. Top-level + JSON WILL be a list. + -n, --normalize Permit to normalize input file. If not set, program + does not write anything. + -m, --minimal Only output the charset detected to STDOUT. Disabling + JSON output. + -r, --replace Replace file when trying to normalize it instead of + creating a new one. + -f, --force Replace file without asking if you are sure, use this + flag with caution. + -t THRESHOLD, --threshold THRESHOLD + Define a custom maximum amount of chaos allowed in + decoded content. 0. <= chaos <= 1. + --version Show version information and exit. +``` + +```bash +normalizer ./data/sample.1.fr.srt +``` + +or + +```bash +python -m charset_normalizer ./data/sample.1.fr.srt +``` + +🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format. + +```json +{ + "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt", + "encoding": "cp1252", + "encoding_aliases": [ + "1252", + "windows_1252" + ], + "alternative_encodings": [ + "cp1254", + "cp1256", + "cp1258", + "iso8859_14", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_9", + "latin_1", + "mbcs" + ], + "language": "French", + "alphabets": [ + "Basic Latin", + "Latin-1 Supplement" + ], + "has_sig_or_bom": false, + "chaos": 0.149, + "coherence": 97.152, + "unicode_path": null, + "is_preferred": true +} +``` + +### Python +*Just print out normalized text* +```python +from charset_normalizer import from_path + +results = from_path('./my_subtitle.srt') + +print(str(results.best())) +``` + +*Upgrade your code without effort* +```python +from charset_normalizer import detect +``` + +The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible. + +See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/) + +## 😇 Why + +When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a +reliable alternative using a completely different method. Also! I never back down on a good challenge! + +I **don't care** about the **originating charset** encoding, because **two different tables** can +produce **two identical rendered string.** +What I want is to get readable text, the best I can. + +In a way, **I'm brute forcing text decoding.** How cool is that ? 😎 + +Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode. + +## 🍰 How + + - Discard all charset encoding table that could not fit the binary content. + - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding. + - Extract matches with the lowest mess detected. + - Additionally, we measure coherence / probe for a language. + +**Wait a minute**, what is noise/mess and coherence according to **YOU ?** + +*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then +**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text). + I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to + improve or rewrite it. + +*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought +that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design. + +## ⚡ Known limitations + + - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters)) + - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content. + +## ⚠️ About Python EOLs + +**If you are running:** + +- Python >=2.7,<3.5: Unsupported +- Python 3.5: charset-normalizer < 2.1 +- Python 3.6: charset-normalizer < 3.1 +- Python 3.7: charset-normalizer < 4.0 + +Upgrade your Python interpreter as soon as possible. + +## 👤 Contributing + +Contributions, issues and feature requests are very much welcome.
+Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute. + +## 📝 License + +Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).
+This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed. + +Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/) + +## 💼 For Enterprise + +Professional support for charset-normalizer is available as part of the [Tidelift +Subscription][1]. Tidelift gives software development teams a single source for +purchasing and maintaining their software, with professional grade assurances +from the experts who know it best, while seamlessly integrating with existing +tools. + +[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme + +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297) + +# Changelog +All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09) + +### Changed +- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583) +- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391) + +### Added +- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase. +- Support for Python 3.14 + +### Fixed +- sdist archive contained useless directories. +- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633) + +### Misc +- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes. + Each published wheel comes with its SBOM. We choose CycloneDX as the format. +- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel. + +## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02) + +### Fixed +- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591) +- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587) + +### Changed +- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8 + +## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24) + +### Changed +- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend. +- Enforce annotation delayed loading for a simpler and consistent types in the project. +- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8 + +### Added +- pre-commit configuration. +- noxfile. + +### Removed +- `build-requirements.txt` as per using `pyproject.toml` native build configuration. +- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile). +- `setup.cfg` in favor of `pyproject.toml` metadata configuration. +- Unused `utils.range_scan` function. + +### Fixed +- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572) +- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+ + +## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08) + +### Added +- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints. +- Support for Python 3.13 (#512) + +### Fixed +- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch. +- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537) +- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381) + +## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31) + +### Fixed +- Unintentional memory usage regression when using large payload that match several encoding (#376) +- Regression on some detection case showcased in the documentation (#371) + +### Added +- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife) + +## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22) + +### Changed +- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8 +- Improved the general detection reliability based on reports from the community + +## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30) + +### Added +- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer` +- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323) + +### Removed +- (internal) Redundant utils.is_ascii function and unused function is_private_use_only +- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant + +### Changed +- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection +- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8 + +### Fixed +- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350) + +## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07) + +### Changed +- Typehint for function `from_path` no longer enforce `PathLike` as its first argument +- Minor improvement over the global detection reliability + +### Added +- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries +- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True) +- Explicit support for Python 3.12 + +### Fixed +- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289) + +## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06) + +### Added +- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262) + +### Removed +- Support for Python 3.6 (PR #260) + +### Changed +- Optional speedup provided by mypy/c 1.0.1 + +## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18) + +### Fixed +- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233) + +### Changed +- Speedup provided by mypy/c 0.990 on Python >= 3.7 + +## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20) + +### Added +- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results +- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES +- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio +- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl) + +### Changed +- Build with static metadata using 'build' frontend +- Make the language detection stricter +- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1 + +### Fixed +- CLI with opt --normalize fail when using full path for files +- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it +- Sphinx warnings when generating the documentation + +### Removed +- Coherence detector no longer return 'Simple English' instead return 'English' +- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese' +- Breaking: Method `first()` and `best()` from CharsetMatch +- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII) +- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches +- Breaking: Top-level function `normalize` +- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch +- Support for the backport `unicodedata2` + +## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18) + +### Added +- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results +- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES +- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio + +### Changed +- Build with static metadata using 'build' frontend +- Make the language detection stricter + +### Fixed +- CLI with opt --normalize fail when using full path for files +- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it + +### Removed +- Coherence detector no longer return 'Simple English' instead return 'English' +- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese' + +## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21) + +### Added +- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl) + +### Removed +- Breaking: Method `first()` and `best()` from CharsetMatch +- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII) + +### Fixed +- Sphinx warnings when generating the documentation + +## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15) + +### Changed +- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1 + +### Removed +- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches +- Breaking: Top-level function `normalize` +- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch +- Support for the backport `unicodedata2` + +## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19) + +### Deprecated +- Function `normalize` scheduled for removal in 3.0 + +### Changed +- Removed useless call to decode in fn is_unprintable (#206) + +### Fixed +- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204) + +## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19) + +### Added +- Output the Unicode table version when running the CLI with `--version` (PR #194) + +### Changed +- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175) +- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183) + +### Fixed +- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175) +- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181) + +### Removed +- Support for Python 3.5 (PR #192) + +### Deprecated +- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194) + +## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12) + +### Fixed +- ASCII miss-detection on rare cases (PR #170) + +## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30) + +### Added +- Explicit support for Python 3.11 (PR #164) + +### Changed +- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165) + +## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04) + +### Fixed +- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154) + +### Changed +- Skipping the language-detection (CD) on ASCII (PR #155) + +## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03) + +### Changed +- Moderating the logging impact (since 2.0.8) for specific environments (PR #147) + +### Fixed +- Wrong logging level applied when setting kwarg `explain` to True (PR #146) + +## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24) +### Changed +- Improvement over Vietnamese detection (PR #126) +- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124) +- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122) +- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129) +- Code style as refactored by Sourcery-AI (PR #131) +- Minor adjustment on the MD around european words (PR #133) +- Remove and replace SRTs from assets / tests (PR #139) +- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135) +- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135) + +### Fixed +- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137) +- Avoid using too insignificant chunk (PR #137) + +### Added +- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135) +- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141) + +## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11) +### Added +- Add support for Kazakh (Cyrillic) language detection (PR #109) + +### Changed +- Further, improve inferring the language from a given single-byte code page (PR #112) +- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116) +- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113) +- Various detection improvement (MD+CD) (PR #117) + +### Removed +- Remove redundant logging entry about detected language(s) (PR #115) + +### Fixed +- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102) + +## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18) +### Fixed +- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100) +- Fix CLI crash when using --minimal output in certain cases (PR #103) + +### Changed +- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101) + +## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14) +### Changed +- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81) +- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82) +- The Unicode detection is slightly improved (PR #93) +- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91) + +### Removed +- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92) + +### Fixed +- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95) +- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96) +- The MANIFEST.in was not exhaustive (PR #78) + +## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30) +### Fixed +- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70) +- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68) +- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72) +- Submatch factoring could be wrong in rare edge cases (PR #72) +- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72) +- Fix line endings from CRLF to LF for certain project files (PR #67) + +### Changed +- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76) +- Allow fallback on specified encoding if any (PR #71) + +## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16) +### Changed +- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63) +- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64) + +## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15) +### Fixed +- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) + +### Changed +- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57) + +## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13) +### Fixed +- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55) +- Using explain=False permanently disable the verbose output in the current runtime (PR #47) +- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47) +- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52) + +### Changed +- Public function normalize default args values were not aligned with from_bytes (PR #53) + +### Added +- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47) + +## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02) +### Changed +- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet. +- Accent has been made on UTF-8 detection, should perform rather instantaneous. +- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible. +- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time) +- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+ +- utf_7 detection has been reinstated. + +### Removed +- This package no longer require anything when used with Python 3.5 (Dropped cached_property) +- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian. +- The exception hook on UnicodeDecodeError has been removed. + +### Deprecated +- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0 + +### Fixed +- The CLI output used the relative path of the file(s). Should be absolute. + +## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28) +### Fixed +- Logger configuration/usage no longer conflict with others (PR #44) + +## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21) +### Removed +- Using standard logging instead of using the package loguru. +- Dropping nose test framework in favor of the maintained pytest. +- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text. +- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version. +- Stop support for UTF-7 that does not contain a SIG. +- Dropping PrettyTable, replaced with pure JSON output in CLI. + +### Fixed +- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process. +- Not searching properly for the BOM when trying utf32/16 parent codec. + +### Changed +- Improving the package final size by compressing frequencies.json. +- Huge improvement over the larges payload. + +### Added +- CLI now produces JSON consumable output. +- Return ASCII if given sequences fit. Given reasonable confidence. + +## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13) + +### Fixed +- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40) + +## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12) + +### Fixed +- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39) + +## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12) + +### Fixed +- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38) + +## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09) + +### Changed +- Amend the previous release to allow prettytable 2.0 (PR #35) + +## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08) + +### Fixed +- Fix error while using the package with a python pre-release interpreter (PR #33) + +### Changed +- Dependencies refactoring, constraints revised. + +### Added +- Add python 3.9 and 3.10 to the supported interpreters + +MIT License + +Copyright (c) 2025 TAHRI Ahmed R. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/RECORD b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..108409a8e7568060a51e09b5f051127b7ad8ccf7 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/RECORD @@ -0,0 +1,35 @@ +../../Scripts/normalizer.exe,sha256=IvfL1xIwLcN8AdjczrodNaXLaHBUERWBgh7YbfqJYUw,106364 +charset_normalizer-3.4.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +charset_normalizer-3.4.3.dist-info/METADATA,sha256=tqX3UoI-UkqIN99aZsk646yI4NgMbu1MjlKr6BbITG4,37450 +charset_normalizer-3.4.3.dist-info/RECORD,, +charset_normalizer-3.4.3.dist-info/WHEEL,sha256=XkFE14KmFh7mutkkb-qn_ueuH2lwfT8rLdfc5xpQ7wE,99 +charset_normalizer-3.4.3.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65 +charset_normalizer-3.4.3.dist-info/licenses/LICENSE,sha256=GFd0hdNwTxpHne2OVzwJds_tMV_S_ReYP6mI2kwvcNE,1092 +charset_normalizer-3.4.3.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19 +charset_normalizer/__init__.py,sha256=0NT8MHi7SKq3juMqYfOdrkzjisK0L73lneNHH4qaUAs,1638 +charset_normalizer/__main__.py,sha256=2sj_BS6H0sU25C1bMqz9DVwa6kOK9lchSEbSU-_iu7M,115 +charset_normalizer/__pycache__/__init__.cpython-39.pyc,, +charset_normalizer/__pycache__/__main__.cpython-39.pyc,, +charset_normalizer/__pycache__/api.cpython-39.pyc,, +charset_normalizer/__pycache__/cd.cpython-39.pyc,, +charset_normalizer/__pycache__/constant.cpython-39.pyc,, +charset_normalizer/__pycache__/legacy.cpython-39.pyc,, +charset_normalizer/__pycache__/md.cpython-39.pyc,, +charset_normalizer/__pycache__/models.cpython-39.pyc,, +charset_normalizer/__pycache__/utils.cpython-39.pyc,, +charset_normalizer/__pycache__/version.cpython-39.pyc,, +charset_normalizer/api.py,sha256=ODy4hX78b3ldTl5sViYPU1yzQ5qkclfgSIFE8BtNrTI,23337 +charset_normalizer/cd.py,sha256=uq8nVxRpR6Guc16ACvOWtL8KO3w7vYaCh8hHisuOyTg,12917 +charset_normalizer/cli/__init__.py,sha256=d9MUx-1V_qD3x9igIy4JT4oC5CU0yjulk7QyZWeRFhg,144 +charset_normalizer/cli/__main__.py,sha256=-pdJCyPywouPyFsC8_eTSgTmvh1YEvgjsvy1WZ0XjaA,13027 +charset_normalizer/cli/__pycache__/__init__.cpython-39.pyc,, +charset_normalizer/cli/__pycache__/__main__.cpython-39.pyc,, +charset_normalizer/constant.py,sha256=mCJmYzpBU27Ut9kiNWWoBbhhxQ-aRVw3K7LSwoFwBGI,44728 +charset_normalizer/legacy.py,sha256=ui08NlKqAXU3Y7smK-NFJjEgRRQz9ruM7aNCbT0OOrE,2811 +charset_normalizer/md.cp39-win_amd64.pyd,sha256=GBRkMtCJSwm_0H_fJ-Jus0DdpkxHcWVC4XcSnC_seLk,10752 +charset_normalizer/md.py,sha256=LSuW2hNgXSgF7JGdRapLAHLuj6pABHiP85LTNAYmu7c,20780 +charset_normalizer/md__mypyc.cp39-win_amd64.pyd,sha256=CZOPvYPp7PJ4wdp_LKOtla0M0e856CwbTsusjGtnb_k,125440 +charset_normalizer/models.py,sha256=ZR2PE-fqf6dASZfqdE5Uhkmr0o1MciSdXOjuNqwkmvg,12754 +charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +charset_normalizer/utils.py,sha256=XtWIQeOuz7cnGebMzyi4Vvi1JtA84QBSIeR9PDzF7pw,12584 +charset_normalizer/version.py,sha256=laniWEeVCCfwRgYLf_rZ2f0qWaNwWTEXQEfUUL_MMvw,123 diff --git a/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/WHEEL b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..924b414ac92043db6c5c7dc73fcd6bccc6abf598 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: false +Tag: cp39-cp39-win_amd64 + diff --git a/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/entry_points.txt b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..65619e73ec06c20c2a70c9507b872ad624d1a85c --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +normalizer = charset_normalizer.cli:cli_detect diff --git a/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/licenses/LICENSE b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/licenses/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..8885be9624d2bf3fdeca3551d3b5f475bcb536db --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/licenses/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 TAHRI Ahmed R. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/top_level.txt b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..66958f0a069d7aea7939bed40b9197608e93b243 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer-3.4.3.dist-info/top_level.txt @@ -0,0 +1 @@ +charset_normalizer diff --git a/phivenv/Lib/site-packages/charset_normalizer/__init__.py b/phivenv/Lib/site-packages/charset_normalizer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1993f86a9d55ddc3f2b7c1dcd92a0ed55a5637ce --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/__init__.py @@ -0,0 +1,48 @@ +""" +Charset-Normalizer +~~~~~~~~~~~~~~ +The Real First Universal Charset Detector. +A library that helps you read text from an unknown charset encoding. +Motivated by chardet, This package is trying to resolve the issue by taking a new approach. +All IANA character set names for which the Python core library provides codecs are supported. + +Basic usage: + >>> from charset_normalizer import from_bytes + >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8')) + >>> best_guess = results.best() + >>> str(best_guess) + 'Bсеки човек има право на образование. Oбразованието!' + +Others methods and usages are available - see the full documentation +at . +:copyright: (c) 2021 by Ahmed TAHRI +:license: MIT, see LICENSE for more details. +""" + +from __future__ import annotations + +import logging + +from .api import from_bytes, from_fp, from_path, is_binary +from .legacy import detect +from .models import CharsetMatch, CharsetMatches +from .utils import set_logging_handler +from .version import VERSION, __version__ + +__all__ = ( + "from_fp", + "from_path", + "from_bytes", + "is_binary", + "detect", + "CharsetMatch", + "CharsetMatches", + "__version__", + "VERSION", + "set_logging_handler", +) + +# Attach a NullHandler to the top level logger by default +# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library + +logging.getLogger("charset_normalizer").addHandler(logging.NullHandler()) diff --git a/phivenv/Lib/site-packages/charset_normalizer/__main__.py b/phivenv/Lib/site-packages/charset_normalizer/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b623edfde51fc72f586702ed8b5c2f742cfe059 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/__main__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .cli import cli_detect + +if __name__ == "__main__": + cli_detect() diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/__init__.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7218cd2878ffa862e8a6277827ecdf730e6d602a Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/__init__.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/__main__.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/__main__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e80369b2f4d33724a803b0e9689e39f9c492c612 Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/__main__.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/api.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/api.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0dcb1f87a6e7f44e19b4bb5b22cc5ef8ba25811d Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/api.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/cd.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/cd.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e341691f5988016a27a7bac339344ddf735893e Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/cd.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/constant.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/constant.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc01b652035542d641c15dd915164d907d239c90 Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/constant.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/legacy.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/legacy.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03f7cd885be0b66909fbf0e0c4ec48cfc3106622 Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/legacy.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/md.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/md.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfa2563dd8d9d859cf0564c90deca79ec855d0ce Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/md.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/models.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/models.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2eef0ec73e4a0695145419f6a9ace201d85b5551 Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/models.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/utils.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8bda7ac438c8b5f6843c93ff1572abff370ac04f Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/utils.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/__pycache__/version.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/version.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb97c9237f11cc482b550f10fbfd663335cd9196 Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/__pycache__/version.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/cd.py b/phivenv/Lib/site-packages/charset_normalizer/cd.py new file mode 100644 index 0000000000000000000000000000000000000000..4c1c4ae64c08beae62d7f5202b9fd741ba9949f5 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/cd.py @@ -0,0 +1,395 @@ +from __future__ import annotations + +import importlib +from codecs import IncrementalDecoder +from collections import Counter +from functools import lru_cache +from typing import Counter as TypeCounter + +from .constant import ( + FREQUENCIES, + KO_NAMES, + LANGUAGE_SUPPORTED_COUNT, + TOO_SMALL_SEQUENCE, + ZH_NAMES, +) +from .md import is_suspiciously_successive_range +from .models import CoherenceMatches +from .utils import ( + is_accentuated, + is_latin, + is_multi_byte_encoding, + is_unicode_range_secondary, + unicode_range, +) + + +def encoding_unicode_range(iana_name: str) -> list[str]: + """ + Return associated unicode ranges in a single byte code page. + """ + if is_multi_byte_encoding(iana_name): + raise OSError("Function not supported on multi-byte code page") + + decoder = importlib.import_module(f"encodings.{iana_name}").IncrementalDecoder + + p: IncrementalDecoder = decoder(errors="ignore") + seen_ranges: dict[str, int] = {} + character_count: int = 0 + + for i in range(0x40, 0xFF): + chunk: str = p.decode(bytes([i])) + + if chunk: + character_range: str | None = unicode_range(chunk) + + if character_range is None: + continue + + if is_unicode_range_secondary(character_range) is False: + if character_range not in seen_ranges: + seen_ranges[character_range] = 0 + seen_ranges[character_range] += 1 + character_count += 1 + + return sorted( + [ + character_range + for character_range in seen_ranges + if seen_ranges[character_range] / character_count >= 0.15 + ] + ) + + +def unicode_range_languages(primary_range: str) -> list[str]: + """ + Return inferred languages used with a unicode range. + """ + languages: list[str] = [] + + for language, characters in FREQUENCIES.items(): + for character in characters: + if unicode_range(character) == primary_range: + languages.append(language) + break + + return languages + + +@lru_cache() +def encoding_languages(iana_name: str) -> list[str]: + """ + Single-byte encoding language association. Some code page are heavily linked to particular language(s). + This function does the correspondence. + """ + unicode_ranges: list[str] = encoding_unicode_range(iana_name) + primary_range: str | None = None + + for specified_range in unicode_ranges: + if "Latin" not in specified_range: + primary_range = specified_range + break + + if primary_range is None: + return ["Latin Based"] + + return unicode_range_languages(primary_range) + + +@lru_cache() +def mb_encoding_languages(iana_name: str) -> list[str]: + """ + Multi-byte encoding language association. Some code page are heavily linked to particular language(s). + This function does the correspondence. + """ + if ( + iana_name.startswith("shift_") + or iana_name.startswith("iso2022_jp") + or iana_name.startswith("euc_j") + or iana_name == "cp932" + ): + return ["Japanese"] + if iana_name.startswith("gb") or iana_name in ZH_NAMES: + return ["Chinese"] + if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES: + return ["Korean"] + + return [] + + +@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT) +def get_target_features(language: str) -> tuple[bool, bool]: + """ + Determine main aspects from a supported language if it contains accents and if is pure Latin. + """ + target_have_accents: bool = False + target_pure_latin: bool = True + + for character in FREQUENCIES[language]: + if not target_have_accents and is_accentuated(character): + target_have_accents = True + if target_pure_latin and is_latin(character) is False: + target_pure_latin = False + + return target_have_accents, target_pure_latin + + +def alphabet_languages( + characters: list[str], ignore_non_latin: bool = False +) -> list[str]: + """ + Return associated languages associated to given characters. + """ + languages: list[tuple[str, float]] = [] + + source_have_accents = any(is_accentuated(character) for character in characters) + + for language, language_characters in FREQUENCIES.items(): + target_have_accents, target_pure_latin = get_target_features(language) + + if ignore_non_latin and target_pure_latin is False: + continue + + if target_have_accents is False and source_have_accents: + continue + + character_count: int = len(language_characters) + + character_match_count: int = len( + [c for c in language_characters if c in characters] + ) + + ratio: float = character_match_count / character_count + + if ratio >= 0.2: + languages.append((language, ratio)) + + languages = sorted(languages, key=lambda x: x[1], reverse=True) + + return [compatible_language[0] for compatible_language in languages] + + +def characters_popularity_compare( + language: str, ordered_characters: list[str] +) -> float: + """ + Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language. + The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit). + Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.) + """ + if language not in FREQUENCIES: + raise ValueError(f"{language} not available") + + character_approved_count: int = 0 + FREQUENCIES_language_set = set(FREQUENCIES[language]) + + ordered_characters_count: int = len(ordered_characters) + target_language_characters_count: int = len(FREQUENCIES[language]) + + large_alphabet: bool = target_language_characters_count > 26 + + for character, character_rank in zip( + ordered_characters, range(0, ordered_characters_count) + ): + if character not in FREQUENCIES_language_set: + continue + + character_rank_in_language: int = FREQUENCIES[language].index(character) + expected_projection_ratio: float = ( + target_language_characters_count / ordered_characters_count + ) + character_rank_projection: int = int(character_rank * expected_projection_ratio) + + if ( + large_alphabet is False + and abs(character_rank_projection - character_rank_in_language) > 4 + ): + continue + + if ( + large_alphabet is True + and abs(character_rank_projection - character_rank_in_language) + < target_language_characters_count / 3 + ): + character_approved_count += 1 + continue + + characters_before_source: list[str] = FREQUENCIES[language][ + 0:character_rank_in_language + ] + characters_after_source: list[str] = FREQUENCIES[language][ + character_rank_in_language: + ] + characters_before: list[str] = ordered_characters[0:character_rank] + characters_after: list[str] = ordered_characters[character_rank:] + + before_match_count: int = len( + set(characters_before) & set(characters_before_source) + ) + + after_match_count: int = len( + set(characters_after) & set(characters_after_source) + ) + + if len(characters_before_source) == 0 and before_match_count <= 4: + character_approved_count += 1 + continue + + if len(characters_after_source) == 0 and after_match_count <= 4: + character_approved_count += 1 + continue + + if ( + before_match_count / len(characters_before_source) >= 0.4 + or after_match_count / len(characters_after_source) >= 0.4 + ): + character_approved_count += 1 + continue + + return character_approved_count / len(ordered_characters) + + +def alpha_unicode_split(decoded_sequence: str) -> list[str]: + """ + Given a decoded text sequence, return a list of str. Unicode range / alphabet separation. + Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list; + One containing the latin letters and the other hebrew. + """ + layers: dict[str, str] = {} + + for character in decoded_sequence: + if character.isalpha() is False: + continue + + character_range: str | None = unicode_range(character) + + if character_range is None: + continue + + layer_target_range: str | None = None + + for discovered_range in layers: + if ( + is_suspiciously_successive_range(discovered_range, character_range) + is False + ): + layer_target_range = discovered_range + break + + if layer_target_range is None: + layer_target_range = character_range + + if layer_target_range not in layers: + layers[layer_target_range] = character.lower() + continue + + layers[layer_target_range] += character.lower() + + return list(layers.values()) + + +def merge_coherence_ratios(results: list[CoherenceMatches]) -> CoherenceMatches: + """ + This function merge results previously given by the function coherence_ratio. + The return type is the same as coherence_ratio. + """ + per_language_ratios: dict[str, list[float]] = {} + for result in results: + for sub_result in result: + language, ratio = sub_result + if language not in per_language_ratios: + per_language_ratios[language] = [ratio] + continue + per_language_ratios[language].append(ratio) + + merge = [ + ( + language, + round( + sum(per_language_ratios[language]) / len(per_language_ratios[language]), + 4, + ), + ) + for language in per_language_ratios + ] + + return sorted(merge, key=lambda x: x[1], reverse=True) + + +def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches: + """ + We shall NOT return "English—" in CoherenceMatches because it is an alternative + of "English". This function only keeps the best match and remove the em-dash in it. + """ + index_results: dict[str, list[float]] = dict() + + for result in results: + language, ratio = result + no_em_name: str = language.replace("—", "") + + if no_em_name not in index_results: + index_results[no_em_name] = [] + + index_results[no_em_name].append(ratio) + + if any(len(index_results[e]) > 1 for e in index_results): + filtered_results: CoherenceMatches = [] + + for language in index_results: + filtered_results.append((language, max(index_results[language]))) + + return filtered_results + + return results + + +@lru_cache(maxsize=2048) +def coherence_ratio( + decoded_sequence: str, threshold: float = 0.1, lg_inclusion: str | None = None +) -> CoherenceMatches: + """ + Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers. + A layer = Character extraction by alphabets/ranges. + """ + + results: list[tuple[str, float]] = [] + ignore_non_latin: bool = False + + sufficient_match_count: int = 0 + + lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else [] + if "Latin Based" in lg_inclusion_list: + ignore_non_latin = True + lg_inclusion_list.remove("Latin Based") + + for layer in alpha_unicode_split(decoded_sequence): + sequence_frequencies: TypeCounter[str] = Counter(layer) + most_common = sequence_frequencies.most_common() + + character_count: int = sum(o for c, o in most_common) + + if character_count <= TOO_SMALL_SEQUENCE: + continue + + popular_character_ordered: list[str] = [c for c, o in most_common] + + for language in lg_inclusion_list or alphabet_languages( + popular_character_ordered, ignore_non_latin + ): + ratio: float = characters_popularity_compare( + language, popular_character_ordered + ) + + if ratio < threshold: + continue + elif ratio >= 0.8: + sufficient_match_count += 1 + + results.append((language, round(ratio, 4))) + + if sufficient_match_count >= 3: + break + + return sorted( + filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True + ) diff --git a/phivenv/Lib/site-packages/charset_normalizer/cli/__init__.py b/phivenv/Lib/site-packages/charset_normalizer/cli/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..562b64e01cb42d989f2ddd5a8651841f700888e4 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/cli/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .__main__ import cli_detect, query_yes_no + +__all__ = ( + "cli_detect", + "query_yes_no", +) diff --git a/phivenv/Lib/site-packages/charset_normalizer/cli/__main__.py b/phivenv/Lib/site-packages/charset_normalizer/cli/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..4e1857b154ed3e396d470a3a659e8a3296c472d0 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/cli/__main__.py @@ -0,0 +1,381 @@ +from __future__ import annotations + +import argparse +import sys +import typing +from json import dumps +from os.path import abspath, basename, dirname, join, realpath +from platform import python_version +from unicodedata import unidata_version + +import charset_normalizer.md as md_module +from charset_normalizer import from_fp +from charset_normalizer.models import CliDetectionResult +from charset_normalizer.version import __version__ + + +def query_yes_no(question: str, default: str = "yes") -> bool: + """Ask a yes/no question via input() and return their answer. + + "question" is a string that is presented to the user. + "default" is the presumed answer if the user just hits . + It must be "yes" (the default), "no" or None (meaning + an answer is required of the user). + + The "answer" return value is True for "yes" or False for "no". + + Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input + """ + valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} + if default is None: + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + sys.stdout.write(question + prompt) + choice = input().lower() + if default is not None and choice == "": + return valid[default] + elif choice in valid: + return valid[choice] + else: + sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n") + + +class FileType: + """Factory for creating file object types + + Instances of FileType are typically passed as type= arguments to the + ArgumentParser add_argument() method. + + Keyword Arguments: + - mode -- A string indicating how the file is to be opened. Accepts the + same values as the builtin open() function. + - bufsize -- The file's desired buffer size. Accepts the same values as + the builtin open() function. + - encoding -- The file's encoding. Accepts the same values as the + builtin open() function. + - errors -- A string indicating how encoding and decoding errors are to + be handled. Accepts the same value as the builtin open() function. + + Backported from CPython 3.12 + """ + + def __init__( + self, + mode: str = "r", + bufsize: int = -1, + encoding: str | None = None, + errors: str | None = None, + ): + self._mode = mode + self._bufsize = bufsize + self._encoding = encoding + self._errors = errors + + def __call__(self, string: str) -> typing.IO: # type: ignore[type-arg] + # the special argument "-" means sys.std{in,out} + if string == "-": + if "r" in self._mode: + return sys.stdin.buffer if "b" in self._mode else sys.stdin + elif any(c in self._mode for c in "wax"): + return sys.stdout.buffer if "b" in self._mode else sys.stdout + else: + msg = f'argument "-" with mode {self._mode}' + raise ValueError(msg) + + # all other arguments are used as file names + try: + return open(string, self._mode, self._bufsize, self._encoding, self._errors) + except OSError as e: + message = f"can't open '{string}': {e}" + raise argparse.ArgumentTypeError(message) + + def __repr__(self) -> str: + args = self._mode, self._bufsize + kwargs = [("encoding", self._encoding), ("errors", self._errors)] + args_str = ", ".join( + [repr(arg) for arg in args if arg != -1] + + [f"{kw}={arg!r}" for kw, arg in kwargs if arg is not None] + ) + return f"{type(self).__name__}({args_str})" + + +def cli_detect(argv: list[str] | None = None) -> int: + """ + CLI assistant using ARGV and ArgumentParser + :param argv: + :return: 0 if everything is fine, anything else equal trouble + """ + parser = argparse.ArgumentParser( + description="The Real First Universal Charset Detector. " + "Discover originating encoding used on text file. " + "Normalize text to unicode." + ) + + parser.add_argument( + "files", type=FileType("rb"), nargs="+", help="File(s) to be analysed" + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + default=False, + dest="verbose", + help="Display complementary information about file if any. " + "Stdout will contain logs about the detection process.", + ) + parser.add_argument( + "-a", + "--with-alternative", + action="store_true", + default=False, + dest="alternatives", + help="Output complementary possibilities if any. Top-level JSON WILL be a list.", + ) + parser.add_argument( + "-n", + "--normalize", + action="store_true", + default=False, + dest="normalize", + help="Permit to normalize input file. If not set, program does not write anything.", + ) + parser.add_argument( + "-m", + "--minimal", + action="store_true", + default=False, + dest="minimal", + help="Only output the charset detected to STDOUT. Disabling JSON output.", + ) + parser.add_argument( + "-r", + "--replace", + action="store_true", + default=False, + dest="replace", + help="Replace file when trying to normalize it instead of creating a new one.", + ) + parser.add_argument( + "-f", + "--force", + action="store_true", + default=False, + dest="force", + help="Replace file without asking if you are sure, use this flag with caution.", + ) + parser.add_argument( + "-i", + "--no-preemptive", + action="store_true", + default=False, + dest="no_preemptive", + help="Disable looking at a charset declaration to hint the detector.", + ) + parser.add_argument( + "-t", + "--threshold", + action="store", + default=0.2, + type=float, + dest="threshold", + help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.", + ) + parser.add_argument( + "--version", + action="version", + version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format( + __version__, + python_version(), + unidata_version, + "OFF" if md_module.__file__.lower().endswith(".py") else "ON", + ), + help="Show version information and exit.", + ) + + args = parser.parse_args(argv) + + if args.replace is True and args.normalize is False: + if args.files: + for my_file in args.files: + my_file.close() + print("Use --replace in addition of --normalize only.", file=sys.stderr) + return 1 + + if args.force is True and args.replace is False: + if args.files: + for my_file in args.files: + my_file.close() + print("Use --force in addition of --replace only.", file=sys.stderr) + return 1 + + if args.threshold < 0.0 or args.threshold > 1.0: + if args.files: + for my_file in args.files: + my_file.close() + print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr) + return 1 + + x_ = [] + + for my_file in args.files: + matches = from_fp( + my_file, + threshold=args.threshold, + explain=args.verbose, + preemptive_behaviour=args.no_preemptive is False, + ) + + best_guess = matches.best() + + if best_guess is None: + print( + 'Unable to identify originating encoding for "{}". {}'.format( + my_file.name, + ( + "Maybe try increasing maximum amount of chaos." + if args.threshold < 1.0 + else "" + ), + ), + file=sys.stderr, + ) + x_.append( + CliDetectionResult( + abspath(my_file.name), + None, + [], + [], + "Unknown", + [], + False, + 1.0, + 0.0, + None, + True, + ) + ) + else: + x_.append( + CliDetectionResult( + abspath(my_file.name), + best_guess.encoding, + best_guess.encoding_aliases, + [ + cp + for cp in best_guess.could_be_from_charset + if cp != best_guess.encoding + ], + best_guess.language, + best_guess.alphabets, + best_guess.bom, + best_guess.percent_chaos, + best_guess.percent_coherence, + None, + True, + ) + ) + + if len(matches) > 1 and args.alternatives: + for el in matches: + if el != best_guess: + x_.append( + CliDetectionResult( + abspath(my_file.name), + el.encoding, + el.encoding_aliases, + [ + cp + for cp in el.could_be_from_charset + if cp != el.encoding + ], + el.language, + el.alphabets, + el.bom, + el.percent_chaos, + el.percent_coherence, + None, + False, + ) + ) + + if args.normalize is True: + if best_guess.encoding.startswith("utf") is True: + print( + '"{}" file does not need to be normalized, as it already came from unicode.'.format( + my_file.name + ), + file=sys.stderr, + ) + if my_file.closed is False: + my_file.close() + continue + + dir_path = dirname(realpath(my_file.name)) + file_name = basename(realpath(my_file.name)) + + o_: list[str] = file_name.split(".") + + if args.replace is False: + o_.insert(-1, best_guess.encoding) + if my_file.closed is False: + my_file.close() + elif ( + args.force is False + and query_yes_no( + 'Are you sure to normalize "{}" by replacing it ?'.format( + my_file.name + ), + "no", + ) + is False + ): + if my_file.closed is False: + my_file.close() + continue + + try: + x_[0].unicode_path = join(dir_path, ".".join(o_)) + + with open(x_[0].unicode_path, "wb") as fp: + fp.write(best_guess.output()) + except OSError as e: + print(str(e), file=sys.stderr) + if my_file.closed is False: + my_file.close() + return 2 + + if my_file.closed is False: + my_file.close() + + if args.minimal is False: + print( + dumps( + [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__, + ensure_ascii=True, + indent=4, + ) + ) + else: + for my_file in args.files: + print( + ", ".join( + [ + el.encoding or "undefined" + for el in x_ + if el.path == abspath(my_file.name) + ] + ) + ) + + return 0 + + +if __name__ == "__main__": + cli_detect() diff --git a/phivenv/Lib/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..645a4905042a343f7796de06a716eeda4754d6dd Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-39.pyc b/phivenv/Lib/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb93adf428f9f61d485050246d64e30f13284bc6 Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/constant.py b/phivenv/Lib/site-packages/charset_normalizer/constant.py new file mode 100644 index 0000000000000000000000000000000000000000..609187b2ea456ae4a2da015479b01200e581721c --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/constant.py @@ -0,0 +1,2015 @@ +from __future__ import annotations + +from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE +from encodings.aliases import aliases +from re import IGNORECASE +from re import compile as re_compile + +# Contain for each eligible encoding a list of/item bytes SIG/BOM +ENCODING_MARKS: dict[str, bytes | list[bytes]] = { + "utf_8": BOM_UTF8, + "utf_7": [ + b"\x2b\x2f\x76\x38", + b"\x2b\x2f\x76\x39", + b"\x2b\x2f\x76\x2b", + b"\x2b\x2f\x76\x2f", + b"\x2b\x2f\x76\x38\x2d", + ], + "gb18030": b"\x84\x31\x95\x33", + "utf_32": [BOM_UTF32_BE, BOM_UTF32_LE], + "utf_16": [BOM_UTF16_BE, BOM_UTF16_LE], +} + +TOO_SMALL_SEQUENCE: int = 32 +TOO_BIG_SEQUENCE: int = int(10e6) + +UTF8_MAXIMAL_ALLOCATION: int = 1_112_064 + +# Up-to-date Unicode ucd/15.0.0 +UNICODE_RANGES_COMBINED: dict[str, range] = { + "Control character": range(32), + "Basic Latin": range(32, 128), + "Latin-1 Supplement": range(128, 256), + "Latin Extended-A": range(256, 384), + "Latin Extended-B": range(384, 592), + "IPA Extensions": range(592, 688), + "Spacing Modifier Letters": range(688, 768), + "Combining Diacritical Marks": range(768, 880), + "Greek and Coptic": range(880, 1024), + "Cyrillic": range(1024, 1280), + "Cyrillic Supplement": range(1280, 1328), + "Armenian": range(1328, 1424), + "Hebrew": range(1424, 1536), + "Arabic": range(1536, 1792), + "Syriac": range(1792, 1872), + "Arabic Supplement": range(1872, 1920), + "Thaana": range(1920, 1984), + "NKo": range(1984, 2048), + "Samaritan": range(2048, 2112), + "Mandaic": range(2112, 2144), + "Syriac Supplement": range(2144, 2160), + "Arabic Extended-B": range(2160, 2208), + "Arabic Extended-A": range(2208, 2304), + "Devanagari": range(2304, 2432), + "Bengali": range(2432, 2560), + "Gurmukhi": range(2560, 2688), + "Gujarati": range(2688, 2816), + "Oriya": range(2816, 2944), + "Tamil": range(2944, 3072), + "Telugu": range(3072, 3200), + "Kannada": range(3200, 3328), + "Malayalam": range(3328, 3456), + "Sinhala": range(3456, 3584), + "Thai": range(3584, 3712), + "Lao": range(3712, 3840), + "Tibetan": range(3840, 4096), + "Myanmar": range(4096, 4256), + "Georgian": range(4256, 4352), + "Hangul Jamo": range(4352, 4608), + "Ethiopic": range(4608, 4992), + "Ethiopic Supplement": range(4992, 5024), + "Cherokee": range(5024, 5120), + "Unified Canadian Aboriginal Syllabics": range(5120, 5760), + "Ogham": range(5760, 5792), + "Runic": range(5792, 5888), + "Tagalog": range(5888, 5920), + "Hanunoo": range(5920, 5952), + "Buhid": range(5952, 5984), + "Tagbanwa": range(5984, 6016), + "Khmer": range(6016, 6144), + "Mongolian": range(6144, 6320), + "Unified Canadian Aboriginal Syllabics Extended": range(6320, 6400), + "Limbu": range(6400, 6480), + "Tai Le": range(6480, 6528), + "New Tai Lue": range(6528, 6624), + "Khmer Symbols": range(6624, 6656), + "Buginese": range(6656, 6688), + "Tai Tham": range(6688, 6832), + "Combining Diacritical Marks Extended": range(6832, 6912), + "Balinese": range(6912, 7040), + "Sundanese": range(7040, 7104), + "Batak": range(7104, 7168), + "Lepcha": range(7168, 7248), + "Ol Chiki": range(7248, 7296), + "Cyrillic Extended-C": range(7296, 7312), + "Georgian Extended": range(7312, 7360), + "Sundanese Supplement": range(7360, 7376), + "Vedic Extensions": range(7376, 7424), + "Phonetic Extensions": range(7424, 7552), + "Phonetic Extensions Supplement": range(7552, 7616), + "Combining Diacritical Marks Supplement": range(7616, 7680), + "Latin Extended Additional": range(7680, 7936), + "Greek Extended": range(7936, 8192), + "General Punctuation": range(8192, 8304), + "Superscripts and Subscripts": range(8304, 8352), + "Currency Symbols": range(8352, 8400), + "Combining Diacritical Marks for Symbols": range(8400, 8448), + "Letterlike Symbols": range(8448, 8528), + "Number Forms": range(8528, 8592), + "Arrows": range(8592, 8704), + "Mathematical Operators": range(8704, 8960), + "Miscellaneous Technical": range(8960, 9216), + "Control Pictures": range(9216, 9280), + "Optical Character Recognition": range(9280, 9312), + "Enclosed Alphanumerics": range(9312, 9472), + "Box Drawing": range(9472, 9600), + "Block Elements": range(9600, 9632), + "Geometric Shapes": range(9632, 9728), + "Miscellaneous Symbols": range(9728, 9984), + "Dingbats": range(9984, 10176), + "Miscellaneous Mathematical Symbols-A": range(10176, 10224), + "Supplemental Arrows-A": range(10224, 10240), + "Braille Patterns": range(10240, 10496), + "Supplemental Arrows-B": range(10496, 10624), + "Miscellaneous Mathematical Symbols-B": range(10624, 10752), + "Supplemental Mathematical Operators": range(10752, 11008), + "Miscellaneous Symbols and Arrows": range(11008, 11264), + "Glagolitic": range(11264, 11360), + "Latin Extended-C": range(11360, 11392), + "Coptic": range(11392, 11520), + "Georgian Supplement": range(11520, 11568), + "Tifinagh": range(11568, 11648), + "Ethiopic Extended": range(11648, 11744), + "Cyrillic Extended-A": range(11744, 11776), + "Supplemental Punctuation": range(11776, 11904), + "CJK Radicals Supplement": range(11904, 12032), + "Kangxi Radicals": range(12032, 12256), + "Ideographic Description Characters": range(12272, 12288), + "CJK Symbols and Punctuation": range(12288, 12352), + "Hiragana": range(12352, 12448), + "Katakana": range(12448, 12544), + "Bopomofo": range(12544, 12592), + "Hangul Compatibility Jamo": range(12592, 12688), + "Kanbun": range(12688, 12704), + "Bopomofo Extended": range(12704, 12736), + "CJK Strokes": range(12736, 12784), + "Katakana Phonetic Extensions": range(12784, 12800), + "Enclosed CJK Letters and Months": range(12800, 13056), + "CJK Compatibility": range(13056, 13312), + "CJK Unified Ideographs Extension A": range(13312, 19904), + "Yijing Hexagram Symbols": range(19904, 19968), + "CJK Unified Ideographs": range(19968, 40960), + "Yi Syllables": range(40960, 42128), + "Yi Radicals": range(42128, 42192), + "Lisu": range(42192, 42240), + "Vai": range(42240, 42560), + "Cyrillic Extended-B": range(42560, 42656), + "Bamum": range(42656, 42752), + "Modifier Tone Letters": range(42752, 42784), + "Latin Extended-D": range(42784, 43008), + "Syloti Nagri": range(43008, 43056), + "Common Indic Number Forms": range(43056, 43072), + "Phags-pa": range(43072, 43136), + "Saurashtra": range(43136, 43232), + "Devanagari Extended": range(43232, 43264), + "Kayah Li": range(43264, 43312), + "Rejang": range(43312, 43360), + "Hangul Jamo Extended-A": range(43360, 43392), + "Javanese": range(43392, 43488), + "Myanmar Extended-B": range(43488, 43520), + "Cham": range(43520, 43616), + "Myanmar Extended-A": range(43616, 43648), + "Tai Viet": range(43648, 43744), + "Meetei Mayek Extensions": range(43744, 43776), + "Ethiopic Extended-A": range(43776, 43824), + "Latin Extended-E": range(43824, 43888), + "Cherokee Supplement": range(43888, 43968), + "Meetei Mayek": range(43968, 44032), + "Hangul Syllables": range(44032, 55216), + "Hangul Jamo Extended-B": range(55216, 55296), + "High Surrogates": range(55296, 56192), + "High Private Use Surrogates": range(56192, 56320), + "Low Surrogates": range(56320, 57344), + "Private Use Area": range(57344, 63744), + "CJK Compatibility Ideographs": range(63744, 64256), + "Alphabetic Presentation Forms": range(64256, 64336), + "Arabic Presentation Forms-A": range(64336, 65024), + "Variation Selectors": range(65024, 65040), + "Vertical Forms": range(65040, 65056), + "Combining Half Marks": range(65056, 65072), + "CJK Compatibility Forms": range(65072, 65104), + "Small Form Variants": range(65104, 65136), + "Arabic Presentation Forms-B": range(65136, 65280), + "Halfwidth and Fullwidth Forms": range(65280, 65520), + "Specials": range(65520, 65536), + "Linear B Syllabary": range(65536, 65664), + "Linear B Ideograms": range(65664, 65792), + "Aegean Numbers": range(65792, 65856), + "Ancient Greek Numbers": range(65856, 65936), + "Ancient Symbols": range(65936, 66000), + "Phaistos Disc": range(66000, 66048), + "Lycian": range(66176, 66208), + "Carian": range(66208, 66272), + "Coptic Epact Numbers": range(66272, 66304), + "Old Italic": range(66304, 66352), + "Gothic": range(66352, 66384), + "Old Permic": range(66384, 66432), + "Ugaritic": range(66432, 66464), + "Old Persian": range(66464, 66528), + "Deseret": range(66560, 66640), + "Shavian": range(66640, 66688), + "Osmanya": range(66688, 66736), + "Osage": range(66736, 66816), + "Elbasan": range(66816, 66864), + "Caucasian Albanian": range(66864, 66928), + "Vithkuqi": range(66928, 67008), + "Linear A": range(67072, 67456), + "Latin Extended-F": range(67456, 67520), + "Cypriot Syllabary": range(67584, 67648), + "Imperial Aramaic": range(67648, 67680), + "Palmyrene": range(67680, 67712), + "Nabataean": range(67712, 67760), + "Hatran": range(67808, 67840), + "Phoenician": range(67840, 67872), + "Lydian": range(67872, 67904), + "Meroitic Hieroglyphs": range(67968, 68000), + "Meroitic Cursive": range(68000, 68096), + "Kharoshthi": range(68096, 68192), + "Old South Arabian": range(68192, 68224), + "Old North Arabian": range(68224, 68256), + "Manichaean": range(68288, 68352), + "Avestan": range(68352, 68416), + "Inscriptional Parthian": range(68416, 68448), + "Inscriptional Pahlavi": range(68448, 68480), + "Psalter Pahlavi": range(68480, 68528), + "Old Turkic": range(68608, 68688), + "Old Hungarian": range(68736, 68864), + "Hanifi Rohingya": range(68864, 68928), + "Rumi Numeral Symbols": range(69216, 69248), + "Yezidi": range(69248, 69312), + "Arabic Extended-C": range(69312, 69376), + "Old Sogdian": range(69376, 69424), + "Sogdian": range(69424, 69488), + "Old Uyghur": range(69488, 69552), + "Chorasmian": range(69552, 69600), + "Elymaic": range(69600, 69632), + "Brahmi": range(69632, 69760), + "Kaithi": range(69760, 69840), + "Sora Sompeng": range(69840, 69888), + "Chakma": range(69888, 69968), + "Mahajani": range(69968, 70016), + "Sharada": range(70016, 70112), + "Sinhala Archaic Numbers": range(70112, 70144), + "Khojki": range(70144, 70224), + "Multani": range(70272, 70320), + "Khudawadi": range(70320, 70400), + "Grantha": range(70400, 70528), + "Newa": range(70656, 70784), + "Tirhuta": range(70784, 70880), + "Siddham": range(71040, 71168), + "Modi": range(71168, 71264), + "Mongolian Supplement": range(71264, 71296), + "Takri": range(71296, 71376), + "Ahom": range(71424, 71504), + "Dogra": range(71680, 71760), + "Warang Citi": range(71840, 71936), + "Dives Akuru": range(71936, 72032), + "Nandinagari": range(72096, 72192), + "Zanabazar Square": range(72192, 72272), + "Soyombo": range(72272, 72368), + "Unified Canadian Aboriginal Syllabics Extended-A": range(72368, 72384), + "Pau Cin Hau": range(72384, 72448), + "Devanagari Extended-A": range(72448, 72544), + "Bhaiksuki": range(72704, 72816), + "Marchen": range(72816, 72896), + "Masaram Gondi": range(72960, 73056), + "Gunjala Gondi": range(73056, 73136), + "Makasar": range(73440, 73472), + "Kawi": range(73472, 73568), + "Lisu Supplement": range(73648, 73664), + "Tamil Supplement": range(73664, 73728), + "Cuneiform": range(73728, 74752), + "Cuneiform Numbers and Punctuation": range(74752, 74880), + "Early Dynastic Cuneiform": range(74880, 75088), + "Cypro-Minoan": range(77712, 77824), + "Egyptian Hieroglyphs": range(77824, 78896), + "Egyptian Hieroglyph Format Controls": range(78896, 78944), + "Anatolian Hieroglyphs": range(82944, 83584), + "Bamum Supplement": range(92160, 92736), + "Mro": range(92736, 92784), + "Tangsa": range(92784, 92880), + "Bassa Vah": range(92880, 92928), + "Pahawh Hmong": range(92928, 93072), + "Medefaidrin": range(93760, 93856), + "Miao": range(93952, 94112), + "Ideographic Symbols and Punctuation": range(94176, 94208), + "Tangut": range(94208, 100352), + "Tangut Components": range(100352, 101120), + "Khitan Small Script": range(101120, 101632), + "Tangut Supplement": range(101632, 101760), + "Kana Extended-B": range(110576, 110592), + "Kana Supplement": range(110592, 110848), + "Kana Extended-A": range(110848, 110896), + "Small Kana Extension": range(110896, 110960), + "Nushu": range(110960, 111360), + "Duployan": range(113664, 113824), + "Shorthand Format Controls": range(113824, 113840), + "Znamenny Musical Notation": range(118528, 118736), + "Byzantine Musical Symbols": range(118784, 119040), + "Musical Symbols": range(119040, 119296), + "Ancient Greek Musical Notation": range(119296, 119376), + "Kaktovik Numerals": range(119488, 119520), + "Mayan Numerals": range(119520, 119552), + "Tai Xuan Jing Symbols": range(119552, 119648), + "Counting Rod Numerals": range(119648, 119680), + "Mathematical Alphanumeric Symbols": range(119808, 120832), + "Sutton SignWriting": range(120832, 121520), + "Latin Extended-G": range(122624, 122880), + "Glagolitic Supplement": range(122880, 122928), + "Cyrillic Extended-D": range(122928, 123024), + "Nyiakeng Puachue Hmong": range(123136, 123216), + "Toto": range(123536, 123584), + "Wancho": range(123584, 123648), + "Nag Mundari": range(124112, 124160), + "Ethiopic Extended-B": range(124896, 124928), + "Mende Kikakui": range(124928, 125152), + "Adlam": range(125184, 125280), + "Indic Siyaq Numbers": range(126064, 126144), + "Ottoman Siyaq Numbers": range(126208, 126288), + "Arabic Mathematical Alphabetic Symbols": range(126464, 126720), + "Mahjong Tiles": range(126976, 127024), + "Domino Tiles": range(127024, 127136), + "Playing Cards": range(127136, 127232), + "Enclosed Alphanumeric Supplement": range(127232, 127488), + "Enclosed Ideographic Supplement": range(127488, 127744), + "Miscellaneous Symbols and Pictographs": range(127744, 128512), + "Emoticons range(Emoji)": range(128512, 128592), + "Ornamental Dingbats": range(128592, 128640), + "Transport and Map Symbols": range(128640, 128768), + "Alchemical Symbols": range(128768, 128896), + "Geometric Shapes Extended": range(128896, 129024), + "Supplemental Arrows-C": range(129024, 129280), + "Supplemental Symbols and Pictographs": range(129280, 129536), + "Chess Symbols": range(129536, 129648), + "Symbols and Pictographs Extended-A": range(129648, 129792), + "Symbols for Legacy Computing": range(129792, 130048), + "CJK Unified Ideographs Extension B": range(131072, 173792), + "CJK Unified Ideographs Extension C": range(173824, 177984), + "CJK Unified Ideographs Extension D": range(177984, 178208), + "CJK Unified Ideographs Extension E": range(178208, 183984), + "CJK Unified Ideographs Extension F": range(183984, 191472), + "CJK Compatibility Ideographs Supplement": range(194560, 195104), + "CJK Unified Ideographs Extension G": range(196608, 201552), + "CJK Unified Ideographs Extension H": range(201552, 205744), + "Tags": range(917504, 917632), + "Variation Selectors Supplement": range(917760, 918000), + "Supplementary Private Use Area-A": range(983040, 1048576), + "Supplementary Private Use Area-B": range(1048576, 1114112), +} + + +UNICODE_SECONDARY_RANGE_KEYWORD: list[str] = [ + "Supplement", + "Extended", + "Extensions", + "Modifier", + "Marks", + "Punctuation", + "Symbols", + "Forms", + "Operators", + "Miscellaneous", + "Drawing", + "Block", + "Shapes", + "Supplemental", + "Tags", +] + +RE_POSSIBLE_ENCODING_INDICATION = re_compile( + r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)", + IGNORECASE, +) + +IANA_NO_ALIASES = [ + "cp720", + "cp737", + "cp856", + "cp874", + "cp875", + "cp1006", + "koi8_r", + "koi8_t", + "koi8_u", +] + +IANA_SUPPORTED: list[str] = sorted( + filter( + lambda x: x.endswith("_codec") is False + and x not in {"rot_13", "tactis", "mbcs"}, + list(set(aliases.values())) + IANA_NO_ALIASES, + ) +) + +IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED) + +# pre-computed code page that are similar using the function cp_similarity. +IANA_SUPPORTED_SIMILAR: dict[str, list[str]] = { + "cp037": ["cp1026", "cp1140", "cp273", "cp500"], + "cp1026": ["cp037", "cp1140", "cp273", "cp500"], + "cp1125": ["cp866"], + "cp1140": ["cp037", "cp1026", "cp273", "cp500"], + "cp1250": ["iso8859_2"], + "cp1251": ["kz1048", "ptcp154"], + "cp1252": ["iso8859_15", "iso8859_9", "latin_1"], + "cp1253": ["iso8859_7"], + "cp1254": ["iso8859_15", "iso8859_9", "latin_1"], + "cp1257": ["iso8859_13"], + "cp273": ["cp037", "cp1026", "cp1140", "cp500"], + "cp437": ["cp850", "cp858", "cp860", "cp861", "cp862", "cp863", "cp865"], + "cp500": ["cp037", "cp1026", "cp1140", "cp273"], + "cp850": ["cp437", "cp857", "cp858", "cp865"], + "cp857": ["cp850", "cp858", "cp865"], + "cp858": ["cp437", "cp850", "cp857", "cp865"], + "cp860": ["cp437", "cp861", "cp862", "cp863", "cp865"], + "cp861": ["cp437", "cp860", "cp862", "cp863", "cp865"], + "cp862": ["cp437", "cp860", "cp861", "cp863", "cp865"], + "cp863": ["cp437", "cp860", "cp861", "cp862", "cp865"], + "cp865": ["cp437", "cp850", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863"], + "cp866": ["cp1125"], + "iso8859_10": ["iso8859_14", "iso8859_15", "iso8859_4", "iso8859_9", "latin_1"], + "iso8859_11": ["tis_620"], + "iso8859_13": ["cp1257"], + "iso8859_14": [ + "iso8859_10", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_9", + "latin_1", + ], + "iso8859_15": [ + "cp1252", + "cp1254", + "iso8859_10", + "iso8859_14", + "iso8859_16", + "iso8859_3", + "iso8859_9", + "latin_1", + ], + "iso8859_16": [ + "iso8859_14", + "iso8859_15", + "iso8859_2", + "iso8859_3", + "iso8859_9", + "latin_1", + ], + "iso8859_2": ["cp1250", "iso8859_16", "iso8859_4"], + "iso8859_3": ["iso8859_14", "iso8859_15", "iso8859_16", "iso8859_9", "latin_1"], + "iso8859_4": ["iso8859_10", "iso8859_2", "iso8859_9", "latin_1"], + "iso8859_7": ["cp1253"], + "iso8859_9": [ + "cp1252", + "cp1254", + "cp1258", + "iso8859_10", + "iso8859_14", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_4", + "latin_1", + ], + "kz1048": ["cp1251", "ptcp154"], + "latin_1": [ + "cp1252", + "cp1254", + "cp1258", + "iso8859_10", + "iso8859_14", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_4", + "iso8859_9", + ], + "mac_iceland": ["mac_roman", "mac_turkish"], + "mac_roman": ["mac_iceland", "mac_turkish"], + "mac_turkish": ["mac_iceland", "mac_roman"], + "ptcp154": ["cp1251", "kz1048"], + "tis_620": ["iso8859_11"], +} + + +CHARDET_CORRESPONDENCE: dict[str, str] = { + "iso2022_kr": "ISO-2022-KR", + "iso2022_jp": "ISO-2022-JP", + "euc_kr": "EUC-KR", + "tis_620": "TIS-620", + "utf_32": "UTF-32", + "euc_jp": "EUC-JP", + "koi8_r": "KOI8-R", + "iso8859_1": "ISO-8859-1", + "iso8859_2": "ISO-8859-2", + "iso8859_5": "ISO-8859-5", + "iso8859_6": "ISO-8859-6", + "iso8859_7": "ISO-8859-7", + "iso8859_8": "ISO-8859-8", + "utf_16": "UTF-16", + "cp855": "IBM855", + "mac_cyrillic": "MacCyrillic", + "gb2312": "GB2312", + "gb18030": "GB18030", + "cp932": "CP932", + "cp866": "IBM866", + "utf_8": "utf-8", + "utf_8_sig": "UTF-8-SIG", + "shift_jis": "SHIFT_JIS", + "big5": "Big5", + "cp1250": "windows-1250", + "cp1251": "windows-1251", + "cp1252": "Windows-1252", + "cp1253": "windows-1253", + "cp1255": "windows-1255", + "cp1256": "windows-1256", + "cp1254": "Windows-1254", + "cp949": "CP949", +} + + +COMMON_SAFE_ASCII_CHARACTERS: set[str] = { + "<", + ">", + "=", + ":", + "/", + "&", + ";", + "{", + "}", + "[", + "]", + ",", + "|", + '"', + "-", + "(", + ")", +} + +# Sample character sets — replace with full lists if needed +COMMON_CHINESE_CHARACTERS = "的一是在不了有和人这中大为上个国我以要他时来用们生到作地于出就分对成会可主发年动同工也能下过子说产种面而方后多定行学法所民得经十三之进着等部度家电力里如水化高自二理起小物现实加量都两体制机当使点从业本去把性好应开它合还因由其些然前外天政四日那社义事平形相全表间样与关各重新线内数正心反你明看原又么利比或但质气第向道命此变条只没结解问意建月公无系军很情者最立代想已通并提直题党程展五果料象员革位入常文总次品式活设及管特件长求老头基资边流路级少图山统接知较将组见计别她手角期根论运农指几九区强放决西被干做必战先回则任取据处队南给色光门即保治北造百规热领七海口东导器压志世金增争济阶油思术极交受联什认六共权收证改清己美再采转更单风切打白教速花带安场身车例真务具万每目至达走积示议声报斗完类八离华名确才科张信马节话米整空元况今集温传土许步群广石记需段研界拉林律叫且究观越织装影算低持音众书布复容儿须际商非验连断深难近矿千周委素技备半办青省列习响约支般史感劳便团往酸历市克何除消构府太准精值号率族维划选标写存候毛亲快效斯院查江型眼王按格养易置派层片始却专状育厂京识适属圆包火住调满县局照参红细引听该铁价严龙飞" + +COMMON_JAPANESE_CHARACTERS = "日一国年大十二本中長出三時行見月分後前生五間上東四今金九入学高円子外八六下来気小七山話女北午百書先名川千水半男西電校語土木聞食車何南万毎白天母火右読友左休父雨" + +COMMON_KOREAN_CHARACTERS = "一二三四五六七八九十百千萬上下左右中人女子大小山川日月火水木金土父母天地國名年時文校學生" + +# Combine all into a set +COMMON_CJK_CHARACTERS = set( + "".join( + [ + COMMON_CHINESE_CHARACTERS, + COMMON_JAPANESE_CHARACTERS, + COMMON_KOREAN_CHARACTERS, + ] + ) +) + +KO_NAMES: set[str] = {"johab", "cp949", "euc_kr"} +ZH_NAMES: set[str] = {"big5", "cp950", "big5hkscs", "hz"} + +# Logging LEVEL below DEBUG +TRACE: int = 5 + + +# Language label that contain the em dash "—" +# character are to be considered alternative seq to origin +FREQUENCIES: dict[str, list[str]] = { + "English": [ + "e", + "a", + "t", + "i", + "o", + "n", + "s", + "r", + "h", + "l", + "d", + "c", + "u", + "m", + "f", + "p", + "g", + "w", + "y", + "b", + "v", + "k", + "x", + "j", + "z", + "q", + ], + "English—": [ + "e", + "a", + "t", + "i", + "o", + "n", + "s", + "r", + "h", + "l", + "d", + "c", + "m", + "u", + "f", + "p", + "g", + "w", + "b", + "y", + "v", + "k", + "j", + "x", + "z", + "q", + ], + "German": [ + "e", + "n", + "i", + "r", + "s", + "t", + "a", + "d", + "h", + "u", + "l", + "g", + "o", + "c", + "m", + "b", + "f", + "k", + "w", + "z", + "p", + "v", + "ü", + "ä", + "ö", + "j", + ], + "French": [ + "e", + "a", + "s", + "n", + "i", + "t", + "r", + "l", + "u", + "o", + "d", + "c", + "p", + "m", + "é", + "v", + "g", + "f", + "b", + "h", + "q", + "à", + "x", + "è", + "y", + "j", + ], + "Dutch": [ + "e", + "n", + "a", + "i", + "r", + "t", + "o", + "d", + "s", + "l", + "g", + "h", + "v", + "m", + "u", + "k", + "c", + "p", + "b", + "w", + "j", + "z", + "f", + "y", + "x", + "ë", + ], + "Italian": [ + "e", + "i", + "a", + "o", + "n", + "l", + "t", + "r", + "s", + "c", + "d", + "u", + "p", + "m", + "g", + "v", + "f", + "b", + "z", + "h", + "q", + "è", + "à", + "k", + "y", + "ò", + ], + "Polish": [ + "a", + "i", + "o", + "e", + "n", + "r", + "z", + "w", + "s", + "c", + "t", + "k", + "y", + "d", + "p", + "m", + "u", + "l", + "j", + "ł", + "g", + "b", + "h", + "ą", + "ę", + "ó", + ], + "Spanish": [ + "e", + "a", + "o", + "n", + "s", + "r", + "i", + "l", + "d", + "t", + "c", + "u", + "m", + "p", + "b", + "g", + "v", + "f", + "y", + "ó", + "h", + "q", + "í", + "j", + "z", + "á", + ], + "Russian": [ + "о", + "а", + "е", + "и", + "н", + "с", + "т", + "р", + "в", + "л", + "к", + "м", + "д", + "п", + "у", + "г", + "я", + "ы", + "з", + "б", + "й", + "ь", + "ч", + "х", + "ж", + "ц", + ], + # Jap-Kanji + "Japanese": [ + "人", + "一", + "大", + "亅", + "丁", + "丨", + "竹", + "笑", + "口", + "日", + "今", + "二", + "彳", + "行", + "十", + "土", + "丶", + "寸", + "寺", + "時", + "乙", + "丿", + "乂", + "气", + "気", + "冂", + "巾", + "亠", + "市", + "目", + "儿", + "見", + "八", + "小", + "凵", + "県", + "月", + "彐", + "門", + "間", + "木", + "東", + "山", + "出", + "本", + "中", + "刀", + "分", + "耳", + "又", + "取", + "最", + "言", + "田", + "心", + "思", + "刂", + "前", + "京", + "尹", + "事", + "生", + "厶", + "云", + "会", + "未", + "来", + "白", + "冫", + "楽", + "灬", + "馬", + "尸", + "尺", + "駅", + "明", + "耂", + "者", + "了", + "阝", + "都", + "高", + "卜", + "占", + "厂", + "广", + "店", + "子", + "申", + "奄", + "亻", + "俺", + "上", + "方", + "冖", + "学", + "衣", + "艮", + "食", + "自", + ], + # Jap-Katakana + "Japanese—": [ + "ー", + "ン", + "ス", + "・", + "ル", + "ト", + "リ", + "イ", + "ア", + "ラ", + "ッ", + "ク", + "ド", + "シ", + "レ", + "ジ", + "タ", + "フ", + "ロ", + "カ", + "テ", + "マ", + "ィ", + "グ", + "バ", + "ム", + "プ", + "オ", + "コ", + "デ", + "ニ", + "ウ", + "メ", + "サ", + "ビ", + "ナ", + "ブ", + "ャ", + "エ", + "ュ", + "チ", + "キ", + "ズ", + "ダ", + "パ", + "ミ", + "ェ", + "ョ", + "ハ", + "セ", + "ベ", + "ガ", + "モ", + "ツ", + "ネ", + "ボ", + "ソ", + "ノ", + "ァ", + "ヴ", + "ワ", + "ポ", + "ペ", + "ピ", + "ケ", + "ゴ", + "ギ", + "ザ", + "ホ", + "ゲ", + "ォ", + "ヤ", + "ヒ", + "ユ", + "ヨ", + "ヘ", + "ゼ", + "ヌ", + "ゥ", + "ゾ", + "ヶ", + "ヂ", + "ヲ", + "ヅ", + "ヵ", + "ヱ", + "ヰ", + "ヮ", + "ヽ", + "゠", + "ヾ", + "ヷ", + "ヿ", + "ヸ", + "ヹ", + "ヺ", + ], + # Jap-Hiragana + "Japanese——": [ + "の", + "に", + "る", + "た", + "と", + "は", + "し", + "い", + "を", + "で", + "て", + "が", + "な", + "れ", + "か", + "ら", + "さ", + "っ", + "り", + "す", + "あ", + "も", + "こ", + "ま", + "う", + "く", + "よ", + "き", + "ん", + "め", + "お", + "け", + "そ", + "つ", + "だ", + "や", + "え", + "ど", + "わ", + "ち", + "み", + "せ", + "じ", + "ば", + "へ", + "び", + "ず", + "ろ", + "ほ", + "げ", + "む", + "べ", + "ひ", + "ょ", + "ゆ", + "ぶ", + "ご", + "ゃ", + "ね", + "ふ", + "ぐ", + "ぎ", + "ぼ", + "ゅ", + "づ", + "ざ", + "ぞ", + "ぬ", + "ぜ", + "ぱ", + "ぽ", + "ぷ", + "ぴ", + "ぃ", + "ぁ", + "ぇ", + "ぺ", + "ゞ", + "ぢ", + "ぉ", + "ぅ", + "ゐ", + "ゝ", + "ゑ", + "゛", + "゜", + "ゎ", + "ゔ", + "゚", + "ゟ", + "゙", + "ゕ", + "ゖ", + ], + "Portuguese": [ + "a", + "e", + "o", + "s", + "i", + "r", + "d", + "n", + "t", + "m", + "u", + "c", + "l", + "p", + "g", + "v", + "b", + "f", + "h", + "ã", + "q", + "é", + "ç", + "á", + "z", + "í", + ], + "Swedish": [ + "e", + "a", + "n", + "r", + "t", + "s", + "i", + "l", + "d", + "o", + "m", + "k", + "g", + "v", + "h", + "f", + "u", + "p", + "ä", + "c", + "b", + "ö", + "å", + "y", + "j", + "x", + ], + "Chinese": [ + "的", + "一", + "是", + "不", + "了", + "在", + "人", + "有", + "我", + "他", + "这", + "个", + "们", + "中", + "来", + "上", + "大", + "为", + "和", + "国", + "地", + "到", + "以", + "说", + "时", + "要", + "就", + "出", + "会", + "可", + "也", + "你", + "对", + "生", + "能", + "而", + "子", + "那", + "得", + "于", + "着", + "下", + "自", + "之", + "年", + "过", + "发", + "后", + "作", + "里", + "用", + "道", + "行", + "所", + "然", + "家", + "种", + "事", + "成", + "方", + "多", + "经", + "么", + "去", + "法", + "学", + "如", + "都", + "同", + "现", + "当", + "没", + "动", + "面", + "起", + "看", + "定", + "天", + "分", + "还", + "进", + "好", + "小", + "部", + "其", + "些", + "主", + "样", + "理", + "心", + "她", + "本", + "前", + "开", + "但", + "因", + "只", + "从", + "想", + "实", + ], + "Ukrainian": [ + "о", + "а", + "н", + "і", + "и", + "р", + "в", + "т", + "е", + "с", + "к", + "л", + "у", + "д", + "м", + "п", + "з", + "я", + "ь", + "б", + "г", + "й", + "ч", + "х", + "ц", + "ї", + ], + "Norwegian": [ + "e", + "r", + "n", + "t", + "a", + "s", + "i", + "o", + "l", + "d", + "g", + "k", + "m", + "v", + "f", + "p", + "u", + "b", + "h", + "å", + "y", + "j", + "ø", + "c", + "æ", + "w", + ], + "Finnish": [ + "a", + "i", + "n", + "t", + "e", + "s", + "l", + "o", + "u", + "k", + "ä", + "m", + "r", + "v", + "j", + "h", + "p", + "y", + "d", + "ö", + "g", + "c", + "b", + "f", + "w", + "z", + ], + "Vietnamese": [ + "n", + "h", + "t", + "i", + "c", + "g", + "a", + "o", + "u", + "m", + "l", + "r", + "à", + "đ", + "s", + "e", + "v", + "p", + "b", + "y", + "ư", + "d", + "á", + "k", + "ộ", + "ế", + ], + "Czech": [ + "o", + "e", + "a", + "n", + "t", + "s", + "i", + "l", + "v", + "r", + "k", + "d", + "u", + "m", + "p", + "í", + "c", + "h", + "z", + "á", + "y", + "j", + "b", + "ě", + "é", + "ř", + ], + "Hungarian": [ + "e", + "a", + "t", + "l", + "s", + "n", + "k", + "r", + "i", + "o", + "z", + "á", + "é", + "g", + "m", + "b", + "y", + "v", + "d", + "h", + "u", + "p", + "j", + "ö", + "f", + "c", + ], + "Korean": [ + "이", + "다", + "에", + "의", + "는", + "로", + "하", + "을", + "가", + "고", + "지", + "서", + "한", + "은", + "기", + "으", + "년", + "대", + "사", + "시", + "를", + "리", + "도", + "인", + "스", + "일", + ], + "Indonesian": [ + "a", + "n", + "e", + "i", + "r", + "t", + "u", + "s", + "d", + "k", + "m", + "l", + "g", + "p", + "b", + "o", + "h", + "y", + "j", + "c", + "w", + "f", + "v", + "z", + "x", + "q", + ], + "Turkish": [ + "a", + "e", + "i", + "n", + "r", + "l", + "ı", + "k", + "d", + "t", + "s", + "m", + "y", + "u", + "o", + "b", + "ü", + "ş", + "v", + "g", + "z", + "h", + "c", + "p", + "ç", + "ğ", + ], + "Romanian": [ + "e", + "i", + "a", + "r", + "n", + "t", + "u", + "l", + "o", + "c", + "s", + "d", + "p", + "m", + "ă", + "f", + "v", + "î", + "g", + "b", + "ș", + "ț", + "z", + "h", + "â", + "j", + ], + "Farsi": [ + "ا", + "ی", + "ر", + "د", + "ن", + "ه", + "و", + "م", + "ت", + "ب", + "س", + "ل", + "ک", + "ش", + "ز", + "ف", + "گ", + "ع", + "خ", + "ق", + "ج", + "آ", + "پ", + "ح", + "ط", + "ص", + ], + "Arabic": [ + "ا", + "ل", + "ي", + "م", + "و", + "ن", + "ر", + "ت", + "ب", + "ة", + "ع", + "د", + "س", + "ف", + "ه", + "ك", + "ق", + "أ", + "ح", + "ج", + "ش", + "ط", + "ص", + "ى", + "خ", + "إ", + ], + "Danish": [ + "e", + "r", + "n", + "t", + "a", + "i", + "s", + "d", + "l", + "o", + "g", + "m", + "k", + "f", + "v", + "u", + "b", + "h", + "p", + "å", + "y", + "ø", + "æ", + "c", + "j", + "w", + ], + "Serbian": [ + "а", + "и", + "о", + "е", + "н", + "р", + "с", + "у", + "т", + "к", + "ј", + "в", + "д", + "м", + "п", + "л", + "г", + "з", + "б", + "a", + "i", + "e", + "o", + "n", + "ц", + "ш", + ], + "Lithuanian": [ + "i", + "a", + "s", + "o", + "r", + "e", + "t", + "n", + "u", + "k", + "m", + "l", + "p", + "v", + "d", + "j", + "g", + "ė", + "b", + "y", + "ų", + "š", + "ž", + "c", + "ą", + "į", + ], + "Slovene": [ + "e", + "a", + "i", + "o", + "n", + "r", + "s", + "l", + "t", + "j", + "v", + "k", + "d", + "p", + "m", + "u", + "z", + "b", + "g", + "h", + "č", + "c", + "š", + "ž", + "f", + "y", + ], + "Slovak": [ + "o", + "a", + "e", + "n", + "i", + "r", + "v", + "t", + "s", + "l", + "k", + "d", + "m", + "p", + "u", + "c", + "h", + "j", + "b", + "z", + "á", + "y", + "ý", + "í", + "č", + "é", + ], + "Hebrew": [ + "י", + "ו", + "ה", + "ל", + "ר", + "ב", + "ת", + "מ", + "א", + "ש", + "נ", + "ע", + "ם", + "ד", + "ק", + "ח", + "פ", + "ס", + "כ", + "ג", + "ט", + "צ", + "ן", + "ז", + "ך", + ], + "Bulgarian": [ + "а", + "и", + "о", + "е", + "н", + "т", + "р", + "с", + "в", + "л", + "к", + "д", + "п", + "м", + "з", + "г", + "я", + "ъ", + "у", + "б", + "ч", + "ц", + "й", + "ж", + "щ", + "х", + ], + "Croatian": [ + "a", + "i", + "o", + "e", + "n", + "r", + "j", + "s", + "t", + "u", + "k", + "l", + "v", + "d", + "m", + "p", + "g", + "z", + "b", + "c", + "č", + "h", + "š", + "ž", + "ć", + "f", + ], + "Hindi": [ + "क", + "र", + "स", + "न", + "त", + "म", + "ह", + "प", + "य", + "ल", + "व", + "ज", + "द", + "ग", + "ब", + "श", + "ट", + "अ", + "ए", + "थ", + "भ", + "ड", + "च", + "ध", + "ष", + "इ", + ], + "Estonian": [ + "a", + "i", + "e", + "s", + "t", + "l", + "u", + "n", + "o", + "k", + "r", + "d", + "m", + "v", + "g", + "p", + "j", + "h", + "ä", + "b", + "õ", + "ü", + "f", + "c", + "ö", + "y", + ], + "Thai": [ + "า", + "น", + "ร", + "อ", + "ก", + "เ", + "ง", + "ม", + "ย", + "ล", + "ว", + "ด", + "ท", + "ส", + "ต", + "ะ", + "ป", + "บ", + "ค", + "ห", + "แ", + "จ", + "พ", + "ช", + "ข", + "ใ", + ], + "Greek": [ + "α", + "τ", + "ο", + "ι", + "ε", + "ν", + "ρ", + "σ", + "κ", + "η", + "π", + "ς", + "υ", + "μ", + "λ", + "ί", + "ό", + "ά", + "γ", + "έ", + "δ", + "ή", + "ω", + "χ", + "θ", + "ύ", + ], + "Tamil": [ + "க", + "த", + "ப", + "ட", + "ர", + "ம", + "ல", + "ன", + "வ", + "ற", + "ய", + "ள", + "ச", + "ந", + "இ", + "ண", + "அ", + "ஆ", + "ழ", + "ங", + "எ", + "உ", + "ஒ", + "ஸ", + ], + "Kazakh": [ + "а", + "ы", + "е", + "н", + "т", + "р", + "л", + "і", + "д", + "с", + "м", + "қ", + "к", + "о", + "б", + "и", + "у", + "ғ", + "ж", + "ң", + "з", + "ш", + "й", + "п", + "г", + "ө", + ], +} + +LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES) diff --git a/phivenv/Lib/site-packages/charset_normalizer/legacy.py b/phivenv/Lib/site-packages/charset_normalizer/legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..0c283bcd4e028853726310724ac939730e2bbf49 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/legacy.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any +from warnings import warn + +from .api import from_bytes +from .constant import CHARDET_CORRESPONDENCE, TOO_SMALL_SEQUENCE + +# TODO: remove this check when dropping Python 3.7 support +if TYPE_CHECKING: + from typing_extensions import TypedDict + + class ResultDict(TypedDict): + encoding: str | None + language: str + confidence: float | None + + +def detect( + byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any +) -> ResultDict: + """ + chardet legacy method + Detect the encoding of the given byte string. It should be mostly backward-compatible. + Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it) + This function is deprecated and should be used to migrate your project easily, consult the documentation for + further information. Not planned for removal. + + :param byte_str: The byte sequence to examine. + :param should_rename_legacy: Should we rename legacy encodings + to their more modern equivalents? + """ + if len(kwargs): + warn( + f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()" + ) + + if not isinstance(byte_str, (bytearray, bytes)): + raise TypeError( # pragma: nocover + f"Expected object of type bytes or bytearray, got: {type(byte_str)}" + ) + + if isinstance(byte_str, bytearray): + byte_str = bytes(byte_str) + + r = from_bytes(byte_str).best() + + encoding = r.encoding if r is not None else None + language = r.language if r is not None and r.language != "Unknown" else "" + confidence = 1.0 - r.chaos if r is not None else None + + # automatically lower confidence + # on small bytes samples. + # https://github.com/jawah/charset_normalizer/issues/391 + if ( + confidence is not None + and confidence >= 0.9 + and encoding + not in { + "utf_8", + "ascii", + } + and r.bom is False # type: ignore[union-attr] + and len(byte_str) < TOO_SMALL_SEQUENCE + ): + confidence -= 0.2 + + # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process + # but chardet does return 'utf-8-sig' and it is a valid codec name. + if r is not None and encoding == "utf_8" and r.bom: + encoding += "_sig" + + if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE: + encoding = CHARDET_CORRESPONDENCE[encoding] + + return { + "encoding": encoding, + "language": language, + "confidence": confidence, + } diff --git a/phivenv/Lib/site-packages/charset_normalizer/md.cp39-win_amd64.pyd b/phivenv/Lib/site-packages/charset_normalizer/md.cp39-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..2c7f67c21f8d04549af5b547f9245cbfa90a75ae Binary files /dev/null and b/phivenv/Lib/site-packages/charset_normalizer/md.cp39-win_amd64.pyd differ diff --git a/phivenv/Lib/site-packages/charset_normalizer/md.py b/phivenv/Lib/site-packages/charset_normalizer/md.py new file mode 100644 index 0000000000000000000000000000000000000000..4f3eaa6245d98ae99144cba8a15e14557e0ae542 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/md.py @@ -0,0 +1,635 @@ +from __future__ import annotations + +from functools import lru_cache +from logging import getLogger + +from .constant import ( + COMMON_SAFE_ASCII_CHARACTERS, + TRACE, + UNICODE_SECONDARY_RANGE_KEYWORD, +) +from .utils import ( + is_accentuated, + is_arabic, + is_arabic_isolated_form, + is_case_variable, + is_cjk, + is_emoticon, + is_hangul, + is_hiragana, + is_katakana, + is_latin, + is_punctuation, + is_separator, + is_symbol, + is_thai, + is_unprintable, + remove_accent, + unicode_range, + is_cjk_uncommon, +) + + +class MessDetectorPlugin: + """ + Base abstract class used for mess detection plugins. + All detectors MUST extend and implement given methods. + """ + + def eligible(self, character: str) -> bool: + """ + Determine if given character should be fed in. + """ + raise NotImplementedError # pragma: nocover + + def feed(self, character: str) -> None: + """ + The main routine to be executed upon character. + Insert the logic in witch the text would be considered chaotic. + """ + raise NotImplementedError # pragma: nocover + + def reset(self) -> None: # pragma: no cover + """ + Permit to reset the plugin to the initial state. + """ + raise NotImplementedError + + @property + def ratio(self) -> float: + """ + Compute the chaos ratio based on what your feed() has seen. + Must NOT be lower than 0.; No restriction gt 0. + """ + raise NotImplementedError # pragma: nocover + + +class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._punctuation_count: int = 0 + self._symbol_count: int = 0 + self._character_count: int = 0 + + self._last_printable_char: str | None = None + self._frenzy_symbol_in_word: bool = False + + def eligible(self, character: str) -> bool: + return character.isprintable() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if ( + character != self._last_printable_char + and character not in COMMON_SAFE_ASCII_CHARACTERS + ): + if is_punctuation(character): + self._punctuation_count += 1 + elif ( + character.isdigit() is False + and is_symbol(character) + and is_emoticon(character) is False + ): + self._symbol_count += 2 + + self._last_printable_char = character + + def reset(self) -> None: # Abstract + self._punctuation_count = 0 + self._character_count = 0 + self._symbol_count = 0 + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + ratio_of_punctuation: float = ( + self._punctuation_count + self._symbol_count + ) / self._character_count + + return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0 + + +class TooManyAccentuatedPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._character_count: int = 0 + self._accentuated_count: int = 0 + + def eligible(self, character: str) -> bool: + return character.isalpha() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if is_accentuated(character): + self._accentuated_count += 1 + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._accentuated_count = 0 + + @property + def ratio(self) -> float: + if self._character_count < 8: + return 0.0 + + ratio_of_accentuation: float = self._accentuated_count / self._character_count + return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0 + + +class UnprintablePlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._unprintable_count: int = 0 + self._character_count: int = 0 + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + if is_unprintable(character): + self._unprintable_count += 1 + self._character_count += 1 + + def reset(self) -> None: # Abstract + self._unprintable_count = 0 + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return (self._unprintable_count * 8) / self._character_count + + +class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._successive_count: int = 0 + self._character_count: int = 0 + + self._last_latin_character: str | None = None + + def eligible(self, character: str) -> bool: + return character.isalpha() and is_latin(character) + + def feed(self, character: str) -> None: + self._character_count += 1 + if ( + self._last_latin_character is not None + and is_accentuated(character) + and is_accentuated(self._last_latin_character) + ): + if character.isupper() and self._last_latin_character.isupper(): + self._successive_count += 1 + # Worse if its the same char duplicated with different accent. + if remove_accent(character) == remove_accent(self._last_latin_character): + self._successive_count += 1 + self._last_latin_character = character + + def reset(self) -> None: # Abstract + self._successive_count = 0 + self._character_count = 0 + self._last_latin_character = None + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return (self._successive_count * 2) / self._character_count + + +class SuspiciousRange(MessDetectorPlugin): + def __init__(self) -> None: + self._suspicious_successive_range_count: int = 0 + self._character_count: int = 0 + self._last_printable_seen: str | None = None + + def eligible(self, character: str) -> bool: + return character.isprintable() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if ( + character.isspace() + or is_punctuation(character) + or character in COMMON_SAFE_ASCII_CHARACTERS + ): + self._last_printable_seen = None + return + + if self._last_printable_seen is None: + self._last_printable_seen = character + return + + unicode_range_a: str | None = unicode_range(self._last_printable_seen) + unicode_range_b: str | None = unicode_range(character) + + if is_suspiciously_successive_range(unicode_range_a, unicode_range_b): + self._suspicious_successive_range_count += 1 + + self._last_printable_seen = character + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._suspicious_successive_range_count = 0 + self._last_printable_seen = None + + @property + def ratio(self) -> float: + if self._character_count <= 13: + return 0.0 + + ratio_of_suspicious_range_usage: float = ( + self._suspicious_successive_range_count * 2 + ) / self._character_count + + return ratio_of_suspicious_range_usage + + +class SuperWeirdWordPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._word_count: int = 0 + self._bad_word_count: int = 0 + self._foreign_long_count: int = 0 + + self._is_current_word_bad: bool = False + self._foreign_long_watch: bool = False + + self._character_count: int = 0 + self._bad_character_count: int = 0 + + self._buffer: str = "" + self._buffer_accent_count: int = 0 + self._buffer_glyph_count: int = 0 + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + if character.isalpha(): + self._buffer += character + if is_accentuated(character): + self._buffer_accent_count += 1 + if ( + self._foreign_long_watch is False + and (is_latin(character) is False or is_accentuated(character)) + and is_cjk(character) is False + and is_hangul(character) is False + and is_katakana(character) is False + and is_hiragana(character) is False + and is_thai(character) is False + ): + self._foreign_long_watch = True + if ( + is_cjk(character) + or is_hangul(character) + or is_katakana(character) + or is_hiragana(character) + or is_thai(character) + ): + self._buffer_glyph_count += 1 + return + if not self._buffer: + return + if ( + character.isspace() or is_punctuation(character) or is_separator(character) + ) and self._buffer: + self._word_count += 1 + buffer_length: int = len(self._buffer) + + self._character_count += buffer_length + + if buffer_length >= 4: + if self._buffer_accent_count / buffer_length >= 0.5: + self._is_current_word_bad = True + # Word/Buffer ending with an upper case accentuated letter are so rare, + # that we will consider them all as suspicious. Same weight as foreign_long suspicious. + elif ( + is_accentuated(self._buffer[-1]) + and self._buffer[-1].isupper() + and all(_.isupper() for _ in self._buffer) is False + ): + self._foreign_long_count += 1 + self._is_current_word_bad = True + elif self._buffer_glyph_count == 1: + self._is_current_word_bad = True + self._foreign_long_count += 1 + if buffer_length >= 24 and self._foreign_long_watch: + camel_case_dst = [ + i + for c, i in zip(self._buffer, range(0, buffer_length)) + if c.isupper() + ] + probable_camel_cased: bool = False + + if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3): + probable_camel_cased = True + + if not probable_camel_cased: + self._foreign_long_count += 1 + self._is_current_word_bad = True + + if self._is_current_word_bad: + self._bad_word_count += 1 + self._bad_character_count += len(self._buffer) + self._is_current_word_bad = False + + self._foreign_long_watch = False + self._buffer = "" + self._buffer_accent_count = 0 + self._buffer_glyph_count = 0 + elif ( + character not in {"<", ">", "-", "=", "~", "|", "_"} + and character.isdigit() is False + and is_symbol(character) + ): + self._is_current_word_bad = True + self._buffer += character + + def reset(self) -> None: # Abstract + self._buffer = "" + self._is_current_word_bad = False + self._foreign_long_watch = False + self._bad_word_count = 0 + self._word_count = 0 + self._character_count = 0 + self._bad_character_count = 0 + self._foreign_long_count = 0 + + @property + def ratio(self) -> float: + if self._word_count <= 10 and self._foreign_long_count == 0: + return 0.0 + + return self._bad_character_count / self._character_count + + +class CjkUncommonPlugin(MessDetectorPlugin): + """ + Detect messy CJK text that probably means nothing. + """ + + def __init__(self) -> None: + self._character_count: int = 0 + self._uncommon_count: int = 0 + + def eligible(self, character: str) -> bool: + return is_cjk(character) + + def feed(self, character: str) -> None: + self._character_count += 1 + + if is_cjk_uncommon(character): + self._uncommon_count += 1 + return + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._uncommon_count = 0 + + @property + def ratio(self) -> float: + if self._character_count < 8: + return 0.0 + + uncommon_form_usage: float = self._uncommon_count / self._character_count + + # we can be pretty sure it's garbage when uncommon characters are widely + # used. otherwise it could just be traditional chinese for example. + return uncommon_form_usage / 10 if uncommon_form_usage > 0.5 else 0.0 + + +class ArchaicUpperLowerPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._buf: bool = False + + self._character_count_since_last_sep: int = 0 + + self._successive_upper_lower_count: int = 0 + self._successive_upper_lower_count_final: int = 0 + + self._character_count: int = 0 + + self._last_alpha_seen: str | None = None + self._current_ascii_only: bool = True + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + is_concerned = character.isalpha() and is_case_variable(character) + chunk_sep = is_concerned is False + + if chunk_sep and self._character_count_since_last_sep > 0: + if ( + self._character_count_since_last_sep <= 64 + and character.isdigit() is False + and self._current_ascii_only is False + ): + self._successive_upper_lower_count_final += ( + self._successive_upper_lower_count + ) + + self._successive_upper_lower_count = 0 + self._character_count_since_last_sep = 0 + self._last_alpha_seen = None + self._buf = False + self._character_count += 1 + self._current_ascii_only = True + + return + + if self._current_ascii_only is True and character.isascii() is False: + self._current_ascii_only = False + + if self._last_alpha_seen is not None: + if (character.isupper() and self._last_alpha_seen.islower()) or ( + character.islower() and self._last_alpha_seen.isupper() + ): + if self._buf is True: + self._successive_upper_lower_count += 2 + self._buf = False + else: + self._buf = True + else: + self._buf = False + + self._character_count += 1 + self._character_count_since_last_sep += 1 + self._last_alpha_seen = character + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._character_count_since_last_sep = 0 + self._successive_upper_lower_count = 0 + self._successive_upper_lower_count_final = 0 + self._last_alpha_seen = None + self._buf = False + self._current_ascii_only = True + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return self._successive_upper_lower_count_final / self._character_count + + +class ArabicIsolatedFormPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._character_count: int = 0 + self._isolated_form_count: int = 0 + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._isolated_form_count = 0 + + def eligible(self, character: str) -> bool: + return is_arabic(character) + + def feed(self, character: str) -> None: + self._character_count += 1 + + if is_arabic_isolated_form(character): + self._isolated_form_count += 1 + + @property + def ratio(self) -> float: + if self._character_count < 8: + return 0.0 + + isolated_form_usage: float = self._isolated_form_count / self._character_count + + return isolated_form_usage + + +@lru_cache(maxsize=1024) +def is_suspiciously_successive_range( + unicode_range_a: str | None, unicode_range_b: str | None +) -> bool: + """ + Determine if two Unicode range seen next to each other can be considered as suspicious. + """ + if unicode_range_a is None or unicode_range_b is None: + return True + + if unicode_range_a == unicode_range_b: + return False + + if "Latin" in unicode_range_a and "Latin" in unicode_range_b: + return False + + if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b: + return False + + # Latin characters can be accompanied with a combining diacritical mark + # eg. Vietnamese. + if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and ( + "Combining" in unicode_range_a or "Combining" in unicode_range_b + ): + return False + + keywords_range_a, keywords_range_b = ( + unicode_range_a.split(" "), + unicode_range_b.split(" "), + ) + + for el in keywords_range_a: + if el in UNICODE_SECONDARY_RANGE_KEYWORD: + continue + if el in keywords_range_b: + return False + + # Japanese Exception + range_a_jp_chars, range_b_jp_chars = ( + unicode_range_a + in ( + "Hiragana", + "Katakana", + ), + unicode_range_b in ("Hiragana", "Katakana"), + ) + if (range_a_jp_chars or range_b_jp_chars) and ( + "CJK" in unicode_range_a or "CJK" in unicode_range_b + ): + return False + if range_a_jp_chars and range_b_jp_chars: + return False + + if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b: + if "CJK" in unicode_range_a or "CJK" in unicode_range_b: + return False + if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin": + return False + + # Chinese/Japanese use dedicated range for punctuation and/or separators. + if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or ( + unicode_range_a in ["Katakana", "Hiragana"] + and unicode_range_b in ["Katakana", "Hiragana"] + ): + if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b: + return False + if "Forms" in unicode_range_a or "Forms" in unicode_range_b: + return False + if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin": + return False + + return True + + +@lru_cache(maxsize=2048) +def mess_ratio( + decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False +) -> float: + """ + Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier. + """ + + detectors: list[MessDetectorPlugin] = [ + md_class() for md_class in MessDetectorPlugin.__subclasses__() + ] + + length: int = len(decoded_sequence) + 1 + + mean_mess_ratio: float = 0.0 + + if length < 512: + intermediary_mean_mess_ratio_calc: int = 32 + elif length <= 1024: + intermediary_mean_mess_ratio_calc = 64 + else: + intermediary_mean_mess_ratio_calc = 128 + + for character, index in zip(decoded_sequence + "\n", range(length)): + for detector in detectors: + if detector.eligible(character): + detector.feed(character) + + if ( + index > 0 and index % intermediary_mean_mess_ratio_calc == 0 + ) or index == length - 1: + mean_mess_ratio = sum(dt.ratio for dt in detectors) + + if mean_mess_ratio >= maximum_threshold: + break + + if debug: + logger = getLogger("charset_normalizer") + + logger.log( + TRACE, + "Mess-detector extended-analysis start. " + f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} " + f"maximum_threshold={maximum_threshold}", + ) + + if len(decoded_sequence) > 16: + logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}") + logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}") + + for dt in detectors: + logger.log(TRACE, f"{dt.__class__}: {dt.ratio}") + + return round(mean_mess_ratio, 3) diff --git a/phivenv/Lib/site-packages/charset_normalizer/models.py b/phivenv/Lib/site-packages/charset_normalizer/models.py new file mode 100644 index 0000000000000000000000000000000000000000..ffb84722a85a469355ef0be3ad7ab850e7429f57 --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/models.py @@ -0,0 +1,360 @@ +from __future__ import annotations + +from encodings.aliases import aliases +from hashlib import sha256 +from json import dumps +from re import sub +from typing import Any, Iterator, List, Tuple + +from .constant import RE_POSSIBLE_ENCODING_INDICATION, TOO_BIG_SEQUENCE +from .utils import iana_name, is_multi_byte_encoding, unicode_range + + +class CharsetMatch: + def __init__( + self, + payload: bytes, + guessed_encoding: str, + mean_mess_ratio: float, + has_sig_or_bom: bool, + languages: CoherenceMatches, + decoded_payload: str | None = None, + preemptive_declaration: str | None = None, + ): + self._payload: bytes = payload + + self._encoding: str = guessed_encoding + self._mean_mess_ratio: float = mean_mess_ratio + self._languages: CoherenceMatches = languages + self._has_sig_or_bom: bool = has_sig_or_bom + self._unicode_ranges: list[str] | None = None + + self._leaves: list[CharsetMatch] = [] + self._mean_coherence_ratio: float = 0.0 + + self._output_payload: bytes | None = None + self._output_encoding: str | None = None + + self._string: str | None = decoded_payload + + self._preemptive_declaration: str | None = preemptive_declaration + + def __eq__(self, other: object) -> bool: + if not isinstance(other, CharsetMatch): + if isinstance(other, str): + return iana_name(other) == self.encoding + return False + return self.encoding == other.encoding and self.fingerprint == other.fingerprint + + def __lt__(self, other: object) -> bool: + """ + Implemented to make sorted available upon CharsetMatches items. + """ + if not isinstance(other, CharsetMatch): + raise ValueError + + chaos_difference: float = abs(self.chaos - other.chaos) + coherence_difference: float = abs(self.coherence - other.coherence) + + # Below 1% difference --> Use Coherence + if chaos_difference < 0.01 and coherence_difference > 0.02: + return self.coherence > other.coherence + elif chaos_difference < 0.01 and coherence_difference <= 0.02: + # When having a difficult decision, use the result that decoded as many multi-byte as possible. + # preserve RAM usage! + if len(self._payload) >= TOO_BIG_SEQUENCE: + return self.chaos < other.chaos + return self.multi_byte_usage > other.multi_byte_usage + + return self.chaos < other.chaos + + @property + def multi_byte_usage(self) -> float: + return 1.0 - (len(str(self)) / len(self.raw)) + + def __str__(self) -> str: + # Lazy Str Loading + if self._string is None: + self._string = str(self._payload, self._encoding, "strict") + return self._string + + def __repr__(self) -> str: + return f"" + + def add_submatch(self, other: CharsetMatch) -> None: + if not isinstance(other, CharsetMatch) or other == self: + raise ValueError( + "Unable to add instance <{}> as a submatch of a CharsetMatch".format( + other.__class__ + ) + ) + + other._string = None # Unload RAM usage; dirty trick. + self._leaves.append(other) + + @property + def encoding(self) -> str: + return self._encoding + + @property + def encoding_aliases(self) -> list[str]: + """ + Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855. + """ + also_known_as: list[str] = [] + for u, p in aliases.items(): + if self.encoding == u: + also_known_as.append(p) + elif self.encoding == p: + also_known_as.append(u) + return also_known_as + + @property + def bom(self) -> bool: + return self._has_sig_or_bom + + @property + def byte_order_mark(self) -> bool: + return self._has_sig_or_bom + + @property + def languages(self) -> list[str]: + """ + Return the complete list of possible languages found in decoded sequence. + Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'. + """ + return [e[0] for e in self._languages] + + @property + def language(self) -> str: + """ + Most probable language found in decoded sequence. If none were detected or inferred, the property will return + "Unknown". + """ + if not self._languages: + # Trying to infer the language based on the given encoding + # Its either English or we should not pronounce ourselves in certain cases. + if "ascii" in self.could_be_from_charset: + return "English" + + # doing it there to avoid circular import + from charset_normalizer.cd import encoding_languages, mb_encoding_languages + + languages = ( + mb_encoding_languages(self.encoding) + if is_multi_byte_encoding(self.encoding) + else encoding_languages(self.encoding) + ) + + if len(languages) == 0 or "Latin Based" in languages: + return "Unknown" + + return languages[0] + + return self._languages[0][0] + + @property + def chaos(self) -> float: + return self._mean_mess_ratio + + @property + def coherence(self) -> float: + if not self._languages: + return 0.0 + return self._languages[0][1] + + @property + def percent_chaos(self) -> float: + return round(self.chaos * 100, ndigits=3) + + @property + def percent_coherence(self) -> float: + return round(self.coherence * 100, ndigits=3) + + @property + def raw(self) -> bytes: + """ + Original untouched bytes. + """ + return self._payload + + @property + def submatch(self) -> list[CharsetMatch]: + return self._leaves + + @property + def has_submatch(self) -> bool: + return len(self._leaves) > 0 + + @property + def alphabets(self) -> list[str]: + if self._unicode_ranges is not None: + return self._unicode_ranges + # list detected ranges + detected_ranges: list[str | None] = [unicode_range(char) for char in str(self)] + # filter and sort + self._unicode_ranges = sorted(list({r for r in detected_ranges if r})) + return self._unicode_ranges + + @property + def could_be_from_charset(self) -> list[str]: + """ + The complete list of encoding that output the exact SAME str result and therefore could be the originating + encoding. + This list does include the encoding available in property 'encoding'. + """ + return [self._encoding] + [m.encoding for m in self._leaves] + + def output(self, encoding: str = "utf_8") -> bytes: + """ + Method to get re-encoded bytes payload using given target encoding. Default to UTF-8. + Any errors will be simply ignored by the encoder NOT replaced. + """ + if self._output_encoding is None or self._output_encoding != encoding: + self._output_encoding = encoding + decoded_string = str(self) + if ( + self._preemptive_declaration is not None + and self._preemptive_declaration.lower() + not in ["utf-8", "utf8", "utf_8"] + ): + patched_header = sub( + RE_POSSIBLE_ENCODING_INDICATION, + lambda m: m.string[m.span()[0] : m.span()[1]].replace( + m.groups()[0], + iana_name(self._output_encoding).replace("_", "-"), # type: ignore[arg-type] + ), + decoded_string[:8192], + count=1, + ) + + decoded_string = patched_header + decoded_string[8192:] + + self._output_payload = decoded_string.encode(encoding, "replace") + + return self._output_payload # type: ignore + + @property + def fingerprint(self) -> str: + """ + Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one. + """ + return sha256(self.output()).hexdigest() + + +class CharsetMatches: + """ + Container with every CharsetMatch items ordered by default from most probable to the less one. + Act like a list(iterable) but does not implements all related methods. + """ + + def __init__(self, results: list[CharsetMatch] | None = None): + self._results: list[CharsetMatch] = sorted(results) if results else [] + + def __iter__(self) -> Iterator[CharsetMatch]: + yield from self._results + + def __getitem__(self, item: int | str) -> CharsetMatch: + """ + Retrieve a single item either by its position or encoding name (alias may be used here). + Raise KeyError upon invalid index or encoding not present in results. + """ + if isinstance(item, int): + return self._results[item] + if isinstance(item, str): + item = iana_name(item, False) + for result in self._results: + if item in result.could_be_from_charset: + return result + raise KeyError + + def __len__(self) -> int: + return len(self._results) + + def __bool__(self) -> bool: + return len(self._results) > 0 + + def append(self, item: CharsetMatch) -> None: + """ + Insert a single match. Will be inserted accordingly to preserve sort. + Can be inserted as a submatch. + """ + if not isinstance(item, CharsetMatch): + raise ValueError( + "Cannot append instance '{}' to CharsetMatches".format( + str(item.__class__) + ) + ) + # We should disable the submatch factoring when the input file is too heavy (conserve RAM usage) + if len(item.raw) < TOO_BIG_SEQUENCE: + for match in self._results: + if match.fingerprint == item.fingerprint and match.chaos == item.chaos: + match.add_submatch(item) + return + self._results.append(item) + self._results = sorted(self._results) + + def best(self) -> CharsetMatch | None: + """ + Simply return the first match. Strict equivalent to matches[0]. + """ + if not self._results: + return None + return self._results[0] + + def first(self) -> CharsetMatch | None: + """ + Redundant method, call the method best(). Kept for BC reasons. + """ + return self.best() + + +CoherenceMatch = Tuple[str, float] +CoherenceMatches = List[CoherenceMatch] + + +class CliDetectionResult: + def __init__( + self, + path: str, + encoding: str | None, + encoding_aliases: list[str], + alternative_encodings: list[str], + language: str, + alphabets: list[str], + has_sig_or_bom: bool, + chaos: float, + coherence: float, + unicode_path: str | None, + is_preferred: bool, + ): + self.path: str = path + self.unicode_path: str | None = unicode_path + self.encoding: str | None = encoding + self.encoding_aliases: list[str] = encoding_aliases + self.alternative_encodings: list[str] = alternative_encodings + self.language: str = language + self.alphabets: list[str] = alphabets + self.has_sig_or_bom: bool = has_sig_or_bom + self.chaos: float = chaos + self.coherence: float = coherence + self.is_preferred: bool = is_preferred + + @property + def __dict__(self) -> dict[str, Any]: # type: ignore + return { + "path": self.path, + "encoding": self.encoding, + "encoding_aliases": self.encoding_aliases, + "alternative_encodings": self.alternative_encodings, + "language": self.language, + "alphabets": self.alphabets, + "has_sig_or_bom": self.has_sig_or_bom, + "chaos": self.chaos, + "coherence": self.coherence, + "unicode_path": self.unicode_path, + "is_preferred": self.is_preferred, + } + + def to_json(self) -> str: + return dumps(self.__dict__, ensure_ascii=True, indent=4) diff --git a/phivenv/Lib/site-packages/charset_normalizer/py.typed b/phivenv/Lib/site-packages/charset_normalizer/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/phivenv/Lib/site-packages/charset_normalizer/utils.py b/phivenv/Lib/site-packages/charset_normalizer/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3cf09201e2eca60a85aa6fcc5fd265482d5420ca --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/utils.py @@ -0,0 +1,414 @@ +from __future__ import annotations + +import importlib +import logging +import unicodedata +from codecs import IncrementalDecoder +from encodings.aliases import aliases +from functools import lru_cache +from re import findall +from typing import Generator + +from _multibytecodec import ( # type: ignore[import-not-found,import] + MultibyteIncrementalDecoder, +) + +from .constant import ( + ENCODING_MARKS, + IANA_SUPPORTED_SIMILAR, + RE_POSSIBLE_ENCODING_INDICATION, + UNICODE_RANGES_COMBINED, + UNICODE_SECONDARY_RANGE_KEYWORD, + UTF8_MAXIMAL_ALLOCATION, + COMMON_CJK_CHARACTERS, +) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_accentuated(character: str) -> bool: + try: + description: str = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + return ( + "WITH GRAVE" in description + or "WITH ACUTE" in description + or "WITH CEDILLA" in description + or "WITH DIAERESIS" in description + or "WITH CIRCUMFLEX" in description + or "WITH TILDE" in description + or "WITH MACRON" in description + or "WITH RING ABOVE" in description + ) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def remove_accent(character: str) -> str: + decomposed: str = unicodedata.decomposition(character) + if not decomposed: + return character + + codes: list[str] = decomposed.split(" ") + + return chr(int(codes[0], 16)) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def unicode_range(character: str) -> str | None: + """ + Retrieve the Unicode range official name from a single character. + """ + character_ord: int = ord(character) + + for range_name, ord_range in UNICODE_RANGES_COMBINED.items(): + if character_ord in ord_range: + return range_name + + return None + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_latin(character: str) -> bool: + try: + description: str = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + return "LATIN" in description + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_punctuation(character: str) -> bool: + character_category: str = unicodedata.category(character) + + if "P" in character_category: + return True + + character_range: str | None = unicode_range(character) + + if character_range is None: + return False + + return "Punctuation" in character_range + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_symbol(character: str) -> bool: + character_category: str = unicodedata.category(character) + + if "S" in character_category or "N" in character_category: + return True + + character_range: str | None = unicode_range(character) + + if character_range is None: + return False + + return "Forms" in character_range and character_category != "Lo" + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_emoticon(character: str) -> bool: + character_range: str | None = unicode_range(character) + + if character_range is None: + return False + + return "Emoticons" in character_range or "Pictographs" in character_range + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_separator(character: str) -> bool: + if character.isspace() or character in {"|", "+", "<", ">"}: + return True + + character_category: str = unicodedata.category(character) + + return "Z" in character_category or character_category in {"Po", "Pd", "Pc"} + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_case_variable(character: str) -> bool: + return character.islower() != character.isupper() + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_cjk(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "CJK" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_hiragana(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "HIRAGANA" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_katakana(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "KATAKANA" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_hangul(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "HANGUL" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_thai(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "THAI" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_arabic(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "ARABIC" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_arabic_isolated_form(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "ARABIC" in character_name and "ISOLATED FORM" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_cjk_uncommon(character: str) -> bool: + return character not in COMMON_CJK_CHARACTERS + + +@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED)) +def is_unicode_range_secondary(range_name: str) -> bool: + return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_unprintable(character: str) -> bool: + return ( + character.isspace() is False # includes \n \t \r \v + and character.isprintable() is False + and character != "\x1a" # Why? Its the ASCII substitute character. + and character != "\ufeff" # bug discovered in Python, + # Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space. + ) + + +def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> str | None: + """ + Extract using ASCII-only decoder any specified encoding in the first n-bytes. + """ + if not isinstance(sequence, bytes): + raise TypeError + + seq_len: int = len(sequence) + + results: list[str] = findall( + RE_POSSIBLE_ENCODING_INDICATION, + sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"), + ) + + if len(results) == 0: + return None + + for specified_encoding in results: + specified_encoding = specified_encoding.lower().replace("-", "_") + + encoding_alias: str + encoding_iana: str + + for encoding_alias, encoding_iana in aliases.items(): + if encoding_alias == specified_encoding: + return encoding_iana + if encoding_iana == specified_encoding: + return encoding_iana + + return None + + +@lru_cache(maxsize=128) +def is_multi_byte_encoding(name: str) -> bool: + """ + Verify is a specific encoding is a multi byte one based on it IANA name + """ + return name in { + "utf_8", + "utf_8_sig", + "utf_16", + "utf_16_be", + "utf_16_le", + "utf_32", + "utf_32_le", + "utf_32_be", + "utf_7", + } or issubclass( + importlib.import_module(f"encodings.{name}").IncrementalDecoder, + MultibyteIncrementalDecoder, + ) + + +def identify_sig_or_bom(sequence: bytes) -> tuple[str | None, bytes]: + """ + Identify and extract SIG/BOM in given sequence. + """ + + for iana_encoding in ENCODING_MARKS: + marks: bytes | list[bytes] = ENCODING_MARKS[iana_encoding] + + if isinstance(marks, bytes): + marks = [marks] + + for mark in marks: + if sequence.startswith(mark): + return iana_encoding, mark + + return None, b"" + + +def should_strip_sig_or_bom(iana_encoding: str) -> bool: + return iana_encoding not in {"utf_16", "utf_32"} + + +def iana_name(cp_name: str, strict: bool = True) -> str: + """Returns the Python normalized encoding name (Not the IANA official name).""" + cp_name = cp_name.lower().replace("-", "_") + + encoding_alias: str + encoding_iana: str + + for encoding_alias, encoding_iana in aliases.items(): + if cp_name in [encoding_alias, encoding_iana]: + return encoding_iana + + if strict: + raise ValueError(f"Unable to retrieve IANA for '{cp_name}'") + + return cp_name + + +def cp_similarity(iana_name_a: str, iana_name_b: str) -> float: + if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b): + return 0.0 + + decoder_a = importlib.import_module(f"encodings.{iana_name_a}").IncrementalDecoder + decoder_b = importlib.import_module(f"encodings.{iana_name_b}").IncrementalDecoder + + id_a: IncrementalDecoder = decoder_a(errors="ignore") + id_b: IncrementalDecoder = decoder_b(errors="ignore") + + character_match_count: int = 0 + + for i in range(255): + to_be_decoded: bytes = bytes([i]) + if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded): + character_match_count += 1 + + return character_match_count / 254 + + +def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool: + """ + Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using + the function cp_similarity. + """ + return ( + iana_name_a in IANA_SUPPORTED_SIMILAR + and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a] + ) + + +def set_logging_handler( + name: str = "charset_normalizer", + level: int = logging.INFO, + format_string: str = "%(asctime)s | %(levelname)s | %(message)s", +) -> None: + logger = logging.getLogger(name) + logger.setLevel(level) + + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter(format_string)) + logger.addHandler(handler) + + +def cut_sequence_chunks( + sequences: bytes, + encoding_iana: str, + offsets: range, + chunk_size: int, + bom_or_sig_available: bool, + strip_sig_or_bom: bool, + sig_payload: bytes, + is_multi_byte_decoder: bool, + decoded_payload: str | None = None, +) -> Generator[str, None, None]: + if decoded_payload and is_multi_byte_decoder is False: + for i in offsets: + chunk = decoded_payload[i : i + chunk_size] + if not chunk: + break + yield chunk + else: + for i in offsets: + chunk_end = i + chunk_size + if chunk_end > len(sequences) + 8: + continue + + cut_sequence = sequences[i : i + chunk_size] + + if bom_or_sig_available and strip_sig_or_bom is False: + cut_sequence = sig_payload + cut_sequence + + chunk = cut_sequence.decode( + encoding_iana, + errors="ignore" if is_multi_byte_decoder else "strict", + ) + + # multi-byte bad cutting detector and adjustment + # not the cleanest way to perform that fix but clever enough for now. + if is_multi_byte_decoder and i > 0: + chunk_partial_size_chk: int = min(chunk_size, 16) + + if ( + decoded_payload + and chunk[:chunk_partial_size_chk] not in decoded_payload + ): + for j in range(i, i - 4, -1): + cut_sequence = sequences[j:chunk_end] + + if bom_or_sig_available and strip_sig_or_bom is False: + cut_sequence = sig_payload + cut_sequence + + chunk = cut_sequence.decode(encoding_iana, errors="ignore") + + if chunk[:chunk_partial_size_chk] in decoded_payload: + break + + yield chunk diff --git a/phivenv/Lib/site-packages/charset_normalizer/version.py b/phivenv/Lib/site-packages/charset_normalizer/version.py new file mode 100644 index 0000000000000000000000000000000000000000..54e6558c8e839012cd6a4fea4011d3b497f3355e --- /dev/null +++ b/phivenv/Lib/site-packages/charset_normalizer/version.py @@ -0,0 +1,8 @@ +""" +Expose version +""" + +from __future__ import annotations + +__version__ = "3.4.3" +VERSION = __version__.split(".") diff --git a/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/INSTALLER b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/METADATA b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..a1b5c57543005b310ac2efb02243a6b6ab6d8932 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/METADATA @@ -0,0 +1,441 @@ +Metadata-Version: 2.1 +Name: colorama +Version: 0.4.6 +Summary: Cross-platform colored terminal text. +Project-URL: Homepage, https://github.com/tartley/colorama +Author-email: Jonathan Hartley +License-File: LICENSE.txt +Keywords: ansi,color,colour,crossplatform,terminal,text,windows,xplatform +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Terminals +Requires-Python: !=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7 +Description-Content-Type: text/x-rst + +.. image:: https://img.shields.io/pypi/v/colorama.svg + :target: https://pypi.org/project/colorama/ + :alt: Latest Version + +.. image:: https://img.shields.io/pypi/pyversions/colorama.svg + :target: https://pypi.org/project/colorama/ + :alt: Supported Python versions + +.. image:: https://github.com/tartley/colorama/actions/workflows/test.yml/badge.svg + :target: https://github.com/tartley/colorama/actions/workflows/test.yml + :alt: Build Status + +Colorama +======== + +Makes ANSI escape character sequences (for producing colored terminal text and +cursor positioning) work under MS Windows. + +.. |donate| image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_SM.gif + :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=2MZ9D2GMLYCUJ&item_name=Colorama¤cy_code=USD + :alt: Donate with Paypal + +`PyPI for releases `_ | +`Github for source `_ | +`Colorama for enterprise on Tidelift `_ + +If you find Colorama useful, please |donate| to the authors. Thank you! + +Installation +------------ + +Tested on CPython 2.7, 3.7, 3.8, 3.9 and 3.10 and Pypy 2.7 and 3.8. + +No requirements other than the standard library. + +.. code-block:: bash + + pip install colorama + # or + conda install -c anaconda colorama + +Description +----------- + +ANSI escape character sequences have long been used to produce colored terminal +text and cursor positioning on Unix and Macs. Colorama makes this work on +Windows, too, by wrapping ``stdout``, stripping ANSI sequences it finds (which +would appear as gobbledygook in the output), and converting them into the +appropriate win32 calls to modify the state of the terminal. On other platforms, +Colorama does nothing. + +This has the upshot of providing a simple cross-platform API for printing +colored terminal text from Python, and has the happy side-effect that existing +applications or libraries which use ANSI sequences to produce colored output on +Linux or Macs can now also work on Windows, simply by calling +``colorama.just_fix_windows_console()`` (since v0.4.6) or ``colorama.init()`` +(all versions, but may have other side-effects – see below). + +An alternative approach is to install ``ansi.sys`` on Windows machines, which +provides the same behaviour for all applications running in terminals. Colorama +is intended for situations where that isn't easy (e.g., maybe your app doesn't +have an installer.) + +Demo scripts in the source code repository print some colored text using +ANSI sequences. Compare their output under Gnome-terminal's built in ANSI +handling, versus on Windows Command-Prompt using Colorama: + +.. image:: https://github.com/tartley/colorama/raw/master/screenshots/ubuntu-demo.png + :width: 661 + :height: 357 + :alt: ANSI sequences on Ubuntu under gnome-terminal. + +.. image:: https://github.com/tartley/colorama/raw/master/screenshots/windows-demo.png + :width: 668 + :height: 325 + :alt: Same ANSI sequences on Windows, using Colorama. + +These screenshots show that, on Windows, Colorama does not support ANSI 'dim +text'; it looks the same as 'normal text'. + +Usage +----- + +Initialisation +.............. + +If the only thing you want from Colorama is to get ANSI escapes to work on +Windows, then run: + +.. code-block:: python + + from colorama import just_fix_windows_console + just_fix_windows_console() + +If you're on a recent version of Windows 10 or better, and your stdout/stderr +are pointing to a Windows console, then this will flip the magic configuration +switch to enable Windows' built-in ANSI support. + +If you're on an older version of Windows, and your stdout/stderr are pointing to +a Windows console, then this will wrap ``sys.stdout`` and/or ``sys.stderr`` in a +magic file object that intercepts ANSI escape sequences and issues the +appropriate Win32 calls to emulate them. + +In all other circumstances, it does nothing whatsoever. Basically the idea is +that this makes Windows act like Unix with respect to ANSI escape handling. + +It's safe to call this function multiple times. It's safe to call this function +on non-Windows platforms, but it won't do anything. It's safe to call this +function when one or both of your stdout/stderr are redirected to a file – it +won't do anything to those streams. + +Alternatively, you can use the older interface with more features (but also more +potential footguns): + +.. code-block:: python + + from colorama import init + init() + +This does the same thing as ``just_fix_windows_console``, except for the +following differences: + +- It's not safe to call ``init`` multiple times; you can end up with multiple + layers of wrapping and broken ANSI support. + +- Colorama will apply a heuristic to guess whether stdout/stderr support ANSI, + and if it thinks they don't, then it will wrap ``sys.stdout`` and + ``sys.stderr`` in a magic file object that strips out ANSI escape sequences + before printing them. This happens on all platforms, and can be convenient if + you want to write your code to emit ANSI escape sequences unconditionally, and + let Colorama decide whether they should actually be output. But note that + Colorama's heuristic is not particularly clever. + +- ``init`` also accepts explicit keyword args to enable/disable various + functionality – see below. + +To stop using Colorama before your program exits, simply call ``deinit()``. +This will restore ``stdout`` and ``stderr`` to their original values, so that +Colorama is disabled. To resume using Colorama again, call ``reinit()``; it is +cheaper than calling ``init()`` again (but does the same thing). + +Most users should depend on ``colorama >= 0.4.6``, and use +``just_fix_windows_console``. The old ``init`` interface will be supported +indefinitely for backwards compatibility, but we don't plan to fix any issues +with it, also for backwards compatibility. + +Colored Output +.............. + +Cross-platform printing of colored text can then be done using Colorama's +constant shorthand for ANSI escape sequences. These are deliberately +rudimentary, see below. + +.. code-block:: python + + from colorama import Fore, Back, Style + print(Fore.RED + 'some red text') + print(Back.GREEN + 'and with a green background') + print(Style.DIM + 'and in dim text') + print(Style.RESET_ALL) + print('back to normal now') + +...or simply by manually printing ANSI sequences from your own code: + +.. code-block:: python + + print('\033[31m' + 'some red text') + print('\033[39m') # and reset to default color + +...or, Colorama can be used in conjunction with existing ANSI libraries +such as the venerable `Termcolor `_ +the fabulous `Blessings `_, +or the incredible `_Rich `_. + +If you wish Colorama's Fore, Back and Style constants were more capable, +then consider using one of the above highly capable libraries to generate +colors, etc, and use Colorama just for its primary purpose: to convert +those ANSI sequences to also work on Windows: + +SIMILARLY, do not send PRs adding the generation of new ANSI types to Colorama. +We are only interested in converting ANSI codes to win32 API calls, not +shortcuts like the above to generate ANSI characters. + +.. code-block:: python + + from colorama import just_fix_windows_console + from termcolor import colored + + # use Colorama to make Termcolor work on Windows too + just_fix_windows_console() + + # then use Termcolor for all colored text output + print(colored('Hello, World!', 'green', 'on_red')) + +Available formatting constants are:: + + Fore: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET. + Back: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET. + Style: DIM, NORMAL, BRIGHT, RESET_ALL + +``Style.RESET_ALL`` resets foreground, background, and brightness. Colorama will +perform this reset automatically on program exit. + +These are fairly well supported, but not part of the standard:: + + Fore: LIGHTBLACK_EX, LIGHTRED_EX, LIGHTGREEN_EX, LIGHTYELLOW_EX, LIGHTBLUE_EX, LIGHTMAGENTA_EX, LIGHTCYAN_EX, LIGHTWHITE_EX + Back: LIGHTBLACK_EX, LIGHTRED_EX, LIGHTGREEN_EX, LIGHTYELLOW_EX, LIGHTBLUE_EX, LIGHTMAGENTA_EX, LIGHTCYAN_EX, LIGHTWHITE_EX + +Cursor Positioning +.................. + +ANSI codes to reposition the cursor are supported. See ``demos/demo06.py`` for +an example of how to generate them. + +Init Keyword Args +................. + +``init()`` accepts some ``**kwargs`` to override default behaviour. + +init(autoreset=False): + If you find yourself repeatedly sending reset sequences to turn off color + changes at the end of every print, then ``init(autoreset=True)`` will + automate that: + + .. code-block:: python + + from colorama import init + init(autoreset=True) + print(Fore.RED + 'some red text') + print('automatically back to default color again') + +init(strip=None): + Pass ``True`` or ``False`` to override whether ANSI codes should be + stripped from the output. The default behaviour is to strip if on Windows + or if output is redirected (not a tty). + +init(convert=None): + Pass ``True`` or ``False`` to override whether to convert ANSI codes in the + output into win32 calls. The default behaviour is to convert if on Windows + and output is to a tty (terminal). + +init(wrap=True): + On Windows, Colorama works by replacing ``sys.stdout`` and ``sys.stderr`` + with proxy objects, which override the ``.write()`` method to do their work. + If this wrapping causes you problems, then this can be disabled by passing + ``init(wrap=False)``. The default behaviour is to wrap if ``autoreset`` or + ``strip`` or ``convert`` are True. + + When wrapping is disabled, colored printing on non-Windows platforms will + continue to work as normal. To do cross-platform colored output, you can + use Colorama's ``AnsiToWin32`` proxy directly: + + .. code-block:: python + + import sys + from colorama import init, AnsiToWin32 + init(wrap=False) + stream = AnsiToWin32(sys.stderr).stream + + # Python 2 + print >>stream, Fore.BLUE + 'blue text on stderr' + + # Python 3 + print(Fore.BLUE + 'blue text on stderr', file=stream) + +Recognised ANSI Sequences +......................... + +ANSI sequences generally take the form:: + + ESC [ ; ... + +Where ```` is an integer, and ```` is a single letter. Zero or +more params are passed to a ````. If no params are passed, it is +generally synonymous with passing a single zero. No spaces exist in the +sequence; they have been inserted here simply to read more easily. + +The only ANSI sequences that Colorama converts into win32 calls are:: + + ESC [ 0 m # reset all (colors and brightness) + ESC [ 1 m # bright + ESC [ 2 m # dim (looks same as normal brightness) + ESC [ 22 m # normal brightness + + # FOREGROUND: + ESC [ 30 m # black + ESC [ 31 m # red + ESC [ 32 m # green + ESC [ 33 m # yellow + ESC [ 34 m # blue + ESC [ 35 m # magenta + ESC [ 36 m # cyan + ESC [ 37 m # white + ESC [ 39 m # reset + + # BACKGROUND + ESC [ 40 m # black + ESC [ 41 m # red + ESC [ 42 m # green + ESC [ 43 m # yellow + ESC [ 44 m # blue + ESC [ 45 m # magenta + ESC [ 46 m # cyan + ESC [ 47 m # white + ESC [ 49 m # reset + + # cursor positioning + ESC [ y;x H # position cursor at x across, y down + ESC [ y;x f # position cursor at x across, y down + ESC [ n A # move cursor n lines up + ESC [ n B # move cursor n lines down + ESC [ n C # move cursor n characters forward + ESC [ n D # move cursor n characters backward + + # clear the screen + ESC [ mode J # clear the screen + + # clear the line + ESC [ mode K # clear the line + +Multiple numeric params to the ``'m'`` command can be combined into a single +sequence:: + + ESC [ 36 ; 45 ; 1 m # bright cyan text on magenta background + +All other ANSI sequences of the form ``ESC [ ; ... `` +are silently stripped from the output on Windows. + +Any other form of ANSI sequence, such as single-character codes or alternative +initial characters, are not recognised or stripped. It would be cool to add +them though. Let me know if it would be useful for you, via the Issues on +GitHub. + +Status & Known Problems +----------------------- + +I've personally only tested it on Windows XP (CMD, Console2), Ubuntu +(gnome-terminal, xterm), and OS X. + +Some valid ANSI sequences aren't recognised. + +If you're hacking on the code, see `README-hacking.md`_. ESPECIALLY, see the +explanation there of why we do not want PRs that allow Colorama to generate new +types of ANSI codes. + +See outstanding issues and wish-list: +https://github.com/tartley/colorama/issues + +If anything doesn't work for you, or doesn't do what you expected or hoped for, +I'd love to hear about it on that issues list, would be delighted by patches, +and would be happy to grant commit access to anyone who submits a working patch +or two. + +.. _README-hacking.md: README-hacking.md + +License +------- + +Copyright Jonathan Hartley & Arnon Yaari, 2013-2020. BSD 3-Clause license; see +LICENSE file. + +Professional support +-------------------- + +.. |tideliftlogo| image:: https://cdn2.hubspot.net/hubfs/4008838/website/logos/logos_for_download/Tidelift_primary-shorthand-logo.png + :alt: Tidelift + :target: https://tidelift.com/subscription/pkg/pypi-colorama?utm_source=pypi-colorama&utm_medium=referral&utm_campaign=readme + +.. list-table:: + :widths: 10 100 + + * - |tideliftlogo| + - Professional support for colorama is available as part of the + `Tidelift Subscription`_. + Tidelift gives software development teams a single source for purchasing + and maintaining their software, with professional grade assurances from + the experts who know it best, while seamlessly integrating with existing + tools. + +.. _Tidelift Subscription: https://tidelift.com/subscription/pkg/pypi-colorama?utm_source=pypi-colorama&utm_medium=referral&utm_campaign=readme + +Thanks +------ + +See the CHANGELOG for more thanks! + +* Marc Schlaich (schlamar) for a ``setup.py`` fix for Python2.5. +* Marc Abramowitz, reported & fixed a crash on exit with closed ``stdout``, + providing a solution to issue #7's setuptools/distutils debate, + and other fixes. +* User 'eryksun', for guidance on correctly instantiating ``ctypes.windll``. +* Matthew McCormick for politely pointing out a longstanding crash on non-Win. +* Ben Hoyt, for a magnificent fix under 64-bit Windows. +* Jesse at Empty Square for submitting a fix for examples in the README. +* User 'jamessp', an observant documentation fix for cursor positioning. +* User 'vaal1239', Dave Mckee & Lackner Kristof for a tiny but much-needed Win7 + fix. +* Julien Stuyck, for wisely suggesting Python3 compatible updates to README. +* Daniel Griffith for multiple fabulous patches. +* Oscar Lesta for a valuable fix to stop ANSI chars being sent to non-tty + output. +* Roger Binns, for many suggestions, valuable feedback, & bug reports. +* Tim Golden for thought and much appreciated feedback on the initial idea. +* User 'Zearin' for updates to the README file. +* John Szakmeister for adding support for light colors +* Charles Merriam for adding documentation to demos +* Jurko for a fix on 64-bit Windows CPython2.5 w/o ctypes +* Florian Bruhin for a fix when stdout or stderr are None +* Thomas Weininger for fixing ValueError on Windows +* Remi Rampin for better Github integration and fixes to the README file +* Simeon Visser for closing a file handle using 'with' and updating classifiers + to include Python 3.3 and 3.4 +* Andy Neff for fixing RESET of LIGHT_EX colors. +* Jonathan Hartley for the initial idea and implementation. diff --git a/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/RECORD b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..b0868a837c95f789a35c4c7abed0b1f47fffc9d4 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/RECORD @@ -0,0 +1,31 @@ +colorama-0.4.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +colorama-0.4.6.dist-info/METADATA,sha256=e67SnrUMOym9sz_4TjF3vxvAV4T3aF7NyqRHHH3YEMw,17158 +colorama-0.4.6.dist-info/RECORD,, +colorama-0.4.6.dist-info/WHEEL,sha256=cdcF4Fbd0FPtw2EMIOwH-3rSOTUdTCeOSXRMD1iLUb8,105 +colorama-0.4.6.dist-info/licenses/LICENSE.txt,sha256=ysNcAmhuXQSlpxQL-zs25zrtSWZW6JEQLkKIhteTAxg,1491 +colorama/__init__.py,sha256=wePQA4U20tKgYARySLEC047ucNX-g8pRLpYBuiHlLb8,266 +colorama/__pycache__/__init__.cpython-39.pyc,, +colorama/__pycache__/ansi.cpython-39.pyc,, +colorama/__pycache__/ansitowin32.cpython-39.pyc,, +colorama/__pycache__/initialise.cpython-39.pyc,, +colorama/__pycache__/win32.cpython-39.pyc,, +colorama/__pycache__/winterm.cpython-39.pyc,, +colorama/ansi.py,sha256=Top4EeEuaQdBWdteKMEcGOTeKeF19Q-Wo_6_Cj5kOzQ,2522 +colorama/ansitowin32.py,sha256=vPNYa3OZbxjbuFyaVo0Tmhmy1FZ1lKMWCnT7odXpItk,11128 +colorama/initialise.py,sha256=-hIny86ClXo39ixh5iSCfUIa2f_h_bgKRDW7gqs-KLU,3325 +colorama/tests/__init__.py,sha256=MkgPAEzGQd-Rq0w0PZXSX2LadRWhUECcisJY8lSrm4Q,75 +colorama/tests/__pycache__/__init__.cpython-39.pyc,, +colorama/tests/__pycache__/ansi_test.cpython-39.pyc,, +colorama/tests/__pycache__/ansitowin32_test.cpython-39.pyc,, +colorama/tests/__pycache__/initialise_test.cpython-39.pyc,, +colorama/tests/__pycache__/isatty_test.cpython-39.pyc,, +colorama/tests/__pycache__/utils.cpython-39.pyc,, +colorama/tests/__pycache__/winterm_test.cpython-39.pyc,, +colorama/tests/ansi_test.py,sha256=FeViDrUINIZcr505PAxvU4AjXz1asEiALs9GXMhwRaE,2839 +colorama/tests/ansitowin32_test.py,sha256=RN7AIhMJ5EqDsYaCjVo-o4u8JzDD4ukJbmevWKS70rY,10678 +colorama/tests/initialise_test.py,sha256=BbPy-XfyHwJ6zKozuQOvNvQZzsx9vdb_0bYXn7hsBTc,6741 +colorama/tests/isatty_test.py,sha256=Pg26LRpv0yQDB5Ac-sxgVXG7hsA1NYvapFgApZfYzZg,1866 +colorama/tests/utils.py,sha256=1IIRylG39z5-dzq09R_ngufxyPZxgldNbrxKxUGwGKE,1079 +colorama/tests/winterm_test.py,sha256=qoWFPEjym5gm2RuMwpf3pOis3a5r_PJZFCzK254JL8A,3709 +colorama/win32.py,sha256=YQOKwMTwtGBbsY4dL5HYTvwTeP9wIQra5MvPNddpxZs,6181 +colorama/winterm.py,sha256=XCQFDHjPi6AHYNdZwy0tA02H-Jh48Jp-HvCjeLeLp3U,7134 diff --git a/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/WHEEL b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..d79189fda3251187de18c3998f23ae6fec11b20f --- /dev/null +++ b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: hatchling 1.11.1 +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any diff --git a/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/licenses/LICENSE.txt b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/licenses/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..3105888ec149d10cad51c11d332779e94b548661 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama-0.4.6.dist-info/licenses/LICENSE.txt @@ -0,0 +1,27 @@ +Copyright (c) 2010 Jonathan Hartley +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holders, nor those of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/phivenv/Lib/site-packages/colorama/__init__.py b/phivenv/Lib/site-packages/colorama/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..383101cdb38706c305449674044e9288b92b7d75 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/__init__.py @@ -0,0 +1,7 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from .initialise import init, deinit, reinit, colorama_text, just_fix_windows_console +from .ansi import Fore, Back, Style, Cursor +from .ansitowin32 import AnsiToWin32 + +__version__ = '0.4.6' + diff --git a/phivenv/Lib/site-packages/colorama/__pycache__/__init__.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19914c4901e53fb445ef5605f688c733ce8d30b8 Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/__pycache__/__init__.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/__pycache__/ansi.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/__pycache__/ansi.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e8ad4b0b731233618dbe2f6fbc5596303adda829 Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/__pycache__/ansi.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/__pycache__/ansitowin32.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/__pycache__/ansitowin32.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db1dfe6c0fe40acd3fbbc2c81e786fa28c7ed54d Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/__pycache__/ansitowin32.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/__pycache__/initialise.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/__pycache__/initialise.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..caa3205ec124bc5913193e94d05b8196b9003a86 Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/__pycache__/initialise.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/__pycache__/win32.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/__pycache__/win32.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..536651ec2c23840e196f3988daeba93ace6eebaa Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/__pycache__/win32.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/__pycache__/winterm.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/__pycache__/winterm.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..521e7b74213e44a8533ffda5819d0840edad5b9e Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/__pycache__/winterm.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/ansi.py b/phivenv/Lib/site-packages/colorama/ansi.py new file mode 100644 index 0000000000000000000000000000000000000000..11ec695ff79627463a0282d25079527562de9e42 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/ansi.py @@ -0,0 +1,102 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +''' +This module generates ANSI character codes to printing colors to terminals. +See: http://en.wikipedia.org/wiki/ANSI_escape_code +''' + +CSI = '\033[' +OSC = '\033]' +BEL = '\a' + + +def code_to_chars(code): + return CSI + str(code) + 'm' + +def set_title(title): + return OSC + '2;' + title + BEL + +def clear_screen(mode=2): + return CSI + str(mode) + 'J' + +def clear_line(mode=2): + return CSI + str(mode) + 'K' + + +class AnsiCodes(object): + def __init__(self): + # the subclasses declare class attributes which are numbers. + # Upon instantiation we define instance attributes, which are the same + # as the class attributes but wrapped with the ANSI escape sequence + for name in dir(self): + if not name.startswith('_'): + value = getattr(self, name) + setattr(self, name, code_to_chars(value)) + + +class AnsiCursor(object): + def UP(self, n=1): + return CSI + str(n) + 'A' + def DOWN(self, n=1): + return CSI + str(n) + 'B' + def FORWARD(self, n=1): + return CSI + str(n) + 'C' + def BACK(self, n=1): + return CSI + str(n) + 'D' + def POS(self, x=1, y=1): + return CSI + str(y) + ';' + str(x) + 'H' + + +class AnsiFore(AnsiCodes): + BLACK = 30 + RED = 31 + GREEN = 32 + YELLOW = 33 + BLUE = 34 + MAGENTA = 35 + CYAN = 36 + WHITE = 37 + RESET = 39 + + # These are fairly well supported, but not part of the standard. + LIGHTBLACK_EX = 90 + LIGHTRED_EX = 91 + LIGHTGREEN_EX = 92 + LIGHTYELLOW_EX = 93 + LIGHTBLUE_EX = 94 + LIGHTMAGENTA_EX = 95 + LIGHTCYAN_EX = 96 + LIGHTWHITE_EX = 97 + + +class AnsiBack(AnsiCodes): + BLACK = 40 + RED = 41 + GREEN = 42 + YELLOW = 43 + BLUE = 44 + MAGENTA = 45 + CYAN = 46 + WHITE = 47 + RESET = 49 + + # These are fairly well supported, but not part of the standard. + LIGHTBLACK_EX = 100 + LIGHTRED_EX = 101 + LIGHTGREEN_EX = 102 + LIGHTYELLOW_EX = 103 + LIGHTBLUE_EX = 104 + LIGHTMAGENTA_EX = 105 + LIGHTCYAN_EX = 106 + LIGHTWHITE_EX = 107 + + +class AnsiStyle(AnsiCodes): + BRIGHT = 1 + DIM = 2 + NORMAL = 22 + RESET_ALL = 0 + +Fore = AnsiFore() +Back = AnsiBack() +Style = AnsiStyle() +Cursor = AnsiCursor() diff --git a/phivenv/Lib/site-packages/colorama/ansitowin32.py b/phivenv/Lib/site-packages/colorama/ansitowin32.py new file mode 100644 index 0000000000000000000000000000000000000000..abf209e60c7c4a9b1ae57452e36b383969848c2e --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/ansitowin32.py @@ -0,0 +1,277 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import re +import sys +import os + +from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style, BEL +from .winterm import enable_vt_processing, WinTerm, WinColor, WinStyle +from .win32 import windll, winapi_test + + +winterm = None +if windll is not None: + winterm = WinTerm() + + +class StreamWrapper(object): + ''' + Wraps a stream (such as stdout), acting as a transparent proxy for all + attribute access apart from method 'write()', which is delegated to our + Converter instance. + ''' + def __init__(self, wrapped, converter): + # double-underscore everything to prevent clashes with names of + # attributes on the wrapped stream object. + self.__wrapped = wrapped + self.__convertor = converter + + def __getattr__(self, name): + return getattr(self.__wrapped, name) + + def __enter__(self, *args, **kwargs): + # special method lookup bypasses __getattr__/__getattribute__, see + # https://stackoverflow.com/questions/12632894/why-doesnt-getattr-work-with-exit + # thus, contextlib magic methods are not proxied via __getattr__ + return self.__wrapped.__enter__(*args, **kwargs) + + def __exit__(self, *args, **kwargs): + return self.__wrapped.__exit__(*args, **kwargs) + + def __setstate__(self, state): + self.__dict__ = state + + def __getstate__(self): + return self.__dict__ + + def write(self, text): + self.__convertor.write(text) + + def isatty(self): + stream = self.__wrapped + if 'PYCHARM_HOSTED' in os.environ: + if stream is not None and (stream is sys.__stdout__ or stream is sys.__stderr__): + return True + try: + stream_isatty = stream.isatty + except AttributeError: + return False + else: + return stream_isatty() + + @property + def closed(self): + stream = self.__wrapped + try: + return stream.closed + # AttributeError in the case that the stream doesn't support being closed + # ValueError for the case that the stream has already been detached when atexit runs + except (AttributeError, ValueError): + return True + + +class AnsiToWin32(object): + ''' + Implements a 'write()' method which, on Windows, will strip ANSI character + sequences from the text, and if outputting to a tty, will convert them into + win32 function calls. + ''' + ANSI_CSI_RE = re.compile('\001?\033\\[((?:\\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer + ANSI_OSC_RE = re.compile('\001?\033\\]([^\a]*)(\a)\002?') # Operating System Command + + def __init__(self, wrapped, convert=None, strip=None, autoreset=False): + # The wrapped stream (normally sys.stdout or sys.stderr) + self.wrapped = wrapped + + # should we reset colors to defaults after every .write() + self.autoreset = autoreset + + # create the proxy wrapping our output stream + self.stream = StreamWrapper(wrapped, self) + + on_windows = os.name == 'nt' + # We test if the WinAPI works, because even if we are on Windows + # we may be using a terminal that doesn't support the WinAPI + # (e.g. Cygwin Terminal). In this case it's up to the terminal + # to support the ANSI codes. + conversion_supported = on_windows and winapi_test() + try: + fd = wrapped.fileno() + except Exception: + fd = -1 + system_has_native_ansi = not on_windows or enable_vt_processing(fd) + have_tty = not self.stream.closed and self.stream.isatty() + need_conversion = conversion_supported and not system_has_native_ansi + + # should we strip ANSI sequences from our output? + if strip is None: + strip = need_conversion or not have_tty + self.strip = strip + + # should we should convert ANSI sequences into win32 calls? + if convert is None: + convert = need_conversion and have_tty + self.convert = convert + + # dict of ansi codes to win32 functions and parameters + self.win32_calls = self.get_win32_calls() + + # are we wrapping stderr? + self.on_stderr = self.wrapped is sys.stderr + + def should_wrap(self): + ''' + True if this class is actually needed. If false, then the output + stream will not be affected, nor will win32 calls be issued, so + wrapping stdout is not actually required. This will generally be + False on non-Windows platforms, unless optional functionality like + autoreset has been requested using kwargs to init() + ''' + return self.convert or self.strip or self.autoreset + + def get_win32_calls(self): + if self.convert and winterm: + return { + AnsiStyle.RESET_ALL: (winterm.reset_all, ), + AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT), + AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL), + AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL), + AnsiFore.BLACK: (winterm.fore, WinColor.BLACK), + AnsiFore.RED: (winterm.fore, WinColor.RED), + AnsiFore.GREEN: (winterm.fore, WinColor.GREEN), + AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW), + AnsiFore.BLUE: (winterm.fore, WinColor.BLUE), + AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA), + AnsiFore.CYAN: (winterm.fore, WinColor.CYAN), + AnsiFore.WHITE: (winterm.fore, WinColor.GREY), + AnsiFore.RESET: (winterm.fore, ), + AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True), + AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True), + AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True), + AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True), + AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True), + AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True), + AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True), + AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True), + AnsiBack.BLACK: (winterm.back, WinColor.BLACK), + AnsiBack.RED: (winterm.back, WinColor.RED), + AnsiBack.GREEN: (winterm.back, WinColor.GREEN), + AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW), + AnsiBack.BLUE: (winterm.back, WinColor.BLUE), + AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA), + AnsiBack.CYAN: (winterm.back, WinColor.CYAN), + AnsiBack.WHITE: (winterm.back, WinColor.GREY), + AnsiBack.RESET: (winterm.back, ), + AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True), + AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True), + AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True), + AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True), + AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True), + AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True), + AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True), + AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True), + } + return dict() + + def write(self, text): + if self.strip or self.convert: + self.write_and_convert(text) + else: + self.wrapped.write(text) + self.wrapped.flush() + if self.autoreset: + self.reset_all() + + + def reset_all(self): + if self.convert: + self.call_win32('m', (0,)) + elif not self.strip and not self.stream.closed: + self.wrapped.write(Style.RESET_ALL) + + + def write_and_convert(self, text): + ''' + Write the given text to our wrapped stream, stripping any ANSI + sequences from the text, and optionally converting them into win32 + calls. + ''' + cursor = 0 + text = self.convert_osc(text) + for match in self.ANSI_CSI_RE.finditer(text): + start, end = match.span() + self.write_plain_text(text, cursor, start) + self.convert_ansi(*match.groups()) + cursor = end + self.write_plain_text(text, cursor, len(text)) + + + def write_plain_text(self, text, start, end): + if start < end: + self.wrapped.write(text[start:end]) + self.wrapped.flush() + + + def convert_ansi(self, paramstring, command): + if self.convert: + params = self.extract_params(command, paramstring) + self.call_win32(command, params) + + + def extract_params(self, command, paramstring): + if command in 'Hf': + params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';')) + while len(params) < 2: + # defaults: + params = params + (1,) + else: + params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0) + if len(params) == 0: + # defaults: + if command in 'JKm': + params = (0,) + elif command in 'ABCD': + params = (1,) + + return params + + + def call_win32(self, command, params): + if command == 'm': + for param in params: + if param in self.win32_calls: + func_args = self.win32_calls[param] + func = func_args[0] + args = func_args[1:] + kwargs = dict(on_stderr=self.on_stderr) + func(*args, **kwargs) + elif command in 'J': + winterm.erase_screen(params[0], on_stderr=self.on_stderr) + elif command in 'K': + winterm.erase_line(params[0], on_stderr=self.on_stderr) + elif command in 'Hf': # cursor position - absolute + winterm.set_cursor_position(params, on_stderr=self.on_stderr) + elif command in 'ABCD': # cursor position - relative + n = params[0] + # A - up, B - down, C - forward, D - back + x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command] + winterm.cursor_adjust(x, y, on_stderr=self.on_stderr) + + + def convert_osc(self, text): + for match in self.ANSI_OSC_RE.finditer(text): + start, end = match.span() + text = text[:start] + text[end:] + paramstring, command = match.groups() + if command == BEL: + if paramstring.count(";") == 1: + params = paramstring.split(";") + # 0 - change title and icon (we will only change title) + # 1 - change icon (we don't support this) + # 2 - change title + if params[0] in '02': + winterm.set_title(params[1]) + return text + + + def flush(self): + self.wrapped.flush() diff --git a/phivenv/Lib/site-packages/colorama/initialise.py b/phivenv/Lib/site-packages/colorama/initialise.py new file mode 100644 index 0000000000000000000000000000000000000000..d5fd4b71fed1bb4871717f978f0c470280f099c1 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/initialise.py @@ -0,0 +1,121 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import atexit +import contextlib +import sys + +from .ansitowin32 import AnsiToWin32 + + +def _wipe_internal_state_for_tests(): + global orig_stdout, orig_stderr + orig_stdout = None + orig_stderr = None + + global wrapped_stdout, wrapped_stderr + wrapped_stdout = None + wrapped_stderr = None + + global atexit_done + atexit_done = False + + global fixed_windows_console + fixed_windows_console = False + + try: + # no-op if it wasn't registered + atexit.unregister(reset_all) + except AttributeError: + # python 2: no atexit.unregister. Oh well, we did our best. + pass + + +def reset_all(): + if AnsiToWin32 is not None: # Issue #74: objects might become None at exit + AnsiToWin32(orig_stdout).reset_all() + + +def init(autoreset=False, convert=None, strip=None, wrap=True): + + if not wrap and any([autoreset, convert, strip]): + raise ValueError('wrap=False conflicts with any other arg=True') + + global wrapped_stdout, wrapped_stderr + global orig_stdout, orig_stderr + + orig_stdout = sys.stdout + orig_stderr = sys.stderr + + if sys.stdout is None: + wrapped_stdout = None + else: + sys.stdout = wrapped_stdout = \ + wrap_stream(orig_stdout, convert, strip, autoreset, wrap) + if sys.stderr is None: + wrapped_stderr = None + else: + sys.stderr = wrapped_stderr = \ + wrap_stream(orig_stderr, convert, strip, autoreset, wrap) + + global atexit_done + if not atexit_done: + atexit.register(reset_all) + atexit_done = True + + +def deinit(): + if orig_stdout is not None: + sys.stdout = orig_stdout + if orig_stderr is not None: + sys.stderr = orig_stderr + + +def just_fix_windows_console(): + global fixed_windows_console + + if sys.platform != "win32": + return + if fixed_windows_console: + return + if wrapped_stdout is not None or wrapped_stderr is not None: + # Someone already ran init() and it did stuff, so we won't second-guess them + return + + # On newer versions of Windows, AnsiToWin32.__init__ will implicitly enable the + # native ANSI support in the console as a side-effect. We only need to actually + # replace sys.stdout/stderr if we're in the old-style conversion mode. + new_stdout = AnsiToWin32(sys.stdout, convert=None, strip=None, autoreset=False) + if new_stdout.convert: + sys.stdout = new_stdout + new_stderr = AnsiToWin32(sys.stderr, convert=None, strip=None, autoreset=False) + if new_stderr.convert: + sys.stderr = new_stderr + + fixed_windows_console = True + +@contextlib.contextmanager +def colorama_text(*args, **kwargs): + init(*args, **kwargs) + try: + yield + finally: + deinit() + + +def reinit(): + if wrapped_stdout is not None: + sys.stdout = wrapped_stdout + if wrapped_stderr is not None: + sys.stderr = wrapped_stderr + + +def wrap_stream(stream, convert, strip, autoreset, wrap): + if wrap: + wrapper = AnsiToWin32(stream, + convert=convert, strip=strip, autoreset=autoreset) + if wrapper.should_wrap(): + stream = wrapper.stream + return stream + + +# Use this for initial setup as well, to reduce code duplication +_wipe_internal_state_for_tests() diff --git a/phivenv/Lib/site-packages/colorama/tests/__init__.py b/phivenv/Lib/site-packages/colorama/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8c5661e93a205bf4fb22404d4fc50f902cc31369 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/tests/__init__.py @@ -0,0 +1 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. diff --git a/phivenv/Lib/site-packages/colorama/tests/__pycache__/__init__.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/tests/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f8c257ad625d997106af43048f7cfed80d49c51d Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/tests/__pycache__/__init__.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/tests/__pycache__/ansi_test.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/tests/__pycache__/ansi_test.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27b6bfbadfb3f54a6c18b3555d1b5fbd4d6be287 Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/tests/__pycache__/ansi_test.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/tests/__pycache__/ansitowin32_test.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/tests/__pycache__/ansitowin32_test.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..591312258821a534da83e556f292793cde535e90 Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/tests/__pycache__/ansitowin32_test.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/tests/__pycache__/initialise_test.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/tests/__pycache__/initialise_test.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..286413bdbe8e1f30642228df8908232841405065 Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/tests/__pycache__/initialise_test.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/tests/__pycache__/isatty_test.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/tests/__pycache__/isatty_test.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a093a75bca678e4b21fa8c0d7ff110769e5cad8c Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/tests/__pycache__/isatty_test.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/tests/__pycache__/utils.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/tests/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..67c001cca9aa50eafee78ad33818948b5c51a486 Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/tests/__pycache__/utils.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/tests/__pycache__/winterm_test.cpython-39.pyc b/phivenv/Lib/site-packages/colorama/tests/__pycache__/winterm_test.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..73b12967194ef5bb8333ed5ec26b125841e03beb Binary files /dev/null and b/phivenv/Lib/site-packages/colorama/tests/__pycache__/winterm_test.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/colorama/tests/ansi_test.py b/phivenv/Lib/site-packages/colorama/tests/ansi_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0a20c80f882066e0e1323b0c7f61e22913c32e35 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/tests/ansi_test.py @@ -0,0 +1,76 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys +from unittest import TestCase, main + +from ..ansi import Back, Fore, Style +from ..ansitowin32 import AnsiToWin32 + +stdout_orig = sys.stdout +stderr_orig = sys.stderr + + +class AnsiTest(TestCase): + + def setUp(self): + # sanity check: stdout should be a file or StringIO object. + # It will only be AnsiToWin32 if init() has previously wrapped it + self.assertNotEqual(type(sys.stdout), AnsiToWin32) + self.assertNotEqual(type(sys.stderr), AnsiToWin32) + + def tearDown(self): + sys.stdout = stdout_orig + sys.stderr = stderr_orig + + + def testForeAttributes(self): + self.assertEqual(Fore.BLACK, '\033[30m') + self.assertEqual(Fore.RED, '\033[31m') + self.assertEqual(Fore.GREEN, '\033[32m') + self.assertEqual(Fore.YELLOW, '\033[33m') + self.assertEqual(Fore.BLUE, '\033[34m') + self.assertEqual(Fore.MAGENTA, '\033[35m') + self.assertEqual(Fore.CYAN, '\033[36m') + self.assertEqual(Fore.WHITE, '\033[37m') + self.assertEqual(Fore.RESET, '\033[39m') + + # Check the light, extended versions. + self.assertEqual(Fore.LIGHTBLACK_EX, '\033[90m') + self.assertEqual(Fore.LIGHTRED_EX, '\033[91m') + self.assertEqual(Fore.LIGHTGREEN_EX, '\033[92m') + self.assertEqual(Fore.LIGHTYELLOW_EX, '\033[93m') + self.assertEqual(Fore.LIGHTBLUE_EX, '\033[94m') + self.assertEqual(Fore.LIGHTMAGENTA_EX, '\033[95m') + self.assertEqual(Fore.LIGHTCYAN_EX, '\033[96m') + self.assertEqual(Fore.LIGHTWHITE_EX, '\033[97m') + + + def testBackAttributes(self): + self.assertEqual(Back.BLACK, '\033[40m') + self.assertEqual(Back.RED, '\033[41m') + self.assertEqual(Back.GREEN, '\033[42m') + self.assertEqual(Back.YELLOW, '\033[43m') + self.assertEqual(Back.BLUE, '\033[44m') + self.assertEqual(Back.MAGENTA, '\033[45m') + self.assertEqual(Back.CYAN, '\033[46m') + self.assertEqual(Back.WHITE, '\033[47m') + self.assertEqual(Back.RESET, '\033[49m') + + # Check the light, extended versions. + self.assertEqual(Back.LIGHTBLACK_EX, '\033[100m') + self.assertEqual(Back.LIGHTRED_EX, '\033[101m') + self.assertEqual(Back.LIGHTGREEN_EX, '\033[102m') + self.assertEqual(Back.LIGHTYELLOW_EX, '\033[103m') + self.assertEqual(Back.LIGHTBLUE_EX, '\033[104m') + self.assertEqual(Back.LIGHTMAGENTA_EX, '\033[105m') + self.assertEqual(Back.LIGHTCYAN_EX, '\033[106m') + self.assertEqual(Back.LIGHTWHITE_EX, '\033[107m') + + + def testStyleAttributes(self): + self.assertEqual(Style.DIM, '\033[2m') + self.assertEqual(Style.NORMAL, '\033[22m') + self.assertEqual(Style.BRIGHT, '\033[1m') + + +if __name__ == '__main__': + main() diff --git a/phivenv/Lib/site-packages/colorama/tests/ansitowin32_test.py b/phivenv/Lib/site-packages/colorama/tests/ansitowin32_test.py new file mode 100644 index 0000000000000000000000000000000000000000..91ca551f97b4576c680711e826a1855fb944c872 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/tests/ansitowin32_test.py @@ -0,0 +1,294 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from io import StringIO, TextIOWrapper +from unittest import TestCase, main +try: + from contextlib import ExitStack +except ImportError: + # python 2 + from contextlib2 import ExitStack + +try: + from unittest.mock import MagicMock, Mock, patch +except ImportError: + from mock import MagicMock, Mock, patch + +from ..ansitowin32 import AnsiToWin32, StreamWrapper +from ..win32 import ENABLE_VIRTUAL_TERMINAL_PROCESSING +from .utils import osname + + +class StreamWrapperTest(TestCase): + + def testIsAProxy(self): + mockStream = Mock() + wrapper = StreamWrapper(mockStream, None) + self.assertTrue( wrapper.random_attr is mockStream.random_attr ) + + def testDelegatesWrite(self): + mockStream = Mock() + mockConverter = Mock() + wrapper = StreamWrapper(mockStream, mockConverter) + wrapper.write('hello') + self.assertTrue(mockConverter.write.call_args, (('hello',), {})) + + def testDelegatesContext(self): + mockConverter = Mock() + s = StringIO() + with StreamWrapper(s, mockConverter) as fp: + fp.write(u'hello') + self.assertTrue(s.closed) + + def testProxyNoContextManager(self): + mockStream = MagicMock() + mockStream.__enter__.side_effect = AttributeError() + mockConverter = Mock() + with self.assertRaises(AttributeError) as excinfo: + with StreamWrapper(mockStream, mockConverter) as wrapper: + wrapper.write('hello') + + def test_closed_shouldnt_raise_on_closed_stream(self): + stream = StringIO() + stream.close() + wrapper = StreamWrapper(stream, None) + self.assertEqual(wrapper.closed, True) + + def test_closed_shouldnt_raise_on_detached_stream(self): + stream = TextIOWrapper(StringIO()) + stream.detach() + wrapper = StreamWrapper(stream, None) + self.assertEqual(wrapper.closed, True) + +class AnsiToWin32Test(TestCase): + + def testInit(self): + mockStdout = Mock() + auto = Mock() + stream = AnsiToWin32(mockStdout, autoreset=auto) + self.assertEqual(stream.wrapped, mockStdout) + self.assertEqual(stream.autoreset, auto) + + @patch('colorama.ansitowin32.winterm', None) + @patch('colorama.ansitowin32.winapi_test', lambda *_: True) + def testStripIsTrueOnWindows(self): + with osname('nt'): + mockStdout = Mock() + stream = AnsiToWin32(mockStdout) + self.assertTrue(stream.strip) + + def testStripIsFalseOffWindows(self): + with osname('posix'): + mockStdout = Mock(closed=False) + stream = AnsiToWin32(mockStdout) + self.assertFalse(stream.strip) + + def testWriteStripsAnsi(self): + mockStdout = Mock() + stream = AnsiToWin32(mockStdout) + stream.wrapped = Mock() + stream.write_and_convert = Mock() + stream.strip = True + + stream.write('abc') + + self.assertFalse(stream.wrapped.write.called) + self.assertEqual(stream.write_and_convert.call_args, (('abc',), {})) + + def testWriteDoesNotStripAnsi(self): + mockStdout = Mock() + stream = AnsiToWin32(mockStdout) + stream.wrapped = Mock() + stream.write_and_convert = Mock() + stream.strip = False + stream.convert = False + + stream.write('abc') + + self.assertFalse(stream.write_and_convert.called) + self.assertEqual(stream.wrapped.write.call_args, (('abc',), {})) + + def assert_autoresets(self, convert, autoreset=True): + stream = AnsiToWin32(Mock()) + stream.convert = convert + stream.reset_all = Mock() + stream.autoreset = autoreset + stream.winterm = Mock() + + stream.write('abc') + + self.assertEqual(stream.reset_all.called, autoreset) + + def testWriteAutoresets(self): + self.assert_autoresets(convert=True) + self.assert_autoresets(convert=False) + self.assert_autoresets(convert=True, autoreset=False) + self.assert_autoresets(convert=False, autoreset=False) + + def testWriteAndConvertWritesPlainText(self): + stream = AnsiToWin32(Mock()) + stream.write_and_convert( 'abc' ) + self.assertEqual( stream.wrapped.write.call_args, (('abc',), {}) ) + + def testWriteAndConvertStripsAllValidAnsi(self): + stream = AnsiToWin32(Mock()) + stream.call_win32 = Mock() + data = [ + 'abc\033[mdef', + 'abc\033[0mdef', + 'abc\033[2mdef', + 'abc\033[02mdef', + 'abc\033[002mdef', + 'abc\033[40mdef', + 'abc\033[040mdef', + 'abc\033[0;1mdef', + 'abc\033[40;50mdef', + 'abc\033[50;30;40mdef', + 'abc\033[Adef', + 'abc\033[0Gdef', + 'abc\033[1;20;128Hdef', + ] + for datum in data: + stream.wrapped.write.reset_mock() + stream.write_and_convert( datum ) + self.assertEqual( + [args[0] for args in stream.wrapped.write.call_args_list], + [ ('abc',), ('def',) ] + ) + + def testWriteAndConvertSkipsEmptySnippets(self): + stream = AnsiToWin32(Mock()) + stream.call_win32 = Mock() + stream.write_and_convert( '\033[40m\033[41m' ) + self.assertFalse( stream.wrapped.write.called ) + + def testWriteAndConvertCallsWin32WithParamsAndCommand(self): + stream = AnsiToWin32(Mock()) + stream.convert = True + stream.call_win32 = Mock() + stream.extract_params = Mock(return_value='params') + data = { + 'abc\033[adef': ('a', 'params'), + 'abc\033[;;bdef': ('b', 'params'), + 'abc\033[0cdef': ('c', 'params'), + 'abc\033[;;0;;Gdef': ('G', 'params'), + 'abc\033[1;20;128Hdef': ('H', 'params'), + } + for datum, expected in data.items(): + stream.call_win32.reset_mock() + stream.write_and_convert( datum ) + self.assertEqual( stream.call_win32.call_args[0], expected ) + + def test_reset_all_shouldnt_raise_on_closed_orig_stdout(self): + stream = StringIO() + converter = AnsiToWin32(stream) + stream.close() + + converter.reset_all() + + def test_wrap_shouldnt_raise_on_closed_orig_stdout(self): + stream = StringIO() + stream.close() + with \ + patch("colorama.ansitowin32.os.name", "nt"), \ + patch("colorama.ansitowin32.winapi_test", lambda: True): + converter = AnsiToWin32(stream) + self.assertTrue(converter.strip) + self.assertFalse(converter.convert) + + def test_wrap_shouldnt_raise_on_missing_closed_attr(self): + with \ + patch("colorama.ansitowin32.os.name", "nt"), \ + patch("colorama.ansitowin32.winapi_test", lambda: True): + converter = AnsiToWin32(object()) + self.assertTrue(converter.strip) + self.assertFalse(converter.convert) + + def testExtractParams(self): + stream = AnsiToWin32(Mock()) + data = { + '': (0,), + ';;': (0,), + '2': (2,), + ';;002;;': (2,), + '0;1': (0, 1), + ';;003;;456;;': (3, 456), + '11;22;33;44;55': (11, 22, 33, 44, 55), + } + for datum, expected in data.items(): + self.assertEqual(stream.extract_params('m', datum), expected) + + def testCallWin32UsesLookup(self): + listener = Mock() + stream = AnsiToWin32(listener) + stream.win32_calls = { + 1: (lambda *_, **__: listener(11),), + 2: (lambda *_, **__: listener(22),), + 3: (lambda *_, **__: listener(33),), + } + stream.call_win32('m', (3, 1, 99, 2)) + self.assertEqual( + [a[0][0] for a in listener.call_args_list], + [33, 11, 22] ) + + def test_osc_codes(self): + mockStdout = Mock() + stream = AnsiToWin32(mockStdout, convert=True) + with patch('colorama.ansitowin32.winterm') as winterm: + data = [ + '\033]0\x07', # missing arguments + '\033]0;foo\x08', # wrong OSC command + '\033]0;colorama_test_title\x07', # should work + '\033]1;colorama_test_title\x07', # wrong set command + '\033]2;colorama_test_title\x07', # should work + '\033]' + ';' * 64 + '\x08', # see issue #247 + ] + for code in data: + stream.write(code) + self.assertEqual(winterm.set_title.call_count, 2) + + def test_native_windows_ansi(self): + with ExitStack() as stack: + def p(a, b): + stack.enter_context(patch(a, b, create=True)) + # Pretend to be on Windows + p("colorama.ansitowin32.os.name", "nt") + p("colorama.ansitowin32.winapi_test", lambda: True) + p("colorama.win32.winapi_test", lambda: True) + p("colorama.winterm.win32.windll", "non-None") + p("colorama.winterm.get_osfhandle", lambda _: 1234) + + # Pretend that our mock stream has native ANSI support + p( + "colorama.winterm.win32.GetConsoleMode", + lambda _: ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + SetConsoleMode = Mock() + p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode) + + stdout = Mock() + stdout.closed = False + stdout.isatty.return_value = True + stdout.fileno.return_value = 1 + + # Our fake console says it has native vt support, so AnsiToWin32 should + # enable that support and do nothing else. + stream = AnsiToWin32(stdout) + SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING) + self.assertFalse(stream.strip) + self.assertFalse(stream.convert) + self.assertFalse(stream.should_wrap()) + + # Now let's pretend we're on an old Windows console, that doesn't have + # native ANSI support. + p("colorama.winterm.win32.GetConsoleMode", lambda _: 0) + SetConsoleMode = Mock() + p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode) + + stream = AnsiToWin32(stdout) + SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING) + self.assertTrue(stream.strip) + self.assertTrue(stream.convert) + self.assertTrue(stream.should_wrap()) + + +if __name__ == '__main__': + main() diff --git a/phivenv/Lib/site-packages/colorama/tests/initialise_test.py b/phivenv/Lib/site-packages/colorama/tests/initialise_test.py new file mode 100644 index 0000000000000000000000000000000000000000..89f9b07511c8fee74686d9cc434bf66345a46d6d --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/tests/initialise_test.py @@ -0,0 +1,189 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys +from unittest import TestCase, main, skipUnless + +try: + from unittest.mock import patch, Mock +except ImportError: + from mock import patch, Mock + +from ..ansitowin32 import StreamWrapper +from ..initialise import init, just_fix_windows_console, _wipe_internal_state_for_tests +from .utils import osname, replace_by + +orig_stdout = sys.stdout +orig_stderr = sys.stderr + + +class InitTest(TestCase): + + @skipUnless(sys.stdout.isatty(), "sys.stdout is not a tty") + def setUp(self): + # sanity check + self.assertNotWrapped() + + def tearDown(self): + _wipe_internal_state_for_tests() + sys.stdout = orig_stdout + sys.stderr = orig_stderr + + def assertWrapped(self): + self.assertIsNot(sys.stdout, orig_stdout, 'stdout should be wrapped') + self.assertIsNot(sys.stderr, orig_stderr, 'stderr should be wrapped') + self.assertTrue(isinstance(sys.stdout, StreamWrapper), + 'bad stdout wrapper') + self.assertTrue(isinstance(sys.stderr, StreamWrapper), + 'bad stderr wrapper') + + def assertNotWrapped(self): + self.assertIs(sys.stdout, orig_stdout, 'stdout should not be wrapped') + self.assertIs(sys.stderr, orig_stderr, 'stderr should not be wrapped') + + @patch('colorama.initialise.reset_all') + @patch('colorama.ansitowin32.winapi_test', lambda *_: True) + @patch('colorama.ansitowin32.enable_vt_processing', lambda *_: False) + def testInitWrapsOnWindows(self, _): + with osname("nt"): + init() + self.assertWrapped() + + @patch('colorama.initialise.reset_all') + @patch('colorama.ansitowin32.winapi_test', lambda *_: False) + def testInitDoesntWrapOnEmulatedWindows(self, _): + with osname("nt"): + init() + self.assertNotWrapped() + + def testInitDoesntWrapOnNonWindows(self): + with osname("posix"): + init() + self.assertNotWrapped() + + def testInitDoesntWrapIfNone(self): + with replace_by(None): + init() + # We can't use assertNotWrapped here because replace_by(None) + # changes stdout/stderr already. + self.assertIsNone(sys.stdout) + self.assertIsNone(sys.stderr) + + def testInitAutoresetOnWrapsOnAllPlatforms(self): + with osname("posix"): + init(autoreset=True) + self.assertWrapped() + + def testInitWrapOffDoesntWrapOnWindows(self): + with osname("nt"): + init(wrap=False) + self.assertNotWrapped() + + def testInitWrapOffIncompatibleWithAutoresetOn(self): + self.assertRaises(ValueError, lambda: init(autoreset=True, wrap=False)) + + @patch('colorama.win32.SetConsoleTextAttribute') + @patch('colorama.initialise.AnsiToWin32') + def testAutoResetPassedOn(self, mockATW32, _): + with osname("nt"): + init(autoreset=True) + self.assertEqual(len(mockATW32.call_args_list), 2) + self.assertEqual(mockATW32.call_args_list[1][1]['autoreset'], True) + self.assertEqual(mockATW32.call_args_list[0][1]['autoreset'], True) + + @patch('colorama.initialise.AnsiToWin32') + def testAutoResetChangeable(self, mockATW32): + with osname("nt"): + init() + + init(autoreset=True) + self.assertEqual(len(mockATW32.call_args_list), 4) + self.assertEqual(mockATW32.call_args_list[2][1]['autoreset'], True) + self.assertEqual(mockATW32.call_args_list[3][1]['autoreset'], True) + + init() + self.assertEqual(len(mockATW32.call_args_list), 6) + self.assertEqual( + mockATW32.call_args_list[4][1]['autoreset'], False) + self.assertEqual( + mockATW32.call_args_list[5][1]['autoreset'], False) + + + @patch('colorama.initialise.atexit.register') + def testAtexitRegisteredOnlyOnce(self, mockRegister): + init() + self.assertTrue(mockRegister.called) + mockRegister.reset_mock() + init() + self.assertFalse(mockRegister.called) + + +class JustFixWindowsConsoleTest(TestCase): + def _reset(self): + _wipe_internal_state_for_tests() + sys.stdout = orig_stdout + sys.stderr = orig_stderr + + def tearDown(self): + self._reset() + + @patch("colorama.ansitowin32.winapi_test", lambda: True) + def testJustFixWindowsConsole(self): + if sys.platform != "win32": + # just_fix_windows_console should be a no-op + just_fix_windows_console() + self.assertIs(sys.stdout, orig_stdout) + self.assertIs(sys.stderr, orig_stderr) + else: + def fake_std(): + # Emulate stdout=not a tty, stderr=tty + # to check that we handle both cases correctly + stdout = Mock() + stdout.closed = False + stdout.isatty.return_value = False + stdout.fileno.return_value = 1 + sys.stdout = stdout + + stderr = Mock() + stderr.closed = False + stderr.isatty.return_value = True + stderr.fileno.return_value = 2 + sys.stderr = stderr + + for native_ansi in [False, True]: + with patch( + 'colorama.ansitowin32.enable_vt_processing', + lambda *_: native_ansi + ): + self._reset() + fake_std() + + # Regular single-call test + prev_stdout = sys.stdout + prev_stderr = sys.stderr + just_fix_windows_console() + self.assertIs(sys.stdout, prev_stdout) + if native_ansi: + self.assertIs(sys.stderr, prev_stderr) + else: + self.assertIsNot(sys.stderr, prev_stderr) + + # second call without resetting is always a no-op + prev_stdout = sys.stdout + prev_stderr = sys.stderr + just_fix_windows_console() + self.assertIs(sys.stdout, prev_stdout) + self.assertIs(sys.stderr, prev_stderr) + + self._reset() + fake_std() + + # If init() runs first, just_fix_windows_console should be a no-op + init() + prev_stdout = sys.stdout + prev_stderr = sys.stderr + just_fix_windows_console() + self.assertIs(prev_stdout, sys.stdout) + self.assertIs(prev_stderr, sys.stderr) + + +if __name__ == '__main__': + main() diff --git a/phivenv/Lib/site-packages/colorama/tests/isatty_test.py b/phivenv/Lib/site-packages/colorama/tests/isatty_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0f84e4befe550d4386d24264648abf1323e682ff --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/tests/isatty_test.py @@ -0,0 +1,57 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys +from unittest import TestCase, main + +from ..ansitowin32 import StreamWrapper, AnsiToWin32 +from .utils import pycharm, replace_by, replace_original_by, StreamTTY, StreamNonTTY + + +def is_a_tty(stream): + return StreamWrapper(stream, None).isatty() + +class IsattyTest(TestCase): + + def test_TTY(self): + tty = StreamTTY() + self.assertTrue(is_a_tty(tty)) + with pycharm(): + self.assertTrue(is_a_tty(tty)) + + def test_nonTTY(self): + non_tty = StreamNonTTY() + self.assertFalse(is_a_tty(non_tty)) + with pycharm(): + self.assertFalse(is_a_tty(non_tty)) + + def test_withPycharm(self): + with pycharm(): + self.assertTrue(is_a_tty(sys.stderr)) + self.assertTrue(is_a_tty(sys.stdout)) + + def test_withPycharmTTYOverride(self): + tty = StreamTTY() + with pycharm(), replace_by(tty): + self.assertTrue(is_a_tty(tty)) + + def test_withPycharmNonTTYOverride(self): + non_tty = StreamNonTTY() + with pycharm(), replace_by(non_tty): + self.assertFalse(is_a_tty(non_tty)) + + def test_withPycharmNoneOverride(self): + with pycharm(): + with replace_by(None), replace_original_by(None): + self.assertFalse(is_a_tty(None)) + self.assertFalse(is_a_tty(StreamNonTTY())) + self.assertTrue(is_a_tty(StreamTTY())) + + def test_withPycharmStreamWrapped(self): + with pycharm(): + self.assertTrue(AnsiToWin32(StreamTTY()).stream.isatty()) + self.assertFalse(AnsiToWin32(StreamNonTTY()).stream.isatty()) + self.assertTrue(AnsiToWin32(sys.stdout).stream.isatty()) + self.assertTrue(AnsiToWin32(sys.stderr).stream.isatty()) + + +if __name__ == '__main__': + main() diff --git a/phivenv/Lib/site-packages/colorama/tests/utils.py b/phivenv/Lib/site-packages/colorama/tests/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..472fafb4403efb9673d5cc724dafd9cf764aac5b --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/tests/utils.py @@ -0,0 +1,49 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from contextlib import contextmanager +from io import StringIO +import sys +import os + + +class StreamTTY(StringIO): + def isatty(self): + return True + +class StreamNonTTY(StringIO): + def isatty(self): + return False + +@contextmanager +def osname(name): + orig = os.name + os.name = name + yield + os.name = orig + +@contextmanager +def replace_by(stream): + orig_stdout = sys.stdout + orig_stderr = sys.stderr + sys.stdout = stream + sys.stderr = stream + yield + sys.stdout = orig_stdout + sys.stderr = orig_stderr + +@contextmanager +def replace_original_by(stream): + orig_stdout = sys.__stdout__ + orig_stderr = sys.__stderr__ + sys.__stdout__ = stream + sys.__stderr__ = stream + yield + sys.__stdout__ = orig_stdout + sys.__stderr__ = orig_stderr + +@contextmanager +def pycharm(): + os.environ["PYCHARM_HOSTED"] = "1" + non_tty = StreamNonTTY() + with replace_by(non_tty), replace_original_by(non_tty): + yield + del os.environ["PYCHARM_HOSTED"] diff --git a/phivenv/Lib/site-packages/colorama/tests/winterm_test.py b/phivenv/Lib/site-packages/colorama/tests/winterm_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d0955f9e608377940f0d548576964f2fcf3caf48 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/tests/winterm_test.py @@ -0,0 +1,131 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys +from unittest import TestCase, main, skipUnless + +try: + from unittest.mock import Mock, patch +except ImportError: + from mock import Mock, patch + +from ..winterm import WinColor, WinStyle, WinTerm + + +class WinTermTest(TestCase): + + @patch('colorama.winterm.win32') + def testInit(self, mockWin32): + mockAttr = Mock() + mockAttr.wAttributes = 7 + 6 * 16 + 8 + mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr + term = WinTerm() + self.assertEqual(term._fore, 7) + self.assertEqual(term._back, 6) + self.assertEqual(term._style, 8) + + @skipUnless(sys.platform.startswith("win"), "requires Windows") + def testGetAttrs(self): + term = WinTerm() + + term._fore = 0 + term._back = 0 + term._style = 0 + self.assertEqual(term.get_attrs(), 0) + + term._fore = WinColor.YELLOW + self.assertEqual(term.get_attrs(), WinColor.YELLOW) + + term._back = WinColor.MAGENTA + self.assertEqual( + term.get_attrs(), + WinColor.YELLOW + WinColor.MAGENTA * 16) + + term._style = WinStyle.BRIGHT + self.assertEqual( + term.get_attrs(), + WinColor.YELLOW + WinColor.MAGENTA * 16 + WinStyle.BRIGHT) + + @patch('colorama.winterm.win32') + def testResetAll(self, mockWin32): + mockAttr = Mock() + mockAttr.wAttributes = 1 + 2 * 16 + 8 + mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr + term = WinTerm() + + term.set_console = Mock() + term._fore = -1 + term._back = -1 + term._style = -1 + + term.reset_all() + + self.assertEqual(term._fore, 1) + self.assertEqual(term._back, 2) + self.assertEqual(term._style, 8) + self.assertEqual(term.set_console.called, True) + + @skipUnless(sys.platform.startswith("win"), "requires Windows") + def testFore(self): + term = WinTerm() + term.set_console = Mock() + term._fore = 0 + + term.fore(5) + + self.assertEqual(term._fore, 5) + self.assertEqual(term.set_console.called, True) + + @skipUnless(sys.platform.startswith("win"), "requires Windows") + def testBack(self): + term = WinTerm() + term.set_console = Mock() + term._back = 0 + + term.back(5) + + self.assertEqual(term._back, 5) + self.assertEqual(term.set_console.called, True) + + @skipUnless(sys.platform.startswith("win"), "requires Windows") + def testStyle(self): + term = WinTerm() + term.set_console = Mock() + term._style = 0 + + term.style(22) + + self.assertEqual(term._style, 22) + self.assertEqual(term.set_console.called, True) + + @patch('colorama.winterm.win32') + def testSetConsole(self, mockWin32): + mockAttr = Mock() + mockAttr.wAttributes = 0 + mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr + term = WinTerm() + term.windll = Mock() + + term.set_console() + + self.assertEqual( + mockWin32.SetConsoleTextAttribute.call_args, + ((mockWin32.STDOUT, term.get_attrs()), {}) + ) + + @patch('colorama.winterm.win32') + def testSetConsoleOnStderr(self, mockWin32): + mockAttr = Mock() + mockAttr.wAttributes = 0 + mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr + term = WinTerm() + term.windll = Mock() + + term.set_console(on_stderr=True) + + self.assertEqual( + mockWin32.SetConsoleTextAttribute.call_args, + ((mockWin32.STDERR, term.get_attrs()), {}) + ) + + +if __name__ == '__main__': + main() diff --git a/phivenv/Lib/site-packages/colorama/win32.py b/phivenv/Lib/site-packages/colorama/win32.py new file mode 100644 index 0000000000000000000000000000000000000000..841b0e270a381cdfaca544a9be976d7276d83b1e --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/win32.py @@ -0,0 +1,180 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. + +# from winbase.h +STDOUT = -11 +STDERR = -12 + +ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004 + +try: + import ctypes + from ctypes import LibraryLoader + windll = LibraryLoader(ctypes.WinDLL) + from ctypes import wintypes +except (AttributeError, ImportError): + windll = None + SetConsoleTextAttribute = lambda *_: None + winapi_test = lambda *_: None +else: + from ctypes import byref, Structure, c_char, POINTER + + COORD = wintypes._COORD + + class CONSOLE_SCREEN_BUFFER_INFO(Structure): + """struct in wincon.h.""" + _fields_ = [ + ("dwSize", COORD), + ("dwCursorPosition", COORD), + ("wAttributes", wintypes.WORD), + ("srWindow", wintypes.SMALL_RECT), + ("dwMaximumWindowSize", COORD), + ] + def __str__(self): + return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % ( + self.dwSize.Y, self.dwSize.X + , self.dwCursorPosition.Y, self.dwCursorPosition.X + , self.wAttributes + , self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right + , self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X + ) + + _GetStdHandle = windll.kernel32.GetStdHandle + _GetStdHandle.argtypes = [ + wintypes.DWORD, + ] + _GetStdHandle.restype = wintypes.HANDLE + + _GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo + _GetConsoleScreenBufferInfo.argtypes = [ + wintypes.HANDLE, + POINTER(CONSOLE_SCREEN_BUFFER_INFO), + ] + _GetConsoleScreenBufferInfo.restype = wintypes.BOOL + + _SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute + _SetConsoleTextAttribute.argtypes = [ + wintypes.HANDLE, + wintypes.WORD, + ] + _SetConsoleTextAttribute.restype = wintypes.BOOL + + _SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition + _SetConsoleCursorPosition.argtypes = [ + wintypes.HANDLE, + COORD, + ] + _SetConsoleCursorPosition.restype = wintypes.BOOL + + _FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA + _FillConsoleOutputCharacterA.argtypes = [ + wintypes.HANDLE, + c_char, + wintypes.DWORD, + COORD, + POINTER(wintypes.DWORD), + ] + _FillConsoleOutputCharacterA.restype = wintypes.BOOL + + _FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute + _FillConsoleOutputAttribute.argtypes = [ + wintypes.HANDLE, + wintypes.WORD, + wintypes.DWORD, + COORD, + POINTER(wintypes.DWORD), + ] + _FillConsoleOutputAttribute.restype = wintypes.BOOL + + _SetConsoleTitleW = windll.kernel32.SetConsoleTitleW + _SetConsoleTitleW.argtypes = [ + wintypes.LPCWSTR + ] + _SetConsoleTitleW.restype = wintypes.BOOL + + _GetConsoleMode = windll.kernel32.GetConsoleMode + _GetConsoleMode.argtypes = [ + wintypes.HANDLE, + POINTER(wintypes.DWORD) + ] + _GetConsoleMode.restype = wintypes.BOOL + + _SetConsoleMode = windll.kernel32.SetConsoleMode + _SetConsoleMode.argtypes = [ + wintypes.HANDLE, + wintypes.DWORD + ] + _SetConsoleMode.restype = wintypes.BOOL + + def _winapi_test(handle): + csbi = CONSOLE_SCREEN_BUFFER_INFO() + success = _GetConsoleScreenBufferInfo( + handle, byref(csbi)) + return bool(success) + + def winapi_test(): + return any(_winapi_test(h) for h in + (_GetStdHandle(STDOUT), _GetStdHandle(STDERR))) + + def GetConsoleScreenBufferInfo(stream_id=STDOUT): + handle = _GetStdHandle(stream_id) + csbi = CONSOLE_SCREEN_BUFFER_INFO() + success = _GetConsoleScreenBufferInfo( + handle, byref(csbi)) + return csbi + + def SetConsoleTextAttribute(stream_id, attrs): + handle = _GetStdHandle(stream_id) + return _SetConsoleTextAttribute(handle, attrs) + + def SetConsoleCursorPosition(stream_id, position, adjust=True): + position = COORD(*position) + # If the position is out of range, do nothing. + if position.Y <= 0 or position.X <= 0: + return + # Adjust for Windows' SetConsoleCursorPosition: + # 1. being 0-based, while ANSI is 1-based. + # 2. expecting (x,y), while ANSI uses (y,x). + adjusted_position = COORD(position.Y - 1, position.X - 1) + if adjust: + # Adjust for viewport's scroll position + sr = GetConsoleScreenBufferInfo(STDOUT).srWindow + adjusted_position.Y += sr.Top + adjusted_position.X += sr.Left + # Resume normal processing + handle = _GetStdHandle(stream_id) + return _SetConsoleCursorPosition(handle, adjusted_position) + + def FillConsoleOutputCharacter(stream_id, char, length, start): + handle = _GetStdHandle(stream_id) + char = c_char(char.encode()) + length = wintypes.DWORD(length) + num_written = wintypes.DWORD(0) + # Note that this is hard-coded for ANSI (vs wide) bytes. + success = _FillConsoleOutputCharacterA( + handle, char, length, start, byref(num_written)) + return num_written.value + + def FillConsoleOutputAttribute(stream_id, attr, length, start): + ''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )''' + handle = _GetStdHandle(stream_id) + attribute = wintypes.WORD(attr) + length = wintypes.DWORD(length) + num_written = wintypes.DWORD(0) + # Note that this is hard-coded for ANSI (vs wide) bytes. + return _FillConsoleOutputAttribute( + handle, attribute, length, start, byref(num_written)) + + def SetConsoleTitle(title): + return _SetConsoleTitleW(title) + + def GetConsoleMode(handle): + mode = wintypes.DWORD() + success = _GetConsoleMode(handle, byref(mode)) + if not success: + raise ctypes.WinError() + return mode.value + + def SetConsoleMode(handle, mode): + success = _SetConsoleMode(handle, mode) + if not success: + raise ctypes.WinError() diff --git a/phivenv/Lib/site-packages/colorama/winterm.py b/phivenv/Lib/site-packages/colorama/winterm.py new file mode 100644 index 0000000000000000000000000000000000000000..aad867e8c80b826bf6a060116f17fa08a8eb0765 --- /dev/null +++ b/phivenv/Lib/site-packages/colorama/winterm.py @@ -0,0 +1,195 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +try: + from msvcrt import get_osfhandle +except ImportError: + def get_osfhandle(_): + raise OSError("This isn't windows!") + + +from . import win32 + +# from wincon.h +class WinColor(object): + BLACK = 0 + BLUE = 1 + GREEN = 2 + CYAN = 3 + RED = 4 + MAGENTA = 5 + YELLOW = 6 + GREY = 7 + +# from wincon.h +class WinStyle(object): + NORMAL = 0x00 # dim text, dim background + BRIGHT = 0x08 # bright text, dim background + BRIGHT_BACKGROUND = 0x80 # dim text, bright background + +class WinTerm(object): + + def __init__(self): + self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes + self.set_attrs(self._default) + self._default_fore = self._fore + self._default_back = self._back + self._default_style = self._style + # In order to emulate LIGHT_EX in windows, we borrow the BRIGHT style. + # So that LIGHT_EX colors and BRIGHT style do not clobber each other, + # we track them separately, since LIGHT_EX is overwritten by Fore/Back + # and BRIGHT is overwritten by Style codes. + self._light = 0 + + def get_attrs(self): + return self._fore + self._back * 16 + (self._style | self._light) + + def set_attrs(self, value): + self._fore = value & 7 + self._back = (value >> 4) & 7 + self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND) + + def reset_all(self, on_stderr=None): + self.set_attrs(self._default) + self.set_console(attrs=self._default) + self._light = 0 + + def fore(self, fore=None, light=False, on_stderr=False): + if fore is None: + fore = self._default_fore + self._fore = fore + # Emulate LIGHT_EX with BRIGHT Style + if light: + self._light |= WinStyle.BRIGHT + else: + self._light &= ~WinStyle.BRIGHT + self.set_console(on_stderr=on_stderr) + + def back(self, back=None, light=False, on_stderr=False): + if back is None: + back = self._default_back + self._back = back + # Emulate LIGHT_EX with BRIGHT_BACKGROUND Style + if light: + self._light |= WinStyle.BRIGHT_BACKGROUND + else: + self._light &= ~WinStyle.BRIGHT_BACKGROUND + self.set_console(on_stderr=on_stderr) + + def style(self, style=None, on_stderr=False): + if style is None: + style = self._default_style + self._style = style + self.set_console(on_stderr=on_stderr) + + def set_console(self, attrs=None, on_stderr=False): + if attrs is None: + attrs = self.get_attrs() + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + win32.SetConsoleTextAttribute(handle, attrs) + + def get_position(self, handle): + position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition + # Because Windows coordinates are 0-based, + # and win32.SetConsoleCursorPosition expects 1-based. + position.X += 1 + position.Y += 1 + return position + + def set_cursor_position(self, position=None, on_stderr=False): + if position is None: + # I'm not currently tracking the position, so there is no default. + # position = self.get_position() + return + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + win32.SetConsoleCursorPosition(handle, position) + + def cursor_adjust(self, x, y, on_stderr=False): + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + position = self.get_position(handle) + adjusted_position = (position.Y + y, position.X + x) + win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False) + + def erase_screen(self, mode=0, on_stderr=False): + # 0 should clear from the cursor to the end of the screen. + # 1 should clear from the cursor to the beginning of the screen. + # 2 should clear the entire screen, and move cursor to (1,1) + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + csbi = win32.GetConsoleScreenBufferInfo(handle) + # get the number of character cells in the current buffer + cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y + # get number of character cells before current cursor position + cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X + if mode == 0: + from_coord = csbi.dwCursorPosition + cells_to_erase = cells_in_screen - cells_before_cursor + elif mode == 1: + from_coord = win32.COORD(0, 0) + cells_to_erase = cells_before_cursor + elif mode == 2: + from_coord = win32.COORD(0, 0) + cells_to_erase = cells_in_screen + else: + # invalid mode + return + # fill the entire screen with blanks + win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord) + # now set the buffer's attributes accordingly + win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord) + if mode == 2: + # put the cursor where needed + win32.SetConsoleCursorPosition(handle, (1, 1)) + + def erase_line(self, mode=0, on_stderr=False): + # 0 should clear from the cursor to the end of the line. + # 1 should clear from the cursor to the beginning of the line. + # 2 should clear the entire line. + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + csbi = win32.GetConsoleScreenBufferInfo(handle) + if mode == 0: + from_coord = csbi.dwCursorPosition + cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X + elif mode == 1: + from_coord = win32.COORD(0, csbi.dwCursorPosition.Y) + cells_to_erase = csbi.dwCursorPosition.X + elif mode == 2: + from_coord = win32.COORD(0, csbi.dwCursorPosition.Y) + cells_to_erase = csbi.dwSize.X + else: + # invalid mode + return + # fill the entire screen with blanks + win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord) + # now set the buffer's attributes accordingly + win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord) + + def set_title(self, title): + win32.SetConsoleTitle(title) + + +def enable_vt_processing(fd): + if win32.windll is None or not win32.winapi_test(): + return False + + try: + handle = get_osfhandle(fd) + mode = win32.GetConsoleMode(handle) + win32.SetConsoleMode( + handle, + mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + + mode = win32.GetConsoleMode(handle) + if mode & win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING: + return True + # Can get TypeError in testsuite where 'fd' is a Mock() + except (OSError, TypeError): + return False diff --git a/phivenv/Lib/site-packages/filelock/__init__.py b/phivenv/Lib/site-packages/filelock/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9d8c5b8ebe565a652b3671b3dfa066f7346af45 --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/__init__.py @@ -0,0 +1,70 @@ +""" +A platform independent file lock that supports the with-statement. + +.. autodata:: filelock.__version__ + :no-value: + +""" + +from __future__ import annotations + +import sys +import warnings +from typing import TYPE_CHECKING + +from ._api import AcquireReturnProxy, BaseFileLock +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock, has_fcntl +from ._windows import WindowsFileLock +from .asyncio import ( + AsyncAcquireReturnProxy, + AsyncSoftFileLock, + AsyncUnixFileLock, + AsyncWindowsFileLock, + BaseAsyncFileLock, +) +from .version import version + +#: version of the project as a string +__version__: str = version + + +if sys.platform == "win32": # pragma: win32 cover + _FileLock: type[BaseFileLock] = WindowsFileLock + _AsyncFileLock: type[BaseAsyncFileLock] = AsyncWindowsFileLock +else: # pragma: win32 no cover # noqa: PLR5501 + if has_fcntl: + _FileLock: type[BaseFileLock] = UnixFileLock + _AsyncFileLock: type[BaseAsyncFileLock] = AsyncUnixFileLock + else: + _FileLock = SoftFileLock + _AsyncFileLock = AsyncSoftFileLock + if warnings is not None: + warnings.warn("only soft file lock is available", stacklevel=2) + +if TYPE_CHECKING: + FileLock = SoftFileLock + AsyncFileLock = AsyncSoftFileLock +else: + #: Alias for the lock, which should be used for the current platform. + FileLock = _FileLock + AsyncFileLock = _AsyncFileLock + + +__all__ = [ + "AcquireReturnProxy", + "AsyncAcquireReturnProxy", + "AsyncFileLock", + "AsyncSoftFileLock", + "AsyncUnixFileLock", + "AsyncWindowsFileLock", + "BaseAsyncFileLock", + "BaseFileLock", + "FileLock", + "SoftFileLock", + "Timeout", + "UnixFileLock", + "WindowsFileLock", + "__version__", +] diff --git a/phivenv/Lib/site-packages/filelock/__pycache__/asyncio.cpython-39.pyc b/phivenv/Lib/site-packages/filelock/__pycache__/asyncio.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4f596083c9c90bc040c52b95e0c056f2094385f Binary files /dev/null and b/phivenv/Lib/site-packages/filelock/__pycache__/asyncio.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/filelock/__pycache__/version.cpython-39.pyc b/phivenv/Lib/site-packages/filelock/__pycache__/version.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44952c4427416981868f2bd8fd2731e110cbb3aa Binary files /dev/null and b/phivenv/Lib/site-packages/filelock/__pycache__/version.cpython-39.pyc differ diff --git a/phivenv/Lib/site-packages/filelock/_api.py b/phivenv/Lib/site-packages/filelock/_api.py new file mode 100644 index 0000000000000000000000000000000000000000..8fde69a0fef7badcc123d17735cd784a99baed52 --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/_api.py @@ -0,0 +1,403 @@ +from __future__ import annotations + +import contextlib +import inspect +import logging +import os +import time +import warnings +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass +from threading import local +from typing import TYPE_CHECKING, Any, cast +from weakref import WeakValueDictionary + +from ._error import Timeout + +if TYPE_CHECKING: + import sys + from types import TracebackType + + if sys.version_info >= (3, 11): # pragma: no cover (py311+) + from typing import Self + else: # pragma: no cover ( None: + self.lock = lock + + def __enter__(self) -> BaseFileLock: + return self.lock + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + self.lock.release() + + +@dataclass +class FileLockContext: + """A dataclass which holds the context for a ``BaseFileLock`` object.""" + + # The context is held in a separate class to allow optional use of thread local storage via the + # ThreadLocalFileContext class. + + #: The path to the lock file. + lock_file: str + + #: The default timeout value. + timeout: float + + #: The mode for the lock files + mode: int + + #: Whether the lock should be blocking or not + blocking: bool + + #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held + lock_file_fd: int | None = None + + #: The lock counter is used for implementing the nested locking mechanism. + lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0 + + +class ThreadLocalFileContext(FileLockContext, local): + """A thread local version of the ``FileLockContext`` class.""" + + +class FileLockMeta(ABCMeta): + def __call__( # noqa: PLR0913 + cls, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = True, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + **kwargs: Any, # capture remaining kwargs for subclasses # noqa: ANN401 + ) -> BaseFileLock: + if is_singleton: + instance = cls._instances.get(str(lock_file)) # type: ignore[attr-defined] + if instance: + params_to_check = { + "thread_local": (thread_local, instance.is_thread_local()), + "timeout": (timeout, instance.timeout), + "mode": (mode, instance.mode), + "blocking": (blocking, instance.blocking), + } + + non_matching_params = { + name: (passed_param, set_param) + for name, (passed_param, set_param) in params_to_check.items() + if passed_param != set_param + } + if not non_matching_params: + return cast("BaseFileLock", instance) + + # parameters do not match; raise error + msg = "Singleton lock instances cannot be initialized with differing arguments" + msg += "\nNon-matching arguments: " + for param_name, (passed_param, set_param) in non_matching_params.items(): + msg += f"\n\t{param_name} (existing lock has {set_param} but {passed_param} was passed)" + raise ValueError(msg) + + # Workaround to make `__init__`'s params optional in subclasses + # E.g. virtualenv changes the signature of the `__init__` method in the `BaseFileLock` class descendant + # (https://github.com/tox-dev/filelock/pull/340) + + all_params = { + "timeout": timeout, + "mode": mode, + "thread_local": thread_local, + "blocking": blocking, + "is_singleton": is_singleton, + **kwargs, + } + + present_params = inspect.signature(cls.__init__).parameters # type: ignore[misc] + init_params = {key: value for key, value in all_params.items() if key in present_params} + + instance = super().__call__(lock_file, **init_params) + + if is_singleton: + cls._instances[str(lock_file)] = instance # type: ignore[attr-defined] + + return cast("BaseFileLock", instance) + + +class BaseFileLock(contextlib.ContextDecorator, metaclass=FileLockMeta): + """Abstract base class for a file lock object.""" + + _instances: WeakValueDictionary[str, BaseFileLock] + + def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None: + """Setup unique state for lock subclasses.""" + super().__init_subclass__(**kwargs) + cls._instances = WeakValueDictionary() + + def __init__( # noqa: PLR0913 + self, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = True, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + ) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \ + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \ + to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock. + :param mode: file permissions for the lockfile + :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \ + ``False`` then the lock will be reentrant across threads. + :param blocking: whether the lock should be blocking or not + :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \ + per lock file. This is useful if you want to use the lock object for reentrant locking without needing \ + to pass the same object around. + + """ + self._is_thread_local = thread_local + self._is_singleton = is_singleton + + # Create the context. Note that external code should not work with the context directly and should instead use + # properties of this class. + kwargs: dict[str, Any] = { + "lock_file": os.fspath(lock_file), + "timeout": timeout, + "mode": mode, + "blocking": blocking, + } + self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs) + + def is_thread_local(self) -> bool: + """:return: a flag indicating if this lock is thread local or not""" + return self._is_thread_local + + @property + def is_singleton(self) -> bool: + """:return: a flag indicating if this lock is singleton or not""" + return self._is_singleton + + @property + def lock_file(self) -> str: + """:return: path to the lock file""" + return self._context.lock_file + + @property + def timeout(self) -> float: + """ + :return: the default timeout value, in seconds + + .. versionadded:: 2.0.0 + """ + return self._context.timeout + + @timeout.setter + def timeout(self, value: float | str) -> None: + """ + Change the default timeout value. + + :param value: the new value, in seconds + + """ + self._context.timeout = float(value) + + @property + def blocking(self) -> bool: + """:return: whether the locking is blocking or not""" + return self._context.blocking + + @blocking.setter + def blocking(self, value: bool) -> None: + """ + Change the default blocking value. + + :param value: the new value as bool + + """ + self._context.blocking = value + + @property + def mode(self) -> int: + """:return: the file permissions for the lockfile""" + return self._context.mode + + @abstractmethod + def _acquire(self) -> None: + """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file.""" + raise NotImplementedError + + @abstractmethod + def _release(self) -> None: + """Releases the lock and sets self._context.lock_file_fd to None.""" + raise NotImplementedError + + @property + def is_locked(self) -> bool: + """ + + :return: A boolean indicating if the lock file is holding the lock currently. + + .. versionchanged:: 2.0.0 + + This was previously a method and is now a property. + """ + return self._context.lock_file_fd is not None + + @property + def lock_counter(self) -> int: + """:return: The number of times this lock has been acquired (but not yet released).""" + return self._context.lock_counter + + def acquire( + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + poll_intervall: float | None = None, + blocking: bool | None = None, + ) -> AcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and + if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + .. versionchanged:: 2.0.0 + + This method returns now a *proxy* object instead of *self*, + so that it can be used in a with statement without side effects. + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self._context.timeout + + if blocking is None: + blocking = self._context.blocking + + if poll_intervall is not None: + msg = "use poll_interval instead of poll_intervall" + warnings.warn(msg, DeprecationWarning, stacklevel=2) + poll_interval = poll_intervall + + # Increment the number right at the beginning. We can still undo it, if something fails. + self._context.lock_counter += 1 + + lock_id = id(self) + lock_filename = self.lock_file + start_time = time.perf_counter() + try: + while True: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + self._acquire() + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + if blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + if 0 <= timeout < time.perf_counter() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + time.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + self._context.lock_counter = max(0, self._context.lock_counter - 1) + raise + return AcquireReturnProxy(lock=self) + + def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002 + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. + Also note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + + """ + if self.is_locked: + self._context.lock_counter -= 1 + + if self._context.lock_counter == 0 or force: + lock_id, lock_filename = id(self), self.lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + self._release() + self._context.lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + def __enter__(self) -> Self: + """ + Acquire the lock. + + :return: the lock object + + """ + self.acquire() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + + """ + self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + self.release(force=True) + + +__all__ = [ + "AcquireReturnProxy", + "BaseFileLock", +] diff --git a/phivenv/Lib/site-packages/filelock/_error.py b/phivenv/Lib/site-packages/filelock/_error.py new file mode 100644 index 0000000000000000000000000000000000000000..f7ff08c0f508ad7077eb6ed1990898840c952b3a --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/_error.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Any + + +class Timeout(TimeoutError): # noqa: N818 + """Raised when the lock could not be acquired in *timeout* seconds.""" + + def __init__(self, lock_file: str) -> None: + super().__init__() + self._lock_file = lock_file + + def __reduce__(self) -> str | tuple[Any, ...]: + return self.__class__, (self._lock_file,) # Properly pickle the exception + + def __str__(self) -> str: + return f"The file lock '{self._lock_file}' could not be acquired." + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.lock_file!r})" + + @property + def lock_file(self) -> str: + """:return: The path of the file lock.""" + return self._lock_file + + +__all__ = [ + "Timeout", +] diff --git a/phivenv/Lib/site-packages/filelock/_soft.py b/phivenv/Lib/site-packages/filelock/_soft.py new file mode 100644 index 0000000000000000000000000000000000000000..28c67f74cc82b8f55e47afd6a71972cc1fb95eb6 --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/_soft.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES, EEXIST +from pathlib import Path + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + + +class SoftFileLock(BaseFileLock): + """Simply watches the existence of the lock file.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + # first check for exists and read-only mode as the open will mask this case as EEXIST + flags = ( + os.O_WRONLY # open for writing only + | os.O_CREAT + | os.O_EXCL # together with above raise EEXIST if the file specified by filename exists + | os.O_TRUNC # truncate the file to zero byte + ) + try: + file_handler = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: # re-raise unless expected exception + if not ( + exception.errno == EEXIST # lock already exist + or (exception.errno == EACCES and sys.platform == "win32") # has no access to this lock + ): # pragma: win32 no cover + raise + else: + self._context.lock_file_fd = file_handler + + def _release(self) -> None: + assert self._context.lock_file_fd is not None # noqa: S101 + os.close(self._context.lock_file_fd) # the lock file is definitely not None + self._context.lock_file_fd = None + with suppress(OSError): # the file is already deleted and that's what we want + Path(self.lock_file).unlink() + + +__all__ = [ + "SoftFileLock", +] diff --git a/phivenv/Lib/site-packages/filelock/_unix.py b/phivenv/Lib/site-packages/filelock/_unix.py new file mode 100644 index 0000000000000000000000000000000000000000..b2fd0f33d25d2bdf4a2a883380154771b4a25f9b --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/_unix.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import ENOSYS +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists + +#: a flag to indicate if the fcntl API is available +has_fcntl = False +if sys.platform == "win32": # pragma: win32 cover + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + +else: # pragma: win32 no cover + try: + import fcntl + + _ = (fcntl.flock, fcntl.LOCK_EX, fcntl.LOCK_NB, fcntl.LOCK_UN) + except (ImportError, AttributeError): + pass + else: + has_fcntl = True + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + ensure_directory_exists(self.lock_file) + open_flags = os.O_RDWR | os.O_TRUNC + if not Path(self.lock_file).exists(): + open_flags |= os.O_CREAT + fd = os.open(self.lock_file, open_flags, self._context.mode) + with suppress(PermissionError): # This locked is not owned by this UID + os.fchmod(fd, self._context.mode) + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError as exception: + os.close(fd) + if exception.errno == ENOSYS: # NotImplemented error + msg = "FileSystem does not appear to support flock; use SoftFileLock instead" + raise NotImplementedError(msg) from exception + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + # Do not remove the lockfile: + # https://github.com/tox-dev/py-filelock/issues/31 + # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition + fd = cast("int", self._context.lock_file_fd) + self._context.lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + +__all__ = [ + "UnixFileLock", + "has_fcntl", +] diff --git a/phivenv/Lib/site-packages/filelock/_util.py b/phivenv/Lib/site-packages/filelock/_util.py new file mode 100644 index 0000000000000000000000000000000000000000..c671e8533873948f0e1b5575ff952c722019f067 --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/_util.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import os +import stat +import sys +from errno import EACCES, EISDIR +from pathlib import Path + + +def raise_on_not_writable_file(filename: str) -> None: + """ + Raise an exception if attempting to open the file for writing would fail. + + This is done so files that will never be writable can be separated from files that are writable but currently + locked. + + :param filename: file to check + :raises OSError: as if the file was opened for writing. + + """ + try: # use stat to do exists + can write to check without race condition + file_stat = os.stat(filename) # noqa: PTH116 + except OSError: + return # swallow does not exist or other errors + + if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it + if not (file_stat.st_mode & stat.S_IWUSR): + raise PermissionError(EACCES, "Permission denied", filename) + + if stat.S_ISDIR(file_stat.st_mode): + if sys.platform == "win32": # pragma: win32 cover + # On Windows, this is PermissionError + raise PermissionError(EACCES, "Permission denied", filename) + else: # pragma: win32 no cover # noqa: RET506 + # On linux / macOS, this is IsADirectoryError + raise IsADirectoryError(EISDIR, "Is a directory", filename) + + +def ensure_directory_exists(filename: Path | str) -> None: + """ + Ensure the directory containing the file exists (create it if necessary). + + :param filename: file. + + """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) + + +__all__ = [ + "ensure_directory_exists", + "raise_on_not_writable_file", +] diff --git a/phivenv/Lib/site-packages/filelock/_windows.py b/phivenv/Lib/site-packages/filelock/_windows.py new file mode 100644 index 0000000000000000000000000000000000000000..348251d1067c28c55a6a267f8d11337abfae837f --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/_windows.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + +if sys.platform == "win32": # pragma: win32 cover + import msvcrt + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + flags = ( + os.O_RDWR # open for read and write + | os.O_CREAT # create file if not exists + | os.O_TRUNC # truncate file if not empty + ) + try: + fd = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: + if exception.errno != EACCES: # has no access to this lock + raise + else: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + except OSError as exception: + os.close(fd) # close file first + if exception.errno != EACCES: # file is already locked + raise + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + fd = cast("int", self._context.lock_file_fd) + self._context.lock_file_fd = None + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + os.close(fd) + + with suppress(OSError): # Probably another instance of the application hat acquired the file lock. + Path(self.lock_file).unlink() + +else: # pragma: win32 no cover + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + + +__all__ = [ + "WindowsFileLock", +] diff --git a/phivenv/Lib/site-packages/filelock/asyncio.py b/phivenv/Lib/site-packages/filelock/asyncio.py new file mode 100644 index 0000000000000000000000000000000000000000..22ae122d79054c101aec8a47f6cbc219703a1f96 --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/asyncio.py @@ -0,0 +1,343 @@ +"""An asyncio-based implementation of the file lock.""" + +from __future__ import annotations + +import asyncio +import contextlib +import logging +import os +import time +from dataclasses import dataclass +from inspect import iscoroutinefunction +from threading import local +from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast + +from ._api import BaseFileLock, FileLockContext, FileLockMeta +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock +from ._windows import WindowsFileLock + +if TYPE_CHECKING: + import sys + from concurrent import futures + from types import TracebackType + + if sys.version_info >= (3, 11): # pragma: no cover (py311+) + from typing import Self + else: # pragma: no cover ( None: # noqa: D107 + self.lock = lock + + async def __aenter__(self) -> BaseAsyncFileLock: # noqa: D105 + return self.lock + + async def __aexit__( # noqa: D105 + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + await self.lock.release() + + +class AsyncFileLockMeta(FileLockMeta): + def __call__( # type: ignore[override] # noqa: PLR0913 + cls, # noqa: N805 + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = False, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + loop: asyncio.AbstractEventLoop | None = None, + run_in_executor: bool = True, + executor: futures.Executor | None = None, + ) -> BaseAsyncFileLock: + if thread_local and run_in_executor: + msg = "run_in_executor is not supported when thread_local is True" + raise ValueError(msg) + instance = super().__call__( + lock_file=lock_file, + timeout=timeout, + mode=mode, + thread_local=thread_local, + blocking=blocking, + is_singleton=is_singleton, + loop=loop, + run_in_executor=run_in_executor, + executor=executor, + ) + return cast("BaseAsyncFileLock", instance) + + +class BaseAsyncFileLock(BaseFileLock, metaclass=AsyncFileLockMeta): + """Base class for asynchronous file locks.""" + + def __init__( # noqa: PLR0913 + self, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = False, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + loop: asyncio.AbstractEventLoop | None = None, + run_in_executor: bool = True, + executor: futures.Executor | None = None, + ) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \ + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \ + to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock. + :param mode: file permissions for the lockfile + :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \ + ``False`` then the lock will be reentrant across threads. + :param blocking: whether the lock should be blocking or not + :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \ + per lock file. This is useful if you want to use the lock object for reentrant locking without needing \ + to pass the same object around. + :param loop: The event loop to use. If not specified, the running event loop will be used. + :param run_in_executor: If this is set to ``True`` then the lock will be acquired in an executor. + :param executor: The executor to use. If not specified, the default executor will be used. + + """ + self._is_thread_local = thread_local + self._is_singleton = is_singleton + + # Create the context. Note that external code should not work with the context directly and should instead use + # properties of this class. + kwargs: dict[str, Any] = { + "lock_file": os.fspath(lock_file), + "timeout": timeout, + "mode": mode, + "blocking": blocking, + "loop": loop, + "run_in_executor": run_in_executor, + "executor": executor, + } + self._context: AsyncFileLockContext = (AsyncThreadLocalFileContext if thread_local else AsyncFileLockContext)( + **kwargs + ) + + @property + def run_in_executor(self) -> bool: + """::return: whether run in executor.""" + return self._context.run_in_executor + + @property + def executor(self) -> futures.Executor | None: + """::return: the executor.""" + return self._context.executor + + @executor.setter + def executor(self, value: futures.Executor | None) -> None: # pragma: no cover + """ + Change the executor. + + :param value: the new executor or ``None`` + :type value: futures.Executor | None + + """ + self._context.executor = value + + @property + def loop(self) -> asyncio.AbstractEventLoop | None: + """::return: the event loop.""" + return self._context.loop + + async def acquire( # type: ignore[override] + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + blocking: bool | None = None, + ) -> AsyncAcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default + :attr:`~BaseFileLock.timeout` is and if ``timeout < 0``, there is no timeout and + this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self._context.timeout + + if blocking is None: + blocking = self._context.blocking + + # Increment the number right at the beginning. We can still undo it, if something fails. + self._context.lock_counter += 1 + + lock_id = id(self) + lock_filename = self.lock_file + start_time = time.perf_counter() + try: + while True: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + await self._run_internal_method(self._acquire) + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + if blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + if 0 <= timeout < time.perf_counter() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + await asyncio.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + self._context.lock_counter = max(0, self._context.lock_counter - 1) + raise + return AsyncAcquireReturnProxy(lock=self) + + async def release(self, force: bool = False) -> None: # type: ignore[override] # noqa: FBT001, FBT002 + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. + Also note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + + """ + if self.is_locked: + self._context.lock_counter -= 1 + + if self._context.lock_counter == 0 or force: + lock_id, lock_filename = id(self), self.lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + await self._run_internal_method(self._release) + self._context.lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + async def _run_internal_method(self, method: Callable[[], Any]) -> None: + if iscoroutinefunction(method): + await method() + elif self.run_in_executor: + loop = self.loop or asyncio.get_running_loop() + await loop.run_in_executor(self.executor, method) + else: + method() + + def __enter__(self) -> NoReturn: + """ + Replace old __enter__ method to avoid using it. + + NOTE: DO NOT USE `with` FOR ASYNCIO LOCKS, USE `async with` INSTEAD. + + :return: none + :rtype: NoReturn + """ + msg = "Do not use `with` for asyncio locks, use `async with` instead." + raise NotImplementedError(msg) + + async def __aenter__(self) -> Self: + """ + Acquire the lock. + + :return: the lock object + + """ + await self.acquire() + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + + """ + await self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + with contextlib.suppress(RuntimeError): + loop = self.loop or asyncio.get_running_loop() + if not loop.is_running(): # pragma: no cover + loop.run_until_complete(self.release(force=True)) + else: + loop.create_task(self.release(force=True)) + + +class AsyncSoftFileLock(SoftFileLock, BaseAsyncFileLock): + """Simply watches the existence of the lock file.""" + + +class AsyncUnixFileLock(UnixFileLock, BaseAsyncFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + +class AsyncWindowsFileLock(WindowsFileLock, BaseAsyncFileLock): + """Uses the :func:`msvcrt.locking` to hard lock the lock file on windows systems.""" + + +__all__ = [ + "AsyncAcquireReturnProxy", + "AsyncSoftFileLock", + "AsyncUnixFileLock", + "AsyncWindowsFileLock", + "BaseAsyncFileLock", +] diff --git a/phivenv/Lib/site-packages/filelock/py.typed b/phivenv/Lib/site-packages/filelock/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/phivenv/Lib/site-packages/filelock/version.py b/phivenv/Lib/site-packages/filelock/version.py new file mode 100644 index 0000000000000000000000000000000000000000..9fdac7217947bcb32634c26736adb9ac219079a0 --- /dev/null +++ b/phivenv/Lib/site-packages/filelock/version.py @@ -0,0 +1,21 @@ +# file generated by setuptools-scm +# don't change, don't track in version control + +__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"] + +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple + from typing import Union + + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '3.19.1' +__version_tuple__ = version_tuple = (3, 19, 1)