dheeena commited on
Commit
7cb8c9d
·
verified ·
1 Parent(s): aabd464

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. venv/lib/python3.13/site-packages/_yaml/__init__.py +33 -0
  2. venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER +1 -0
  3. venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA +764 -0
  4. venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD +35 -0
  5. venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL +7 -0
  6. venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt +2 -0
  7. venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt +1 -0
  8. venv/lib/python3.13/site-packages/filelock/__init__.py +70 -0
  9. venv/lib/python3.13/site-packages/filelock/_api.py +403 -0
  10. venv/lib/python3.13/site-packages/filelock/_error.py +30 -0
  11. venv/lib/python3.13/site-packages/filelock/_soft.py +47 -0
  12. venv/lib/python3.13/site-packages/filelock/_unix.py +70 -0
  13. venv/lib/python3.13/site-packages/filelock/_util.py +52 -0
  14. venv/lib/python3.13/site-packages/filelock/_windows.py +65 -0
  15. venv/lib/python3.13/site-packages/filelock/asyncio.py +344 -0
  16. venv/lib/python3.13/site-packages/filelock/py.typed +0 -0
  17. venv/lib/python3.13/site-packages/filelock/version.py +34 -0
  18. venv/lib/python3.13/site-packages/fsspec/__init__.py +71 -0
  19. venv/lib/python3.13/site-packages/fsspec/_version.py +34 -0
  20. venv/lib/python3.13/site-packages/fsspec/caching.py +1004 -0
  21. venv/lib/python3.13/site-packages/fsspec/compression.py +182 -0
  22. venv/lib/python3.13/site-packages/fsspec/config.py +131 -0
  23. venv/lib/python3.13/site-packages/fsspec/conftest.py +125 -0
  24. venv/lib/python3.13/site-packages/fsspec/core.py +743 -0
  25. venv/lib/python3.13/site-packages/fsspec/dircache.py +98 -0
  26. venv/lib/python3.13/site-packages/fsspec/fuse.py +324 -0
  27. venv/lib/python3.13/site-packages/fsspec/generic.py +396 -0
  28. venv/lib/python3.13/site-packages/fsspec/gui.py +417 -0
  29. venv/lib/python3.13/site-packages/fsspec/json.py +117 -0
  30. venv/lib/python3.13/site-packages/fsspec/mapping.py +251 -0
  31. venv/lib/python3.13/site-packages/fsspec/parquet.py +541 -0
  32. venv/lib/python3.13/site-packages/fsspec/registry.py +330 -0
  33. venv/lib/python3.13/site-packages/fsspec/spec.py +2281 -0
  34. venv/lib/python3.13/site-packages/fsspec/transaction.py +90 -0
  35. venv/lib/python3.13/site-packages/hf_xet/__init__.py +5 -0
  36. venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER +1 -0
  37. venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA +209 -0
  38. venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD +22 -0
  39. venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL +4 -0
  40. venv/lib/python3.13/site-packages/packaging/__init__.py +15 -0
  41. venv/lib/python3.13/site-packages/packaging/_elffile.py +109 -0
  42. venv/lib/python3.13/site-packages/packaging/_manylinux.py +262 -0
  43. venv/lib/python3.13/site-packages/packaging/_musllinux.py +85 -0
  44. venv/lib/python3.13/site-packages/packaging/_parser.py +353 -0
  45. venv/lib/python3.13/site-packages/packaging/_structures.py +61 -0
  46. venv/lib/python3.13/site-packages/packaging/_tokenizer.py +195 -0
  47. venv/lib/python3.13/site-packages/packaging/markers.py +362 -0
  48. venv/lib/python3.13/site-packages/packaging/metadata.py +862 -0
  49. venv/lib/python3.13/site-packages/packaging/py.typed +0 -0
  50. venv/lib/python3.13/site-packages/packaging/requirements.py +91 -0
venv/lib/python3.13/site-packages/_yaml/__init__.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is a stub package designed to roughly emulate the _yaml
2
+ # extension module, which previously existed as a standalone module
3
+ # and has been moved into the `yaml` package namespace.
4
+ # It does not perfectly mimic its old counterpart, but should get
5
+ # close enough for anyone who's relying on it even when they shouldn't.
6
+ import yaml
7
+
8
+ # in some circumstances, the yaml module we imoprted may be from a different version, so we need
9
+ # to tread carefully when poking at it here (it may not have the attributes we expect)
10
+ if not getattr(yaml, '__with_libyaml__', False):
11
+ from sys import version_info
12
+
13
+ exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
14
+ raise exc("No module named '_yaml'")
15
+ else:
16
+ from yaml._yaml import *
17
+ import warnings
18
+ warnings.warn(
19
+ 'The _yaml extension module is now located at yaml._yaml'
20
+ ' and its location is subject to change. To use the'
21
+ ' LibYAML-based parser and emitter, import from `yaml`:'
22
+ ' `from yaml import CLoader as Loader, CDumper as Dumper`.',
23
+ DeprecationWarning
24
+ )
25
+ del warnings
26
+ # Don't `del yaml` here because yaml is actually an existing
27
+ # namespace member of _yaml.
28
+
29
+ __name__ = '_yaml'
30
+ # If the module is top-level (i.e. not a part of any specific package)
31
+ # then the attribute should be set to ''.
32
+ # https://docs.python.org/3.8/library/types.html
33
+ __package__ = ''
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/METADATA ADDED
@@ -0,0 +1,764 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: charset-normalizer
3
+ Version: 3.4.4
4
+ Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
5
+ Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
6
+ Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
7
+ License: MIT
8
+ Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
9
+ Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
10
+ Project-URL: Code, https://github.com/jawah/charset_normalizer
11
+ Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
12
+ Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.7
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
26
+ Classifier: Programming Language :: Python :: 3 :: Only
27
+ Classifier: Programming Language :: Python :: Implementation :: CPython
28
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
29
+ Classifier: Topic :: Text Processing :: Linguistic
30
+ Classifier: Topic :: Utilities
31
+ Classifier: Typing :: Typed
32
+ Requires-Python: >=3.7
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Provides-Extra: unicode-backport
36
+ Dynamic: license-file
37
+
38
+ <h1 align="center">Charset Detection, for Everyone 👋</h1>
39
+
40
+ <p align="center">
41
+ <sup>The Real First Universal Charset Detector</sup><br>
42
+ <a href="https://pypi.org/project/charset-normalizer">
43
+ <img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
44
+ </a>
45
+ <a href="https://pepy.tech/project/charset-normalizer/">
46
+ <img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
47
+ </a>
48
+ <a href="https://bestpractices.coreinfrastructure.org/projects/7297">
49
+ <img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
50
+ </a>
51
+ </p>
52
+ <p align="center">
53
+ <sup><i>Featured Packages</i></sup><br>
54
+ <a href="https://github.com/jawah/niquests">
55
+ <img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
56
+ </a>
57
+ <a href="https://github.com/jawah/wassima">
58
+ <img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
59
+ </a>
60
+ </p>
61
+ <p align="center">
62
+ <sup><i>In other language (unofficial port - by the community)</i></sup><br>
63
+ <a href="https://github.com/nickspring/charset-normalizer-rs">
64
+ <img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
65
+ </a>
66
+ </p>
67
+
68
+ > A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
69
+ > I'm trying to resolve the issue by taking a new approach.
70
+ > All IANA character set names for which the Python core library provides codecs are supported.
71
+
72
+ <p align="center">
73
+ >>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
74
+ </p>
75
+
76
+ This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
77
+
78
+ | Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
79
+ |--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
80
+ | `Fast` | ❌ | ✅ | ✅ |
81
+ | `Universal**` | ❌ | ✅ | ❌ |
82
+ | `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
83
+ | `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
84
+ | `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
85
+ | `Native Python` | ✅ | ✅ | ❌ |
86
+ | `Detect spoken language` | ❌ | ✅ | N/A |
87
+ | `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
88
+ | `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
89
+ | `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
90
+
91
+ <p align="center">
92
+ <img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
93
+ </p>
94
+
95
+ *\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
96
+
97
+ ## ⚡ Performance
98
+
99
+ This package offer better performance than its counterpart Chardet. Here are some numbers.
100
+
101
+ | Package | Accuracy | Mean per file (ms) | File per sec (est) |
102
+ |-----------------------------------------------|:--------:|:------------------:|:------------------:|
103
+ | [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
104
+ | charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
105
+
106
+ | Package | 99th percentile | 95th percentile | 50th percentile |
107
+ |-----------------------------------------------|:---------------:|:---------------:|:---------------:|
108
+ | [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
109
+ | charset-normalizer | 100 ms | 50 ms | 5 ms |
110
+
111
+ _updated as of december 2024 using CPython 3.12_
112
+
113
+ Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
114
+
115
+ > Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
116
+ > And yes, these results might change at any time. The dataset can be updated to include more files.
117
+ > The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
118
+ > Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
119
+ > (e.g. Supported Encoding) Challenge-them if you want.
120
+
121
+ ## ✨ Installation
122
+
123
+ Using pip:
124
+
125
+ ```sh
126
+ pip install charset-normalizer -U
127
+ ```
128
+
129
+ ## 🚀 Basic Usage
130
+
131
+ ### CLI
132
+ This package comes with a CLI.
133
+
134
+ ```
135
+ usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
136
+ file [file ...]
137
+
138
+ The Real First Universal Charset Detector. Discover originating encoding used
139
+ on text file. Normalize text to unicode.
140
+
141
+ positional arguments:
142
+ files File(s) to be analysed
143
+
144
+ optional arguments:
145
+ -h, --help show this help message and exit
146
+ -v, --verbose Display complementary information about file if any.
147
+ Stdout will contain logs about the detection process.
148
+ -a, --with-alternative
149
+ Output complementary possibilities if any. Top-level
150
+ JSON WILL be a list.
151
+ -n, --normalize Permit to normalize input file. If not set, program
152
+ does not write anything.
153
+ -m, --minimal Only output the charset detected to STDOUT. Disabling
154
+ JSON output.
155
+ -r, --replace Replace file when trying to normalize it instead of
156
+ creating a new one.
157
+ -f, --force Replace file without asking if you are sure, use this
158
+ flag with caution.
159
+ -t THRESHOLD, --threshold THRESHOLD
160
+ Define a custom maximum amount of chaos allowed in
161
+ decoded content. 0. <= chaos <= 1.
162
+ --version Show version information and exit.
163
+ ```
164
+
165
+ ```bash
166
+ normalizer ./data/sample.1.fr.srt
167
+ ```
168
+
169
+ or
170
+
171
+ ```bash
172
+ python -m charset_normalizer ./data/sample.1.fr.srt
173
+ ```
174
+
175
+ 🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
176
+
177
+ ```json
178
+ {
179
+ "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
180
+ "encoding": "cp1252",
181
+ "encoding_aliases": [
182
+ "1252",
183
+ "windows_1252"
184
+ ],
185
+ "alternative_encodings": [
186
+ "cp1254",
187
+ "cp1256",
188
+ "cp1258",
189
+ "iso8859_14",
190
+ "iso8859_15",
191
+ "iso8859_16",
192
+ "iso8859_3",
193
+ "iso8859_9",
194
+ "latin_1",
195
+ "mbcs"
196
+ ],
197
+ "language": "French",
198
+ "alphabets": [
199
+ "Basic Latin",
200
+ "Latin-1 Supplement"
201
+ ],
202
+ "has_sig_or_bom": false,
203
+ "chaos": 0.149,
204
+ "coherence": 97.152,
205
+ "unicode_path": null,
206
+ "is_preferred": true
207
+ }
208
+ ```
209
+
210
+ ### Python
211
+ *Just print out normalized text*
212
+ ```python
213
+ from charset_normalizer import from_path
214
+
215
+ results = from_path('./my_subtitle.srt')
216
+
217
+ print(str(results.best()))
218
+ ```
219
+
220
+ *Upgrade your code without effort*
221
+ ```python
222
+ from charset_normalizer import detect
223
+ ```
224
+
225
+ The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
226
+
227
+ See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
228
+
229
+ ## 😇 Why
230
+
231
+ When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
232
+ reliable alternative using a completely different method. Also! I never back down on a good challenge!
233
+
234
+ I **don't care** about the **originating charset** encoding, because **two different tables** can
235
+ produce **two identical rendered string.**
236
+ What I want is to get readable text, the best I can.
237
+
238
+ In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
239
+
240
+ Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
241
+
242
+ ## 🍰 How
243
+
244
+ - Discard all charset encoding table that could not fit the binary content.
245
+ - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
246
+ - Extract matches with the lowest mess detected.
247
+ - Additionally, we measure coherence / probe for a language.
248
+
249
+ **Wait a minute**, what is noise/mess and coherence according to **YOU ?**
250
+
251
+ *Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
252
+ **I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
253
+ I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
254
+ improve or rewrite it.
255
+
256
+ *Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
257
+ that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
258
+
259
+ ## ⚡ Known limitations
260
+
261
+ - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
262
+ - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
263
+
264
+ ## ⚠️ About Python EOLs
265
+
266
+ **If you are running:**
267
+
268
+ - Python >=2.7,<3.5: Unsupported
269
+ - Python 3.5: charset-normalizer < 2.1
270
+ - Python 3.6: charset-normalizer < 3.1
271
+ - Python 3.7: charset-normalizer < 4.0
272
+
273
+ Upgrade your Python interpreter as soon as possible.
274
+
275
+ ## 👤 Contributing
276
+
277
+ Contributions, issues and feature requests are very much welcome.<br />
278
+ Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
279
+
280
+ ## 📝 License
281
+
282
+ Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
283
+ This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
284
+
285
+ Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
286
+
287
+ ## 💼 For Enterprise
288
+
289
+ Professional support for charset-normalizer is available as part of the [Tidelift
290
+ Subscription][1]. Tidelift gives software development teams a single source for
291
+ purchasing and maintaining their software, with professional grade assurances
292
+ from the experts who know it best, while seamlessly integrating with existing
293
+ tools.
294
+
295
+ [1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
296
+
297
+ [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
298
+
299
+ # Changelog
300
+ All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
301
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
302
+
303
+ ## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
304
+
305
+ ### Changed
306
+ - Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
307
+ - Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
308
+
309
+ ### Removed
310
+ - `setuptools-scm` as a build dependency.
311
+
312
+ ### Misc
313
+ - Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
314
+ - Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
315
+ - Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
316
+
317
+ ## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
318
+
319
+ ### Changed
320
+ - mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
321
+ - automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
322
+
323
+ ### Added
324
+ - Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
325
+ - Support for Python 3.14
326
+
327
+ ### Fixed
328
+ - sdist archive contained useless directories.
329
+ - automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
330
+
331
+ ### Misc
332
+ - SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
333
+ Each published wheel comes with its SBOM. We choose CycloneDX as the format.
334
+ - Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
335
+
336
+ ## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
337
+
338
+ ### Fixed
339
+ - Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
340
+ - Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
341
+
342
+ ### Changed
343
+ - Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
344
+
345
+ ## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
346
+
347
+ ### Changed
348
+ - Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
349
+ - Enforce annotation delayed loading for a simpler and consistent types in the project.
350
+ - Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
351
+
352
+ ### Added
353
+ - pre-commit configuration.
354
+ - noxfile.
355
+
356
+ ### Removed
357
+ - `build-requirements.txt` as per using `pyproject.toml` native build configuration.
358
+ - `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
359
+ - `setup.cfg` in favor of `pyproject.toml` metadata configuration.
360
+ - Unused `utils.range_scan` function.
361
+
362
+ ### Fixed
363
+ - Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
364
+ - Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
365
+
366
+ ## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
367
+
368
+ ### Added
369
+ - Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
370
+ - Support for Python 3.13 (#512)
371
+
372
+ ### Fixed
373
+ - Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
374
+ - Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
375
+ - Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
376
+
377
+ ## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
378
+
379
+ ### Fixed
380
+ - Unintentional memory usage regression when using large payload that match several encoding (#376)
381
+ - Regression on some detection case showcased in the documentation (#371)
382
+
383
+ ### Added
384
+ - Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
385
+
386
+ ## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
387
+
388
+ ### Changed
389
+ - Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
390
+ - Improved the general detection reliability based on reports from the community
391
+
392
+ ## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
393
+
394
+ ### Added
395
+ - Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
396
+ - Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
397
+
398
+ ### Removed
399
+ - (internal) Redundant utils.is_ascii function and unused function is_private_use_only
400
+ - (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
401
+
402
+ ### Changed
403
+ - (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
404
+ - Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
405
+
406
+ ### Fixed
407
+ - Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
408
+
409
+ ## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
410
+
411
+ ### Changed
412
+ - Typehint for function `from_path` no longer enforce `PathLike` as its first argument
413
+ - Minor improvement over the global detection reliability
414
+
415
+ ### Added
416
+ - Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
417
+ - Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
418
+ - Explicit support for Python 3.12
419
+
420
+ ### Fixed
421
+ - Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
422
+
423
+ ## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
424
+
425
+ ### Added
426
+ - Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
427
+
428
+ ### Removed
429
+ - Support for Python 3.6 (PR #260)
430
+
431
+ ### Changed
432
+ - Optional speedup provided by mypy/c 1.0.1
433
+
434
+ ## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
435
+
436
+ ### Fixed
437
+ - Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
438
+
439
+ ### Changed
440
+ - Speedup provided by mypy/c 0.990 on Python >= 3.7
441
+
442
+ ## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
443
+
444
+ ### Added
445
+ - Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
446
+ - Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
447
+ - Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
448
+ - `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
449
+
450
+ ### Changed
451
+ - Build with static metadata using 'build' frontend
452
+ - Make the language detection stricter
453
+ - Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
454
+
455
+ ### Fixed
456
+ - CLI with opt --normalize fail when using full path for files
457
+ - TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
458
+ - Sphinx warnings when generating the documentation
459
+
460
+ ### Removed
461
+ - Coherence detector no longer return 'Simple English' instead return 'English'
462
+ - Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
463
+ - Breaking: Method `first()` and `best()` from CharsetMatch
464
+ - UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
465
+ - Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
466
+ - Breaking: Top-level function `normalize`
467
+ - Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
468
+ - Support for the backport `unicodedata2`
469
+
470
+ ## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
471
+
472
+ ### Added
473
+ - Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
474
+ - Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
475
+ - Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
476
+
477
+ ### Changed
478
+ - Build with static metadata using 'build' frontend
479
+ - Make the language detection stricter
480
+
481
+ ### Fixed
482
+ - CLI with opt --normalize fail when using full path for files
483
+ - TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
484
+
485
+ ### Removed
486
+ - Coherence detector no longer return 'Simple English' instead return 'English'
487
+ - Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
488
+
489
+ ## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
490
+
491
+ ### Added
492
+ - `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
493
+
494
+ ### Removed
495
+ - Breaking: Method `first()` and `best()` from CharsetMatch
496
+ - UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
497
+
498
+ ### Fixed
499
+ - Sphinx warnings when generating the documentation
500
+
501
+ ## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
502
+
503
+ ### Changed
504
+ - Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
505
+
506
+ ### Removed
507
+ - Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
508
+ - Breaking: Top-level function `normalize`
509
+ - Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
510
+ - Support for the backport `unicodedata2`
511
+
512
+ ## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
513
+
514
+ ### Deprecated
515
+ - Function `normalize` scheduled for removal in 3.0
516
+
517
+ ### Changed
518
+ - Removed useless call to decode in fn is_unprintable (#206)
519
+
520
+ ### Fixed
521
+ - Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
522
+
523
+ ## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
524
+
525
+ ### Added
526
+ - Output the Unicode table version when running the CLI with `--version` (PR #194)
527
+
528
+ ### Changed
529
+ - Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
530
+ - Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
531
+
532
+ ### Fixed
533
+ - Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
534
+ - CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
535
+
536
+ ### Removed
537
+ - Support for Python 3.5 (PR #192)
538
+
539
+ ### Deprecated
540
+ - Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
541
+
542
+ ## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
543
+
544
+ ### Fixed
545
+ - ASCII miss-detection on rare cases (PR #170)
546
+
547
+ ## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
548
+
549
+ ### Added
550
+ - Explicit support for Python 3.11 (PR #164)
551
+
552
+ ### Changed
553
+ - The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
554
+
555
+ ## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
556
+
557
+ ### Fixed
558
+ - Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
559
+
560
+ ### Changed
561
+ - Skipping the language-detection (CD) on ASCII (PR #155)
562
+
563
+ ## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
564
+
565
+ ### Changed
566
+ - Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
567
+
568
+ ### Fixed
569
+ - Wrong logging level applied when setting kwarg `explain` to True (PR #146)
570
+
571
+ ## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
572
+ ### Changed
573
+ - Improvement over Vietnamese detection (PR #126)
574
+ - MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
575
+ - Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
576
+ - call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
577
+ - Code style as refactored by Sourcery-AI (PR #131)
578
+ - Minor adjustment on the MD around european words (PR #133)
579
+ - Remove and replace SRTs from assets / tests (PR #139)
580
+ - Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
581
+ - Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
582
+
583
+ ### Fixed
584
+ - Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
585
+ - Avoid using too insignificant chunk (PR #137)
586
+
587
+ ### Added
588
+ - Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
589
+ - Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
590
+
591
+ ## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
592
+ ### Added
593
+ - Add support for Kazakh (Cyrillic) language detection (PR #109)
594
+
595
+ ### Changed
596
+ - Further, improve inferring the language from a given single-byte code page (PR #112)
597
+ - Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
598
+ - Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
599
+ - Various detection improvement (MD+CD) (PR #117)
600
+
601
+ ### Removed
602
+ - Remove redundant logging entry about detected language(s) (PR #115)
603
+
604
+ ### Fixed
605
+ - Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
606
+
607
+ ## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
608
+ ### Fixed
609
+ - Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
610
+ - Fix CLI crash when using --minimal output in certain cases (PR #103)
611
+
612
+ ### Changed
613
+ - Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
614
+
615
+ ## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
616
+ ### Changed
617
+ - The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
618
+ - The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
619
+ - The Unicode detection is slightly improved (PR #93)
620
+ - Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
621
+
622
+ ### Removed
623
+ - The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
624
+
625
+ ### Fixed
626
+ - In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
627
+ - Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
628
+ - The MANIFEST.in was not exhaustive (PR #78)
629
+
630
+ ## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
631
+ ### Fixed
632
+ - The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
633
+ - Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
634
+ - The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
635
+ - Submatch factoring could be wrong in rare edge cases (PR #72)
636
+ - Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
637
+ - Fix line endings from CRLF to LF for certain project files (PR #67)
638
+
639
+ ### Changed
640
+ - Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
641
+ - Allow fallback on specified encoding if any (PR #71)
642
+
643
+ ## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
644
+ ### Changed
645
+ - Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
646
+ - According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
647
+
648
+ ## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
649
+ ### Fixed
650
+ - Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
651
+
652
+ ### Changed
653
+ - Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
654
+
655
+ ## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
656
+ ### Fixed
657
+ - Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
658
+ - Using explain=False permanently disable the verbose output in the current runtime (PR #47)
659
+ - One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
660
+ - Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
661
+
662
+ ### Changed
663
+ - Public function normalize default args values were not aligned with from_bytes (PR #53)
664
+
665
+ ### Added
666
+ - You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
667
+
668
+ ## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
669
+ ### Changed
670
+ - 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
671
+ - Accent has been made on UTF-8 detection, should perform rather instantaneous.
672
+ - The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
673
+ - The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
674
+ - The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
675
+ - utf_7 detection has been reinstated.
676
+
677
+ ### Removed
678
+ - This package no longer require anything when used with Python 3.5 (Dropped cached_property)
679
+ - Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
680
+ - The exception hook on UnicodeDecodeError has been removed.
681
+
682
+ ### Deprecated
683
+ - Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
684
+
685
+ ### Fixed
686
+ - The CLI output used the relative path of the file(s). Should be absolute.
687
+
688
+ ## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
689
+ ### Fixed
690
+ - Logger configuration/usage no longer conflict with others (PR #44)
691
+
692
+ ## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
693
+ ### Removed
694
+ - Using standard logging instead of using the package loguru.
695
+ - Dropping nose test framework in favor of the maintained pytest.
696
+ - Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
697
+ - Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
698
+ - Stop support for UTF-7 that does not contain a SIG.
699
+ - Dropping PrettyTable, replaced with pure JSON output in CLI.
700
+
701
+ ### Fixed
702
+ - BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
703
+ - Not searching properly for the BOM when trying utf32/16 parent codec.
704
+
705
+ ### Changed
706
+ - Improving the package final size by compressing frequencies.json.
707
+ - Huge improvement over the larges payload.
708
+
709
+ ### Added
710
+ - CLI now produces JSON consumable output.
711
+ - Return ASCII if given sequences fit. Given reasonable confidence.
712
+
713
+ ## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
714
+
715
+ ### Fixed
716
+ - In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
717
+
718
+ ## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
719
+
720
+ ### Fixed
721
+ - Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
722
+
723
+ ## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
724
+
725
+ ### Fixed
726
+ - The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
727
+
728
+ ## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
729
+
730
+ ### Changed
731
+ - Amend the previous release to allow prettytable 2.0 (PR #35)
732
+
733
+ ## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
734
+
735
+ ### Fixed
736
+ - Fix error while using the package with a python pre-release interpreter (PR #33)
737
+
738
+ ### Changed
739
+ - Dependencies refactoring, constraints revised.
740
+
741
+ ### Added
742
+ - Add python 3.9 and 3.10 to the supported interpreters
743
+
744
+ MIT License
745
+
746
+ Copyright (c) 2025 TAHRI Ahmed R.
747
+
748
+ Permission is hereby granted, free of charge, to any person obtaining a copy
749
+ of this software and associated documentation files (the "Software"), to deal
750
+ in the Software without restriction, including without limitation the rights
751
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
752
+ copies of the Software, and to permit persons to whom the Software is
753
+ furnished to do so, subject to the following conditions:
754
+
755
+ The above copyright notice and this permission notice shall be included in all
756
+ copies or substantial portions of the Software.
757
+
758
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
759
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
760
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
761
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
762
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
763
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
764
+ SOFTWARE.
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/RECORD ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ../../../bin/normalizer,sha256=0NCCWHGXwNJFGXe9vG0dHrG67nHnzOFp4ZWd0RQ0qoI,225
2
+ charset_normalizer-3.4.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
3
+ charset_normalizer-3.4.4.dist-info/METADATA,sha256=jVuUFBti8dav19YLvWissTihVdF2ozUY4KKMw7jdkBQ,37303
4
+ charset_normalizer-3.4.4.dist-info/RECORD,,
5
+ charset_normalizer-3.4.4.dist-info/WHEEL,sha256=2iHh9e2o6T3nHtu_NVT7Cs7pebIqF94rZK8zrQfgoJI,190
6
+ charset_normalizer-3.4.4.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65
7
+ charset_normalizer-3.4.4.dist-info/licenses/LICENSE,sha256=bQ1Bv-FwrGx9wkjJpj4lTQ-0WmDVCoJX0K-SxuJJuIc,1071
8
+ charset_normalizer-3.4.4.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
9
+ charset_normalizer/__init__.py,sha256=OKRxRv2Zhnqk00tqkN0c1BtJjm165fWXLydE52IKuHc,1590
10
+ charset_normalizer/__main__.py,sha256=yzYxMR-IhKRHYwcSlavEv8oGdwxsR89mr2X09qXGdps,109
11
+ charset_normalizer/__pycache__/__init__.cpython-313.pyc,,
12
+ charset_normalizer/__pycache__/__main__.cpython-313.pyc,,
13
+ charset_normalizer/__pycache__/api.cpython-313.pyc,,
14
+ charset_normalizer/__pycache__/cd.cpython-313.pyc,,
15
+ charset_normalizer/__pycache__/constant.cpython-313.pyc,,
16
+ charset_normalizer/__pycache__/legacy.cpython-313.pyc,,
17
+ charset_normalizer/__pycache__/md.cpython-313.pyc,,
18
+ charset_normalizer/__pycache__/models.cpython-313.pyc,,
19
+ charset_normalizer/__pycache__/utils.cpython-313.pyc,,
20
+ charset_normalizer/__pycache__/version.cpython-313.pyc,,
21
+ charset_normalizer/api.py,sha256=V07i8aVeCD8T2fSia3C-fn0i9t8qQguEBhsqszg32Ns,22668
22
+ charset_normalizer/cd.py,sha256=WKTo1HDb-H9HfCDc3Bfwq5jzS25Ziy9SE2a74SgTq88,12522
23
+ charset_normalizer/cli/__init__.py,sha256=D8I86lFk2-py45JvqxniTirSj_sFyE6sjaY_0-G1shc,136
24
+ charset_normalizer/cli/__main__.py,sha256=dMaXG6IJXRvqq8z2tig7Qb83-BpWTln55ooiku5_uvg,12646
25
+ charset_normalizer/cli/__pycache__/__init__.cpython-313.pyc,,
26
+ charset_normalizer/cli/__pycache__/__main__.cpython-313.pyc,,
27
+ charset_normalizer/constant.py,sha256=7UVY4ldYhmQMHUdgQ_sgZmzcQ0xxYxpBunqSZ-XJZ8U,42713
28
+ charset_normalizer/legacy.py,sha256=sYBzSpzsRrg_wF4LP536pG64BItw7Tqtc3SMQAHvFLM,2731
29
+ charset_normalizer/md.cpython-313-x86_64-linux-gnu.so,sha256=sZ7umtJLjKfA83NFJ7npkiDyr06zDT8cWtl6uIx2MsM,15912
30
+ charset_normalizer/md.py,sha256=-_oN3h3_X99nkFfqamD3yu45DC_wfk5odH0Tr_CQiXs,20145
31
+ charset_normalizer/md__mypyc.cpython-313-x86_64-linux-gnu.so,sha256=i-yavqPJtZwjTKvP9hBLZ8CLZD88rVtguaSoLHso_Oc,291056
32
+ charset_normalizer/models.py,sha256=lKXhOnIPtiakbK3i__J9wpOfzx3JDTKj7Dn3Rg0VaRI,12394
33
+ charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ charset_normalizer/utils.py,sha256=sTejPgrdlNsKNucZfJCxJ95lMTLA0ShHLLE3n5wpT9Q,12170
35
+ charset_normalizer/version.py,sha256=nKE4qBNk5WA4LIJ_yIH_aSDfvtsyizkWMg-PUG-UZVk,115
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/WHEEL ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp313-cp313-manylinux_2_17_x86_64
5
+ Tag: cp313-cp313-manylinux2014_x86_64
6
+ Tag: cp313-cp313-manylinux_2_28_x86_64
7
+
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/entry_points.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [console_scripts]
2
+ normalizer = charset_normalizer.cli:cli_detect
venv/lib/python3.13/site-packages/charset_normalizer-3.4.4.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ charset_normalizer
venv/lib/python3.13/site-packages/filelock/__init__.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ A platform independent file lock that supports the with-statement.
3
+
4
+ .. autodata:: filelock.__version__
5
+ :no-value:
6
+
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import sys
12
+ import warnings
13
+ from typing import TYPE_CHECKING
14
+
15
+ from ._api import AcquireReturnProxy, BaseFileLock
16
+ from ._error import Timeout
17
+ from ._soft import SoftFileLock
18
+ from ._unix import UnixFileLock, has_fcntl
19
+ from ._windows import WindowsFileLock
20
+ from .asyncio import (
21
+ AsyncAcquireReturnProxy,
22
+ AsyncSoftFileLock,
23
+ AsyncUnixFileLock,
24
+ AsyncWindowsFileLock,
25
+ BaseAsyncFileLock,
26
+ )
27
+ from .version import version
28
+
29
+ #: version of the project as a string
30
+ __version__: str = version
31
+
32
+
33
+ if sys.platform == "win32": # pragma: win32 cover
34
+ _FileLock: type[BaseFileLock] = WindowsFileLock
35
+ _AsyncFileLock: type[BaseAsyncFileLock] = AsyncWindowsFileLock
36
+ else: # pragma: win32 no cover # noqa: PLR5501
37
+ if has_fcntl:
38
+ _FileLock: type[BaseFileLock] = UnixFileLock
39
+ _AsyncFileLock: type[BaseAsyncFileLock] = AsyncUnixFileLock
40
+ else:
41
+ _FileLock = SoftFileLock
42
+ _AsyncFileLock = AsyncSoftFileLock
43
+ if warnings is not None:
44
+ warnings.warn("only soft file lock is available", stacklevel=2)
45
+
46
+ if TYPE_CHECKING:
47
+ FileLock = SoftFileLock
48
+ AsyncFileLock = AsyncSoftFileLock
49
+ else:
50
+ #: Alias for the lock, which should be used for the current platform.
51
+ FileLock = _FileLock
52
+ AsyncFileLock = _AsyncFileLock
53
+
54
+
55
+ __all__ = [
56
+ "AcquireReturnProxy",
57
+ "AsyncAcquireReturnProxy",
58
+ "AsyncFileLock",
59
+ "AsyncSoftFileLock",
60
+ "AsyncUnixFileLock",
61
+ "AsyncWindowsFileLock",
62
+ "BaseAsyncFileLock",
63
+ "BaseFileLock",
64
+ "FileLock",
65
+ "SoftFileLock",
66
+ "Timeout",
67
+ "UnixFileLock",
68
+ "WindowsFileLock",
69
+ "__version__",
70
+ ]
venv/lib/python3.13/site-packages/filelock/_api.py ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import inspect
5
+ import logging
6
+ import os
7
+ import time
8
+ import warnings
9
+ from abc import ABCMeta, abstractmethod
10
+ from dataclasses import dataclass
11
+ from threading import local
12
+ from typing import TYPE_CHECKING, Any, cast
13
+ from weakref import WeakValueDictionary
14
+
15
+ from ._error import Timeout
16
+
17
+ if TYPE_CHECKING:
18
+ import sys
19
+ from types import TracebackType
20
+
21
+ if sys.version_info >= (3, 11): # pragma: no cover (py311+)
22
+ from typing import Self
23
+ else: # pragma: no cover (<py311)
24
+ from typing_extensions import Self
25
+
26
+
27
+ _LOGGER = logging.getLogger("filelock")
28
+
29
+
30
+ # This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__
31
+ # is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired
32
+ # again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak)
33
+ class AcquireReturnProxy:
34
+ """A context-aware object that will release the lock file when exiting."""
35
+
36
+ def __init__(self, lock: BaseFileLock) -> None:
37
+ self.lock = lock
38
+
39
+ def __enter__(self) -> BaseFileLock:
40
+ return self.lock
41
+
42
+ def __exit__(
43
+ self,
44
+ exc_type: type[BaseException] | None,
45
+ exc_value: BaseException | None,
46
+ traceback: TracebackType | None,
47
+ ) -> None:
48
+ self.lock.release()
49
+
50
+
51
+ @dataclass
52
+ class FileLockContext:
53
+ """A dataclass which holds the context for a ``BaseFileLock`` object."""
54
+
55
+ # The context is held in a separate class to allow optional use of thread local storage via the
56
+ # ThreadLocalFileContext class.
57
+
58
+ #: The path to the lock file.
59
+ lock_file: str
60
+
61
+ #: The default timeout value.
62
+ timeout: float
63
+
64
+ #: The mode for the lock files
65
+ mode: int
66
+
67
+ #: Whether the lock should be blocking or not
68
+ blocking: bool
69
+
70
+ #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held
71
+ lock_file_fd: int | None = None
72
+
73
+ #: The lock counter is used for implementing the nested locking mechanism.
74
+ lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0
75
+
76
+
77
+ class ThreadLocalFileContext(FileLockContext, local):
78
+ """A thread local version of the ``FileLockContext`` class."""
79
+
80
+
81
+ class FileLockMeta(ABCMeta):
82
+ def __call__( # noqa: PLR0913
83
+ cls,
84
+ lock_file: str | os.PathLike[str],
85
+ timeout: float = -1,
86
+ mode: int = 0o644,
87
+ thread_local: bool = True, # noqa: FBT001, FBT002
88
+ *,
89
+ blocking: bool = True,
90
+ is_singleton: bool = False,
91
+ **kwargs: Any, # capture remaining kwargs for subclasses # noqa: ANN401
92
+ ) -> BaseFileLock:
93
+ if is_singleton:
94
+ instance = cls._instances.get(str(lock_file)) # type: ignore[attr-defined]
95
+ if instance:
96
+ params_to_check = {
97
+ "thread_local": (thread_local, instance.is_thread_local()),
98
+ "timeout": (timeout, instance.timeout),
99
+ "mode": (mode, instance.mode),
100
+ "blocking": (blocking, instance.blocking),
101
+ }
102
+
103
+ non_matching_params = {
104
+ name: (passed_param, set_param)
105
+ for name, (passed_param, set_param) in params_to_check.items()
106
+ if passed_param != set_param
107
+ }
108
+ if not non_matching_params:
109
+ return cast("BaseFileLock", instance)
110
+
111
+ # parameters do not match; raise error
112
+ msg = "Singleton lock instances cannot be initialized with differing arguments"
113
+ msg += "\nNon-matching arguments: "
114
+ for param_name, (passed_param, set_param) in non_matching_params.items():
115
+ msg += f"\n\t{param_name} (existing lock has {set_param} but {passed_param} was passed)"
116
+ raise ValueError(msg)
117
+
118
+ # Workaround to make `__init__`'s params optional in subclasses
119
+ # E.g. virtualenv changes the signature of the `__init__` method in the `BaseFileLock` class descendant
120
+ # (https://github.com/tox-dev/filelock/pull/340)
121
+
122
+ all_params = {
123
+ "timeout": timeout,
124
+ "mode": mode,
125
+ "thread_local": thread_local,
126
+ "blocking": blocking,
127
+ "is_singleton": is_singleton,
128
+ **kwargs,
129
+ }
130
+
131
+ present_params = inspect.signature(cls.__init__).parameters # type: ignore[misc]
132
+ init_params = {key: value for key, value in all_params.items() if key in present_params}
133
+
134
+ instance = super().__call__(lock_file, **init_params)
135
+
136
+ if is_singleton:
137
+ cls._instances[str(lock_file)] = instance # type: ignore[attr-defined]
138
+
139
+ return cast("BaseFileLock", instance)
140
+
141
+
142
+ class BaseFileLock(contextlib.ContextDecorator, metaclass=FileLockMeta):
143
+ """Abstract base class for a file lock object."""
144
+
145
+ _instances: WeakValueDictionary[str, BaseFileLock]
146
+
147
+ def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None:
148
+ """Setup unique state for lock subclasses."""
149
+ super().__init_subclass__(**kwargs)
150
+ cls._instances = WeakValueDictionary()
151
+
152
+ def __init__( # noqa: PLR0913
153
+ self,
154
+ lock_file: str | os.PathLike[str],
155
+ timeout: float = -1,
156
+ mode: int = 0o644,
157
+ thread_local: bool = True, # noqa: FBT001, FBT002
158
+ *,
159
+ blocking: bool = True,
160
+ is_singleton: bool = False,
161
+ ) -> None:
162
+ """
163
+ Create a new lock object.
164
+
165
+ :param lock_file: path to the file
166
+ :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
167
+ the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
168
+ to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock.
169
+ :param mode: file permissions for the lockfile
170
+ :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
171
+ ``False`` then the lock will be reentrant across threads.
172
+ :param blocking: whether the lock should be blocking or not
173
+ :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
174
+ per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
175
+ to pass the same object around.
176
+
177
+ """
178
+ self._is_thread_local = thread_local
179
+ self._is_singleton = is_singleton
180
+
181
+ # Create the context. Note that external code should not work with the context directly and should instead use
182
+ # properties of this class.
183
+ kwargs: dict[str, Any] = {
184
+ "lock_file": os.fspath(lock_file),
185
+ "timeout": timeout,
186
+ "mode": mode,
187
+ "blocking": blocking,
188
+ }
189
+ self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs)
190
+
191
+ def is_thread_local(self) -> bool:
192
+ """:return: a flag indicating if this lock is thread local or not"""
193
+ return self._is_thread_local
194
+
195
+ @property
196
+ def is_singleton(self) -> bool:
197
+ """:return: a flag indicating if this lock is singleton or not"""
198
+ return self._is_singleton
199
+
200
+ @property
201
+ def lock_file(self) -> str:
202
+ """:return: path to the lock file"""
203
+ return self._context.lock_file
204
+
205
+ @property
206
+ def timeout(self) -> float:
207
+ """
208
+ :return: the default timeout value, in seconds
209
+
210
+ .. versionadded:: 2.0.0
211
+ """
212
+ return self._context.timeout
213
+
214
+ @timeout.setter
215
+ def timeout(self, value: float | str) -> None:
216
+ """
217
+ Change the default timeout value.
218
+
219
+ :param value: the new value, in seconds
220
+
221
+ """
222
+ self._context.timeout = float(value)
223
+
224
+ @property
225
+ def blocking(self) -> bool:
226
+ """:return: whether the locking is blocking or not"""
227
+ return self._context.blocking
228
+
229
+ @blocking.setter
230
+ def blocking(self, value: bool) -> None:
231
+ """
232
+ Change the default blocking value.
233
+
234
+ :param value: the new value as bool
235
+
236
+ """
237
+ self._context.blocking = value
238
+
239
+ @property
240
+ def mode(self) -> int:
241
+ """:return: the file permissions for the lockfile"""
242
+ return self._context.mode
243
+
244
+ @abstractmethod
245
+ def _acquire(self) -> None:
246
+ """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file."""
247
+ raise NotImplementedError
248
+
249
+ @abstractmethod
250
+ def _release(self) -> None:
251
+ """Releases the lock and sets self._context.lock_file_fd to None."""
252
+ raise NotImplementedError
253
+
254
+ @property
255
+ def is_locked(self) -> bool:
256
+ """
257
+
258
+ :return: A boolean indicating if the lock file is holding the lock currently.
259
+
260
+ .. versionchanged:: 2.0.0
261
+
262
+ This was previously a method and is now a property.
263
+ """
264
+ return self._context.lock_file_fd is not None
265
+
266
+ @property
267
+ def lock_counter(self) -> int:
268
+ """:return: The number of times this lock has been acquired (but not yet released)."""
269
+ return self._context.lock_counter
270
+
271
+ def acquire(
272
+ self,
273
+ timeout: float | None = None,
274
+ poll_interval: float = 0.05,
275
+ *,
276
+ poll_intervall: float | None = None,
277
+ blocking: bool | None = None,
278
+ ) -> AcquireReturnProxy:
279
+ """
280
+ Try to acquire the file lock.
281
+
282
+ :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and
283
+ if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired
284
+ :param poll_interval: interval of trying to acquire the lock file
285
+ :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead
286
+ :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
287
+ first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
288
+ :raises Timeout: if fails to acquire lock within the timeout period
289
+ :return: a context object that will unlock the file when the context is exited
290
+
291
+ .. code-block:: python
292
+
293
+ # You can use this method in the context manager (recommended)
294
+ with lock.acquire():
295
+ pass
296
+
297
+ # Or use an equivalent try-finally construct:
298
+ lock.acquire()
299
+ try:
300
+ pass
301
+ finally:
302
+ lock.release()
303
+
304
+ .. versionchanged:: 2.0.0
305
+
306
+ This method returns now a *proxy* object instead of *self*,
307
+ so that it can be used in a with statement without side effects.
308
+
309
+ """
310
+ # Use the default timeout, if no timeout is provided.
311
+ if timeout is None:
312
+ timeout = self._context.timeout
313
+
314
+ if blocking is None:
315
+ blocking = self._context.blocking
316
+
317
+ if poll_intervall is not None:
318
+ msg = "use poll_interval instead of poll_intervall"
319
+ warnings.warn(msg, DeprecationWarning, stacklevel=2)
320
+ poll_interval = poll_intervall
321
+
322
+ # Increment the number right at the beginning. We can still undo it, if something fails.
323
+ self._context.lock_counter += 1
324
+
325
+ lock_id = id(self)
326
+ lock_filename = self.lock_file
327
+ start_time = time.perf_counter()
328
+ try:
329
+ while True:
330
+ if not self.is_locked:
331
+ _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
332
+ self._acquire()
333
+ if self.is_locked:
334
+ _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
335
+ break
336
+ if blocking is False:
337
+ _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
338
+ raise Timeout(lock_filename) # noqa: TRY301
339
+ if 0 <= timeout < time.perf_counter() - start_time:
340
+ _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
341
+ raise Timeout(lock_filename) # noqa: TRY301
342
+ msg = "Lock %s not acquired on %s, waiting %s seconds ..."
343
+ _LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
344
+ time.sleep(poll_interval)
345
+ except BaseException: # Something did go wrong, so decrement the counter.
346
+ self._context.lock_counter = max(0, self._context.lock_counter - 1)
347
+ raise
348
+ return AcquireReturnProxy(lock=self)
349
+
350
+ def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002
351
+ """
352
+ Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0.
353
+ Also note, that the lock file itself is not automatically deleted.
354
+
355
+ :param force: If true, the lock counter is ignored and the lock is released in every case/
356
+
357
+ """
358
+ if self.is_locked:
359
+ self._context.lock_counter -= 1
360
+
361
+ if self._context.lock_counter == 0 or force:
362
+ lock_id, lock_filename = id(self), self.lock_file
363
+
364
+ _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
365
+ self._release()
366
+ self._context.lock_counter = 0
367
+ _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
368
+
369
+ def __enter__(self) -> Self:
370
+ """
371
+ Acquire the lock.
372
+
373
+ :return: the lock object
374
+
375
+ """
376
+ self.acquire()
377
+ return self
378
+
379
+ def __exit__(
380
+ self,
381
+ exc_type: type[BaseException] | None,
382
+ exc_value: BaseException | None,
383
+ traceback: TracebackType | None,
384
+ ) -> None:
385
+ """
386
+ Release the lock.
387
+
388
+ :param exc_type: the exception type if raised
389
+ :param exc_value: the exception value if raised
390
+ :param traceback: the exception traceback if raised
391
+
392
+ """
393
+ self.release()
394
+
395
+ def __del__(self) -> None:
396
+ """Called when the lock object is deleted."""
397
+ self.release(force=True)
398
+
399
+
400
+ __all__ = [
401
+ "AcquireReturnProxy",
402
+ "BaseFileLock",
403
+ ]
venv/lib/python3.13/site-packages/filelock/_error.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+
6
+ class Timeout(TimeoutError): # noqa: N818
7
+ """Raised when the lock could not be acquired in *timeout* seconds."""
8
+
9
+ def __init__(self, lock_file: str) -> None:
10
+ super().__init__()
11
+ self._lock_file = lock_file
12
+
13
+ def __reduce__(self) -> str | tuple[Any, ...]:
14
+ return self.__class__, (self._lock_file,) # Properly pickle the exception
15
+
16
+ def __str__(self) -> str:
17
+ return f"The file lock '{self._lock_file}' could not be acquired."
18
+
19
+ def __repr__(self) -> str:
20
+ return f"{self.__class__.__name__}({self.lock_file!r})"
21
+
22
+ @property
23
+ def lock_file(self) -> str:
24
+ """:return: The path of the file lock."""
25
+ return self._lock_file
26
+
27
+
28
+ __all__ = [
29
+ "Timeout",
30
+ ]
venv/lib/python3.13/site-packages/filelock/_soft.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from contextlib import suppress
6
+ from errno import EACCES, EEXIST
7
+ from pathlib import Path
8
+
9
+ from ._api import BaseFileLock
10
+ from ._util import ensure_directory_exists, raise_on_not_writable_file
11
+
12
+
13
+ class SoftFileLock(BaseFileLock):
14
+ """Simply watches the existence of the lock file."""
15
+
16
+ def _acquire(self) -> None:
17
+ raise_on_not_writable_file(self.lock_file)
18
+ ensure_directory_exists(self.lock_file)
19
+ # first check for exists and read-only mode as the open will mask this case as EEXIST
20
+ flags = (
21
+ os.O_WRONLY # open for writing only
22
+ | os.O_CREAT
23
+ | os.O_EXCL # together with above raise EEXIST if the file specified by filename exists
24
+ | os.O_TRUNC # truncate the file to zero byte
25
+ )
26
+ try:
27
+ file_handler = os.open(self.lock_file, flags, self._context.mode)
28
+ except OSError as exception: # re-raise unless expected exception
29
+ if not (
30
+ exception.errno == EEXIST # lock already exist
31
+ or (exception.errno == EACCES and sys.platform == "win32") # has no access to this lock
32
+ ): # pragma: win32 no cover
33
+ raise
34
+ else:
35
+ self._context.lock_file_fd = file_handler
36
+
37
+ def _release(self) -> None:
38
+ assert self._context.lock_file_fd is not None # noqa: S101
39
+ os.close(self._context.lock_file_fd) # the lock file is definitely not None
40
+ self._context.lock_file_fd = None
41
+ with suppress(OSError): # the file is already deleted and that's what we want
42
+ Path(self.lock_file).unlink()
43
+
44
+
45
+ __all__ = [
46
+ "SoftFileLock",
47
+ ]
venv/lib/python3.13/site-packages/filelock/_unix.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from contextlib import suppress
6
+ from errno import ENOSYS
7
+ from pathlib import Path
8
+ from typing import cast
9
+
10
+ from ._api import BaseFileLock
11
+ from ._util import ensure_directory_exists
12
+
13
+ #: a flag to indicate if the fcntl API is available
14
+ has_fcntl = False
15
+ if sys.platform == "win32": # pragma: win32 cover
16
+
17
+ class UnixFileLock(BaseFileLock):
18
+ """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
19
+
20
+ def _acquire(self) -> None:
21
+ raise NotImplementedError
22
+
23
+ def _release(self) -> None:
24
+ raise NotImplementedError
25
+
26
+ else: # pragma: win32 no cover
27
+ try:
28
+ import fcntl
29
+
30
+ _ = (fcntl.flock, fcntl.LOCK_EX, fcntl.LOCK_NB, fcntl.LOCK_UN)
31
+ except (ImportError, AttributeError):
32
+ pass
33
+ else:
34
+ has_fcntl = True
35
+
36
+ class UnixFileLock(BaseFileLock):
37
+ """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
38
+
39
+ def _acquire(self) -> None:
40
+ ensure_directory_exists(self.lock_file)
41
+ open_flags = os.O_RDWR | os.O_TRUNC
42
+ if not Path(self.lock_file).exists():
43
+ open_flags |= os.O_CREAT
44
+ fd = os.open(self.lock_file, open_flags, self._context.mode)
45
+ with suppress(PermissionError): # This locked is not owned by this UID
46
+ os.fchmod(fd, self._context.mode)
47
+ try:
48
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
49
+ except OSError as exception:
50
+ os.close(fd)
51
+ if exception.errno == ENOSYS: # NotImplemented error
52
+ msg = "FileSystem does not appear to support flock; use SoftFileLock instead"
53
+ raise NotImplementedError(msg) from exception
54
+ else:
55
+ self._context.lock_file_fd = fd
56
+
57
+ def _release(self) -> None:
58
+ # Do not remove the lockfile:
59
+ # https://github.com/tox-dev/py-filelock/issues/31
60
+ # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
61
+ fd = cast("int", self._context.lock_file_fd)
62
+ self._context.lock_file_fd = None
63
+ fcntl.flock(fd, fcntl.LOCK_UN)
64
+ os.close(fd)
65
+
66
+
67
+ __all__ = [
68
+ "UnixFileLock",
69
+ "has_fcntl",
70
+ ]
venv/lib/python3.13/site-packages/filelock/_util.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import stat
5
+ import sys
6
+ from errno import EACCES, EISDIR
7
+ from pathlib import Path
8
+
9
+
10
+ def raise_on_not_writable_file(filename: str) -> None:
11
+ """
12
+ Raise an exception if attempting to open the file for writing would fail.
13
+
14
+ This is done so files that will never be writable can be separated from files that are writable but currently
15
+ locked.
16
+
17
+ :param filename: file to check
18
+ :raises OSError: as if the file was opened for writing.
19
+
20
+ """
21
+ try: # use stat to do exists + can write to check without race condition
22
+ file_stat = os.stat(filename) # noqa: PTH116
23
+ except OSError:
24
+ return # swallow does not exist or other errors
25
+
26
+ if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it
27
+ if not (file_stat.st_mode & stat.S_IWUSR):
28
+ raise PermissionError(EACCES, "Permission denied", filename)
29
+
30
+ if stat.S_ISDIR(file_stat.st_mode):
31
+ if sys.platform == "win32": # pragma: win32 cover
32
+ # On Windows, this is PermissionError
33
+ raise PermissionError(EACCES, "Permission denied", filename)
34
+ else: # pragma: win32 no cover # noqa: RET506
35
+ # On linux / macOS, this is IsADirectoryError
36
+ raise IsADirectoryError(EISDIR, "Is a directory", filename)
37
+
38
+
39
+ def ensure_directory_exists(filename: Path | str) -> None:
40
+ """
41
+ Ensure the directory containing the file exists (create it if necessary).
42
+
43
+ :param filename: file.
44
+
45
+ """
46
+ Path(filename).parent.mkdir(parents=True, exist_ok=True)
47
+
48
+
49
+ __all__ = [
50
+ "ensure_directory_exists",
51
+ "raise_on_not_writable_file",
52
+ ]
venv/lib/python3.13/site-packages/filelock/_windows.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from contextlib import suppress
6
+ from errno import EACCES
7
+ from pathlib import Path
8
+ from typing import cast
9
+
10
+ from ._api import BaseFileLock
11
+ from ._util import ensure_directory_exists, raise_on_not_writable_file
12
+
13
+ if sys.platform == "win32": # pragma: win32 cover
14
+ import msvcrt
15
+
16
+ class WindowsFileLock(BaseFileLock):
17
+ """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
18
+
19
+ def _acquire(self) -> None:
20
+ raise_on_not_writable_file(self.lock_file)
21
+ ensure_directory_exists(self.lock_file)
22
+ flags = (
23
+ os.O_RDWR # open for read and write
24
+ | os.O_CREAT # create file if not exists
25
+ | os.O_TRUNC # truncate file if not empty
26
+ )
27
+ try:
28
+ fd = os.open(self.lock_file, flags, self._context.mode)
29
+ except OSError as exception:
30
+ if exception.errno != EACCES: # has no access to this lock
31
+ raise
32
+ else:
33
+ try:
34
+ msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
35
+ except OSError as exception:
36
+ os.close(fd) # close file first
37
+ if exception.errno != EACCES: # file is already locked
38
+ raise
39
+ else:
40
+ self._context.lock_file_fd = fd
41
+
42
+ def _release(self) -> None:
43
+ fd = cast("int", self._context.lock_file_fd)
44
+ self._context.lock_file_fd = None
45
+ msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
46
+ os.close(fd)
47
+
48
+ with suppress(OSError): # Probably another instance of the application hat acquired the file lock.
49
+ Path(self.lock_file).unlink()
50
+
51
+ else: # pragma: win32 no cover
52
+
53
+ class WindowsFileLock(BaseFileLock):
54
+ """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
55
+
56
+ def _acquire(self) -> None:
57
+ raise NotImplementedError
58
+
59
+ def _release(self) -> None:
60
+ raise NotImplementedError
61
+
62
+
63
+ __all__ = [
64
+ "WindowsFileLock",
65
+ ]
venv/lib/python3.13/site-packages/filelock/asyncio.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """An asyncio-based implementation of the file lock."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import contextlib
7
+ import logging
8
+ import os
9
+ import time
10
+ from dataclasses import dataclass
11
+ from inspect import iscoroutinefunction
12
+ from threading import local
13
+ from typing import TYPE_CHECKING, Any, NoReturn, cast
14
+
15
+ from ._api import BaseFileLock, FileLockContext, FileLockMeta
16
+ from ._error import Timeout
17
+ from ._soft import SoftFileLock
18
+ from ._unix import UnixFileLock
19
+ from ._windows import WindowsFileLock
20
+
21
+ if TYPE_CHECKING:
22
+ import sys
23
+ from collections.abc import Callable
24
+ from concurrent import futures
25
+ from types import TracebackType
26
+
27
+ if sys.version_info >= (3, 11): # pragma: no cover (py311+)
28
+ from typing import Self
29
+ else: # pragma: no cover (<py311)
30
+ from typing_extensions import Self
31
+
32
+
33
+ _LOGGER = logging.getLogger("filelock")
34
+
35
+
36
+ @dataclass
37
+ class AsyncFileLockContext(FileLockContext):
38
+ """A dataclass which holds the context for a ``BaseAsyncFileLock`` object."""
39
+
40
+ #: Whether run in executor
41
+ run_in_executor: bool = True
42
+
43
+ #: The executor
44
+ executor: futures.Executor | None = None
45
+
46
+ #: The loop
47
+ loop: asyncio.AbstractEventLoop | None = None
48
+
49
+
50
+ class AsyncThreadLocalFileContext(AsyncFileLockContext, local):
51
+ """A thread local version of the ``FileLockContext`` class."""
52
+
53
+
54
+ class AsyncAcquireReturnProxy:
55
+ """A context-aware object that will release the lock file when exiting."""
56
+
57
+ def __init__(self, lock: BaseAsyncFileLock) -> None: # noqa: D107
58
+ self.lock = lock
59
+
60
+ async def __aenter__(self) -> BaseAsyncFileLock: # noqa: D105
61
+ return self.lock
62
+
63
+ async def __aexit__( # noqa: D105
64
+ self,
65
+ exc_type: type[BaseException] | None,
66
+ exc_value: BaseException | None,
67
+ traceback: TracebackType | None,
68
+ ) -> None:
69
+ await self.lock.release()
70
+
71
+
72
+ class AsyncFileLockMeta(FileLockMeta):
73
+ def __call__( # type: ignore[override] # noqa: PLR0913
74
+ cls, # noqa: N805
75
+ lock_file: str | os.PathLike[str],
76
+ timeout: float = -1,
77
+ mode: int = 0o644,
78
+ thread_local: bool = False, # noqa: FBT001, FBT002
79
+ *,
80
+ blocking: bool = True,
81
+ is_singleton: bool = False,
82
+ loop: asyncio.AbstractEventLoop | None = None,
83
+ run_in_executor: bool = True,
84
+ executor: futures.Executor | None = None,
85
+ ) -> BaseAsyncFileLock:
86
+ if thread_local and run_in_executor:
87
+ msg = "run_in_executor is not supported when thread_local is True"
88
+ raise ValueError(msg)
89
+ instance = super().__call__(
90
+ lock_file=lock_file,
91
+ timeout=timeout,
92
+ mode=mode,
93
+ thread_local=thread_local,
94
+ blocking=blocking,
95
+ is_singleton=is_singleton,
96
+ loop=loop,
97
+ run_in_executor=run_in_executor,
98
+ executor=executor,
99
+ )
100
+ return cast("BaseAsyncFileLock", instance)
101
+
102
+
103
+ class BaseAsyncFileLock(BaseFileLock, metaclass=AsyncFileLockMeta):
104
+ """Base class for asynchronous file locks."""
105
+
106
+ def __init__( # noqa: PLR0913
107
+ self,
108
+ lock_file: str | os.PathLike[str],
109
+ timeout: float = -1,
110
+ mode: int = 0o644,
111
+ thread_local: bool = False, # noqa: FBT001, FBT002
112
+ *,
113
+ blocking: bool = True,
114
+ is_singleton: bool = False,
115
+ loop: asyncio.AbstractEventLoop | None = None,
116
+ run_in_executor: bool = True,
117
+ executor: futures.Executor | None = None,
118
+ ) -> None:
119
+ """
120
+ Create a new lock object.
121
+
122
+ :param lock_file: path to the file
123
+ :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
124
+ the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
125
+ to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock.
126
+ :param mode: file permissions for the lockfile
127
+ :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
128
+ ``False`` then the lock will be reentrant across threads.
129
+ :param blocking: whether the lock should be blocking or not
130
+ :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
131
+ per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
132
+ to pass the same object around.
133
+ :param loop: The event loop to use. If not specified, the running event loop will be used.
134
+ :param run_in_executor: If this is set to ``True`` then the lock will be acquired in an executor.
135
+ :param executor: The executor to use. If not specified, the default executor will be used.
136
+
137
+ """
138
+ self._is_thread_local = thread_local
139
+ self._is_singleton = is_singleton
140
+
141
+ # Create the context. Note that external code should not work with the context directly and should instead use
142
+ # properties of this class.
143
+ kwargs: dict[str, Any] = {
144
+ "lock_file": os.fspath(lock_file),
145
+ "timeout": timeout,
146
+ "mode": mode,
147
+ "blocking": blocking,
148
+ "loop": loop,
149
+ "run_in_executor": run_in_executor,
150
+ "executor": executor,
151
+ }
152
+ self._context: AsyncFileLockContext = (AsyncThreadLocalFileContext if thread_local else AsyncFileLockContext)(
153
+ **kwargs
154
+ )
155
+
156
+ @property
157
+ def run_in_executor(self) -> bool:
158
+ """::return: whether run in executor."""
159
+ return self._context.run_in_executor
160
+
161
+ @property
162
+ def executor(self) -> futures.Executor | None:
163
+ """::return: the executor."""
164
+ return self._context.executor
165
+
166
+ @executor.setter
167
+ def executor(self, value: futures.Executor | None) -> None: # pragma: no cover
168
+ """
169
+ Change the executor.
170
+
171
+ :param value: the new executor or ``None``
172
+ :type value: futures.Executor | None
173
+
174
+ """
175
+ self._context.executor = value
176
+
177
+ @property
178
+ def loop(self) -> asyncio.AbstractEventLoop | None:
179
+ """::return: the event loop."""
180
+ return self._context.loop
181
+
182
+ async def acquire( # type: ignore[override]
183
+ self,
184
+ timeout: float | None = None,
185
+ poll_interval: float = 0.05,
186
+ *,
187
+ blocking: bool | None = None,
188
+ ) -> AsyncAcquireReturnProxy:
189
+ """
190
+ Try to acquire the file lock.
191
+
192
+ :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default
193
+ :attr:`~BaseFileLock.timeout` is and if ``timeout < 0``, there is no timeout and
194
+ this method will block until the lock could be acquired
195
+ :param poll_interval: interval of trying to acquire the lock file
196
+ :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
197
+ first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
198
+ :raises Timeout: if fails to acquire lock within the timeout period
199
+ :return: a context object that will unlock the file when the context is exited
200
+
201
+ .. code-block:: python
202
+
203
+ # You can use this method in the context manager (recommended)
204
+ with lock.acquire():
205
+ pass
206
+
207
+ # Or use an equivalent try-finally construct:
208
+ lock.acquire()
209
+ try:
210
+ pass
211
+ finally:
212
+ lock.release()
213
+
214
+ """
215
+ # Use the default timeout, if no timeout is provided.
216
+ if timeout is None:
217
+ timeout = self._context.timeout
218
+
219
+ if blocking is None:
220
+ blocking = self._context.blocking
221
+
222
+ # Increment the number right at the beginning. We can still undo it, if something fails.
223
+ self._context.lock_counter += 1
224
+
225
+ lock_id = id(self)
226
+ lock_filename = self.lock_file
227
+ start_time = time.perf_counter()
228
+ try:
229
+ while True:
230
+ if not self.is_locked:
231
+ _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
232
+ await self._run_internal_method(self._acquire)
233
+ if self.is_locked:
234
+ _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
235
+ break
236
+ if blocking is False:
237
+ _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
238
+ raise Timeout(lock_filename) # noqa: TRY301
239
+ if 0 <= timeout < time.perf_counter() - start_time:
240
+ _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
241
+ raise Timeout(lock_filename) # noqa: TRY301
242
+ msg = "Lock %s not acquired on %s, waiting %s seconds ..."
243
+ _LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
244
+ await asyncio.sleep(poll_interval)
245
+ except BaseException: # Something did go wrong, so decrement the counter.
246
+ self._context.lock_counter = max(0, self._context.lock_counter - 1)
247
+ raise
248
+ return AsyncAcquireReturnProxy(lock=self)
249
+
250
+ async def release(self, force: bool = False) -> None: # type: ignore[override] # noqa: FBT001, FBT002
251
+ """
252
+ Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0.
253
+ Also note, that the lock file itself is not automatically deleted.
254
+
255
+ :param force: If true, the lock counter is ignored and the lock is released in every case/
256
+
257
+ """
258
+ if self.is_locked:
259
+ self._context.lock_counter -= 1
260
+
261
+ if self._context.lock_counter == 0 or force:
262
+ lock_id, lock_filename = id(self), self.lock_file
263
+
264
+ _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
265
+ await self._run_internal_method(self._release)
266
+ self._context.lock_counter = 0
267
+ _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
268
+
269
+ async def _run_internal_method(self, method: Callable[[], Any]) -> None:
270
+ if iscoroutinefunction(method):
271
+ await method()
272
+ elif self.run_in_executor:
273
+ loop = self.loop or asyncio.get_running_loop()
274
+ await loop.run_in_executor(self.executor, method)
275
+ else:
276
+ method()
277
+
278
+ def __enter__(self) -> NoReturn:
279
+ """
280
+ Replace old __enter__ method to avoid using it.
281
+
282
+ NOTE: DO NOT USE `with` FOR ASYNCIO LOCKS, USE `async with` INSTEAD.
283
+
284
+ :return: none
285
+ :rtype: NoReturn
286
+ """
287
+ msg = "Do not use `with` for asyncio locks, use `async with` instead."
288
+ raise NotImplementedError(msg)
289
+
290
+ async def __aenter__(self) -> Self:
291
+ """
292
+ Acquire the lock.
293
+
294
+ :return: the lock object
295
+
296
+ """
297
+ await self.acquire()
298
+ return self
299
+
300
+ async def __aexit__(
301
+ self,
302
+ exc_type: type[BaseException] | None,
303
+ exc_value: BaseException | None,
304
+ traceback: TracebackType | None,
305
+ ) -> None:
306
+ """
307
+ Release the lock.
308
+
309
+ :param exc_type: the exception type if raised
310
+ :param exc_value: the exception value if raised
311
+ :param traceback: the exception traceback if raised
312
+
313
+ """
314
+ await self.release()
315
+
316
+ def __del__(self) -> None:
317
+ """Called when the lock object is deleted."""
318
+ with contextlib.suppress(RuntimeError):
319
+ loop = self.loop or asyncio.get_running_loop()
320
+ if not loop.is_running(): # pragma: no cover
321
+ loop.run_until_complete(self.release(force=True))
322
+ else:
323
+ loop.create_task(self.release(force=True))
324
+
325
+
326
+ class AsyncSoftFileLock(SoftFileLock, BaseAsyncFileLock):
327
+ """Simply watches the existence of the lock file."""
328
+
329
+
330
+ class AsyncUnixFileLock(UnixFileLock, BaseAsyncFileLock):
331
+ """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
332
+
333
+
334
+ class AsyncWindowsFileLock(WindowsFileLock, BaseAsyncFileLock):
335
+ """Uses the :func:`msvcrt.locking` to hard lock the lock file on windows systems."""
336
+
337
+
338
+ __all__ = [
339
+ "AsyncAcquireReturnProxy",
340
+ "AsyncSoftFileLock",
341
+ "AsyncUnixFileLock",
342
+ "AsyncWindowsFileLock",
343
+ "BaseAsyncFileLock",
344
+ ]
venv/lib/python3.13/site-packages/filelock/py.typed ADDED
File without changes
venv/lib/python3.13/site-packages/filelock/version.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '3.20.0'
32
+ __version_tuple__ = version_tuple = (3, 20, 0)
33
+
34
+ __commit_id__ = commit_id = None
venv/lib/python3.13/site-packages/fsspec/__init__.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from . import caching
2
+ from ._version import __version__ # noqa: F401
3
+ from .callbacks import Callback
4
+ from .compression import available_compressions
5
+ from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
6
+ from .exceptions import FSTimeoutError
7
+ from .mapping import FSMap, get_mapper
8
+ from .registry import (
9
+ available_protocols,
10
+ filesystem,
11
+ get_filesystem_class,
12
+ register_implementation,
13
+ registry,
14
+ )
15
+ from .spec import AbstractFileSystem
16
+
17
+ __all__ = [
18
+ "AbstractFileSystem",
19
+ "FSTimeoutError",
20
+ "FSMap",
21
+ "filesystem",
22
+ "register_implementation",
23
+ "get_filesystem_class",
24
+ "get_fs_token_paths",
25
+ "get_mapper",
26
+ "open",
27
+ "open_files",
28
+ "open_local",
29
+ "registry",
30
+ "caching",
31
+ "Callback",
32
+ "available_protocols",
33
+ "available_compressions",
34
+ "url_to_fs",
35
+ ]
36
+
37
+
38
+ def process_entries():
39
+ try:
40
+ from importlib.metadata import entry_points
41
+ except ImportError:
42
+ return
43
+ if entry_points is not None:
44
+ try:
45
+ eps = entry_points()
46
+ except TypeError:
47
+ pass # importlib-metadata < 0.8
48
+ else:
49
+ if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
50
+ specs = eps.select(group="fsspec.specs")
51
+ else:
52
+ specs = eps.get("fsspec.specs", [])
53
+ registered_names = {}
54
+ for spec in specs:
55
+ err_msg = f"Unable to load filesystem from {spec}"
56
+ name = spec.name
57
+ if name in registered_names:
58
+ continue
59
+ registered_names[name] = True
60
+ register_implementation(
61
+ name,
62
+ spec.value.replace(":", "."),
63
+ errtxt=err_msg,
64
+ # We take our implementations as the ones to overload with if
65
+ # for some reason we encounter some, may be the same, already
66
+ # registered
67
+ clobber=True,
68
+ )
69
+
70
+
71
+ process_entries()
venv/lib/python3.13/site-packages/fsspec/_version.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '2025.10.0'
32
+ __version_tuple__ = version_tuple = (2025, 10, 0)
33
+
34
+ __commit_id__ = commit_id = None
venv/lib/python3.13/site-packages/fsspec/caching.py ADDED
@@ -0,0 +1,1004 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import collections
4
+ import functools
5
+ import logging
6
+ import math
7
+ import os
8
+ import threading
9
+ import warnings
10
+ from collections import OrderedDict
11
+ from concurrent.futures import Future, ThreadPoolExecutor
12
+ from itertools import groupby
13
+ from operator import itemgetter
14
+ from typing import (
15
+ TYPE_CHECKING,
16
+ Any,
17
+ Callable,
18
+ ClassVar,
19
+ Generic,
20
+ NamedTuple,
21
+ TypeVar,
22
+ )
23
+
24
+ if TYPE_CHECKING:
25
+ import mmap
26
+
27
+ from typing_extensions import ParamSpec
28
+
29
+ P = ParamSpec("P")
30
+ else:
31
+ P = TypeVar("P")
32
+
33
+ T = TypeVar("T")
34
+
35
+
36
+ logger = logging.getLogger("fsspec")
37
+
38
+ Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
39
+ MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes
40
+
41
+
42
+ class BaseCache:
43
+ """Pass-though cache: doesn't keep anything, calls every time
44
+
45
+ Acts as base class for other cachers
46
+
47
+ Parameters
48
+ ----------
49
+ blocksize: int
50
+ How far to read ahead in numbers of bytes
51
+ fetcher: func
52
+ Function of the form f(start, end) which gets bytes from remote as
53
+ specified
54
+ size: int
55
+ How big this file is
56
+ """
57
+
58
+ name: ClassVar[str] = "none"
59
+
60
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
61
+ self.blocksize = blocksize
62
+ self.nblocks = 0
63
+ self.fetcher = fetcher
64
+ self.size = size
65
+ self.hit_count = 0
66
+ self.miss_count = 0
67
+ # the bytes that we actually requested
68
+ self.total_requested_bytes = 0
69
+
70
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
71
+ if start is None:
72
+ start = 0
73
+ if stop is None:
74
+ stop = self.size
75
+ if start >= self.size or start >= stop:
76
+ return b""
77
+ return self.fetcher(start, stop)
78
+
79
+ def _reset_stats(self) -> None:
80
+ """Reset hit and miss counts for a more ganular report e.g. by file."""
81
+ self.hit_count = 0
82
+ self.miss_count = 0
83
+ self.total_requested_bytes = 0
84
+
85
+ def _log_stats(self) -> str:
86
+ """Return a formatted string of the cache statistics."""
87
+ if self.hit_count == 0 and self.miss_count == 0:
88
+ # a cache that does nothing, this is for logs only
89
+ return ""
90
+ return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
91
+
92
+ def __repr__(self) -> str:
93
+ # TODO: use rich for better formatting
94
+ return f"""
95
+ <{self.__class__.__name__}:
96
+ block size : {self.blocksize}
97
+ block count : {self.nblocks}
98
+ file size : {self.size}
99
+ cache hits : {self.hit_count}
100
+ cache misses: {self.miss_count}
101
+ total requested bytes: {self.total_requested_bytes}>
102
+ """
103
+
104
+
105
+ class MMapCache(BaseCache):
106
+ """memory-mapped sparse file cache
107
+
108
+ Opens temporary file, which is filled blocks-wise when data is requested.
109
+ Ensure there is enough disc space in the temporary location.
110
+
111
+ This cache method might only work on posix
112
+
113
+ Parameters
114
+ ----------
115
+ blocksize: int
116
+ How far to read ahead in numbers of bytes
117
+ fetcher: Fetcher
118
+ Function of the form f(start, end) which gets bytes from remote as
119
+ specified
120
+ size: int
121
+ How big this file is
122
+ location: str
123
+ Where to create the temporary file. If None, a temporary file is
124
+ created using tempfile.TemporaryFile().
125
+ blocks: set[int]
126
+ Set of block numbers that have already been fetched. If None, an empty
127
+ set is created.
128
+ multi_fetcher: MultiFetcher
129
+ Function of the form f([(start, end)]) which gets bytes from remote
130
+ as specified. This function is used to fetch multiple blocks at once.
131
+ If not specified, the fetcher function is used instead.
132
+ """
133
+
134
+ name = "mmap"
135
+
136
+ def __init__(
137
+ self,
138
+ blocksize: int,
139
+ fetcher: Fetcher,
140
+ size: int,
141
+ location: str | None = None,
142
+ blocks: set[int] | None = None,
143
+ multi_fetcher: MultiFetcher | None = None,
144
+ ) -> None:
145
+ super().__init__(blocksize, fetcher, size)
146
+ self.blocks = set() if blocks is None else blocks
147
+ self.location = location
148
+ self.multi_fetcher = multi_fetcher
149
+ self.cache = self._makefile()
150
+
151
+ def _makefile(self) -> mmap.mmap | bytearray:
152
+ import mmap
153
+ import tempfile
154
+
155
+ if self.size == 0:
156
+ return bytearray()
157
+
158
+ # posix version
159
+ if self.location is None or not os.path.exists(self.location):
160
+ if self.location is None:
161
+ fd = tempfile.TemporaryFile()
162
+ self.blocks = set()
163
+ else:
164
+ fd = open(self.location, "wb+")
165
+ fd.seek(self.size - 1)
166
+ fd.write(b"1")
167
+ fd.flush()
168
+ else:
169
+ fd = open(self.location, "r+b")
170
+
171
+ return mmap.mmap(fd.fileno(), self.size)
172
+
173
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
174
+ logger.debug(f"MMap cache fetching {start}-{end}")
175
+ if start is None:
176
+ start = 0
177
+ if end is None:
178
+ end = self.size
179
+ if start >= self.size or start >= end:
180
+ return b""
181
+ start_block = start // self.blocksize
182
+ end_block = end // self.blocksize
183
+ block_range = range(start_block, end_block + 1)
184
+ # Determine which blocks need to be fetched. This sequence is sorted by construction.
185
+ need = (i for i in block_range if i not in self.blocks)
186
+ # Count the number of blocks already cached
187
+ self.hit_count += sum(1 for i in block_range if i in self.blocks)
188
+
189
+ ranges = []
190
+
191
+ # Consolidate needed blocks.
192
+ # Algorithm adapted from Python 2.x itertools documentation.
193
+ # We are grouping an enumerated sequence of blocks. By comparing when the difference
194
+ # between an ascending range (provided by enumerate) and the needed block numbers
195
+ # we can detect when the block number skips values. The key computes this difference.
196
+ # Whenever the difference changes, we know that we have previously cached block(s),
197
+ # and a new group is started. In other words, this algorithm neatly groups
198
+ # runs of consecutive block numbers so they can be fetched together.
199
+ for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
200
+ # Extract the blocks from the enumerated sequence
201
+ _blocks = tuple(map(itemgetter(1), _blocks))
202
+ # Compute start of first block
203
+ sstart = _blocks[0] * self.blocksize
204
+ # Compute the end of the last block. Last block may not be full size.
205
+ send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
206
+
207
+ # Fetch bytes (could be multiple consecutive blocks)
208
+ self.total_requested_bytes += send - sstart
209
+ logger.debug(
210
+ f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
211
+ )
212
+ ranges.append((sstart, send))
213
+
214
+ # Update set of cached blocks
215
+ self.blocks.update(_blocks)
216
+ # Update cache statistics with number of blocks we had to cache
217
+ self.miss_count += len(_blocks)
218
+
219
+ if not ranges:
220
+ return self.cache[start:end]
221
+
222
+ if self.multi_fetcher:
223
+ logger.debug(f"MMap get blocks {ranges}")
224
+ for idx, r in enumerate(self.multi_fetcher(ranges)):
225
+ (sstart, send) = ranges[idx]
226
+ logger.debug(f"MMap copy block ({sstart}-{send}")
227
+ self.cache[sstart:send] = r
228
+ else:
229
+ for sstart, send in ranges:
230
+ logger.debug(f"MMap get block ({sstart}-{send}")
231
+ self.cache[sstart:send] = self.fetcher(sstart, send)
232
+
233
+ return self.cache[start:end]
234
+
235
+ def __getstate__(self) -> dict[str, Any]:
236
+ state = self.__dict__.copy()
237
+ # Remove the unpicklable entries.
238
+ del state["cache"]
239
+ return state
240
+
241
+ def __setstate__(self, state: dict[str, Any]) -> None:
242
+ # Restore instance attributes
243
+ self.__dict__.update(state)
244
+ self.cache = self._makefile()
245
+
246
+
247
+ class ReadAheadCache(BaseCache):
248
+ """Cache which reads only when we get beyond a block of data
249
+
250
+ This is a much simpler version of BytesCache, and does not attempt to
251
+ fill holes in the cache or keep fragments alive. It is best suited to
252
+ many small reads in a sequential order (e.g., reading lines from a file).
253
+ """
254
+
255
+ name = "readahead"
256
+
257
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
258
+ super().__init__(blocksize, fetcher, size)
259
+ self.cache = b""
260
+ self.start = 0
261
+ self.end = 0
262
+
263
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
264
+ if start is None:
265
+ start = 0
266
+ if end is None or end > self.size:
267
+ end = self.size
268
+ if start >= self.size or start >= end:
269
+ return b""
270
+ l = end - start
271
+ if start >= self.start and end <= self.end:
272
+ # cache hit
273
+ self.hit_count += 1
274
+ return self.cache[start - self.start : end - self.start]
275
+ elif self.start <= start < self.end:
276
+ # partial hit
277
+ self.miss_count += 1
278
+ part = self.cache[start - self.start :]
279
+ l -= len(part)
280
+ start = self.end
281
+ else:
282
+ # miss
283
+ self.miss_count += 1
284
+ part = b""
285
+ end = min(self.size, end + self.blocksize)
286
+ self.total_requested_bytes += end - start
287
+ self.cache = self.fetcher(start, end) # new block replaces old
288
+ self.start = start
289
+ self.end = self.start + len(self.cache)
290
+ return part + self.cache[:l]
291
+
292
+
293
+ class FirstChunkCache(BaseCache):
294
+ """Caches the first block of a file only
295
+
296
+ This may be useful for file types where the metadata is stored in the header,
297
+ but is randomly accessed.
298
+ """
299
+
300
+ name = "first"
301
+
302
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
303
+ if blocksize > size:
304
+ # this will buffer the whole thing
305
+ blocksize = size
306
+ super().__init__(blocksize, fetcher, size)
307
+ self.cache: bytes | None = None
308
+
309
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
310
+ start = start or 0
311
+ if start > self.size:
312
+ logger.debug("FirstChunkCache: requested start > file size")
313
+ return b""
314
+
315
+ end = min(end, self.size)
316
+
317
+ if start < self.blocksize:
318
+ if self.cache is None:
319
+ self.miss_count += 1
320
+ if end > self.blocksize:
321
+ self.total_requested_bytes += end
322
+ data = self.fetcher(0, end)
323
+ self.cache = data[: self.blocksize]
324
+ return data[start:]
325
+ self.cache = self.fetcher(0, self.blocksize)
326
+ self.total_requested_bytes += self.blocksize
327
+ part = self.cache[start:end]
328
+ if end > self.blocksize:
329
+ self.total_requested_bytes += end - self.blocksize
330
+ part += self.fetcher(self.blocksize, end)
331
+ self.hit_count += 1
332
+ return part
333
+ else:
334
+ self.miss_count += 1
335
+ self.total_requested_bytes += end - start
336
+ return self.fetcher(start, end)
337
+
338
+
339
+ class BlockCache(BaseCache):
340
+ """
341
+ Cache holding memory as a set of blocks.
342
+
343
+ Requests are only ever made ``blocksize`` at a time, and are
344
+ stored in an LRU cache. The least recently accessed block is
345
+ discarded when more than ``maxblocks`` are stored.
346
+
347
+ Parameters
348
+ ----------
349
+ blocksize : int
350
+ The number of bytes to store in each block.
351
+ Requests are only ever made for ``blocksize``, so this
352
+ should balance the overhead of making a request against
353
+ the granularity of the blocks.
354
+ fetcher : Callable
355
+ size : int
356
+ The total size of the file being cached.
357
+ maxblocks : int
358
+ The maximum number of blocks to cache for. The maximum memory
359
+ use for this cache is then ``blocksize * maxblocks``.
360
+ """
361
+
362
+ name = "blockcache"
363
+
364
+ def __init__(
365
+ self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
366
+ ) -> None:
367
+ super().__init__(blocksize, fetcher, size)
368
+ self.nblocks = math.ceil(size / blocksize)
369
+ self.maxblocks = maxblocks
370
+ self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
371
+
372
+ def cache_info(self):
373
+ """
374
+ The statistics on the block cache.
375
+
376
+ Returns
377
+ -------
378
+ NamedTuple
379
+ Returned directly from the LRU Cache used internally.
380
+ """
381
+ return self._fetch_block_cached.cache_info()
382
+
383
+ def __getstate__(self) -> dict[str, Any]:
384
+ state = self.__dict__
385
+ del state["_fetch_block_cached"]
386
+ return state
387
+
388
+ def __setstate__(self, state: dict[str, Any]) -> None:
389
+ self.__dict__.update(state)
390
+ self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
391
+ self._fetch_block
392
+ )
393
+
394
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
395
+ if start is None:
396
+ start = 0
397
+ if end is None:
398
+ end = self.size
399
+ if start >= self.size or start >= end:
400
+ return b""
401
+
402
+ # byte position -> block numbers
403
+ start_block_number = start // self.blocksize
404
+ end_block_number = end // self.blocksize
405
+
406
+ # these are cached, so safe to do multiple calls for the same start and end.
407
+ for block_number in range(start_block_number, end_block_number + 1):
408
+ self._fetch_block_cached(block_number)
409
+
410
+ return self._read_cache(
411
+ start,
412
+ end,
413
+ start_block_number=start_block_number,
414
+ end_block_number=end_block_number,
415
+ )
416
+
417
+ def _fetch_block(self, block_number: int) -> bytes:
418
+ """
419
+ Fetch the block of data for `block_number`.
420
+ """
421
+ if block_number > self.nblocks:
422
+ raise ValueError(
423
+ f"'block_number={block_number}' is greater than "
424
+ f"the number of blocks ({self.nblocks})"
425
+ )
426
+
427
+ start = block_number * self.blocksize
428
+ end = start + self.blocksize
429
+ self.total_requested_bytes += end - start
430
+ self.miss_count += 1
431
+ logger.info("BlockCache fetching block %d", block_number)
432
+ block_contents = super()._fetch(start, end)
433
+ return block_contents
434
+
435
+ def _read_cache(
436
+ self, start: int, end: int, start_block_number: int, end_block_number: int
437
+ ) -> bytes:
438
+ """
439
+ Read from our block cache.
440
+
441
+ Parameters
442
+ ----------
443
+ start, end : int
444
+ The start and end byte positions.
445
+ start_block_number, end_block_number : int
446
+ The start and end block numbers.
447
+ """
448
+ start_pos = start % self.blocksize
449
+ end_pos = end % self.blocksize
450
+
451
+ self.hit_count += 1
452
+ if start_block_number == end_block_number:
453
+ block: bytes = self._fetch_block_cached(start_block_number)
454
+ return block[start_pos:end_pos]
455
+
456
+ else:
457
+ # read from the initial
458
+ out = [self._fetch_block_cached(start_block_number)[start_pos:]]
459
+
460
+ # intermediate blocks
461
+ # Note: it'd be nice to combine these into one big request. However
462
+ # that doesn't play nicely with our LRU cache.
463
+ out.extend(
464
+ map(
465
+ self._fetch_block_cached,
466
+ range(start_block_number + 1, end_block_number),
467
+ )
468
+ )
469
+
470
+ # final block
471
+ out.append(self._fetch_block_cached(end_block_number)[:end_pos])
472
+
473
+ return b"".join(out)
474
+
475
+
476
+ class BytesCache(BaseCache):
477
+ """Cache which holds data in a in-memory bytes object
478
+
479
+ Implements read-ahead by the block size, for semi-random reads progressing
480
+ through the file.
481
+
482
+ Parameters
483
+ ----------
484
+ trim: bool
485
+ As we read more data, whether to discard the start of the buffer when
486
+ we are more than a blocksize ahead of it.
487
+ """
488
+
489
+ name: ClassVar[str] = "bytes"
490
+
491
+ def __init__(
492
+ self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
493
+ ) -> None:
494
+ super().__init__(blocksize, fetcher, size)
495
+ self.cache = b""
496
+ self.start: int | None = None
497
+ self.end: int | None = None
498
+ self.trim = trim
499
+
500
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
501
+ # TODO: only set start/end after fetch, in case it fails?
502
+ # is this where retry logic might go?
503
+ if start is None:
504
+ start = 0
505
+ if end is None:
506
+ end = self.size
507
+ if start >= self.size or start >= end:
508
+ return b""
509
+ if (
510
+ self.start is not None
511
+ and start >= self.start
512
+ and self.end is not None
513
+ and end < self.end
514
+ ):
515
+ # cache hit: we have all the required data
516
+ offset = start - self.start
517
+ self.hit_count += 1
518
+ return self.cache[offset : offset + end - start]
519
+
520
+ if self.blocksize:
521
+ bend = min(self.size, end + self.blocksize)
522
+ else:
523
+ bend = end
524
+
525
+ if bend == start or start > self.size:
526
+ return b""
527
+
528
+ if (self.start is None or start < self.start) and (
529
+ self.end is None or end > self.end
530
+ ):
531
+ # First read, or extending both before and after
532
+ self.total_requested_bytes += bend - start
533
+ self.miss_count += 1
534
+ self.cache = self.fetcher(start, bend)
535
+ self.start = start
536
+ else:
537
+ assert self.start is not None
538
+ assert self.end is not None
539
+ self.miss_count += 1
540
+
541
+ if start < self.start:
542
+ if self.end is None or self.end - end > self.blocksize:
543
+ self.total_requested_bytes += bend - start
544
+ self.cache = self.fetcher(start, bend)
545
+ self.start = start
546
+ else:
547
+ self.total_requested_bytes += self.start - start
548
+ new = self.fetcher(start, self.start)
549
+ self.start = start
550
+ self.cache = new + self.cache
551
+ elif self.end is not None and bend > self.end:
552
+ if self.end > self.size:
553
+ pass
554
+ elif end - self.end > self.blocksize:
555
+ self.total_requested_bytes += bend - start
556
+ self.cache = self.fetcher(start, bend)
557
+ self.start = start
558
+ else:
559
+ self.total_requested_bytes += bend - self.end
560
+ new = self.fetcher(self.end, bend)
561
+ self.cache = self.cache + new
562
+
563
+ self.end = self.start + len(self.cache)
564
+ offset = start - self.start
565
+ out = self.cache[offset : offset + end - start]
566
+ if self.trim:
567
+ num = (self.end - self.start) // (self.blocksize + 1)
568
+ if num > 1:
569
+ self.start += self.blocksize * num
570
+ self.cache = self.cache[self.blocksize * num :]
571
+ return out
572
+
573
+ def __len__(self) -> int:
574
+ return len(self.cache)
575
+
576
+
577
+ class AllBytes(BaseCache):
578
+ """Cache entire contents of the file"""
579
+
580
+ name: ClassVar[str] = "all"
581
+
582
+ def __init__(
583
+ self,
584
+ blocksize: int | None = None,
585
+ fetcher: Fetcher | None = None,
586
+ size: int | None = None,
587
+ data: bytes | None = None,
588
+ ) -> None:
589
+ super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
590
+ if data is None:
591
+ self.miss_count += 1
592
+ self.total_requested_bytes += self.size
593
+ data = self.fetcher(0, self.size)
594
+ self.data = data
595
+
596
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
597
+ self.hit_count += 1
598
+ return self.data[start:stop]
599
+
600
+
601
+ class KnownPartsOfAFile(BaseCache):
602
+ """
603
+ Cache holding known file parts.
604
+
605
+ Parameters
606
+ ----------
607
+ blocksize: int
608
+ How far to read ahead in numbers of bytes
609
+ fetcher: func
610
+ Function of the form f(start, end) which gets bytes from remote as
611
+ specified
612
+ size: int
613
+ How big this file is
614
+ data: dict
615
+ A dictionary mapping explicit `(start, stop)` file-offset tuples
616
+ with known bytes.
617
+ strict: bool, default True
618
+ Whether to fetch reads that go beyond a known byte-range boundary.
619
+ If `False`, any read that ends outside a known part will be zero
620
+ padded. Note that zero padding will not be used for reads that
621
+ begin outside a known byte-range.
622
+ """
623
+
624
+ name: ClassVar[str] = "parts"
625
+
626
+ def __init__(
627
+ self,
628
+ blocksize: int,
629
+ fetcher: Fetcher,
630
+ size: int,
631
+ data: dict[tuple[int, int], bytes] | None = None,
632
+ strict: bool = True,
633
+ **_: Any,
634
+ ):
635
+ super().__init__(blocksize, fetcher, size)
636
+ self.strict = strict
637
+
638
+ # simple consolidation of contiguous blocks
639
+ if data:
640
+ old_offsets = sorted(data.keys())
641
+ offsets = [old_offsets[0]]
642
+ blocks = [data.pop(old_offsets[0])]
643
+ for start, stop in old_offsets[1:]:
644
+ start0, stop0 = offsets[-1]
645
+ if start == stop0:
646
+ offsets[-1] = (start0, stop)
647
+ blocks[-1] += data.pop((start, stop))
648
+ else:
649
+ offsets.append((start, stop))
650
+ blocks.append(data.pop((start, stop)))
651
+
652
+ self.data = dict(zip(offsets, blocks))
653
+ else:
654
+ self.data = {}
655
+
656
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
657
+ if start is None:
658
+ start = 0
659
+ if stop is None:
660
+ stop = self.size
661
+
662
+ out = b""
663
+ for (loc0, loc1), data in self.data.items():
664
+ # If self.strict=False, use zero-padded data
665
+ # for reads beyond the end of a "known" buffer
666
+ if loc0 <= start < loc1:
667
+ off = start - loc0
668
+ out = data[off : off + stop - start]
669
+ if not self.strict or loc0 <= stop <= loc1:
670
+ # The request is within a known range, or
671
+ # it begins within a known range, and we
672
+ # are allowed to pad reads beyond the
673
+ # buffer with zero
674
+ out += b"\x00" * (stop - start - len(out))
675
+ self.hit_count += 1
676
+ return out
677
+ else:
678
+ # The request ends outside a known range,
679
+ # and we are being "strict" about reads
680
+ # beyond the buffer
681
+ start = loc1
682
+ break
683
+
684
+ # We only get here if there is a request outside the
685
+ # known parts of the file. In an ideal world, this
686
+ # should never happen
687
+ if self.fetcher is None:
688
+ # We cannot fetch the data, so raise an error
689
+ raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
690
+ # We can fetch the data, but should warn the user
691
+ # that this may be slow
692
+ warnings.warn(
693
+ f"Read is outside the known file parts: {(start, stop)}. "
694
+ f"IO/caching performance may be poor!"
695
+ )
696
+ logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
697
+ self.total_requested_bytes += stop - start
698
+ self.miss_count += 1
699
+ return out + super()._fetch(start, stop)
700
+
701
+
702
+ class UpdatableLRU(Generic[P, T]):
703
+ """
704
+ Custom implementation of LRU cache that allows updating keys
705
+
706
+ Used by BackgroudBlockCache
707
+ """
708
+
709
+ class CacheInfo(NamedTuple):
710
+ hits: int
711
+ misses: int
712
+ maxsize: int
713
+ currsize: int
714
+
715
+ def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
716
+ self._cache: OrderedDict[Any, T] = collections.OrderedDict()
717
+ self._func = func
718
+ self._max_size = max_size
719
+ self._hits = 0
720
+ self._misses = 0
721
+ self._lock = threading.Lock()
722
+
723
+ def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
724
+ if kwargs:
725
+ raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
726
+ with self._lock:
727
+ if args in self._cache:
728
+ self._cache.move_to_end(args)
729
+ self._hits += 1
730
+ return self._cache[args]
731
+
732
+ result = self._func(*args, **kwargs)
733
+
734
+ with self._lock:
735
+ self._cache[args] = result
736
+ self._misses += 1
737
+ if len(self._cache) > self._max_size:
738
+ self._cache.popitem(last=False)
739
+
740
+ return result
741
+
742
+ def is_key_cached(self, *args: Any) -> bool:
743
+ with self._lock:
744
+ return args in self._cache
745
+
746
+ def add_key(self, result: T, *args: Any) -> None:
747
+ with self._lock:
748
+ self._cache[args] = result
749
+ if len(self._cache) > self._max_size:
750
+ self._cache.popitem(last=False)
751
+
752
+ def cache_info(self) -> UpdatableLRU.CacheInfo:
753
+ with self._lock:
754
+ return self.CacheInfo(
755
+ maxsize=self._max_size,
756
+ currsize=len(self._cache),
757
+ hits=self._hits,
758
+ misses=self._misses,
759
+ )
760
+
761
+
762
+ class BackgroundBlockCache(BaseCache):
763
+ """
764
+ Cache holding memory as a set of blocks with pre-loading of
765
+ the next block in the background.
766
+
767
+ Requests are only ever made ``blocksize`` at a time, and are
768
+ stored in an LRU cache. The least recently accessed block is
769
+ discarded when more than ``maxblocks`` are stored. If the
770
+ next block is not in cache, it is loaded in a separate thread
771
+ in non-blocking way.
772
+
773
+ Parameters
774
+ ----------
775
+ blocksize : int
776
+ The number of bytes to store in each block.
777
+ Requests are only ever made for ``blocksize``, so this
778
+ should balance the overhead of making a request against
779
+ the granularity of the blocks.
780
+ fetcher : Callable
781
+ size : int
782
+ The total size of the file being cached.
783
+ maxblocks : int
784
+ The maximum number of blocks to cache for. The maximum memory
785
+ use for this cache is then ``blocksize * maxblocks``.
786
+ """
787
+
788
+ name: ClassVar[str] = "background"
789
+
790
+ def __init__(
791
+ self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
792
+ ) -> None:
793
+ super().__init__(blocksize, fetcher, size)
794
+ self.nblocks = math.ceil(size / blocksize)
795
+ self.maxblocks = maxblocks
796
+ self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
797
+
798
+ self._thread_executor = ThreadPoolExecutor(max_workers=1)
799
+ self._fetch_future_block_number: int | None = None
800
+ self._fetch_future: Future[bytes] | None = None
801
+ self._fetch_future_lock = threading.Lock()
802
+
803
+ def cache_info(self) -> UpdatableLRU.CacheInfo:
804
+ """
805
+ The statistics on the block cache.
806
+
807
+ Returns
808
+ -------
809
+ NamedTuple
810
+ Returned directly from the LRU Cache used internally.
811
+ """
812
+ return self._fetch_block_cached.cache_info()
813
+
814
+ def __getstate__(self) -> dict[str, Any]:
815
+ state = self.__dict__
816
+ del state["_fetch_block_cached"]
817
+ del state["_thread_executor"]
818
+ del state["_fetch_future_block_number"]
819
+ del state["_fetch_future"]
820
+ del state["_fetch_future_lock"]
821
+ return state
822
+
823
+ def __setstate__(self, state) -> None:
824
+ self.__dict__.update(state)
825
+ self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
826
+ self._thread_executor = ThreadPoolExecutor(max_workers=1)
827
+ self._fetch_future_block_number = None
828
+ self._fetch_future = None
829
+ self._fetch_future_lock = threading.Lock()
830
+
831
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
832
+ if start is None:
833
+ start = 0
834
+ if end is None:
835
+ end = self.size
836
+ if start >= self.size or start >= end:
837
+ return b""
838
+
839
+ # byte position -> block numbers
840
+ start_block_number = start // self.blocksize
841
+ end_block_number = end // self.blocksize
842
+
843
+ fetch_future_block_number = None
844
+ fetch_future = None
845
+ with self._fetch_future_lock:
846
+ # Background thread is running. Check we we can or must join it.
847
+ if self._fetch_future is not None:
848
+ assert self._fetch_future_block_number is not None
849
+ if self._fetch_future.done():
850
+ logger.info("BlockCache joined background fetch without waiting.")
851
+ self._fetch_block_cached.add_key(
852
+ self._fetch_future.result(), self._fetch_future_block_number
853
+ )
854
+ # Cleanup the fetch variables. Done with fetching the block.
855
+ self._fetch_future_block_number = None
856
+ self._fetch_future = None
857
+ else:
858
+ # Must join if we need the block for the current fetch
859
+ must_join = bool(
860
+ start_block_number
861
+ <= self._fetch_future_block_number
862
+ <= end_block_number
863
+ )
864
+ if must_join:
865
+ # Copy to the local variables to release lock
866
+ # before waiting for result
867
+ fetch_future_block_number = self._fetch_future_block_number
868
+ fetch_future = self._fetch_future
869
+
870
+ # Cleanup the fetch variables. Have a local copy.
871
+ self._fetch_future_block_number = None
872
+ self._fetch_future = None
873
+
874
+ # Need to wait for the future for the current read
875
+ if fetch_future is not None:
876
+ logger.info("BlockCache waiting for background fetch.")
877
+ # Wait until result and put it in cache
878
+ self._fetch_block_cached.add_key(
879
+ fetch_future.result(), fetch_future_block_number
880
+ )
881
+
882
+ # these are cached, so safe to do multiple calls for the same start and end.
883
+ for block_number in range(start_block_number, end_block_number + 1):
884
+ self._fetch_block_cached(block_number)
885
+
886
+ # fetch next block in the background if nothing is running in the background,
887
+ # the block is within file and it is not already cached
888
+ end_block_plus_1 = end_block_number + 1
889
+ with self._fetch_future_lock:
890
+ if (
891
+ self._fetch_future is None
892
+ and end_block_plus_1 <= self.nblocks
893
+ and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
894
+ ):
895
+ self._fetch_future_block_number = end_block_plus_1
896
+ self._fetch_future = self._thread_executor.submit(
897
+ self._fetch_block, end_block_plus_1, "async"
898
+ )
899
+
900
+ return self._read_cache(
901
+ start,
902
+ end,
903
+ start_block_number=start_block_number,
904
+ end_block_number=end_block_number,
905
+ )
906
+
907
+ def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
908
+ """
909
+ Fetch the block of data for `block_number`.
910
+ """
911
+ if block_number > self.nblocks:
912
+ raise ValueError(
913
+ f"'block_number={block_number}' is greater than "
914
+ f"the number of blocks ({self.nblocks})"
915
+ )
916
+
917
+ start = block_number * self.blocksize
918
+ end = start + self.blocksize
919
+ logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
920
+ self.total_requested_bytes += end - start
921
+ self.miss_count += 1
922
+ block_contents = super()._fetch(start, end)
923
+ return block_contents
924
+
925
+ def _read_cache(
926
+ self, start: int, end: int, start_block_number: int, end_block_number: int
927
+ ) -> bytes:
928
+ """
929
+ Read from our block cache.
930
+
931
+ Parameters
932
+ ----------
933
+ start, end : int
934
+ The start and end byte positions.
935
+ start_block_number, end_block_number : int
936
+ The start and end block numbers.
937
+ """
938
+ start_pos = start % self.blocksize
939
+ end_pos = end % self.blocksize
940
+
941
+ # kind of pointless to count this as a hit, but it is
942
+ self.hit_count += 1
943
+
944
+ if start_block_number == end_block_number:
945
+ block = self._fetch_block_cached(start_block_number)
946
+ return block[start_pos:end_pos]
947
+
948
+ else:
949
+ # read from the initial
950
+ out = [self._fetch_block_cached(start_block_number)[start_pos:]]
951
+
952
+ # intermediate blocks
953
+ # Note: it'd be nice to combine these into one big request. However
954
+ # that doesn't play nicely with our LRU cache.
955
+ out.extend(
956
+ map(
957
+ self._fetch_block_cached,
958
+ range(start_block_number + 1, end_block_number),
959
+ )
960
+ )
961
+
962
+ # final block
963
+ out.append(self._fetch_block_cached(end_block_number)[:end_pos])
964
+
965
+ return b"".join(out)
966
+
967
+
968
+ caches: dict[str | None, type[BaseCache]] = {
969
+ # one custom case
970
+ None: BaseCache,
971
+ }
972
+
973
+
974
+ def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
975
+ """'Register' cache implementation.
976
+
977
+ Parameters
978
+ ----------
979
+ clobber: bool, optional
980
+ If set to True (default is False) - allow to overwrite existing
981
+ entry.
982
+
983
+ Raises
984
+ ------
985
+ ValueError
986
+ """
987
+ name = cls.name
988
+ if not clobber and name in caches:
989
+ raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
990
+ caches[name] = cls
991
+
992
+
993
+ for c in (
994
+ BaseCache,
995
+ MMapCache,
996
+ BytesCache,
997
+ ReadAheadCache,
998
+ BlockCache,
999
+ FirstChunkCache,
1000
+ AllBytes,
1001
+ KnownPartsOfAFile,
1002
+ BackgroundBlockCache,
1003
+ ):
1004
+ register_cache(c)
venv/lib/python3.13/site-packages/fsspec/compression.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper functions for a standard streaming compression API"""
2
+
3
+ from zipfile import ZipFile
4
+
5
+ import fsspec.utils
6
+ from fsspec.spec import AbstractBufferedFile
7
+
8
+
9
+ def noop_file(file, mode, **kwargs):
10
+ return file
11
+
12
+
13
+ # TODO: files should also be available as contexts
14
+ # should be functions of the form func(infile, mode=, **kwargs) -> file-like
15
+ compr = {None: noop_file}
16
+
17
+
18
+ def register_compression(name, callback, extensions, force=False):
19
+ """Register an "inferable" file compression type.
20
+
21
+ Registers transparent file compression type for use with fsspec.open.
22
+ Compression can be specified by name in open, or "infer"-ed for any files
23
+ ending with the given extensions.
24
+
25
+ Args:
26
+ name: (str) The compression type name. Eg. "gzip".
27
+ callback: A callable of form (infile, mode, **kwargs) -> file-like.
28
+ Accepts an input file-like object, the target mode and kwargs.
29
+ Returns a wrapped file-like object.
30
+ extensions: (str, Iterable[str]) A file extension, or list of file
31
+ extensions for which to infer this compression scheme. Eg. "gz".
32
+ force: (bool) Force re-registration of compression type or extensions.
33
+
34
+ Raises:
35
+ ValueError: If name or extensions already registered, and not force.
36
+
37
+ """
38
+ if isinstance(extensions, str):
39
+ extensions = [extensions]
40
+
41
+ # Validate registration
42
+ if name in compr and not force:
43
+ raise ValueError(f"Duplicate compression registration: {name}")
44
+
45
+ for ext in extensions:
46
+ if ext in fsspec.utils.compressions and not force:
47
+ raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
48
+
49
+ compr[name] = callback
50
+
51
+ for ext in extensions:
52
+ fsspec.utils.compressions[ext] = name
53
+
54
+
55
+ def unzip(infile, mode="rb", filename=None, **kwargs):
56
+ if "r" not in mode:
57
+ filename = filename or "file"
58
+ z = ZipFile(infile, mode="w", **kwargs)
59
+ fo = z.open(filename, mode="w")
60
+ fo.close = lambda closer=fo.close: closer() or z.close()
61
+ return fo
62
+ z = ZipFile(infile)
63
+ if filename is None:
64
+ filename = z.namelist()[0]
65
+ return z.open(filename, mode="r", **kwargs)
66
+
67
+
68
+ register_compression("zip", unzip, "zip")
69
+
70
+ try:
71
+ from bz2 import BZ2File
72
+ except ImportError:
73
+ pass
74
+ else:
75
+ register_compression("bz2", BZ2File, "bz2")
76
+
77
+ try: # pragma: no cover
78
+ from isal import igzip
79
+
80
+ def isal(infile, mode="rb", **kwargs):
81
+ return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
82
+
83
+ register_compression("gzip", isal, "gz")
84
+ except ImportError:
85
+ from gzip import GzipFile
86
+
87
+ register_compression(
88
+ "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
89
+ )
90
+
91
+ try:
92
+ from lzma import LZMAFile
93
+
94
+ register_compression("lzma", LZMAFile, "lzma")
95
+ register_compression("xz", LZMAFile, "xz")
96
+ except ImportError:
97
+ pass
98
+
99
+ try:
100
+ import lzmaffi
101
+
102
+ register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
103
+ register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
104
+ except ImportError:
105
+ pass
106
+
107
+
108
+ class SnappyFile(AbstractBufferedFile):
109
+ def __init__(self, infile, mode, **kwargs):
110
+ import snappy
111
+
112
+ super().__init__(
113
+ fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
114
+ )
115
+ self.infile = infile
116
+ if "r" in mode:
117
+ self.codec = snappy.StreamDecompressor()
118
+ else:
119
+ self.codec = snappy.StreamCompressor()
120
+
121
+ def _upload_chunk(self, final=False):
122
+ self.buffer.seek(0)
123
+ out = self.codec.add_chunk(self.buffer.read())
124
+ self.infile.write(out)
125
+ return True
126
+
127
+ def seek(self, loc, whence=0):
128
+ raise NotImplementedError("SnappyFile is not seekable")
129
+
130
+ def seekable(self):
131
+ return False
132
+
133
+ def _fetch_range(self, start, end):
134
+ """Get the specified set of bytes from remote"""
135
+ data = self.infile.read(end - start)
136
+ return self.codec.decompress(data)
137
+
138
+
139
+ try:
140
+ import snappy
141
+
142
+ snappy.compress(b"")
143
+ # Snappy may use the .sz file extension, but this is not part of the
144
+ # standard implementation.
145
+ register_compression("snappy", SnappyFile, [])
146
+
147
+ except (ImportError, NameError, AttributeError):
148
+ pass
149
+
150
+ try:
151
+ import lz4.frame
152
+
153
+ register_compression("lz4", lz4.frame.open, "lz4")
154
+ except ImportError:
155
+ pass
156
+
157
+ try:
158
+ # zstd in the standard library for python >= 3.14
159
+ from compression.zstd import ZstdFile
160
+
161
+ register_compression("zstd", ZstdFile, "zst")
162
+
163
+ except ImportError:
164
+ try:
165
+ import zstandard as zstd
166
+
167
+ def zstandard_file(infile, mode="rb"):
168
+ if "r" in mode:
169
+ cctx = zstd.ZstdDecompressor()
170
+ return cctx.stream_reader(infile)
171
+ else:
172
+ cctx = zstd.ZstdCompressor(level=10)
173
+ return cctx.stream_writer(infile)
174
+
175
+ register_compression("zstd", zstandard_file, "zst")
176
+ except ImportError:
177
+ pass
178
+
179
+
180
+ def available_compressions():
181
+ """Return a list of the implemented compressions."""
182
+ return list(compr)
venv/lib/python3.13/site-packages/fsspec/config.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import configparser
4
+ import json
5
+ import os
6
+ import warnings
7
+ from typing import Any
8
+
9
+ conf: dict[str, dict[str, Any]] = {}
10
+ default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
11
+ conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
12
+
13
+
14
+ def set_conf_env(conf_dict, envdict=os.environ):
15
+ """Set config values from environment variables
16
+
17
+ Looks for variables of the form ``FSSPEC_<protocol>`` and
18
+ ``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
19
+ as a json dictionary and used to ``update`` the config of the
20
+ corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
21
+ attempt to convert the string value, but the kwarg keys will be lower-cased.
22
+
23
+ The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
24
+ ``FSSPEC_<protocol>`` ones.
25
+
26
+ Parameters
27
+ ----------
28
+ conf_dict : dict(str, dict)
29
+ This dict will be mutated
30
+ envdict : dict-like(str, str)
31
+ Source for the values - usually the real environment
32
+ """
33
+ kwarg_keys = []
34
+ for key in envdict:
35
+ if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
36
+ if key.count("_") > 1:
37
+ kwarg_keys.append(key)
38
+ continue
39
+ try:
40
+ value = json.loads(envdict[key])
41
+ except json.decoder.JSONDecodeError as ex:
42
+ warnings.warn(
43
+ f"Ignoring environment variable {key} due to a parse failure: {ex}"
44
+ )
45
+ else:
46
+ if isinstance(value, dict):
47
+ _, proto = key.split("_", 1)
48
+ conf_dict.setdefault(proto.lower(), {}).update(value)
49
+ else:
50
+ warnings.warn(
51
+ f"Ignoring environment variable {key} due to not being a dict:"
52
+ f" {type(value)}"
53
+ )
54
+ elif key.startswith("FSSPEC"):
55
+ warnings.warn(
56
+ f"Ignoring environment variable {key} due to having an unexpected name"
57
+ )
58
+
59
+ for key in kwarg_keys:
60
+ _, proto, kwarg = key.split("_", 2)
61
+ conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
62
+
63
+
64
+ def set_conf_files(cdir, conf_dict):
65
+ """Set config values from files
66
+
67
+ Scans for INI and JSON files in the given dictionary, and uses their
68
+ contents to set the config. In case of repeated values, later values
69
+ win.
70
+
71
+ In the case of INI files, all values are strings, and these will not
72
+ be converted.
73
+
74
+ Parameters
75
+ ----------
76
+ cdir : str
77
+ Directory to search
78
+ conf_dict : dict(str, dict)
79
+ This dict will be mutated
80
+ """
81
+ if not os.path.isdir(cdir):
82
+ return
83
+ allfiles = sorted(os.listdir(cdir))
84
+ for fn in allfiles:
85
+ if fn.endswith(".ini"):
86
+ ini = configparser.ConfigParser()
87
+ ini.read(os.path.join(cdir, fn))
88
+ for key in ini:
89
+ if key == "DEFAULT":
90
+ continue
91
+ conf_dict.setdefault(key, {}).update(dict(ini[key]))
92
+ if fn.endswith(".json"):
93
+ with open(os.path.join(cdir, fn)) as f:
94
+ js = json.load(f)
95
+ for key in js:
96
+ conf_dict.setdefault(key, {}).update(dict(js[key]))
97
+
98
+
99
+ def apply_config(cls, kwargs, conf_dict=None):
100
+ """Supply default values for kwargs when instantiating class
101
+
102
+ Augments the passed kwargs, by finding entries in the config dict
103
+ which match the classes ``.protocol`` attribute (one or more str)
104
+
105
+ Parameters
106
+ ----------
107
+ cls : file system implementation
108
+ kwargs : dict
109
+ conf_dict : dict of dict
110
+ Typically this is the global configuration
111
+
112
+ Returns
113
+ -------
114
+ dict : the modified set of kwargs
115
+ """
116
+ if conf_dict is None:
117
+ conf_dict = conf
118
+ protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
119
+ kw = {}
120
+ for proto in protos:
121
+ # default kwargs from the current state of the config
122
+ if proto in conf_dict:
123
+ kw.update(conf_dict[proto])
124
+ # explicit kwargs always win
125
+ kw.update(**kwargs)
126
+ kwargs = kw
127
+ return kwargs
128
+
129
+
130
+ set_conf_files(conf_dir, conf)
131
+ set_conf_env(conf)
venv/lib/python3.13/site-packages/fsspec/conftest.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import sys
5
+ import time
6
+ from collections import deque
7
+ from collections.abc import Generator, Sequence
8
+
9
+ import pytest
10
+
11
+ import fsspec
12
+
13
+
14
+ @pytest.fixture()
15
+ def m():
16
+ """
17
+ Fixture providing a memory filesystem.
18
+ """
19
+ m = fsspec.filesystem("memory")
20
+ m.store.clear()
21
+ m.pseudo_dirs.clear()
22
+ m.pseudo_dirs.append("")
23
+ try:
24
+ yield m
25
+ finally:
26
+ m.store.clear()
27
+ m.pseudo_dirs.clear()
28
+ m.pseudo_dirs.append("")
29
+
30
+
31
+ class InstanceCacheInspector:
32
+ """
33
+ Helper class to inspect instance caches of filesystem classes in tests.
34
+ """
35
+
36
+ def clear(self) -> None:
37
+ """
38
+ Clear instance caches of all currently imported filesystem classes.
39
+ """
40
+ classes = deque([fsspec.spec.AbstractFileSystem])
41
+ while classes:
42
+ cls = classes.popleft()
43
+ cls.clear_instance_cache()
44
+ classes.extend(cls.__subclasses__())
45
+
46
+ def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]:
47
+ """
48
+ Gather counts of filesystem instances in the instance caches
49
+ of all currently imported filesystem classes.
50
+
51
+ Parameters
52
+ ----------
53
+ omit_zero:
54
+ Whether to omit instance types with no cached instances.
55
+ """
56
+ out: dict[str, int] = {}
57
+ classes = deque([fsspec.spec.AbstractFileSystem])
58
+ while classes:
59
+ cls = classes.popleft()
60
+ count = len(cls._cache) # there is no public interface for the cache
61
+ # note: skip intermediate AbstractFileSystem subclasses
62
+ # if they proxy the protocol attribute via a property.
63
+ if isinstance(cls.protocol, (Sequence, str)):
64
+ key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0]
65
+ if count or not omit_zero:
66
+ out[key] = count
67
+ classes.extend(cls.__subclasses__())
68
+ return out
69
+
70
+
71
+ @pytest.fixture(scope="function", autouse=True)
72
+ def instance_caches() -> Generator[InstanceCacheInspector, None, None]:
73
+ """
74
+ Fixture to ensure empty filesystem instance caches before and after a test.
75
+
76
+ Used by default for all tests.
77
+ Clears caches of all imported filesystem classes.
78
+ Can be used to write test assertions about instance caches.
79
+
80
+ Usage:
81
+
82
+ def test_something(instance_caches):
83
+ # Test code here
84
+ fsspec.open("file://abc")
85
+ fsspec.open("memory://foo/bar")
86
+
87
+ # Test assertion
88
+ assert instance_caches.gather_counts() == {"file": 1, "memory": 1}
89
+
90
+ Returns
91
+ -------
92
+ instance_caches: An instance cache inspector for clearing and inspecting caches.
93
+ """
94
+ ic = InstanceCacheInspector()
95
+
96
+ ic.clear()
97
+ try:
98
+ yield ic
99
+ finally:
100
+ ic.clear()
101
+
102
+
103
+ @pytest.fixture(scope="function")
104
+ def ftp_writable(tmpdir):
105
+ """
106
+ Fixture providing a writable FTP filesystem.
107
+ """
108
+ pytest.importorskip("pyftpdlib")
109
+
110
+ d = str(tmpdir)
111
+ with open(os.path.join(d, "out"), "wb") as f:
112
+ f.write(b"hello" * 10000)
113
+ P = subprocess.Popen(
114
+ [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
115
+ )
116
+ try:
117
+ time.sleep(1)
118
+ yield "localhost", 2121, "user", "pass"
119
+ finally:
120
+ P.terminate()
121
+ P.wait()
122
+ try:
123
+ shutil.rmtree(tmpdir)
124
+ except Exception:
125
+ pass
venv/lib/python3.13/site-packages/fsspec/core.py ADDED
@@ -0,0 +1,743 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ import os
6
+ import re
7
+ from glob import has_magic
8
+ from pathlib import Path
9
+
10
+ # for backwards compat, we export cache things from here too
11
+ from fsspec.caching import ( # noqa: F401
12
+ BaseCache,
13
+ BlockCache,
14
+ BytesCache,
15
+ MMapCache,
16
+ ReadAheadCache,
17
+ caches,
18
+ )
19
+ from fsspec.compression import compr
20
+ from fsspec.config import conf
21
+ from fsspec.registry import filesystem, get_filesystem_class
22
+ from fsspec.utils import (
23
+ _unstrip_protocol,
24
+ build_name_function,
25
+ infer_compression,
26
+ stringify_path,
27
+ )
28
+
29
+ logger = logging.getLogger("fsspec")
30
+
31
+
32
+ class OpenFile:
33
+ """
34
+ File-like object to be used in a context
35
+
36
+ Can layer (buffered) text-mode and compression over any file-system, which
37
+ are typically binary-only.
38
+
39
+ These instances are safe to serialize, as the low-level file object
40
+ is not created until invoked using ``with``.
41
+
42
+ Parameters
43
+ ----------
44
+ fs: FileSystem
45
+ The file system to use for opening the file. Should be a subclass or duck-type
46
+ with ``fsspec.spec.AbstractFileSystem``
47
+ path: str
48
+ Location to open
49
+ mode: str like 'rb', optional
50
+ Mode of the opened file
51
+ compression: str or None, optional
52
+ Compression to apply
53
+ encoding: str or None, optional
54
+ The encoding to use if opened in text mode.
55
+ errors: str or None, optional
56
+ How to handle encoding errors if opened in text mode.
57
+ newline: None or str
58
+ Passed to TextIOWrapper in text mode, how to handle line endings.
59
+ autoopen: bool
60
+ If True, calls open() immediately. Mostly used by pickle
61
+ pos: int
62
+ If given and autoopen is True, seek to this location immediately
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ fs,
68
+ path,
69
+ mode="rb",
70
+ compression=None,
71
+ encoding=None,
72
+ errors=None,
73
+ newline=None,
74
+ ):
75
+ self.fs = fs
76
+ self.path = path
77
+ self.mode = mode
78
+ self.compression = get_compression(path, compression)
79
+ self.encoding = encoding
80
+ self.errors = errors
81
+ self.newline = newline
82
+ self.fobjects = []
83
+
84
+ def __reduce__(self):
85
+ return (
86
+ OpenFile,
87
+ (
88
+ self.fs,
89
+ self.path,
90
+ self.mode,
91
+ self.compression,
92
+ self.encoding,
93
+ self.errors,
94
+ self.newline,
95
+ ),
96
+ )
97
+
98
+ def __repr__(self):
99
+ return f"<OpenFile '{self.path}'>"
100
+
101
+ def __enter__(self):
102
+ mode = self.mode.replace("t", "").replace("b", "") + "b"
103
+
104
+ try:
105
+ f = self.fs.open(self.path, mode=mode)
106
+ except FileNotFoundError as e:
107
+ if has_magic(self.path):
108
+ raise FileNotFoundError(
109
+ "%s not found. The URL contains glob characters: you maybe needed\n"
110
+ "to pass expand=True in fsspec.open() or the storage_options of \n"
111
+ "your library. You can also set the config value 'open_expand'\n"
112
+ "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
113
+ self.path,
114
+ ) from e
115
+ raise
116
+
117
+ self.fobjects = [f]
118
+
119
+ if self.compression is not None:
120
+ compress = compr[self.compression]
121
+ f = compress(f, mode=mode[0])
122
+ self.fobjects.append(f)
123
+
124
+ if "b" not in self.mode:
125
+ # assume, for example, that 'r' is equivalent to 'rt' as in builtin
126
+ f = PickleableTextIOWrapper(
127
+ f, encoding=self.encoding, errors=self.errors, newline=self.newline
128
+ )
129
+ self.fobjects.append(f)
130
+
131
+ return self.fobjects[-1]
132
+
133
+ def __exit__(self, *args):
134
+ self.close()
135
+
136
+ @property
137
+ def full_name(self):
138
+ return _unstrip_protocol(self.path, self.fs)
139
+
140
+ def open(self):
141
+ """Materialise this as a real open file without context
142
+
143
+ The OpenFile object should be explicitly closed to avoid enclosed file
144
+ instances persisting. You must, therefore, keep a reference to the OpenFile
145
+ during the life of the file-like it generates.
146
+ """
147
+ return self.__enter__()
148
+
149
+ def close(self):
150
+ """Close all encapsulated file objects"""
151
+ for f in reversed(self.fobjects):
152
+ if "r" not in self.mode and not f.closed:
153
+ f.flush()
154
+ f.close()
155
+ self.fobjects.clear()
156
+
157
+
158
+ class OpenFiles(list):
159
+ """List of OpenFile instances
160
+
161
+ Can be used in a single context, which opens and closes all of the
162
+ contained files. Normal list access to get the elements works as
163
+ normal.
164
+
165
+ A special case is made for caching filesystems - the files will
166
+ be down/uploaded together at the start or end of the context, and
167
+ this may happen concurrently, if the target filesystem supports it.
168
+ """
169
+
170
+ def __init__(self, *args, mode="rb", fs=None):
171
+ self.mode = mode
172
+ self.fs = fs
173
+ self.files = []
174
+ super().__init__(*args)
175
+
176
+ def __enter__(self):
177
+ if self.fs is None:
178
+ raise ValueError("Context has already been used")
179
+
180
+ fs = self.fs
181
+ while True:
182
+ if hasattr(fs, "open_many"):
183
+ # check for concurrent cache download; or set up for upload
184
+ self.files = fs.open_many(self)
185
+ return self.files
186
+ if hasattr(fs, "fs") and fs.fs is not None:
187
+ fs = fs.fs
188
+ else:
189
+ break
190
+ return [s.__enter__() for s in self]
191
+
192
+ def __exit__(self, *args):
193
+ fs = self.fs
194
+ [s.__exit__(*args) for s in self]
195
+ if "r" not in self.mode:
196
+ while True:
197
+ if hasattr(fs, "open_many"):
198
+ # check for concurrent cache upload
199
+ fs.commit_many(self.files)
200
+ return
201
+ if hasattr(fs, "fs") and fs.fs is not None:
202
+ fs = fs.fs
203
+ else:
204
+ break
205
+
206
+ def __getitem__(self, item):
207
+ out = super().__getitem__(item)
208
+ if isinstance(item, slice):
209
+ return OpenFiles(out, mode=self.mode, fs=self.fs)
210
+ return out
211
+
212
+ def __repr__(self):
213
+ return f"<List of {len(self)} OpenFile instances>"
214
+
215
+
216
+ def open_files(
217
+ urlpath,
218
+ mode="rb",
219
+ compression=None,
220
+ encoding="utf8",
221
+ errors=None,
222
+ name_function=None,
223
+ num=1,
224
+ protocol=None,
225
+ newline=None,
226
+ auto_mkdir=True,
227
+ expand=True,
228
+ **kwargs,
229
+ ):
230
+ """Given a path or paths, return a list of ``OpenFile`` objects.
231
+
232
+ For writing, a str path must contain the "*" character, which will be filled
233
+ in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
234
+
235
+ For either reading or writing, can instead provide explicit list of paths.
236
+
237
+ Parameters
238
+ ----------
239
+ urlpath: string or list
240
+ Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
241
+ to read from alternative filesystems. To read from multiple files you
242
+ can pass a globstring or a list of paths, with the caveat that they
243
+ must all have the same protocol.
244
+ mode: 'rb', 'wt', etc.
245
+ compression: string or None
246
+ If given, open file using compression codec. Can either be a compression
247
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
248
+ compression from the filename suffix.
249
+ encoding: str
250
+ For text mode only
251
+ errors: None or str
252
+ Passed to TextIOWrapper in text mode
253
+ name_function: function or None
254
+ if opening a set of files for writing, those files do not yet exist,
255
+ so we need to generate their names by formatting the urlpath for
256
+ each sequence number
257
+ num: int [1]
258
+ if writing mode, number of files we expect to create (passed to
259
+ name+function)
260
+ protocol: str or None
261
+ If given, overrides the protocol found in the URL.
262
+ newline: bytes or None
263
+ Used for line terminator in text mode. If None, uses system default;
264
+ if blank, uses no translation.
265
+ auto_mkdir: bool (True)
266
+ If in write mode, this will ensure the target directory exists before
267
+ writing, by calling ``fs.mkdirs(exist_ok=True)``.
268
+ expand: bool
269
+ **kwargs: dict
270
+ Extra options that make sense to a particular storage connection, e.g.
271
+ host, port, username, password, etc.
272
+
273
+ Examples
274
+ --------
275
+ >>> files = open_files('2015-*-*.csv') # doctest: +SKIP
276
+ >>> files = open_files(
277
+ ... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
278
+ ... ) # doctest: +SKIP
279
+
280
+ Returns
281
+ -------
282
+ An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
283
+ be used as a single context
284
+
285
+ Notes
286
+ -----
287
+ For a full list of the available protocols and the implementations that
288
+ they map across to see the latest online documentation:
289
+
290
+ - For implementations built into ``fsspec`` see
291
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
292
+ - For implementations in separate packages see
293
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
294
+ """
295
+ fs, fs_token, paths = get_fs_token_paths(
296
+ urlpath,
297
+ mode,
298
+ num=num,
299
+ name_function=name_function,
300
+ storage_options=kwargs,
301
+ protocol=protocol,
302
+ expand=expand,
303
+ )
304
+ if fs.protocol == "file":
305
+ fs.auto_mkdir = auto_mkdir
306
+ elif "r" not in mode and auto_mkdir:
307
+ parents = {fs._parent(path) for path in paths}
308
+ for parent in parents:
309
+ try:
310
+ fs.makedirs(parent, exist_ok=True)
311
+ except PermissionError:
312
+ pass
313
+ return OpenFiles(
314
+ [
315
+ OpenFile(
316
+ fs,
317
+ path,
318
+ mode=mode,
319
+ compression=compression,
320
+ encoding=encoding,
321
+ errors=errors,
322
+ newline=newline,
323
+ )
324
+ for path in paths
325
+ ],
326
+ mode=mode,
327
+ fs=fs,
328
+ )
329
+
330
+
331
+ def _un_chain(path, kwargs):
332
+ # Avoid a circular import
333
+ from fsspec.implementations.chained import ChainedFileSystem
334
+
335
+ if "::" in path:
336
+ x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
337
+ bits = []
338
+ for p in path.split("::"):
339
+ if "://" in p or x.match(p):
340
+ bits.append(p)
341
+ else:
342
+ bits.append(p + "://")
343
+ else:
344
+ bits = [path]
345
+ # [[url, protocol, kwargs], ...]
346
+ out = []
347
+ previous_bit = None
348
+ kwargs = kwargs.copy()
349
+ for bit in reversed(bits):
350
+ protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
351
+ cls = get_filesystem_class(protocol)
352
+ extra_kwargs = cls._get_kwargs_from_urls(bit)
353
+ kws = kwargs.pop(protocol, {})
354
+ if bit is bits[0]:
355
+ kws.update(kwargs)
356
+ kw = dict(
357
+ **{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
358
+ **kws,
359
+ )
360
+ bit = cls._strip_protocol(bit)
361
+ if "target_protocol" not in kw and issubclass(cls, ChainedFileSystem):
362
+ bit = previous_bit
363
+ out.append((bit, protocol, kw))
364
+ previous_bit = bit
365
+ out.reverse()
366
+ return out
367
+
368
+
369
+ def url_to_fs(url, **kwargs):
370
+ """
371
+ Turn fully-qualified and potentially chained URL into filesystem instance
372
+
373
+ Parameters
374
+ ----------
375
+ url : str
376
+ The fsspec-compatible URL
377
+ **kwargs: dict
378
+ Extra options that make sense to a particular storage connection, e.g.
379
+ host, port, username, password, etc.
380
+
381
+ Returns
382
+ -------
383
+ filesystem : FileSystem
384
+ The new filesystem discovered from ``url`` and created with
385
+ ``**kwargs``.
386
+ urlpath : str
387
+ The file-systems-specific URL for ``url``.
388
+ """
389
+ url = stringify_path(url)
390
+ # non-FS arguments that appear in fsspec.open()
391
+ # inspect could keep this in sync with open()'s signature
392
+ known_kwargs = {
393
+ "compression",
394
+ "encoding",
395
+ "errors",
396
+ "expand",
397
+ "mode",
398
+ "name_function",
399
+ "newline",
400
+ "num",
401
+ }
402
+ kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
403
+ chain = _un_chain(url, kwargs)
404
+ inkwargs = {}
405
+ # Reverse iterate the chain, creating a nested target_* structure
406
+ for i, ch in enumerate(reversed(chain)):
407
+ urls, protocol, kw = ch
408
+ if i == len(chain) - 1:
409
+ inkwargs = dict(**kw, **inkwargs)
410
+ continue
411
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
412
+ inkwargs["target_protocol"] = protocol
413
+ inkwargs["fo"] = urls
414
+ urlpath, protocol, _ = chain[0]
415
+ fs = filesystem(protocol, **inkwargs)
416
+ return fs, urlpath
417
+
418
+
419
+ DEFAULT_EXPAND = conf.get("open_expand", False)
420
+
421
+
422
+ def open(
423
+ urlpath,
424
+ mode="rb",
425
+ compression=None,
426
+ encoding="utf8",
427
+ errors=None,
428
+ protocol=None,
429
+ newline=None,
430
+ expand=None,
431
+ **kwargs,
432
+ ):
433
+ """Given a path or paths, return one ``OpenFile`` object.
434
+
435
+ Parameters
436
+ ----------
437
+ urlpath: string or list
438
+ Absolute or relative filepath. Prefix with a protocol like ``s3://``
439
+ to read from alternative filesystems. Should not include glob
440
+ character(s).
441
+ mode: 'rb', 'wt', etc.
442
+ compression: string or None
443
+ If given, open file using compression codec. Can either be a compression
444
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
445
+ compression from the filename suffix.
446
+ encoding: str
447
+ For text mode only
448
+ errors: None or str
449
+ Passed to TextIOWrapper in text mode
450
+ protocol: str or None
451
+ If given, overrides the protocol found in the URL.
452
+ newline: bytes or None
453
+ Used for line terminator in text mode. If None, uses system default;
454
+ if blank, uses no translation.
455
+ expand: bool or None
456
+ Whether to regard file paths containing special glob characters as needing
457
+ expansion (finding the first match) or absolute. Setting False allows using
458
+ paths which do embed such characters. If None (default), this argument
459
+ takes its value from the DEFAULT_EXPAND module variable, which takes
460
+ its initial value from the "open_expand" config value at startup, which will
461
+ be False if not set.
462
+ **kwargs: dict
463
+ Extra options that make sense to a particular storage connection, e.g.
464
+ host, port, username, password, etc.
465
+
466
+ Examples
467
+ --------
468
+ >>> openfile = open('2015-01-01.csv') # doctest: +SKIP
469
+ >>> openfile = open(
470
+ ... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
471
+ ... ) # doctest: +SKIP
472
+ >>> with openfile as f:
473
+ ... df = pd.read_csv(f) # doctest: +SKIP
474
+ ...
475
+
476
+ Returns
477
+ -------
478
+ ``OpenFile`` object.
479
+
480
+ Notes
481
+ -----
482
+ For a full list of the available protocols and the implementations that
483
+ they map across to see the latest online documentation:
484
+
485
+ - For implementations built into ``fsspec`` see
486
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
487
+ - For implementations in separate packages see
488
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
489
+ """
490
+ expand = DEFAULT_EXPAND if expand is None else expand
491
+ out = open_files(
492
+ urlpath=[urlpath],
493
+ mode=mode,
494
+ compression=compression,
495
+ encoding=encoding,
496
+ errors=errors,
497
+ protocol=protocol,
498
+ newline=newline,
499
+ expand=expand,
500
+ **kwargs,
501
+ )
502
+ if not out:
503
+ raise FileNotFoundError(urlpath)
504
+ return out[0]
505
+
506
+
507
+ def open_local(
508
+ url: str | list[str] | Path | list[Path],
509
+ mode: str = "rb",
510
+ **storage_options: dict,
511
+ ) -> str | list[str]:
512
+ """Open file(s) which can be resolved to local
513
+
514
+ For files which either are local, or get downloaded upon open
515
+ (e.g., by file caching)
516
+
517
+ Parameters
518
+ ----------
519
+ url: str or list(str)
520
+ mode: str
521
+ Must be read mode
522
+ storage_options:
523
+ passed on to FS for or used by open_files (e.g., compression)
524
+ """
525
+ if "r" not in mode:
526
+ raise ValueError("Can only ensure local files when reading")
527
+ of = open_files(url, mode=mode, **storage_options)
528
+ if not getattr(of[0].fs, "local_file", False):
529
+ raise ValueError(
530
+ "open_local can only be used on a filesystem which"
531
+ " has attribute local_file=True"
532
+ )
533
+ with of as files:
534
+ paths = [f.name for f in files]
535
+ if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
536
+ return paths[0]
537
+ return paths
538
+
539
+
540
+ def get_compression(urlpath, compression):
541
+ if compression == "infer":
542
+ compression = infer_compression(urlpath)
543
+ if compression is not None and compression not in compr:
544
+ raise ValueError(f"Compression type {compression} not supported")
545
+ return compression
546
+
547
+
548
+ def split_protocol(urlpath):
549
+ """Return protocol, path pair"""
550
+ urlpath = stringify_path(urlpath)
551
+ if "://" in urlpath:
552
+ protocol, path = urlpath.split("://", 1)
553
+ if len(protocol) > 1:
554
+ # excludes Windows paths
555
+ return protocol, path
556
+ if urlpath.startswith("data:"):
557
+ return urlpath.split(":", 1)
558
+ return None, urlpath
559
+
560
+
561
+ def strip_protocol(urlpath):
562
+ """Return only path part of full URL, according to appropriate backend"""
563
+ protocol, _ = split_protocol(urlpath)
564
+ cls = get_filesystem_class(protocol)
565
+ return cls._strip_protocol(urlpath)
566
+
567
+
568
+ def expand_paths_if_needed(paths, mode, num, fs, name_function):
569
+ """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
570
+ in them (read mode).
571
+
572
+ :param paths: list of paths
573
+ mode: str
574
+ Mode in which to open files.
575
+ num: int
576
+ If opening in writing mode, number of files we expect to create.
577
+ fs: filesystem object
578
+ name_function: callable
579
+ If opening in writing mode, this callable is used to generate path
580
+ names. Names are generated for each partition by
581
+ ``urlpath.replace('*', name_function(partition_index))``.
582
+ :return: list of paths
583
+ """
584
+ expanded_paths = []
585
+ paths = list(paths)
586
+
587
+ if "w" in mode: # read mode
588
+ if sum(1 for p in paths if "*" in p) > 1:
589
+ raise ValueError(
590
+ "When writing data, only one filename mask can be specified."
591
+ )
592
+ num = max(num, len(paths))
593
+
594
+ for curr_path in paths:
595
+ if "*" in curr_path:
596
+ # expand using name_function
597
+ expanded_paths.extend(_expand_paths(curr_path, name_function, num))
598
+ else:
599
+ expanded_paths.append(curr_path)
600
+ # if we generated more paths that asked for, trim the list
601
+ if len(expanded_paths) > num:
602
+ expanded_paths = expanded_paths[:num]
603
+
604
+ else: # read mode
605
+ for curr_path in paths:
606
+ if has_magic(curr_path):
607
+ # expand using glob
608
+ expanded_paths.extend(fs.glob(curr_path))
609
+ else:
610
+ expanded_paths.append(curr_path)
611
+
612
+ return expanded_paths
613
+
614
+
615
+ def get_fs_token_paths(
616
+ urlpath,
617
+ mode="rb",
618
+ num=1,
619
+ name_function=None,
620
+ storage_options=None,
621
+ protocol=None,
622
+ expand=True,
623
+ ):
624
+ """Filesystem, deterministic token, and paths from a urlpath and options.
625
+
626
+ Parameters
627
+ ----------
628
+ urlpath: string or iterable
629
+ Absolute or relative filepath, URL (may include protocols like
630
+ ``s3://``), or globstring pointing to data.
631
+ mode: str, optional
632
+ Mode in which to open files.
633
+ num: int, optional
634
+ If opening in writing mode, number of files we expect to create.
635
+ name_function: callable, optional
636
+ If opening in writing mode, this callable is used to generate path
637
+ names. Names are generated for each partition by
638
+ ``urlpath.replace('*', name_function(partition_index))``.
639
+ storage_options: dict, optional
640
+ Additional keywords to pass to the filesystem class.
641
+ protocol: str or None
642
+ To override the protocol specifier in the URL
643
+ expand: bool
644
+ Expand string paths for writing, assuming the path is a directory
645
+ """
646
+ if isinstance(urlpath, (list, tuple, set)):
647
+ if not urlpath:
648
+ raise ValueError("empty urlpath sequence")
649
+ urlpath0 = stringify_path(next(iter(urlpath)))
650
+ else:
651
+ urlpath0 = stringify_path(urlpath)
652
+ storage_options = storage_options or {}
653
+ if protocol:
654
+ storage_options["protocol"] = protocol
655
+ chain = _un_chain(urlpath0, storage_options or {})
656
+ inkwargs = {}
657
+ # Reverse iterate the chain, creating a nested target_* structure
658
+ for i, ch in enumerate(reversed(chain)):
659
+ urls, nested_protocol, kw = ch
660
+ if i == len(chain) - 1:
661
+ inkwargs = dict(**kw, **inkwargs)
662
+ continue
663
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
664
+ inkwargs["target_protocol"] = nested_protocol
665
+ inkwargs["fo"] = urls
666
+ paths, protocol, _ = chain[0]
667
+ fs = filesystem(protocol, **inkwargs)
668
+ if isinstance(urlpath, (list, tuple, set)):
669
+ pchains = [
670
+ _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
671
+ ]
672
+ if len({pc[1] for pc in pchains}) > 1:
673
+ raise ValueError("Protocol mismatch getting fs from %s", urlpath)
674
+ paths = [pc[0] for pc in pchains]
675
+ else:
676
+ paths = fs._strip_protocol(paths)
677
+ if isinstance(paths, (list, tuple, set)):
678
+ if expand:
679
+ paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
680
+ elif not isinstance(paths, list):
681
+ paths = list(paths)
682
+ else:
683
+ if ("w" in mode or "x" in mode) and expand:
684
+ paths = _expand_paths(paths, name_function, num)
685
+ elif "*" in paths:
686
+ paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
687
+ else:
688
+ paths = [paths]
689
+
690
+ return fs, fs._fs_token, paths
691
+
692
+
693
+ def _expand_paths(path, name_function, num):
694
+ if isinstance(path, str):
695
+ if path.count("*") > 1:
696
+ raise ValueError("Output path spec must contain exactly one '*'.")
697
+ elif "*" not in path:
698
+ path = os.path.join(path, "*.part")
699
+
700
+ if name_function is None:
701
+ name_function = build_name_function(num - 1)
702
+
703
+ paths = [path.replace("*", name_function(i)) for i in range(num)]
704
+ if paths != sorted(paths):
705
+ logger.warning(
706
+ "In order to preserve order between partitions"
707
+ " paths created with ``name_function`` should "
708
+ "sort to partition order"
709
+ )
710
+ elif isinstance(path, (tuple, list)):
711
+ assert len(path) == num
712
+ paths = list(path)
713
+ else:
714
+ raise ValueError(
715
+ "Path should be either\n"
716
+ "1. A list of paths: ['foo.json', 'bar.json', ...]\n"
717
+ "2. A directory: 'foo/\n"
718
+ "3. A path with a '*' in it: 'foo.*.json'"
719
+ )
720
+ return paths
721
+
722
+
723
+ class PickleableTextIOWrapper(io.TextIOWrapper):
724
+ """TextIOWrapper cannot be pickled. This solves it.
725
+
726
+ Requires that ``buffer`` be pickleable, which all instances of
727
+ AbstractBufferedFile are.
728
+ """
729
+
730
+ def __init__(
731
+ self,
732
+ buffer,
733
+ encoding=None,
734
+ errors=None,
735
+ newline=None,
736
+ line_buffering=False,
737
+ write_through=False,
738
+ ):
739
+ self.args = buffer, encoding, errors, newline, line_buffering, write_through
740
+ super().__init__(*self.args)
741
+
742
+ def __reduce__(self):
743
+ return PickleableTextIOWrapper, self.args
venv/lib/python3.13/site-packages/fsspec/dircache.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from collections.abc import MutableMapping
3
+ from functools import lru_cache
4
+
5
+
6
+ class DirCache(MutableMapping):
7
+ """
8
+ Caching of directory listings, in a structure like::
9
+
10
+ {"path0": [
11
+ {"name": "path0/file0",
12
+ "size": 123,
13
+ "type": "file",
14
+ ...
15
+ },
16
+ {"name": "path0/file1",
17
+ },
18
+ ...
19
+ ],
20
+ "path1": [...]
21
+ }
22
+
23
+ Parameters to this class control listing expiry or indeed turn
24
+ caching off
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ use_listings_cache=True,
30
+ listings_expiry_time=None,
31
+ max_paths=None,
32
+ **kwargs,
33
+ ):
34
+ """
35
+
36
+ Parameters
37
+ ----------
38
+ use_listings_cache: bool
39
+ If False, this cache never returns items, but always reports KeyError,
40
+ and setting items has no effect
41
+ listings_expiry_time: int or float (optional)
42
+ Time in seconds that a listing is considered valid. If None,
43
+ listings do not expire.
44
+ max_paths: int (optional)
45
+ The number of most recent listings that are considered valid; 'recent'
46
+ refers to when the entry was set.
47
+ """
48
+ self._cache = {}
49
+ self._times = {}
50
+ if max_paths:
51
+ self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
52
+ self.use_listings_cache = use_listings_cache
53
+ self.listings_expiry_time = listings_expiry_time
54
+ self.max_paths = max_paths
55
+
56
+ def __getitem__(self, item):
57
+ if self.listings_expiry_time is not None:
58
+ if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
59
+ del self._cache[item]
60
+ if self.max_paths:
61
+ self._q(item)
62
+ return self._cache[item] # maybe raises KeyError
63
+
64
+ def clear(self):
65
+ self._cache.clear()
66
+
67
+ def __len__(self):
68
+ return len(self._cache)
69
+
70
+ def __contains__(self, item):
71
+ try:
72
+ self[item]
73
+ return True
74
+ except KeyError:
75
+ return False
76
+
77
+ def __setitem__(self, key, value):
78
+ if not self.use_listings_cache:
79
+ return
80
+ if self.max_paths:
81
+ self._q(key)
82
+ self._cache[key] = value
83
+ if self.listings_expiry_time is not None:
84
+ self._times[key] = time.time()
85
+
86
+ def __delitem__(self, key):
87
+ del self._cache[key]
88
+
89
+ def __iter__(self):
90
+ entries = list(self._cache)
91
+
92
+ return (k for k in entries if k in self)
93
+
94
+ def __reduce__(self):
95
+ return (
96
+ DirCache,
97
+ (self.use_listings_cache, self.listings_expiry_time, self.max_paths),
98
+ )
venv/lib/python3.13/site-packages/fsspec/fuse.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import logging
3
+ import os
4
+ import stat
5
+ import threading
6
+ import time
7
+ from errno import EIO, ENOENT
8
+
9
+ from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
10
+
11
+ from fsspec import __version__
12
+ from fsspec.core import url_to_fs
13
+
14
+ logger = logging.getLogger("fsspec.fuse")
15
+
16
+
17
+ class FUSEr(Operations):
18
+ def __init__(self, fs, path, ready_file=False):
19
+ self.fs = fs
20
+ self.cache = {}
21
+ self.root = path.rstrip("/") + "/"
22
+ self.counter = 0
23
+ logger.info("Starting FUSE at %s", path)
24
+ self._ready_file = ready_file
25
+
26
+ def getattr(self, path, fh=None):
27
+ logger.debug("getattr %s", path)
28
+ if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
29
+ return {"type": "file", "st_size": 5}
30
+
31
+ path = "".join([self.root, path.lstrip("/")]).rstrip("/")
32
+ try:
33
+ info = self.fs.info(path)
34
+ except FileNotFoundError as exc:
35
+ raise FuseOSError(ENOENT) from exc
36
+
37
+ data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
38
+ perm = info.get("mode", 0o777)
39
+
40
+ if info["type"] != "file":
41
+ data["st_mode"] = stat.S_IFDIR | perm
42
+ data["st_size"] = 0
43
+ data["st_blksize"] = 0
44
+ else:
45
+ data["st_mode"] = stat.S_IFREG | perm
46
+ data["st_size"] = info["size"]
47
+ data["st_blksize"] = 5 * 2**20
48
+ data["st_nlink"] = 1
49
+ data["st_atime"] = info["atime"] if "atime" in info else time.time()
50
+ data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
51
+ data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
52
+ return data
53
+
54
+ def readdir(self, path, fh):
55
+ logger.debug("readdir %s", path)
56
+ path = "".join([self.root, path.lstrip("/")])
57
+ files = self.fs.ls(path, False)
58
+ files = [os.path.basename(f.rstrip("/")) for f in files]
59
+ return [".", ".."] + files
60
+
61
+ def mkdir(self, path, mode):
62
+ path = "".join([self.root, path.lstrip("/")])
63
+ self.fs.mkdir(path)
64
+ return 0
65
+
66
+ def rmdir(self, path):
67
+ path = "".join([self.root, path.lstrip("/")])
68
+ self.fs.rmdir(path)
69
+ return 0
70
+
71
+ def read(self, path, size, offset, fh):
72
+ logger.debug("read %s", (path, size, offset))
73
+ if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
74
+ # status indicator
75
+ return b"ready"
76
+
77
+ f = self.cache[fh]
78
+ f.seek(offset)
79
+ out = f.read(size)
80
+ return out
81
+
82
+ def write(self, path, data, offset, fh):
83
+ logger.debug("write %s", (path, offset))
84
+ f = self.cache[fh]
85
+ f.seek(offset)
86
+ f.write(data)
87
+ return len(data)
88
+
89
+ def create(self, path, flags, fi=None):
90
+ logger.debug("create %s", (path, flags))
91
+ fn = "".join([self.root, path.lstrip("/")])
92
+ self.fs.touch(fn) # OS will want to get attributes immediately
93
+ f = self.fs.open(fn, "wb")
94
+ self.cache[self.counter] = f
95
+ self.counter += 1
96
+ return self.counter - 1
97
+
98
+ def open(self, path, flags):
99
+ logger.debug("open %s", (path, flags))
100
+ fn = "".join([self.root, path.lstrip("/")])
101
+ if flags % 2 == 0:
102
+ # read
103
+ mode = "rb"
104
+ else:
105
+ # write/create
106
+ mode = "wb"
107
+ self.cache[self.counter] = self.fs.open(fn, mode)
108
+ self.counter += 1
109
+ return self.counter - 1
110
+
111
+ def truncate(self, path, length, fh=None):
112
+ fn = "".join([self.root, path.lstrip("/")])
113
+ if length != 0:
114
+ raise NotImplementedError
115
+ # maybe should be no-op since open with write sets size to zero anyway
116
+ self.fs.touch(fn)
117
+
118
+ def unlink(self, path):
119
+ fn = "".join([self.root, path.lstrip("/")])
120
+ try:
121
+ self.fs.rm(fn, False)
122
+ except (OSError, FileNotFoundError) as exc:
123
+ raise FuseOSError(EIO) from exc
124
+
125
+ def release(self, path, fh):
126
+ try:
127
+ if fh in self.cache:
128
+ f = self.cache[fh]
129
+ f.close()
130
+ self.cache.pop(fh)
131
+ except Exception as e:
132
+ print(e)
133
+ return 0
134
+
135
+ def chmod(self, path, mode):
136
+ if hasattr(self.fs, "chmod"):
137
+ path = "".join([self.root, path.lstrip("/")])
138
+ return self.fs.chmod(path, mode)
139
+ raise NotImplementedError
140
+
141
+
142
+ def run(
143
+ fs,
144
+ path,
145
+ mount_point,
146
+ foreground=True,
147
+ threads=False,
148
+ ready_file=False,
149
+ ops_class=FUSEr,
150
+ ):
151
+ """Mount stuff in a local directory
152
+
153
+ This uses fusepy to make it appear as if a given path on an fsspec
154
+ instance is in fact resident within the local file-system.
155
+
156
+ This requires that fusepy by installed, and that FUSE be available on
157
+ the system (typically requiring a package to be installed with
158
+ apt, yum, brew, etc.).
159
+
160
+ Parameters
161
+ ----------
162
+ fs: file-system instance
163
+ From one of the compatible implementations
164
+ path: str
165
+ Location on that file-system to regard as the root directory to
166
+ mount. Note that you typically should include the terminating "/"
167
+ character.
168
+ mount_point: str
169
+ An empty directory on the local file-system where the contents of
170
+ the remote path will appear.
171
+ foreground: bool
172
+ Whether or not calling this function will block. Operation will
173
+ typically be more stable if True.
174
+ threads: bool
175
+ Whether or not to create threads when responding to file operations
176
+ within the mounter directory. Operation will typically be more
177
+ stable if False.
178
+ ready_file: bool
179
+ Whether the FUSE process is ready. The ``.fuse_ready`` file will
180
+ exist in the ``mount_point`` directory if True. Debugging purpose.
181
+ ops_class: FUSEr or Subclass of FUSEr
182
+ To override the default behavior of FUSEr. For Example, logging
183
+ to file.
184
+
185
+ """
186
+ func = lambda: FUSE(
187
+ ops_class(fs, path, ready_file=ready_file),
188
+ mount_point,
189
+ nothreads=not threads,
190
+ foreground=foreground,
191
+ )
192
+ if not foreground:
193
+ th = threading.Thread(target=func)
194
+ th.daemon = True
195
+ th.start()
196
+ return th
197
+ else: # pragma: no cover
198
+ try:
199
+ func()
200
+ except KeyboardInterrupt:
201
+ pass
202
+
203
+
204
+ def main(args):
205
+ """Mount filesystem from chained URL to MOUNT_POINT.
206
+
207
+ Examples:
208
+
209
+ python3 -m fsspec.fuse memory /usr/share /tmp/mem
210
+
211
+ python3 -m fsspec.fuse local /tmp/source /tmp/local \\
212
+ -l /tmp/fsspecfuse.log
213
+
214
+ You can also mount chained-URLs and use special settings:
215
+
216
+ python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
217
+ / /tmp/zip \\
218
+ -o 'filecache-cache_storage=/tmp/simplecache'
219
+
220
+ You can specify the type of the setting by using `[int]` or `[bool]`,
221
+ (`true`, `yes`, `1` represents the Boolean value `True`):
222
+
223
+ python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
224
+ /historic/packages/RPMS /tmp/ftp \\
225
+ -o 'simplecache-cache_storage=/tmp/simplecache' \\
226
+ -o 'simplecache-check_files=false[bool]' \\
227
+ -o 'ftp-listings_expiry_time=60[int]' \\
228
+ -o 'ftp-username=anonymous' \\
229
+ -o 'ftp-password=xieyanbo'
230
+ """
231
+
232
+ class RawDescriptionArgumentParser(argparse.ArgumentParser):
233
+ def format_help(self):
234
+ usage = super().format_help()
235
+ parts = usage.split("\n\n")
236
+ parts[1] = self.description.rstrip()
237
+ return "\n\n".join(parts)
238
+
239
+ parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
240
+ parser.add_argument("--version", action="version", version=__version__)
241
+ parser.add_argument("url", type=str, help="fs url")
242
+ parser.add_argument("source_path", type=str, help="source directory in fs")
243
+ parser.add_argument("mount_point", type=str, help="local directory")
244
+ parser.add_argument(
245
+ "-o",
246
+ "--option",
247
+ action="append",
248
+ help="Any options of protocol included in the chained URL",
249
+ )
250
+ parser.add_argument(
251
+ "-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
252
+ )
253
+ parser.add_argument(
254
+ "-f",
255
+ "--foreground",
256
+ action="store_false",
257
+ help="Running in foreground or not (Default: False)",
258
+ )
259
+ parser.add_argument(
260
+ "-t",
261
+ "--threads",
262
+ action="store_false",
263
+ help="Running with threads support (Default: False)",
264
+ )
265
+ parser.add_argument(
266
+ "-r",
267
+ "--ready-file",
268
+ action="store_false",
269
+ help="The `.fuse_ready` file will exist after FUSE is ready. "
270
+ "(Debugging purpose, Default: False)",
271
+ )
272
+ args = parser.parse_args(args)
273
+
274
+ kwargs = {}
275
+ for item in args.option or []:
276
+ key, sep, value = item.partition("=")
277
+ if not sep:
278
+ parser.error(message=f"Wrong option: {item!r}")
279
+ val = value.lower()
280
+ if val.endswith("[int]"):
281
+ value = int(value[: -len("[int]")])
282
+ elif val.endswith("[bool]"):
283
+ value = val[: -len("[bool]")] in ["1", "yes", "true"]
284
+
285
+ if "-" in key:
286
+ fs_name, setting_name = key.split("-", 1)
287
+ if fs_name in kwargs:
288
+ kwargs[fs_name][setting_name] = value
289
+ else:
290
+ kwargs[fs_name] = {setting_name: value}
291
+ else:
292
+ kwargs[key] = value
293
+
294
+ if args.log_file:
295
+ logging.basicConfig(
296
+ level=logging.DEBUG,
297
+ filename=args.log_file,
298
+ format="%(asctime)s %(message)s",
299
+ )
300
+
301
+ class LoggingFUSEr(FUSEr, LoggingMixIn):
302
+ pass
303
+
304
+ fuser = LoggingFUSEr
305
+ else:
306
+ fuser = FUSEr
307
+
308
+ fs, url_path = url_to_fs(args.url, **kwargs)
309
+ logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
310
+ run(
311
+ fs,
312
+ args.source_path,
313
+ args.mount_point,
314
+ foreground=args.foreground,
315
+ threads=args.threads,
316
+ ready_file=args.ready_file,
317
+ ops_class=fuser,
318
+ )
319
+
320
+
321
+ if __name__ == "__main__":
322
+ import sys
323
+
324
+ main(sys.argv[1:])
venv/lib/python3.13/site-packages/fsspec/generic.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import uuid
8
+
9
+ from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
10
+ from .callbacks import DEFAULT_CALLBACK
11
+ from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
12
+
13
+ _generic_fs = {}
14
+ logger = logging.getLogger("fsspec.generic")
15
+
16
+
17
+ def set_generic_fs(protocol, **storage_options):
18
+ """Populate the dict used for method=="generic" lookups"""
19
+ _generic_fs[protocol] = filesystem(protocol, **storage_options)
20
+
21
+
22
+ def _resolve_fs(url, method, protocol=None, storage_options=None):
23
+ """Pick instance of backend FS"""
24
+ url = url[0] if isinstance(url, (list, tuple)) else url
25
+ protocol = protocol or split_protocol(url)[0]
26
+ storage_options = storage_options or {}
27
+ if method == "default":
28
+ return filesystem(protocol)
29
+ if method == "generic":
30
+ return _generic_fs[protocol]
31
+ if method == "current":
32
+ cls = get_filesystem_class(protocol)
33
+ return cls.current()
34
+ if method == "options":
35
+ fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
36
+ return fs
37
+ raise ValueError(f"Unknown FS resolution method: {method}")
38
+
39
+
40
+ def rsync(
41
+ source,
42
+ destination,
43
+ delete_missing=False,
44
+ source_field="size",
45
+ dest_field="size",
46
+ update_cond="different",
47
+ inst_kwargs=None,
48
+ fs=None,
49
+ **kwargs,
50
+ ):
51
+ """Sync files between two directory trees
52
+
53
+ (experimental)
54
+
55
+ Parameters
56
+ ----------
57
+ source: str
58
+ Root of the directory tree to take files from. This must be a directory, but
59
+ do not include any terminating "/" character
60
+ destination: str
61
+ Root path to copy into. The contents of this location should be
62
+ identical to the contents of ``source`` when done. This will be made a
63
+ directory, and the terminal "/" should not be included.
64
+ delete_missing: bool
65
+ If there are paths in the destination that don't exist in the
66
+ source and this is True, delete them. Otherwise, leave them alone.
67
+ source_field: str | callable
68
+ If ``update_field`` is "different", this is the key in the info
69
+ of source files to consider for difference. Maybe a function of the
70
+ info dict.
71
+ dest_field: str | callable
72
+ If ``update_field`` is "different", this is the key in the info
73
+ of destination files to consider for difference. May be a function of
74
+ the info dict.
75
+ update_cond: "different"|"always"|"never"
76
+ If "always", every file is copied, regardless of whether it exists in
77
+ the destination. If "never", files that exist in the destination are
78
+ not copied again. If "different" (default), only copy if the info
79
+ fields given by ``source_field`` and ``dest_field`` (usually "size")
80
+ are different. Other comparisons may be added in the future.
81
+ inst_kwargs: dict|None
82
+ If ``fs`` is None, use this set of keyword arguments to make a
83
+ GenericFileSystem instance
84
+ fs: GenericFileSystem|None
85
+ Instance to use if explicitly given. The instance defines how to
86
+ to make downstream file system instances from paths.
87
+
88
+ Returns
89
+ -------
90
+ dict of the copy operations that were performed, {source: destination}
91
+ """
92
+ fs = fs or GenericFileSystem(**(inst_kwargs or {}))
93
+ source = fs._strip_protocol(source)
94
+ destination = fs._strip_protocol(destination)
95
+ allfiles = fs.find(source, withdirs=True, detail=True)
96
+ if not fs.isdir(source):
97
+ raise ValueError("Can only rsync on a directory")
98
+ otherfiles = fs.find(destination, withdirs=True, detail=True)
99
+ dirs = [
100
+ a
101
+ for a, v in allfiles.items()
102
+ if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
103
+ ]
104
+ logger.debug(f"{len(dirs)} directories to create")
105
+ if dirs:
106
+ fs.make_many_dirs(
107
+ [dirn.replace(source, destination) for dirn in dirs], exist_ok=True
108
+ )
109
+ allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
110
+ logger.debug(f"{len(allfiles)} files to consider for copy")
111
+ to_delete = [
112
+ o
113
+ for o, v in otherfiles.items()
114
+ if o.replace(destination, source) not in allfiles and v["type"] == "file"
115
+ ]
116
+ for k, v in allfiles.copy().items():
117
+ otherfile = k.replace(source, destination)
118
+ if otherfile in otherfiles:
119
+ if update_cond == "always":
120
+ allfiles[k] = otherfile
121
+ elif update_cond == "never":
122
+ allfiles.pop(k)
123
+ elif update_cond == "different":
124
+ inf1 = source_field(v) if callable(source_field) else v[source_field]
125
+ v2 = otherfiles[otherfile]
126
+ inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
127
+ if inf1 != inf2:
128
+ # details mismatch, make copy
129
+ allfiles[k] = otherfile
130
+ else:
131
+ # details match, don't copy
132
+ allfiles.pop(k)
133
+ else:
134
+ # file not in target yet
135
+ allfiles[k] = otherfile
136
+ logger.debug(f"{len(allfiles)} files to copy")
137
+ if allfiles:
138
+ source_files, target_files = zip(*allfiles.items())
139
+ fs.cp(source_files, target_files, **kwargs)
140
+ logger.debug(f"{len(to_delete)} files to delete")
141
+ if delete_missing and to_delete:
142
+ fs.rm(to_delete)
143
+ return allfiles
144
+
145
+
146
+ class GenericFileSystem(AsyncFileSystem):
147
+ """Wrapper over all other FS types
148
+
149
+ <experimental!>
150
+
151
+ This implementation is a single unified interface to be able to run FS operations
152
+ over generic URLs, and dispatch to the specific implementations using the URL
153
+ protocol prefix.
154
+
155
+ Note: instances of this FS are always async, even if you never use it with any async
156
+ backend.
157
+ """
158
+
159
+ protocol = "generic" # there is no real reason to ever use a protocol with this FS
160
+
161
+ def __init__(self, default_method="default", storage_options=None, **kwargs):
162
+ """
163
+
164
+ Parameters
165
+ ----------
166
+ default_method: str (optional)
167
+ Defines how to configure backend FS instances. Options are:
168
+ - "default": instantiate like FSClass(), with no
169
+ extra arguments; this is the default instance of that FS, and can be
170
+ configured via the config system
171
+ - "generic": takes instances from the `_generic_fs` dict in this module,
172
+ which you must populate before use. Keys are by protocol
173
+ - "options": expects storage_options, a dict mapping protocol to
174
+ kwargs to use when constructing the filesystem
175
+ - "current": takes the most recently instantiated version of each FS
176
+ """
177
+ self.method = default_method
178
+ self.st_opts = storage_options
179
+ super().__init__(**kwargs)
180
+
181
+ def _parent(self, path):
182
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
183
+ return fs.unstrip_protocol(fs._parent(path))
184
+
185
+ def _strip_protocol(self, path):
186
+ # normalization only
187
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
188
+ return fs.unstrip_protocol(fs._strip_protocol(path))
189
+
190
+ async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
191
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
192
+ if fs.async_impl:
193
+ out = await fs._find(
194
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
195
+ )
196
+ else:
197
+ out = fs.find(
198
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
199
+ )
200
+ result = {}
201
+ for k, v in out.items():
202
+ v = v.copy() # don't corrupt target FS dircache
203
+ name = fs.unstrip_protocol(k)
204
+ v["name"] = name
205
+ result[name] = v
206
+ if detail:
207
+ return result
208
+ return list(result)
209
+
210
+ async def _info(self, url, **kwargs):
211
+ fs = _resolve_fs(url, self.method)
212
+ if fs.async_impl:
213
+ out = await fs._info(url, **kwargs)
214
+ else:
215
+ out = fs.info(url, **kwargs)
216
+ out = out.copy() # don't edit originals
217
+ out["name"] = fs.unstrip_protocol(out["name"])
218
+ return out
219
+
220
+ async def _ls(
221
+ self,
222
+ url,
223
+ detail=True,
224
+ **kwargs,
225
+ ):
226
+ fs = _resolve_fs(url, self.method)
227
+ if fs.async_impl:
228
+ out = await fs._ls(url, detail=True, **kwargs)
229
+ else:
230
+ out = fs.ls(url, detail=True, **kwargs)
231
+ out = [o.copy() for o in out] # don't edit originals
232
+ for o in out:
233
+ o["name"] = fs.unstrip_protocol(o["name"])
234
+ if detail:
235
+ return out
236
+ else:
237
+ return [o["name"] for o in out]
238
+
239
+ async def _cat_file(
240
+ self,
241
+ url,
242
+ **kwargs,
243
+ ):
244
+ fs = _resolve_fs(url, self.method)
245
+ if fs.async_impl:
246
+ return await fs._cat_file(url, **kwargs)
247
+ else:
248
+ return fs.cat_file(url, **kwargs)
249
+
250
+ async def _pipe_file(
251
+ self,
252
+ path,
253
+ value,
254
+ **kwargs,
255
+ ):
256
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
257
+ if fs.async_impl:
258
+ return await fs._pipe_file(path, value, **kwargs)
259
+ else:
260
+ return fs.pipe_file(path, value, **kwargs)
261
+
262
+ async def _rm(self, url, **kwargs):
263
+ urls = url
264
+ if isinstance(urls, str):
265
+ urls = [urls]
266
+ fs = _resolve_fs(urls[0], self.method)
267
+ if fs.async_impl:
268
+ await fs._rm(urls, **kwargs)
269
+ else:
270
+ fs.rm(url, **kwargs)
271
+
272
+ async def _makedirs(self, path, exist_ok=False):
273
+ logger.debug("Make dir %s", path)
274
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
275
+ if fs.async_impl:
276
+ await fs._makedirs(path, exist_ok=exist_ok)
277
+ else:
278
+ fs.makedirs(path, exist_ok=exist_ok)
279
+
280
+ def rsync(self, source, destination, **kwargs):
281
+ """Sync files between two directory trees
282
+
283
+ See `func:rsync` for more details.
284
+ """
285
+ rsync(source, destination, fs=self, **kwargs)
286
+
287
+ async def _cp_file(
288
+ self,
289
+ url,
290
+ url2,
291
+ blocksize=2**20,
292
+ callback=DEFAULT_CALLBACK,
293
+ tempdir: str | None = None,
294
+ **kwargs,
295
+ ):
296
+ fs = _resolve_fs(url, self.method)
297
+ fs2 = _resolve_fs(url2, self.method)
298
+ if fs is fs2:
299
+ # pure remote
300
+ if fs.async_impl:
301
+ return await fs._copy(url, url2, **kwargs)
302
+ else:
303
+ return fs.copy(url, url2, **kwargs)
304
+ await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
305
+
306
+ async def _make_many_dirs(self, urls, exist_ok=True):
307
+ fs = _resolve_fs(urls[0], self.method)
308
+ if fs.async_impl:
309
+ coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
310
+ await _run_coros_in_chunks(coros)
311
+ else:
312
+ for u in urls:
313
+ fs.makedirs(u, exist_ok=exist_ok)
314
+
315
+ make_many_dirs = sync_wrapper(_make_many_dirs)
316
+
317
+ async def _copy(
318
+ self,
319
+ path1: list[str],
320
+ path2: list[str],
321
+ recursive: bool = False,
322
+ on_error: str = "ignore",
323
+ maxdepth: int | None = None,
324
+ batch_size: int | None = None,
325
+ tempdir: str | None = None,
326
+ **kwargs,
327
+ ):
328
+ # TODO: special case for one FS being local, which can use get/put
329
+ # TODO: special case for one being memFS, which can use cat/pipe
330
+ if recursive:
331
+ raise NotImplementedError("Please use fsspec.generic.rsync")
332
+ path1 = [path1] if isinstance(path1, str) else path1
333
+ path2 = [path2] if isinstance(path2, str) else path2
334
+
335
+ fs = _resolve_fs(path1, self.method)
336
+ fs2 = _resolve_fs(path2, self.method)
337
+
338
+ if fs is fs2:
339
+ if fs.async_impl:
340
+ return await fs._copy(path1, path2, **kwargs)
341
+ else:
342
+ return fs.copy(path1, path2, **kwargs)
343
+
344
+ await copy_file_op(
345
+ fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
346
+ )
347
+
348
+
349
+ async def copy_file_op(
350
+ fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
351
+ ):
352
+ import tempfile
353
+
354
+ tempdir = tempdir or tempfile.mkdtemp()
355
+ try:
356
+ coros = [
357
+ _copy_file_op(
358
+ fs1,
359
+ u1,
360
+ fs2,
361
+ u2,
362
+ os.path.join(tempdir, uuid.uuid4().hex),
363
+ )
364
+ for u1, u2 in zip(url1, url2)
365
+ ]
366
+ out = await _run_coros_in_chunks(
367
+ coros, batch_size=batch_size, return_exceptions=True
368
+ )
369
+ finally:
370
+ shutil.rmtree(tempdir)
371
+ if on_error == "return":
372
+ return out
373
+ elif on_error == "raise":
374
+ for o in out:
375
+ if isinstance(o, Exception):
376
+ raise o
377
+
378
+
379
+ async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
380
+ if fs1.async_impl:
381
+ await fs1._get_file(url1, local)
382
+ else:
383
+ fs1.get_file(url1, local)
384
+ if fs2.async_impl:
385
+ await fs2._put_file(local, url2)
386
+ else:
387
+ fs2.put_file(local, url2)
388
+ os.unlink(local)
389
+ logger.debug("Copy %s -> %s; done", url1, url2)
390
+
391
+
392
+ async def maybe_await(cor):
393
+ if inspect.iscoroutine(cor):
394
+ return await cor
395
+ else:
396
+ return cor
venv/lib/python3.13/site-packages/fsspec/gui.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import contextlib
3
+ import logging
4
+ import os
5
+ import re
6
+ from collections.abc import Sequence
7
+ from typing import ClassVar
8
+
9
+ import panel as pn
10
+
11
+ from .core import OpenFile, get_filesystem_class, split_protocol
12
+ from .registry import known_implementations
13
+
14
+ pn.extension()
15
+ logger = logging.getLogger("fsspec.gui")
16
+
17
+
18
+ class SigSlot:
19
+ """Signal-slot mixin, for Panel event passing
20
+
21
+ Include this class in a widget manager's superclasses to be able to
22
+ register events and callbacks on Panel widgets managed by that class.
23
+
24
+ The method ``_register`` should be called as widgets are added, and external
25
+ code should call ``connect`` to associate callbacks.
26
+
27
+ By default, all signals emit a DEBUG logging statement.
28
+ """
29
+
30
+ # names of signals that this class may emit each of which must be
31
+ # set by _register for any new instance
32
+ signals: ClassVar[Sequence[str]] = []
33
+ # names of actions that this class may respond to
34
+ slots: ClassVar[Sequence[str]] = []
35
+
36
+ # each of which must be a method name
37
+
38
+ def __init__(self):
39
+ self._ignoring_events = False
40
+ self._sigs = {}
41
+ self._map = {}
42
+ self._setup()
43
+
44
+ def _setup(self):
45
+ """Create GUI elements and register signals"""
46
+ self.panel = pn.pane.PaneBase()
47
+ # no signals to set up in the base class
48
+
49
+ def _register(
50
+ self, widget, name, thing="value", log_level=logging.DEBUG, auto=False
51
+ ):
52
+ """Watch the given attribute of a widget and assign it a named event
53
+
54
+ This is normally called at the time a widget is instantiated, in the
55
+ class which owns it.
56
+
57
+ Parameters
58
+ ----------
59
+ widget : pn.layout.Panel or None
60
+ Widget to watch. If None, an anonymous signal not associated with
61
+ any widget.
62
+ name : str
63
+ Name of this event
64
+ thing : str
65
+ Attribute of the given widget to watch
66
+ log_level : int
67
+ When the signal is triggered, a logging event of the given level
68
+ will be fired in the dfviz logger.
69
+ auto : bool
70
+ If True, automatically connects with a method in this class of the
71
+ same name.
72
+ """
73
+ if name not in self.signals:
74
+ raise ValueError(f"Attempt to assign an undeclared signal: {name}")
75
+ self._sigs[name] = {
76
+ "widget": widget,
77
+ "callbacks": [],
78
+ "thing": thing,
79
+ "log": log_level,
80
+ }
81
+ wn = "-".join(
82
+ [
83
+ getattr(widget, "name", str(widget)) if widget is not None else "none",
84
+ thing,
85
+ ]
86
+ )
87
+ self._map[wn] = name
88
+ if widget is not None:
89
+ widget.param.watch(self._signal, thing, onlychanged=True)
90
+ if auto and hasattr(self, name):
91
+ self.connect(name, getattr(self, name))
92
+
93
+ def _repr_mimebundle_(self, *args, **kwargs):
94
+ """Display in a notebook or a server"""
95
+ try:
96
+ return self.panel._repr_mimebundle_(*args, **kwargs)
97
+ except (ValueError, AttributeError) as exc:
98
+ raise NotImplementedError(
99
+ "Panel does not seem to be set up properly"
100
+ ) from exc
101
+
102
+ def connect(self, signal, slot):
103
+ """Associate call back with given event
104
+
105
+ The callback must be a function which takes the "new" value of the
106
+ watched attribute as the only parameter. If the callback return False,
107
+ this cancels any further processing of the given event.
108
+
109
+ Alternatively, the callback can be a string, in which case it means
110
+ emitting the correspondingly-named event (i.e., connect to self)
111
+ """
112
+ self._sigs[signal]["callbacks"].append(slot)
113
+
114
+ def _signal(self, event):
115
+ """This is called by a an action on a widget
116
+
117
+ Within an self.ignore_events context, nothing happens.
118
+
119
+ Tests can execute this method by directly changing the values of
120
+ widget components.
121
+ """
122
+ if not self._ignoring_events:
123
+ wn = "-".join([event.obj.name, event.name])
124
+ if wn in self._map and self._map[wn] in self._sigs:
125
+ self._emit(self._map[wn], event.new)
126
+
127
+ @contextlib.contextmanager
128
+ def ignore_events(self):
129
+ """Temporarily turn off events processing in this instance
130
+
131
+ (does not propagate to children)
132
+ """
133
+ self._ignoring_events = True
134
+ try:
135
+ yield
136
+ finally:
137
+ self._ignoring_events = False
138
+
139
+ def _emit(self, sig, value=None):
140
+ """An event happened, call its callbacks
141
+
142
+ This method can be used in tests to simulate message passing without
143
+ directly changing visual elements.
144
+
145
+ Calling of callbacks will halt whenever one returns False.
146
+ """
147
+ logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
148
+ for callback in self._sigs[sig]["callbacks"]:
149
+ if isinstance(callback, str):
150
+ self._emit(callback)
151
+ else:
152
+ try:
153
+ # running callbacks should not break the interface
154
+ ret = callback(value)
155
+ if ret is False:
156
+ break
157
+ except Exception as e:
158
+ logger.exception(
159
+ "Exception (%s) while executing callback for signal: %s",
160
+ e,
161
+ sig,
162
+ )
163
+
164
+ def show(self, threads=False):
165
+ """Open a new browser tab and display this instance's interface"""
166
+ self.panel.show(threads=threads, verbose=False)
167
+ return self
168
+
169
+
170
+ class SingleSelect(SigSlot):
171
+ """A multiselect which only allows you to select one item for an event"""
172
+
173
+ signals = ["_selected", "selected"] # the first is internal
174
+ slots = ["set_options", "set_selection", "add", "clear", "select"]
175
+
176
+ def __init__(self, **kwargs):
177
+ self.kwargs = kwargs
178
+ super().__init__()
179
+
180
+ def _setup(self):
181
+ self.panel = pn.widgets.MultiSelect(**self.kwargs)
182
+ self._register(self.panel, "_selected", "value")
183
+ self._register(None, "selected")
184
+ self.connect("_selected", self.select_one)
185
+
186
+ def _signal(self, *args, **kwargs):
187
+ super()._signal(*args, **kwargs)
188
+
189
+ def select_one(self, *_):
190
+ with self.ignore_events():
191
+ val = [self.panel.value[-1]] if self.panel.value else []
192
+ self.panel.value = val
193
+ self._emit("selected", self.panel.value)
194
+
195
+ def set_options(self, options):
196
+ self.panel.options = options
197
+
198
+ def clear(self):
199
+ self.panel.options = []
200
+
201
+ @property
202
+ def value(self):
203
+ return self.panel.value
204
+
205
+ def set_selection(self, selection):
206
+ self.panel.value = [selection]
207
+
208
+
209
+ class FileSelector(SigSlot):
210
+ """Panel-based graphical file selector widget
211
+
212
+ Instances of this widget are interactive and can be displayed in jupyter by having
213
+ them as the output of a cell, or in a separate browser tab using ``.show()``.
214
+ """
215
+
216
+ signals = [
217
+ "protocol_changed",
218
+ "selection_changed",
219
+ "directory_entered",
220
+ "home_clicked",
221
+ "up_clicked",
222
+ "go_clicked",
223
+ "filters_changed",
224
+ ]
225
+ slots = ["set_filters", "go_home"]
226
+
227
+ def __init__(self, url=None, filters=None, ignore=None, kwargs=None):
228
+ """
229
+
230
+ Parameters
231
+ ----------
232
+ url : str (optional)
233
+ Initial value of the URL to populate the dialog; should include protocol
234
+ filters : list(str) (optional)
235
+ File endings to include in the listings. If not included, all files are
236
+ allowed. Does not affect directories.
237
+ If given, the endings will appear as checkboxes in the interface
238
+ ignore : list(str) (optional)
239
+ Regex(s) of file basename patterns to ignore, e.g., "\\." for typical
240
+ hidden files on posix
241
+ kwargs : dict (optional)
242
+ To pass to file system instance
243
+ """
244
+ if url:
245
+ self.init_protocol, url = split_protocol(url)
246
+ else:
247
+ self.init_protocol, url = "file", os.getcwd()
248
+ self.init_url = url
249
+ self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
250
+ self.filters = filters
251
+ self.ignore = [re.compile(i) for i in ignore or []]
252
+ self._fs = None
253
+ super().__init__()
254
+
255
+ def _setup(self):
256
+ self.url = pn.widgets.TextInput(
257
+ name="url",
258
+ value=self.init_url,
259
+ align="end",
260
+ sizing_mode="stretch_width",
261
+ width_policy="max",
262
+ )
263
+ self.protocol = pn.widgets.Select(
264
+ options=sorted(known_implementations),
265
+ value=self.init_protocol,
266
+ name="protocol",
267
+ align="center",
268
+ )
269
+ self.kwargs = pn.widgets.TextInput(
270
+ name="kwargs", value=self.init_kwargs, align="center"
271
+ )
272
+ self.go = pn.widgets.Button(name="⇨", align="end", width=45)
273
+ self.main = SingleSelect(size=10)
274
+ self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
275
+ self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end")
276
+
277
+ self._register(self.protocol, "protocol_changed", auto=True)
278
+ self._register(self.go, "go_clicked", "clicks", auto=True)
279
+ self._register(self.up, "up_clicked", "clicks", auto=True)
280
+ self._register(self.home, "home_clicked", "clicks", auto=True)
281
+ self._register(None, "selection_changed")
282
+ self.main.connect("selected", self.selection_changed)
283
+ self._register(None, "directory_entered")
284
+ self.prev_protocol = self.protocol.value
285
+ self.prev_kwargs = self.storage_options
286
+
287
+ self.filter_sel = pn.widgets.CheckBoxGroup(
288
+ value=[], options=[], inline=False, align="end", width_policy="min"
289
+ )
290
+ self._register(self.filter_sel, "filters_changed", auto=True)
291
+
292
+ self.panel = pn.Column(
293
+ pn.Row(self.protocol, self.kwargs),
294
+ pn.Row(self.home, self.up, self.url, self.go, self.filter_sel),
295
+ self.main.panel,
296
+ )
297
+ self.set_filters(self.filters)
298
+ self.go_clicked()
299
+
300
+ def set_filters(self, filters=None):
301
+ self.filters = filters
302
+ if filters:
303
+ self.filter_sel.options = filters
304
+ self.filter_sel.value = filters
305
+ else:
306
+ self.filter_sel.options = []
307
+ self.filter_sel.value = []
308
+
309
+ @property
310
+ def storage_options(self):
311
+ """Value of the kwargs box as a dictionary"""
312
+ return ast.literal_eval(self.kwargs.value) or {}
313
+
314
+ @property
315
+ def fs(self):
316
+ """Current filesystem instance"""
317
+ if self._fs is None:
318
+ cls = get_filesystem_class(self.protocol.value)
319
+ self._fs = cls(**self.storage_options)
320
+ return self._fs
321
+
322
+ @property
323
+ def urlpath(self):
324
+ """URL of currently selected item"""
325
+ return (
326
+ (f"{self.protocol.value}://{self.main.value[0]}")
327
+ if self.main.value
328
+ else None
329
+ )
330
+
331
+ def open_file(self, mode="rb", compression=None, encoding=None):
332
+ """Create OpenFile instance for the currently selected item
333
+
334
+ For example, in a notebook you might do something like
335
+
336
+ .. code-block::
337
+
338
+ [ ]: sel = FileSelector(); sel
339
+
340
+ # user selects their file
341
+
342
+ [ ]: with sel.open_file('rb') as f:
343
+ ... out = f.read()
344
+
345
+ Parameters
346
+ ----------
347
+ mode: str (optional)
348
+ Open mode for the file.
349
+ compression: str (optional)
350
+ The interact with the file as compressed. Set to 'infer' to guess
351
+ compression from the file ending
352
+ encoding: str (optional)
353
+ If using text mode, use this encoding; defaults to UTF8.
354
+ """
355
+ if self.urlpath is None:
356
+ raise ValueError("No file selected")
357
+ return OpenFile(self.fs, self.urlpath, mode, compression, encoding)
358
+
359
+ def filters_changed(self, values):
360
+ self.filters = values
361
+ self.go_clicked()
362
+
363
+ def selection_changed(self, *_):
364
+ if self.urlpath is None:
365
+ return
366
+ if self.fs.isdir(self.urlpath):
367
+ self.url.value = self.fs._strip_protocol(self.urlpath)
368
+ self.go_clicked()
369
+
370
+ def go_clicked(self, *_):
371
+ if (
372
+ self.prev_protocol != self.protocol.value
373
+ or self.prev_kwargs != self.storage_options
374
+ ):
375
+ self._fs = None # causes fs to be recreated
376
+ self.prev_protocol = self.protocol.value
377
+ self.prev_kwargs = self.storage_options
378
+ listing = sorted(
379
+ self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"]
380
+ )
381
+ listing = [
382
+ l
383
+ for l in listing
384
+ if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore)
385
+ ]
386
+ folders = {
387
+ "📁 " + o["name"].rsplit("/", 1)[-1]: o["name"]
388
+ for o in listing
389
+ if o["type"] == "directory"
390
+ }
391
+ files = {
392
+ "📄 " + o["name"].rsplit("/", 1)[-1]: o["name"]
393
+ for o in listing
394
+ if o["type"] == "file"
395
+ }
396
+ if self.filters:
397
+ files = {
398
+ k: v
399
+ for k, v in files.items()
400
+ if any(v.endswith(ext) for ext in self.filters)
401
+ }
402
+ self.main.set_options(dict(**folders, **files))
403
+
404
+ def protocol_changed(self, *_):
405
+ self._fs = None
406
+ self.main.options = []
407
+ self.url.value = ""
408
+
409
+ def home_clicked(self, *_):
410
+ self.protocol.value = self.init_protocol
411
+ self.kwargs.value = self.init_kwargs
412
+ self.url.value = self.init_url
413
+ self.go_clicked()
414
+
415
+ def up_clicked(self, *_):
416
+ self.url.value = self.fs._parent(self.url.value)
417
+ self.go_clicked()
venv/lib/python3.13/site-packages/fsspec/json.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from collections.abc import Mapping, Sequence
3
+ from contextlib import suppress
4
+ from pathlib import PurePath
5
+ from typing import (
6
+ Any,
7
+ Callable,
8
+ ClassVar,
9
+ Optional,
10
+ )
11
+
12
+ from .registry import _import_class, get_filesystem_class
13
+ from .spec import AbstractFileSystem
14
+
15
+
16
+ class FilesystemJSONEncoder(json.JSONEncoder):
17
+ include_password: ClassVar[bool] = True
18
+
19
+ def default(self, o: Any) -> Any:
20
+ if isinstance(o, AbstractFileSystem):
21
+ return o.to_dict(include_password=self.include_password)
22
+ if isinstance(o, PurePath):
23
+ cls = type(o)
24
+ return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)}
25
+
26
+ return super().default(o)
27
+
28
+ def make_serializable(self, obj: Any) -> Any:
29
+ """
30
+ Recursively converts an object so that it can be JSON serialized via
31
+ :func:`json.dumps` and :func:`json.dump`, without actually calling
32
+ said functions.
33
+ """
34
+ if isinstance(obj, (str, int, float, bool)):
35
+ return obj
36
+ if isinstance(obj, Mapping):
37
+ return {k: self.make_serializable(v) for k, v in obj.items()}
38
+ if isinstance(obj, Sequence):
39
+ return [self.make_serializable(v) for v in obj]
40
+
41
+ return self.default(obj)
42
+
43
+
44
+ class FilesystemJSONDecoder(json.JSONDecoder):
45
+ def __init__(
46
+ self,
47
+ *,
48
+ object_hook: Optional[Callable[[dict[str, Any]], Any]] = None,
49
+ parse_float: Optional[Callable[[str], Any]] = None,
50
+ parse_int: Optional[Callable[[str], Any]] = None,
51
+ parse_constant: Optional[Callable[[str], Any]] = None,
52
+ strict: bool = True,
53
+ object_pairs_hook: Optional[Callable[[list[tuple[str, Any]]], Any]] = None,
54
+ ) -> None:
55
+ self.original_object_hook = object_hook
56
+
57
+ super().__init__(
58
+ object_hook=self.custom_object_hook,
59
+ parse_float=parse_float,
60
+ parse_int=parse_int,
61
+ parse_constant=parse_constant,
62
+ strict=strict,
63
+ object_pairs_hook=object_pairs_hook,
64
+ )
65
+
66
+ @classmethod
67
+ def try_resolve_path_cls(cls, dct: dict[str, Any]):
68
+ with suppress(Exception):
69
+ fqp = dct["cls"]
70
+
71
+ path_cls = _import_class(fqp)
72
+
73
+ if issubclass(path_cls, PurePath):
74
+ return path_cls
75
+
76
+ return None
77
+
78
+ @classmethod
79
+ def try_resolve_fs_cls(cls, dct: dict[str, Any]):
80
+ with suppress(Exception):
81
+ if "cls" in dct:
82
+ try:
83
+ fs_cls = _import_class(dct["cls"])
84
+ if issubclass(fs_cls, AbstractFileSystem):
85
+ return fs_cls
86
+ except Exception:
87
+ if "protocol" in dct: # Fallback if cls cannot be imported
88
+ return get_filesystem_class(dct["protocol"])
89
+
90
+ raise
91
+
92
+ return None
93
+
94
+ def custom_object_hook(self, dct: dict[str, Any]):
95
+ if "cls" in dct:
96
+ if (obj_cls := self.try_resolve_fs_cls(dct)) is not None:
97
+ return AbstractFileSystem.from_dict(dct)
98
+ if (obj_cls := self.try_resolve_path_cls(dct)) is not None:
99
+ return obj_cls(dct["str"])
100
+
101
+ if self.original_object_hook is not None:
102
+ return self.original_object_hook(dct)
103
+
104
+ return dct
105
+
106
+ def unmake_serializable(self, obj: Any) -> Any:
107
+ """
108
+ Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`.
109
+ """
110
+ if isinstance(obj, dict):
111
+ obj = self.custom_object_hook(obj)
112
+ if isinstance(obj, dict):
113
+ return {k: self.unmake_serializable(v) for k, v in obj.items()}
114
+ if isinstance(obj, (list, tuple)):
115
+ return [self.unmake_serializable(v) for v in obj]
116
+
117
+ return obj
venv/lib/python3.13/site-packages/fsspec/mapping.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import array
2
+ import logging
3
+ import posixpath
4
+ import warnings
5
+ from collections.abc import MutableMapping
6
+ from functools import cached_property
7
+
8
+ from fsspec.core import url_to_fs
9
+
10
+ logger = logging.getLogger("fsspec.mapping")
11
+
12
+
13
+ class FSMap(MutableMapping):
14
+ """Wrap a FileSystem instance as a mutable wrapping.
15
+
16
+ The keys of the mapping become files under the given root, and the
17
+ values (which must be bytes) the contents of those files.
18
+
19
+ Parameters
20
+ ----------
21
+ root: string
22
+ prefix for all the files
23
+ fs: FileSystem instance
24
+ check: bool (=True)
25
+ performs a touch at the location, to check for write access.
26
+
27
+ Examples
28
+ --------
29
+ >>> fs = FileSystem(**parameters) # doctest: +SKIP
30
+ >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
31
+ or, more likely
32
+ >>> d = fs.get_mapper('my-data/path/')
33
+
34
+ >>> d['loc1'] = b'Hello World' # doctest: +SKIP
35
+ >>> list(d.keys()) # doctest: +SKIP
36
+ ['loc1']
37
+ >>> d['loc1'] # doctest: +SKIP
38
+ b'Hello World'
39
+ """
40
+
41
+ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
42
+ self.fs = fs
43
+ self.root = fs._strip_protocol(root)
44
+ self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
45
+ if missing_exceptions is None:
46
+ missing_exceptions = (
47
+ FileNotFoundError,
48
+ IsADirectoryError,
49
+ NotADirectoryError,
50
+ )
51
+ self.missing_exceptions = missing_exceptions
52
+ self.check = check
53
+ self.create = create
54
+ if create:
55
+ if not self.fs.exists(root):
56
+ self.fs.mkdir(root)
57
+ if check:
58
+ if not self.fs.exists(root):
59
+ raise ValueError(
60
+ f"Path {root} does not exist. Create "
61
+ f" with the ``create=True`` keyword"
62
+ )
63
+ self.fs.touch(root + "/a")
64
+ self.fs.rm(root + "/a")
65
+
66
+ @cached_property
67
+ def dirfs(self):
68
+ """dirfs instance that can be used with the same keys as the mapper"""
69
+ from .implementations.dirfs import DirFileSystem
70
+
71
+ return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
72
+
73
+ def clear(self):
74
+ """Remove all keys below root - empties out mapping"""
75
+ logger.info("Clear mapping at %s", self.root)
76
+ try:
77
+ self.fs.rm(self.root, True)
78
+ self.fs.mkdir(self.root)
79
+ except: # noqa: E722
80
+ pass
81
+
82
+ def getitems(self, keys, on_error="raise"):
83
+ """Fetch multiple items from the store
84
+
85
+ If the backend is async-able, this might proceed concurrently
86
+
87
+ Parameters
88
+ ----------
89
+ keys: list(str)
90
+ They keys to be fetched
91
+ on_error : "raise", "omit", "return"
92
+ If raise, an underlying exception will be raised (converted to KeyError
93
+ if the type is in self.missing_exceptions); if omit, keys with exception
94
+ will simply not be included in the output; if "return", all keys are
95
+ included in the output, but the value will be bytes or an exception
96
+ instance.
97
+
98
+ Returns
99
+ -------
100
+ dict(key, bytes|exception)
101
+ """
102
+ keys2 = [self._key_to_str(k) for k in keys]
103
+ oe = on_error if on_error == "raise" else "return"
104
+ try:
105
+ out = self.fs.cat(keys2, on_error=oe)
106
+ if isinstance(out, bytes):
107
+ out = {keys2[0]: out}
108
+ except self.missing_exceptions as e:
109
+ raise KeyError from e
110
+ out = {
111
+ k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
112
+ for k, v in out.items()
113
+ }
114
+ return {
115
+ key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
116
+ for key, k2 in zip(keys, keys2)
117
+ if on_error == "return" or not isinstance(out[k2], BaseException)
118
+ }
119
+
120
+ def setitems(self, values_dict):
121
+ """Set the values of multiple items in the store
122
+
123
+ Parameters
124
+ ----------
125
+ values_dict: dict(str, bytes)
126
+ """
127
+ values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
128
+ self.fs.pipe(values)
129
+
130
+ def delitems(self, keys):
131
+ """Remove multiple keys from the store"""
132
+ self.fs.rm([self._key_to_str(k) for k in keys])
133
+
134
+ def _key_to_str(self, key):
135
+ """Generate full path for the key"""
136
+ if not isinstance(key, str):
137
+ # raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
138
+ warnings.warn(
139
+ "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
140
+ DeprecationWarning,
141
+ )
142
+ if isinstance(key, list):
143
+ key = tuple(key)
144
+ key = str(key)
145
+ return f"{self._root_key_to_str}{key}".rstrip("/")
146
+
147
+ def _str_to_key(self, s):
148
+ """Strip path of to leave key name"""
149
+ return s[len(self.root) :].lstrip("/")
150
+
151
+ def __getitem__(self, key, default=None):
152
+ """Retrieve data"""
153
+ k = self._key_to_str(key)
154
+ try:
155
+ result = self.fs.cat(k)
156
+ except self.missing_exceptions as exc:
157
+ if default is not None:
158
+ return default
159
+ raise KeyError(key) from exc
160
+ return result
161
+
162
+ def pop(self, key, default=None):
163
+ """Pop data"""
164
+ result = self.__getitem__(key, default)
165
+ try:
166
+ del self[key]
167
+ except KeyError:
168
+ pass
169
+ return result
170
+
171
+ def __setitem__(self, key, value):
172
+ """Store value in key"""
173
+ key = self._key_to_str(key)
174
+ self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
175
+ self.fs.pipe_file(key, maybe_convert(value))
176
+
177
+ def __iter__(self):
178
+ return (self._str_to_key(x) for x in self.fs.find(self.root))
179
+
180
+ def __len__(self):
181
+ return len(self.fs.find(self.root))
182
+
183
+ def __delitem__(self, key):
184
+ """Remove key"""
185
+ try:
186
+ self.fs.rm(self._key_to_str(key))
187
+ except Exception as exc:
188
+ raise KeyError from exc
189
+
190
+ def __contains__(self, key):
191
+ """Does key exist in mapping?"""
192
+ path = self._key_to_str(key)
193
+ return self.fs.isfile(path)
194
+
195
+ def __reduce__(self):
196
+ return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
197
+
198
+
199
+ def maybe_convert(value):
200
+ if isinstance(value, array.array) or hasattr(value, "__array__"):
201
+ # bytes-like things
202
+ if hasattr(value, "dtype") and value.dtype.kind in "Mm":
203
+ # The buffer interface doesn't support datetime64/timdelta64 numpy
204
+ # arrays
205
+ value = value.view("int64")
206
+ value = bytes(memoryview(value))
207
+ return value
208
+
209
+
210
+ def get_mapper(
211
+ url="",
212
+ check=False,
213
+ create=False,
214
+ missing_exceptions=None,
215
+ alternate_root=None,
216
+ **kwargs,
217
+ ):
218
+ """Create key-value interface for given URL and options
219
+
220
+ The URL will be of the form "protocol://location" and point to the root
221
+ of the mapper required. All keys will be file-names below this location,
222
+ and their values the contents of each key.
223
+
224
+ Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
225
+
226
+ Parameters
227
+ ----------
228
+ url: str
229
+ Root URL of mapping
230
+ check: bool
231
+ Whether to attempt to read from the location before instantiation, to
232
+ check that the mapping does exist
233
+ create: bool
234
+ Whether to make the directory corresponding to the root before
235
+ instantiating
236
+ missing_exceptions: None or tuple
237
+ If given, these exception types will be regarded as missing keys and
238
+ return KeyError when trying to read data. By default, you get
239
+ (FileNotFoundError, IsADirectoryError, NotADirectoryError)
240
+ alternate_root: None or str
241
+ In cases of complex URLs, the parser may fail to pick the correct part
242
+ for the mapper root, so this arg can override
243
+
244
+ Returns
245
+ -------
246
+ ``FSMap`` instance, the dict-like key-value store.
247
+ """
248
+ # Removing protocol here - could defer to each open() on the backend
249
+ fs, urlpath = url_to_fs(url, **kwargs)
250
+ root = alternate_root if alternate_root is not None else urlpath
251
+ return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
venv/lib/python3.13/site-packages/fsspec/parquet.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import warnings
4
+
5
+ from .core import url_to_fs
6
+ from .utils import merge_offset_ranges
7
+
8
+ # Parquet-Specific Utilities for fsspec
9
+ #
10
+ # Most of the functions defined in this module are NOT
11
+ # intended for public consumption. The only exception
12
+ # to this is `open_parquet_file`, which should be used
13
+ # place of `fs.open()` to open parquet-formatted files
14
+ # on remote file systems.
15
+
16
+
17
+ def open_parquet_file(
18
+ path,
19
+ mode="rb",
20
+ fs=None,
21
+ metadata=None,
22
+ columns=None,
23
+ row_groups=None,
24
+ storage_options=None,
25
+ strict=False,
26
+ engine="auto",
27
+ max_gap=64_000,
28
+ max_block=256_000_000,
29
+ footer_sample_size=1_000_000,
30
+ **kwargs,
31
+ ):
32
+ """
33
+ Return a file-like object for a single Parquet file.
34
+
35
+ The specified parquet `engine` will be used to parse the
36
+ footer metadata, and determine the required byte ranges
37
+ from the file. The target path will then be opened with
38
+ the "parts" (`KnownPartsOfAFile`) caching strategy.
39
+
40
+ Note that this method is intended for usage with remote
41
+ file systems, and is unlikely to improve parquet-read
42
+ performance on local file systems.
43
+
44
+ Parameters
45
+ ----------
46
+ path: str
47
+ Target file path.
48
+ mode: str, optional
49
+ Mode option to be passed through to `fs.open`. Default is "rb".
50
+ metadata: Any, optional
51
+ Parquet metadata object. Object type must be supported
52
+ by the backend parquet engine. For now, only the "fastparquet"
53
+ engine supports an explicit `ParquetFile` metadata object.
54
+ If a metadata object is supplied, the remote footer metadata
55
+ will not need to be transferred into local memory.
56
+ fs: AbstractFileSystem, optional
57
+ Filesystem object to use for opening the file. If nothing is
58
+ specified, an `AbstractFileSystem` object will be inferred.
59
+ engine : str, default "auto"
60
+ Parquet engine to use for metadata parsing. Allowed options
61
+ include "fastparquet", "pyarrow", and "auto". The specified
62
+ engine must be installed in the current environment. If
63
+ "auto" is specified, and both engines are installed,
64
+ "fastparquet" will take precedence over "pyarrow".
65
+ columns: list, optional
66
+ List of all column names that may be read from the file.
67
+ row_groups : list, optional
68
+ List of all row-groups that may be read from the file. This
69
+ may be a list of row-group indices (integers), or it may be
70
+ a list of `RowGroup` metadata objects (if the "fastparquet"
71
+ engine is used).
72
+ storage_options : dict, optional
73
+ Used to generate an `AbstractFileSystem` object if `fs` was
74
+ not specified.
75
+ strict : bool, optional
76
+ Whether the resulting `KnownPartsOfAFile` cache should
77
+ fetch reads that go beyond a known byte-range boundary.
78
+ If `False` (the default), any read that ends outside a
79
+ known part will be zero padded. Note that using
80
+ `strict=True` may be useful for debugging.
81
+ max_gap : int, optional
82
+ Neighboring byte ranges will only be merged when their
83
+ inter-range gap is <= `max_gap`. Default is 64KB.
84
+ max_block : int, optional
85
+ Neighboring byte ranges will only be merged when the size of
86
+ the aggregated range is <= `max_block`. Default is 256MB.
87
+ footer_sample_size : int, optional
88
+ Number of bytes to read from the end of the path to look
89
+ for the footer metadata. If the sampled bytes do not contain
90
+ the footer, a second read request will be required, and
91
+ performance will suffer. Default is 1MB.
92
+ **kwargs :
93
+ Optional key-word arguments to pass to `fs.open`
94
+ """
95
+
96
+ # Make sure we have an `AbstractFileSystem` object
97
+ # to work with
98
+ if fs is None:
99
+ fs = url_to_fs(path, **(storage_options or {}))[0]
100
+
101
+ # For now, `columns == []` not supported. Just use
102
+ # default `open` command with `path` input
103
+ if columns is not None and len(columns) == 0:
104
+ return fs.open(path, mode=mode)
105
+
106
+ # Set the engine
107
+ engine = _set_engine(engine)
108
+
109
+ # Fetch the known byte ranges needed to read
110
+ # `columns` and/or `row_groups`
111
+ data = _get_parquet_byte_ranges(
112
+ [path],
113
+ fs,
114
+ metadata=metadata,
115
+ columns=columns,
116
+ row_groups=row_groups,
117
+ engine=engine,
118
+ max_gap=max_gap,
119
+ max_block=max_block,
120
+ footer_sample_size=footer_sample_size,
121
+ )
122
+
123
+ # Extract file name from `data`
124
+ fn = next(iter(data)) if data else path
125
+
126
+ # Call self.open with "parts" caching
127
+ options = kwargs.pop("cache_options", {}).copy()
128
+ return fs.open(
129
+ fn,
130
+ mode=mode,
131
+ cache_type="parts",
132
+ cache_options={
133
+ **options,
134
+ "data": data.get(fn, {}),
135
+ "strict": strict,
136
+ },
137
+ **kwargs,
138
+ )
139
+
140
+
141
+ def _get_parquet_byte_ranges(
142
+ paths,
143
+ fs,
144
+ metadata=None,
145
+ columns=None,
146
+ row_groups=None,
147
+ max_gap=64_000,
148
+ max_block=256_000_000,
149
+ footer_sample_size=1_000_000,
150
+ engine="auto",
151
+ ):
152
+ """Get a dictionary of the known byte ranges needed
153
+ to read a specific column/row-group selection from a
154
+ Parquet dataset. Each value in the output dictionary
155
+ is intended for use as the `data` argument for the
156
+ `KnownPartsOfAFile` caching strategy of a single path.
157
+ """
158
+
159
+ # Set engine if necessary
160
+ if isinstance(engine, str):
161
+ engine = _set_engine(engine)
162
+
163
+ # Pass to specialized function if metadata is defined
164
+ if metadata is not None:
165
+ # Use the provided parquet metadata object
166
+ # to avoid transferring/parsing footer metadata
167
+ return _get_parquet_byte_ranges_from_metadata(
168
+ metadata,
169
+ fs,
170
+ engine,
171
+ columns=columns,
172
+ row_groups=row_groups,
173
+ max_gap=max_gap,
174
+ max_block=max_block,
175
+ )
176
+
177
+ # Get file sizes asynchronously
178
+ file_sizes = fs.sizes(paths)
179
+
180
+ # Populate global paths, starts, & ends
181
+ result = {}
182
+ data_paths = []
183
+ data_starts = []
184
+ data_ends = []
185
+ add_header_magic = True
186
+ if columns is None and row_groups is None:
187
+ # We are NOT selecting specific columns or row-groups.
188
+ #
189
+ # We can avoid sampling the footers, and just transfer
190
+ # all file data with cat_ranges
191
+ for i, path in enumerate(paths):
192
+ result[path] = {}
193
+ for b in range(0, file_sizes[i], max_block):
194
+ data_paths.append(path)
195
+ data_starts.append(b)
196
+ data_ends.append(min(b + max_block, file_sizes[i]))
197
+ add_header_magic = False # "Magic" should already be included
198
+ else:
199
+ # We ARE selecting specific columns or row-groups.
200
+ #
201
+ # Gather file footers.
202
+ # We just take the last `footer_sample_size` bytes of each
203
+ # file (or the entire file if it is smaller than that)
204
+ footer_starts = []
205
+ footer_ends = []
206
+ for i, path in enumerate(paths):
207
+ footer_ends.append(file_sizes[i])
208
+ sample_size = max(0, file_sizes[i] - footer_sample_size)
209
+ footer_starts.append(sample_size)
210
+ footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
211
+
212
+ # Check our footer samples and re-sample if necessary.
213
+ missing_footer_starts = footer_starts.copy()
214
+ large_footer = 0
215
+ for i, path in enumerate(paths):
216
+ footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
217
+ real_footer_start = file_sizes[i] - (footer_size + 8)
218
+ if real_footer_start < footer_starts[i]:
219
+ missing_footer_starts[i] = real_footer_start
220
+ large_footer = max(large_footer, (footer_size + 8))
221
+ if large_footer:
222
+ warnings.warn(
223
+ f"Not enough data was used to sample the parquet footer. "
224
+ f"Try setting footer_sample_size >= {large_footer}."
225
+ )
226
+ for i, block in enumerate(
227
+ fs.cat_ranges(
228
+ paths,
229
+ missing_footer_starts,
230
+ footer_starts,
231
+ )
232
+ ):
233
+ footer_samples[i] = block + footer_samples[i]
234
+ footer_starts[i] = missing_footer_starts[i]
235
+
236
+ # Calculate required byte ranges for each path
237
+ for i, path in enumerate(paths):
238
+ # Deal with small-file case.
239
+ # Just include all remaining bytes of the file
240
+ # in a single range.
241
+ if file_sizes[i] < max_block:
242
+ if footer_starts[i] > 0:
243
+ # Only need to transfer the data if the
244
+ # footer sample isn't already the whole file
245
+ data_paths.append(path)
246
+ data_starts.append(0)
247
+ data_ends.append(footer_starts[i])
248
+ continue
249
+
250
+ # Use "engine" to collect data byte ranges
251
+ path_data_starts, path_data_ends = engine._parquet_byte_ranges(
252
+ columns,
253
+ row_groups=row_groups,
254
+ footer=footer_samples[i],
255
+ footer_start=footer_starts[i],
256
+ )
257
+
258
+ data_paths += [path] * len(path_data_starts)
259
+ data_starts += path_data_starts
260
+ data_ends += path_data_ends
261
+
262
+ # Merge adjacent offset ranges
263
+ data_paths, data_starts, data_ends = merge_offset_ranges(
264
+ data_paths,
265
+ data_starts,
266
+ data_ends,
267
+ max_gap=max_gap,
268
+ max_block=max_block,
269
+ sort=False, # Should already be sorted
270
+ )
271
+
272
+ # Start by populating `result` with footer samples
273
+ for i, path in enumerate(paths):
274
+ result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
275
+
276
+ # Transfer the data byte-ranges into local memory
277
+ _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
278
+
279
+ # Add b"PAR1" to header if necessary
280
+ if add_header_magic:
281
+ _add_header_magic(result)
282
+
283
+ return result
284
+
285
+
286
+ def _get_parquet_byte_ranges_from_metadata(
287
+ metadata,
288
+ fs,
289
+ engine,
290
+ columns=None,
291
+ row_groups=None,
292
+ max_gap=64_000,
293
+ max_block=256_000_000,
294
+ ):
295
+ """Simplified version of `_get_parquet_byte_ranges` for
296
+ the case that an engine-specific `metadata` object is
297
+ provided, and the remote footer metadata does not need to
298
+ be transferred before calculating the required byte ranges.
299
+ """
300
+
301
+ # Use "engine" to collect data byte ranges
302
+ data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
303
+ columns,
304
+ row_groups=row_groups,
305
+ metadata=metadata,
306
+ )
307
+
308
+ # Merge adjacent offset ranges
309
+ data_paths, data_starts, data_ends = merge_offset_ranges(
310
+ data_paths,
311
+ data_starts,
312
+ data_ends,
313
+ max_gap=max_gap,
314
+ max_block=max_block,
315
+ sort=False, # Should be sorted
316
+ )
317
+
318
+ # Transfer the data byte-ranges into local memory
319
+ result = {fn: {} for fn in list(set(data_paths))}
320
+ _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
321
+
322
+ # Add b"PAR1" to header
323
+ _add_header_magic(result)
324
+
325
+ return result
326
+
327
+
328
+ def _transfer_ranges(fs, blocks, paths, starts, ends):
329
+ # Use cat_ranges to gather the data byte_ranges
330
+ ranges = (paths, starts, ends)
331
+ for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
332
+ blocks[path][(start, stop)] = data
333
+
334
+
335
+ def _add_header_magic(data):
336
+ # Add b"PAR1" to file headers
337
+ for path in list(data.keys()):
338
+ add_magic = True
339
+ for k in data[path]:
340
+ if k[0] == 0 and k[1] >= 4:
341
+ add_magic = False
342
+ break
343
+ if add_magic:
344
+ data[path][(0, 4)] = b"PAR1"
345
+
346
+
347
+ def _set_engine(engine_str):
348
+ # Define a list of parquet engines to try
349
+ if engine_str == "auto":
350
+ try_engines = ("fastparquet", "pyarrow")
351
+ elif not isinstance(engine_str, str):
352
+ raise ValueError(
353
+ "Failed to set parquet engine! "
354
+ "Please pass 'fastparquet', 'pyarrow', or 'auto'"
355
+ )
356
+ elif engine_str not in ("fastparquet", "pyarrow"):
357
+ raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
358
+ else:
359
+ try_engines = [engine_str]
360
+
361
+ # Try importing the engines in `try_engines`,
362
+ # and choose the first one that succeeds
363
+ for engine in try_engines:
364
+ try:
365
+ if engine == "fastparquet":
366
+ return FastparquetEngine()
367
+ elif engine == "pyarrow":
368
+ return PyarrowEngine()
369
+ except ImportError:
370
+ pass
371
+
372
+ # Raise an error if a supported parquet engine
373
+ # was not found
374
+ raise ImportError(
375
+ f"The following parquet engines are not installed "
376
+ f"in your python environment: {try_engines}."
377
+ f"Please install 'fastparquert' or 'pyarrow' to "
378
+ f"utilize the `fsspec.parquet` module."
379
+ )
380
+
381
+
382
+ class FastparquetEngine:
383
+ # The purpose of the FastparquetEngine class is
384
+ # to check if fastparquet can be imported (on initialization)
385
+ # and to define a `_parquet_byte_ranges` method. In the
386
+ # future, this class may also be used to define other
387
+ # methods/logic that are specific to fastparquet.
388
+
389
+ def __init__(self):
390
+ import fastparquet as fp
391
+
392
+ self.fp = fp
393
+
394
+ def _row_group_filename(self, row_group, pf):
395
+ return pf.row_group_filename(row_group)
396
+
397
+ def _parquet_byte_ranges(
398
+ self,
399
+ columns,
400
+ row_groups=None,
401
+ metadata=None,
402
+ footer=None,
403
+ footer_start=None,
404
+ ):
405
+ # Initialize offset ranges and define ParqetFile metadata
406
+ pf = metadata
407
+ data_paths, data_starts, data_ends = [], [], []
408
+ if pf is None:
409
+ pf = self.fp.ParquetFile(io.BytesIO(footer))
410
+
411
+ # Convert columns to a set and add any index columns
412
+ # specified in the pandas metadata (just in case)
413
+ column_set = None if columns is None else set(columns)
414
+ if column_set is not None and hasattr(pf, "pandas_metadata"):
415
+ md_index = [
416
+ ind
417
+ for ind in pf.pandas_metadata.get("index_columns", [])
418
+ # Ignore RangeIndex information
419
+ if not isinstance(ind, dict)
420
+ ]
421
+ column_set |= set(md_index)
422
+
423
+ # Check if row_groups is a list of integers
424
+ # or a list of row-group metadata
425
+ if row_groups and not isinstance(row_groups[0], int):
426
+ # Input row_groups contains row-group metadata
427
+ row_group_indices = None
428
+ else:
429
+ # Input row_groups contains row-group indices
430
+ row_group_indices = row_groups
431
+ row_groups = pf.row_groups
432
+
433
+ # Loop through column chunks to add required byte ranges
434
+ for r, row_group in enumerate(row_groups):
435
+ # Skip this row-group if we are targeting
436
+ # specific row-groups
437
+ if row_group_indices is None or r in row_group_indices:
438
+ # Find the target parquet-file path for `row_group`
439
+ fn = self._row_group_filename(row_group, pf)
440
+
441
+ for column in row_group.columns:
442
+ name = column.meta_data.path_in_schema[0]
443
+ # Skip this column if we are targeting a
444
+ # specific columns
445
+ if column_set is None or name in column_set:
446
+ file_offset0 = column.meta_data.dictionary_page_offset
447
+ if file_offset0 is None:
448
+ file_offset0 = column.meta_data.data_page_offset
449
+ num_bytes = column.meta_data.total_compressed_size
450
+ if footer_start is None or file_offset0 < footer_start:
451
+ data_paths.append(fn)
452
+ data_starts.append(file_offset0)
453
+ data_ends.append(
454
+ min(
455
+ file_offset0 + num_bytes,
456
+ footer_start or (file_offset0 + num_bytes),
457
+ )
458
+ )
459
+
460
+ if metadata:
461
+ # The metadata in this call may map to multiple
462
+ # file paths. Need to include `data_paths`
463
+ return data_paths, data_starts, data_ends
464
+ return data_starts, data_ends
465
+
466
+
467
+ class PyarrowEngine:
468
+ # The purpose of the PyarrowEngine class is
469
+ # to check if pyarrow can be imported (on initialization)
470
+ # and to define a `_parquet_byte_ranges` method. In the
471
+ # future, this class may also be used to define other
472
+ # methods/logic that are specific to pyarrow.
473
+
474
+ def __init__(self):
475
+ import pyarrow.parquet as pq
476
+
477
+ self.pq = pq
478
+
479
+ def _row_group_filename(self, row_group, metadata):
480
+ raise NotImplementedError
481
+
482
+ def _parquet_byte_ranges(
483
+ self,
484
+ columns,
485
+ row_groups=None,
486
+ metadata=None,
487
+ footer=None,
488
+ footer_start=None,
489
+ ):
490
+ if metadata is not None:
491
+ raise ValueError("metadata input not supported for PyarrowEngine")
492
+
493
+ data_starts, data_ends = [], []
494
+ md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
495
+
496
+ # Convert columns to a set and add any index columns
497
+ # specified in the pandas metadata (just in case)
498
+ column_set = None if columns is None else set(columns)
499
+ if column_set is not None:
500
+ schema = md.schema.to_arrow_schema()
501
+ has_pandas_metadata = (
502
+ schema.metadata is not None and b"pandas" in schema.metadata
503
+ )
504
+ if has_pandas_metadata:
505
+ md_index = [
506
+ ind
507
+ for ind in json.loads(
508
+ schema.metadata[b"pandas"].decode("utf8")
509
+ ).get("index_columns", [])
510
+ # Ignore RangeIndex information
511
+ if not isinstance(ind, dict)
512
+ ]
513
+ column_set |= set(md_index)
514
+
515
+ # Loop through column chunks to add required byte ranges
516
+ for r in range(md.num_row_groups):
517
+ # Skip this row-group if we are targeting
518
+ # specific row-groups
519
+ if row_groups is None or r in row_groups:
520
+ row_group = md.row_group(r)
521
+ for c in range(row_group.num_columns):
522
+ column = row_group.column(c)
523
+ name = column.path_in_schema
524
+ # Skip this column if we are targeting a
525
+ # specific columns
526
+ split_name = name.split(".")[0]
527
+ if (
528
+ column_set is None
529
+ or name in column_set
530
+ or split_name in column_set
531
+ ):
532
+ file_offset0 = column.dictionary_page_offset
533
+ if file_offset0 is None:
534
+ file_offset0 = column.data_page_offset
535
+ num_bytes = column.total_compressed_size
536
+ if file_offset0 < footer_start:
537
+ data_starts.append(file_offset0)
538
+ data_ends.append(
539
+ min(file_offset0 + num_bytes, footer_start)
540
+ )
541
+ return data_starts, data_ends
venv/lib/python3.13/site-packages/fsspec/registry.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import types
5
+ import warnings
6
+
7
+ __all__ = ["registry", "get_filesystem_class", "default"]
8
+
9
+ # internal, mutable
10
+ _registry: dict[str, type] = {}
11
+
12
+ # external, immutable
13
+ registry = types.MappingProxyType(_registry)
14
+ default = "file"
15
+
16
+
17
+ def register_implementation(name, cls, clobber=False, errtxt=None):
18
+ """Add implementation class to the registry
19
+
20
+ Parameters
21
+ ----------
22
+ name: str
23
+ Protocol name to associate with the class
24
+ cls: class or str
25
+ if a class: fsspec-compliant implementation class (normally inherits from
26
+ ``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
27
+ str, the full path to an implementation class like package.module.class,
28
+ which gets added to known_implementations,
29
+ so the import is deferred until the filesystem is actually used.
30
+ clobber: bool (optional)
31
+ Whether to overwrite a protocol with the same name; if False, will raise
32
+ instead.
33
+ errtxt: str (optional)
34
+ If given, then a failure to import the given class will result in this
35
+ text being given.
36
+ """
37
+ if isinstance(cls, str):
38
+ if name in known_implementations and clobber is False:
39
+ if cls != known_implementations[name]["class"]:
40
+ raise ValueError(
41
+ f"Name ({name}) already in the known_implementations and clobber "
42
+ f"is False"
43
+ )
44
+ else:
45
+ known_implementations[name] = {
46
+ "class": cls,
47
+ "err": errtxt or f"{cls} import failed for protocol {name}",
48
+ }
49
+
50
+ else:
51
+ if name in registry and clobber is False:
52
+ if _registry[name] is not cls:
53
+ raise ValueError(
54
+ f"Name ({name}) already in the registry and clobber is False"
55
+ )
56
+ else:
57
+ _registry[name] = cls
58
+
59
+
60
+ # protocols mapped to the class which implements them. This dict can be
61
+ # updated with register_implementation
62
+ known_implementations = {
63
+ "abfs": {
64
+ "class": "adlfs.AzureBlobFileSystem",
65
+ "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
66
+ },
67
+ "adl": {
68
+ "class": "adlfs.AzureDatalakeFileSystem",
69
+ "err": "Install adlfs to access Azure Datalake Gen1",
70
+ },
71
+ "arrow_hdfs": {
72
+ "class": "fsspec.implementations.arrow.HadoopFileSystem",
73
+ "err": "pyarrow and local java libraries required for HDFS",
74
+ },
75
+ "asynclocal": {
76
+ "class": "morefs.asyn_local.AsyncLocalFileSystem",
77
+ "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
78
+ },
79
+ "asyncwrapper": {
80
+ "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
81
+ },
82
+ "az": {
83
+ "class": "adlfs.AzureBlobFileSystem",
84
+ "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
85
+ },
86
+ "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
87
+ "box": {
88
+ "class": "boxfs.BoxFileSystem",
89
+ "err": "Please install boxfs to access BoxFileSystem",
90
+ },
91
+ "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
92
+ "dask": {
93
+ "class": "fsspec.implementations.dask.DaskWorkerFileSystem",
94
+ "err": "Install dask distributed to access worker file system",
95
+ },
96
+ "data": {"class": "fsspec.implementations.data.DataFileSystem"},
97
+ "dbfs": {
98
+ "class": "fsspec.implementations.dbfs.DatabricksFileSystem",
99
+ "err": "Install the requests package to use the DatabricksFileSystem",
100
+ },
101
+ "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
102
+ "dropbox": {
103
+ "class": "dropboxdrivefs.DropboxDriveFileSystem",
104
+ "err": (
105
+ 'DropboxFileSystem requires "dropboxdrivefs","requests" and "'
106
+ '"dropbox" to be installed'
107
+ ),
108
+ },
109
+ "dvc": {
110
+ "class": "dvc.api.DVCFileSystem",
111
+ "err": "Install dvc to access DVCFileSystem",
112
+ },
113
+ "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
114
+ "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
115
+ "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
116
+ "gcs": {
117
+ "class": "gcsfs.GCSFileSystem",
118
+ "err": "Please install gcsfs to access Google Storage",
119
+ },
120
+ "gdrive": {
121
+ "class": "gdrive_fsspec.GoogleDriveFileSystem",
122
+ "err": "Please install gdrive_fs for access to Google Drive",
123
+ },
124
+ "generic": {"class": "fsspec.generic.GenericFileSystem"},
125
+ "gist": {
126
+ "class": "fsspec.implementations.gist.GistFileSystem",
127
+ "err": "Install the requests package to use the gist FS",
128
+ },
129
+ "git": {
130
+ "class": "fsspec.implementations.git.GitFileSystem",
131
+ "err": "Install pygit2 to browse local git repos",
132
+ },
133
+ "github": {
134
+ "class": "fsspec.implementations.github.GithubFileSystem",
135
+ "err": "Install the requests package to use the github FS",
136
+ },
137
+ "gs": {
138
+ "class": "gcsfs.GCSFileSystem",
139
+ "err": "Please install gcsfs to access Google Storage",
140
+ },
141
+ "hdfs": {
142
+ "class": "fsspec.implementations.arrow.HadoopFileSystem",
143
+ "err": "pyarrow and local java libraries required for HDFS",
144
+ },
145
+ "hf": {
146
+ "class": "huggingface_hub.HfFileSystem",
147
+ "err": "Install huggingface_hub to access HfFileSystem",
148
+ },
149
+ "http": {
150
+ "class": "fsspec.implementations.http.HTTPFileSystem",
151
+ "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
152
+ },
153
+ "https": {
154
+ "class": "fsspec.implementations.http.HTTPFileSystem",
155
+ "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
156
+ },
157
+ "jlab": {
158
+ "class": "fsspec.implementations.jupyter.JupyterFileSystem",
159
+ "err": "Jupyter FS requires requests to be installed",
160
+ },
161
+ "jupyter": {
162
+ "class": "fsspec.implementations.jupyter.JupyterFileSystem",
163
+ "err": "Jupyter FS requires requests to be installed",
164
+ },
165
+ "lakefs": {
166
+ "class": "lakefs_spec.LakeFSFileSystem",
167
+ "err": "Please install lakefs-spec to access LakeFSFileSystem",
168
+ },
169
+ "libarchive": {
170
+ "class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
171
+ "err": "LibArchive requires to be installed",
172
+ },
173
+ "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
174
+ "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
175
+ "oci": {
176
+ "class": "ocifs.OCIFileSystem",
177
+ "err": "Install ocifs to access OCI Object Storage",
178
+ },
179
+ "ocilake": {
180
+ "class": "ocifs.OCIFileSystem",
181
+ "err": "Install ocifs to access OCI Data Lake",
182
+ },
183
+ "oss": {
184
+ "class": "ossfs.OSSFileSystem",
185
+ "err": "Install ossfs to access Alibaba Object Storage System",
186
+ },
187
+ "pyscript": {
188
+ "class": "pyscript_fsspec_client.client.PyscriptFileSystem",
189
+ "err": "Install requests (cpython) or run in pyscript",
190
+ },
191
+ "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
192
+ "root": {
193
+ "class": "fsspec_xrootd.XRootDFileSystem",
194
+ "err": (
195
+ "Install fsspec-xrootd to access xrootd storage system. "
196
+ "Note: 'root' is the protocol name for xrootd storage systems, "
197
+ "not referring to root directories"
198
+ ),
199
+ },
200
+ "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
201
+ "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
202
+ "sftp": {
203
+ "class": "fsspec.implementations.sftp.SFTPFileSystem",
204
+ "err": 'SFTPFileSystem requires "paramiko" to be installed',
205
+ },
206
+ "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
207
+ "smb": {
208
+ "class": "fsspec.implementations.smb.SMBFileSystem",
209
+ "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
210
+ },
211
+ "ssh": {
212
+ "class": "fsspec.implementations.sftp.SFTPFileSystem",
213
+ "err": 'SFTPFileSystem requires "paramiko" to be installed',
214
+ },
215
+ "tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
216
+ "tos": {
217
+ "class": "tosfs.TosFileSystem",
218
+ "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
219
+ },
220
+ "tosfs": {
221
+ "class": "tosfs.TosFileSystem",
222
+ "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
223
+ },
224
+ "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
225
+ "webdav": {
226
+ "class": "webdav4.fsspec.WebdavFileSystem",
227
+ "err": "Install webdav4 to access WebDAV",
228
+ },
229
+ "webhdfs": {
230
+ "class": "fsspec.implementations.webhdfs.WebHDFS",
231
+ "err": 'webHDFS access requires "requests" to be installed',
232
+ },
233
+ "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
234
+ }
235
+
236
+ assert list(known_implementations) == sorted(known_implementations), (
237
+ "Not in alphabetical order"
238
+ )
239
+
240
+
241
+ def get_filesystem_class(protocol):
242
+ """Fetch named protocol implementation from the registry
243
+
244
+ The dict ``known_implementations`` maps protocol names to the locations
245
+ of classes implementing the corresponding file-system. When used for the
246
+ first time, appropriate imports will happen and the class will be placed in
247
+ the registry. All subsequent calls will fetch directly from the registry.
248
+
249
+ Some protocol implementations require additional dependencies, and so the
250
+ import may fail. In this case, the string in the "err" field of the
251
+ ``known_implementations`` will be given as the error message.
252
+ """
253
+ if not protocol:
254
+ protocol = default
255
+
256
+ if protocol not in registry:
257
+ if protocol not in known_implementations:
258
+ raise ValueError(f"Protocol not known: {protocol}")
259
+ bit = known_implementations[protocol]
260
+ try:
261
+ register_implementation(protocol, _import_class(bit["class"]))
262
+ except ImportError as e:
263
+ raise ImportError(bit.get("err")) from e
264
+ cls = registry[protocol]
265
+ if getattr(cls, "protocol", None) in ("abstract", None):
266
+ cls.protocol = protocol
267
+
268
+ return cls
269
+
270
+
271
+ s3_msg = """Your installed version of s3fs is very old and known to cause
272
+ severe performance issues, see also https://github.com/dask/dask/issues/10276
273
+
274
+ To fix, you should specify a lower version bound on s3fs, or
275
+ update the current installation.
276
+ """
277
+
278
+
279
+ def _import_class(fqp: str):
280
+ """Take a fully-qualified path and return the imported class or identifier.
281
+
282
+ ``fqp`` is of the form "package.module.klass" or
283
+ "package.module:subobject.klass".
284
+
285
+ Warnings
286
+ --------
287
+ This can import arbitrary modules. Make sure you haven't installed any modules
288
+ that may execute malicious code at import time.
289
+ """
290
+ if ":" in fqp:
291
+ mod, name = fqp.rsplit(":", 1)
292
+ else:
293
+ mod, name = fqp.rsplit(".", 1)
294
+
295
+ is_s3 = mod == "s3fs"
296
+ mod = importlib.import_module(mod)
297
+ if is_s3 and mod.__version__.split(".") < ["0", "5"]:
298
+ warnings.warn(s3_msg)
299
+ for part in name.split("."):
300
+ mod = getattr(mod, part)
301
+
302
+ if not isinstance(mod, type):
303
+ raise TypeError(f"{fqp} is not a class")
304
+
305
+ return mod
306
+
307
+
308
+ def filesystem(protocol, **storage_options):
309
+ """Instantiate filesystems for given protocol and arguments
310
+
311
+ ``storage_options`` are specific to the protocol being chosen, and are
312
+ passed directly to the class.
313
+ """
314
+ if protocol == "arrow_hdfs":
315
+ warnings.warn(
316
+ "The 'arrow_hdfs' protocol has been deprecated and will be "
317
+ "removed in the future. Specify it as 'hdfs'.",
318
+ DeprecationWarning,
319
+ )
320
+
321
+ cls = get_filesystem_class(protocol)
322
+ return cls(**storage_options)
323
+
324
+
325
+ def available_protocols():
326
+ """Return a list of the implemented protocols.
327
+
328
+ Note that any given protocol may require extra packages to be importable.
329
+ """
330
+ return list(known_implementations)
venv/lib/python3.13/site-packages/fsspec/spec.py ADDED
@@ -0,0 +1,2281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import json
5
+ import logging
6
+ import os
7
+ import threading
8
+ import warnings
9
+ import weakref
10
+ from errno import ESPIPE
11
+ from glob import has_magic
12
+ from hashlib import sha256
13
+ from typing import Any, ClassVar
14
+
15
+ from .callbacks import DEFAULT_CALLBACK
16
+ from .config import apply_config, conf
17
+ from .dircache import DirCache
18
+ from .transaction import Transaction
19
+ from .utils import (
20
+ _unstrip_protocol,
21
+ glob_translate,
22
+ isfilelike,
23
+ other_paths,
24
+ read_block,
25
+ stringify_path,
26
+ tokenize,
27
+ )
28
+
29
+ logger = logging.getLogger("fsspec")
30
+
31
+
32
+ def make_instance(cls, args, kwargs):
33
+ return cls(*args, **kwargs)
34
+
35
+
36
+ class _Cached(type):
37
+ """
38
+ Metaclass for caching file system instances.
39
+
40
+ Notes
41
+ -----
42
+ Instances are cached according to
43
+
44
+ * The values of the class attributes listed in `_extra_tokenize_attributes`
45
+ * The arguments passed to ``__init__``.
46
+
47
+ This creates an additional reference to the filesystem, which prevents the
48
+ filesystem from being garbage collected when all *user* references go away.
49
+ A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
50
+ be made for a filesystem instance to be garbage collected.
51
+ """
52
+
53
+ def __init__(cls, *args, **kwargs):
54
+ super().__init__(*args, **kwargs)
55
+ # Note: we intentionally create a reference here, to avoid garbage
56
+ # collecting instances when all other references are gone. To really
57
+ # delete a FileSystem, the cache must be cleared.
58
+ if conf.get("weakref_instance_cache"): # pragma: no cover
59
+ # debug option for analysing fork/spawn conditions
60
+ cls._cache = weakref.WeakValueDictionary()
61
+ else:
62
+ cls._cache = {}
63
+ cls._pid = os.getpid()
64
+
65
+ def __call__(cls, *args, **kwargs):
66
+ kwargs = apply_config(cls, kwargs)
67
+ extra_tokens = tuple(
68
+ getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
69
+ )
70
+ strip_tokenize_options = {
71
+ k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs
72
+ }
73
+ token = tokenize(
74
+ cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
75
+ )
76
+ skip = kwargs.pop("skip_instance_cache", False)
77
+ if os.getpid() != cls._pid:
78
+ cls._cache.clear()
79
+ cls._pid = os.getpid()
80
+ if not skip and cls.cachable and token in cls._cache:
81
+ cls._latest = token
82
+ return cls._cache[token]
83
+ else:
84
+ obj = super().__call__(*args, **kwargs, **strip_tokenize_options)
85
+ # Setting _fs_token here causes some static linters to complain.
86
+ obj._fs_token_ = token
87
+ obj.storage_args = args
88
+ obj.storage_options = kwargs
89
+ if obj.async_impl and obj.mirror_sync_methods:
90
+ from .asyn import mirror_sync_methods
91
+
92
+ mirror_sync_methods(obj)
93
+
94
+ if cls.cachable and not skip:
95
+ cls._latest = token
96
+ cls._cache[token] = obj
97
+ return obj
98
+
99
+
100
+ class AbstractFileSystem(metaclass=_Cached):
101
+ """
102
+ An abstract super-class for pythonic file-systems
103
+
104
+ Implementations are expected to be compatible with or, better, subclass
105
+ from here.
106
+ """
107
+
108
+ cachable = True # this class can be cached, instances reused
109
+ _cached = False
110
+ blocksize = 2**22
111
+ sep = "/"
112
+ protocol: ClassVar[str | tuple[str, ...]] = "abstract"
113
+ _latest = None
114
+ async_impl = False
115
+ mirror_sync_methods = False
116
+ root_marker = "" # For some FSs, may require leading '/' or other character
117
+ transaction_type = Transaction
118
+
119
+ #: Extra *class attributes* that should be considered when hashing.
120
+ _extra_tokenize_attributes = ()
121
+ #: *storage options* that should not be considered when hashing.
122
+ _strip_tokenize_options = ()
123
+
124
+ # Set by _Cached metaclass
125
+ storage_args: tuple[Any, ...]
126
+ storage_options: dict[str, Any]
127
+
128
+ def __init__(self, *args, **storage_options):
129
+ """Create and configure file-system instance
130
+
131
+ Instances may be cachable, so if similar enough arguments are seen
132
+ a new instance is not required. The token attribute exists to allow
133
+ implementations to cache instances if they wish.
134
+
135
+ A reasonable default should be provided if there are no arguments.
136
+
137
+ Subclasses should call this method.
138
+
139
+ Parameters
140
+ ----------
141
+ use_listings_cache, listings_expiry_time, max_paths:
142
+ passed to ``DirCache``, if the implementation supports
143
+ directory listing caching. Pass use_listings_cache=False
144
+ to disable such caching.
145
+ skip_instance_cache: bool
146
+ If this is a cachable implementation, pass True here to force
147
+ creating a new instance even if a matching instance exists, and prevent
148
+ storing this instance.
149
+ asynchronous: bool
150
+ loop: asyncio-compatible IOLoop or None
151
+ """
152
+ if self._cached:
153
+ # reusing instance, don't change
154
+ return
155
+ self._cached = True
156
+ self._intrans = False
157
+ self._transaction = None
158
+ self._invalidated_caches_in_transaction = []
159
+ self.dircache = DirCache(**storage_options)
160
+
161
+ if storage_options.pop("add_docs", None):
162
+ warnings.warn("add_docs is no longer supported.", FutureWarning)
163
+
164
+ if storage_options.pop("add_aliases", None):
165
+ warnings.warn("add_aliases has been removed.", FutureWarning)
166
+ # This is set in _Cached
167
+ self._fs_token_ = None
168
+
169
+ @property
170
+ def fsid(self):
171
+ """Persistent filesystem id that can be used to compare filesystems
172
+ across sessions.
173
+ """
174
+ raise NotImplementedError
175
+
176
+ @property
177
+ def _fs_token(self):
178
+ return self._fs_token_
179
+
180
+ def __dask_tokenize__(self):
181
+ return self._fs_token
182
+
183
+ def __hash__(self):
184
+ return int(self._fs_token, 16)
185
+
186
+ def __eq__(self, other):
187
+ return isinstance(other, type(self)) and self._fs_token == other._fs_token
188
+
189
+ def __reduce__(self):
190
+ return make_instance, (type(self), self.storage_args, self.storage_options)
191
+
192
+ @classmethod
193
+ def _strip_protocol(cls, path):
194
+ """Turn path from fully-qualified to file-system-specific
195
+
196
+ May require FS-specific handling, e.g., for relative paths or links.
197
+ """
198
+ if isinstance(path, list):
199
+ return [cls._strip_protocol(p) for p in path]
200
+ path = stringify_path(path)
201
+ protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
202
+ for protocol in protos:
203
+ if path.startswith(protocol + "://"):
204
+ path = path[len(protocol) + 3 :]
205
+ elif path.startswith(protocol + "::"):
206
+ path = path[len(protocol) + 2 :]
207
+ path = path.rstrip("/")
208
+ # use of root_marker to make minimum required path, e.g., "/"
209
+ return path or cls.root_marker
210
+
211
+ def unstrip_protocol(self, name: str) -> str:
212
+ """Format FS-specific path to generic, including protocol"""
213
+ protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
214
+ for protocol in protos:
215
+ if name.startswith(f"{protocol}://"):
216
+ return name
217
+ return f"{protos[0]}://{name}"
218
+
219
+ @staticmethod
220
+ def _get_kwargs_from_urls(path):
221
+ """If kwargs can be encoded in the paths, extract them here
222
+
223
+ This should happen before instantiation of the class; incoming paths
224
+ then should be amended to strip the options in methods.
225
+
226
+ Examples may look like an sftp path "sftp://user@host:/my/path", where
227
+ the user and host should become kwargs and later get stripped.
228
+ """
229
+ # by default, nothing happens
230
+ return {}
231
+
232
+ @classmethod
233
+ def current(cls):
234
+ """Return the most recently instantiated FileSystem
235
+
236
+ If no instance has been created, then create one with defaults
237
+ """
238
+ if cls._latest in cls._cache:
239
+ return cls._cache[cls._latest]
240
+ return cls()
241
+
242
+ @property
243
+ def transaction(self):
244
+ """A context within which files are committed together upon exit
245
+
246
+ Requires the file class to implement `.commit()` and `.discard()`
247
+ for the normal and exception cases.
248
+ """
249
+ if self._transaction is None:
250
+ self._transaction = self.transaction_type(self)
251
+ return self._transaction
252
+
253
+ def start_transaction(self):
254
+ """Begin write transaction for deferring files, non-context version"""
255
+ self._intrans = True
256
+ self._transaction = self.transaction_type(self)
257
+ return self.transaction
258
+
259
+ def end_transaction(self):
260
+ """Finish write transaction, non-context version"""
261
+ self.transaction.complete()
262
+ self._transaction = None
263
+ # The invalid cache must be cleared after the transaction is completed.
264
+ for path in self._invalidated_caches_in_transaction:
265
+ self.invalidate_cache(path)
266
+ self._invalidated_caches_in_transaction.clear()
267
+
268
+ def invalidate_cache(self, path=None):
269
+ """
270
+ Discard any cached directory information
271
+
272
+ Parameters
273
+ ----------
274
+ path: string or None
275
+ If None, clear all listings cached else listings at or under given
276
+ path.
277
+ """
278
+ # Not necessary to implement invalidation mechanism, may have no cache.
279
+ # But if have, you should call this method of parent class from your
280
+ # subclass to ensure expiring caches after transacations correctly.
281
+ # See the implementation of FTPFileSystem in ftp.py
282
+ if self._intrans:
283
+ self._invalidated_caches_in_transaction.append(path)
284
+
285
+ def mkdir(self, path, create_parents=True, **kwargs):
286
+ """
287
+ Create directory entry at path
288
+
289
+ For systems that don't have true directories, may create an for
290
+ this instance only and not touch the real filesystem
291
+
292
+ Parameters
293
+ ----------
294
+ path: str
295
+ location
296
+ create_parents: bool
297
+ if True, this is equivalent to ``makedirs``
298
+ kwargs:
299
+ may be permissions, etc.
300
+ """
301
+ pass # not necessary to implement, may not have directories
302
+
303
+ def makedirs(self, path, exist_ok=False):
304
+ """Recursively make directories
305
+
306
+ Creates directory at path and any intervening required directories.
307
+ Raises exception if, for instance, the path already exists but is a
308
+ file.
309
+
310
+ Parameters
311
+ ----------
312
+ path: str
313
+ leaf directory name
314
+ exist_ok: bool (False)
315
+ If False, will error if the target already exists
316
+ """
317
+ pass # not necessary to implement, may not have directories
318
+
319
+ def rmdir(self, path):
320
+ """Remove a directory, if empty"""
321
+ pass # not necessary to implement, may not have directories
322
+
323
+ def ls(self, path, detail=True, **kwargs):
324
+ """List objects at path.
325
+
326
+ This should include subdirectories and files at that location. The
327
+ difference between a file and a directory must be clear when details
328
+ are requested.
329
+
330
+ The specific keys, or perhaps a FileInfo class, or similar, is TBD,
331
+ but must be consistent across implementations.
332
+ Must include:
333
+
334
+ - full path to the entry (without protocol)
335
+ - size of the entry, in bytes. If the value cannot be determined, will
336
+ be ``None``.
337
+ - type of entry, "file", "directory" or other
338
+
339
+ Additional information
340
+ may be present, appropriate to the file-system, e.g., generation,
341
+ checksum, etc.
342
+
343
+ May use refresh=True|False to allow use of self._ls_from_cache to
344
+ check for a saved listing and avoid calling the backend. This would be
345
+ common where listing may be expensive.
346
+
347
+ Parameters
348
+ ----------
349
+ path: str
350
+ detail: bool
351
+ if True, gives a list of dictionaries, where each is the same as
352
+ the result of ``info(path)``. If False, gives a list of paths
353
+ (str).
354
+ kwargs: may have additional backend-specific options, such as version
355
+ information
356
+
357
+ Returns
358
+ -------
359
+ List of strings if detail is False, or list of directory information
360
+ dicts if detail is True.
361
+ """
362
+ raise NotImplementedError
363
+
364
+ def _ls_from_cache(self, path):
365
+ """Check cache for listing
366
+
367
+ Returns listing, if found (may be empty list for a directly that exists
368
+ but contains nothing), None if not in cache.
369
+ """
370
+ parent = self._parent(path)
371
+ try:
372
+ return self.dircache[path.rstrip("/")]
373
+ except KeyError:
374
+ pass
375
+ try:
376
+ files = [
377
+ f
378
+ for f in self.dircache[parent]
379
+ if f["name"] == path
380
+ or (f["name"] == path.rstrip("/") and f["type"] == "directory")
381
+ ]
382
+ if len(files) == 0:
383
+ # parent dir was listed but did not contain this file
384
+ raise FileNotFoundError(path)
385
+ return files
386
+ except KeyError:
387
+ pass
388
+
389
+ def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
390
+ """Return all files under the given path.
391
+
392
+ List all files, recursing into subdirectories; output is iterator-style,
393
+ like ``os.walk()``. For a simple list of files, ``find()`` is available.
394
+
395
+ When topdown is True, the caller can modify the dirnames list in-place (perhaps
396
+ using del or slice assignment), and walk() will
397
+ only recurse into the subdirectories whose names remain in dirnames;
398
+ this can be used to prune the search, impose a specific order of visiting,
399
+ or even to inform walk() about directories the caller creates or renames before
400
+ it resumes walk() again.
401
+ Modifying dirnames when topdown is False has no effect. (see os.walk)
402
+
403
+ Note that the "files" outputted will include anything that is not
404
+ a directory, such as links.
405
+
406
+ Parameters
407
+ ----------
408
+ path: str
409
+ Root to recurse into
410
+ maxdepth: int
411
+ Maximum recursion depth. None means limitless, but not recommended
412
+ on link-based file-systems.
413
+ topdown: bool (True)
414
+ Whether to walk the directory tree from the top downwards or from
415
+ the bottom upwards.
416
+ on_error: "omit", "raise", a callable
417
+ if omit (default), path with exception will simply be empty;
418
+ If raise, an underlying exception will be raised;
419
+ if callable, it will be called with a single OSError instance as argument
420
+ kwargs: passed to ``ls``
421
+ """
422
+ if maxdepth is not None and maxdepth < 1:
423
+ raise ValueError("maxdepth must be at least 1")
424
+
425
+ path = self._strip_protocol(path)
426
+ full_dirs = {}
427
+ dirs = {}
428
+ files = {}
429
+
430
+ detail = kwargs.pop("detail", False)
431
+ try:
432
+ listing = self.ls(path, detail=True, **kwargs)
433
+ except (FileNotFoundError, OSError) as e:
434
+ if on_error == "raise":
435
+ raise
436
+ if callable(on_error):
437
+ on_error(e)
438
+ return
439
+
440
+ for info in listing:
441
+ # each info name must be at least [path]/part , but here
442
+ # we check also for names like [path]/part/
443
+ pathname = info["name"].rstrip("/")
444
+ name = pathname.rsplit("/", 1)[-1]
445
+ if info["type"] == "directory" and pathname != path:
446
+ # do not include "self" path
447
+ full_dirs[name] = pathname
448
+ dirs[name] = info
449
+ elif pathname == path:
450
+ # file-like with same name as give path
451
+ files[""] = info
452
+ else:
453
+ files[name] = info
454
+
455
+ if not detail:
456
+ dirs = list(dirs)
457
+ files = list(files)
458
+
459
+ if topdown:
460
+ # Yield before recursion if walking top down
461
+ yield path, dirs, files
462
+
463
+ if maxdepth is not None:
464
+ maxdepth -= 1
465
+ if maxdepth < 1:
466
+ if not topdown:
467
+ yield path, dirs, files
468
+ return
469
+
470
+ for d in dirs:
471
+ yield from self.walk(
472
+ full_dirs[d],
473
+ maxdepth=maxdepth,
474
+ detail=detail,
475
+ topdown=topdown,
476
+ **kwargs,
477
+ )
478
+
479
+ if not topdown:
480
+ # Yield after recursion if walking bottom up
481
+ yield path, dirs, files
482
+
483
+ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
484
+ """List all files below path.
485
+
486
+ Like posix ``find`` command without conditions
487
+
488
+ Parameters
489
+ ----------
490
+ path : str
491
+ maxdepth: int or None
492
+ If not None, the maximum number of levels to descend
493
+ withdirs: bool
494
+ Whether to include directory paths in the output. This is True
495
+ when used by glob, but users usually only want files.
496
+ kwargs are passed to ``ls``.
497
+ """
498
+ # TODO: allow equivalent of -name parameter
499
+ path = self._strip_protocol(path)
500
+ out = {}
501
+
502
+ # Add the root directory if withdirs is requested
503
+ # This is needed for posix glob compliance
504
+ if withdirs and path != "" and self.isdir(path):
505
+ out[path] = self.info(path)
506
+
507
+ for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
508
+ if withdirs:
509
+ files.update(dirs)
510
+ out.update({info["name"]: info for name, info in files.items()})
511
+ if not out and self.isfile(path):
512
+ # walk works on directories, but find should also return [path]
513
+ # when path happens to be a file
514
+ out[path] = {}
515
+ names = sorted(out)
516
+ if not detail:
517
+ return names
518
+ else:
519
+ return {name: out[name] for name in names}
520
+
521
+ def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
522
+ """Space used by files and optionally directories within a path
523
+
524
+ Directory size does not include the size of its contents.
525
+
526
+ Parameters
527
+ ----------
528
+ path: str
529
+ total: bool
530
+ Whether to sum all the file sizes
531
+ maxdepth: int or None
532
+ Maximum number of directory levels to descend, None for unlimited.
533
+ withdirs: bool
534
+ Whether to include directory paths in the output.
535
+ kwargs: passed to ``find``
536
+
537
+ Returns
538
+ -------
539
+ Dict of {path: size} if total=False, or int otherwise, where numbers
540
+ refer to bytes used.
541
+ """
542
+ sizes = {}
543
+ if withdirs and self.isdir(path):
544
+ # Include top-level directory in output
545
+ info = self.info(path)
546
+ sizes[info["name"]] = info["size"]
547
+ for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
548
+ info = self.info(f)
549
+ sizes[info["name"]] = info["size"]
550
+ if total:
551
+ return sum(sizes.values())
552
+ else:
553
+ return sizes
554
+
555
+ def glob(self, path, maxdepth=None, **kwargs):
556
+ """Find files by glob-matching.
557
+
558
+ Pattern matching capabilities for finding files that match the given pattern.
559
+
560
+ Parameters
561
+ ----------
562
+ path: str
563
+ The glob pattern to match against
564
+ maxdepth: int or None
565
+ Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found.
566
+ Must be at least 1 if provided.
567
+ kwargs:
568
+ Additional arguments passed to ``find`` (e.g., detail=True)
569
+
570
+ Returns
571
+ -------
572
+ List of matched paths, or dict of paths and their info if detail=True
573
+
574
+ Notes
575
+ -----
576
+ Supported patterns:
577
+ - '*': Matches any sequence of characters within a single directory level
578
+ - ``'**'``: Matches any number of directory levels (must be an entire path component)
579
+ - '?': Matches exactly one character
580
+ - '[abc]': Matches any character in the set
581
+ - '[a-z]': Matches any character in the range
582
+ - '[!abc]': Matches any character NOT in the set
583
+
584
+ Special behaviors:
585
+ - If the path ends with '/', only folders are returned
586
+ - Consecutive '*' characters are compressed into a single '*'
587
+ - Empty brackets '[]' never match anything
588
+ - Negated empty brackets '[!]' match any single character
589
+ - Special characters in character classes are escaped properly
590
+
591
+ Limitations:
592
+ - ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``)
593
+ - No brace expansion ('{a,b}.txt')
594
+ - No extended glob patterns ('+(pattern)', '!(pattern)')
595
+ """
596
+ if maxdepth is not None and maxdepth < 1:
597
+ raise ValueError("maxdepth must be at least 1")
598
+
599
+ import re
600
+
601
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
602
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
603
+ path = self._strip_protocol(path)
604
+ append_slash_to_dirname = ends_with_sep or path.endswith(
605
+ tuple(sep + "**" for sep in seps)
606
+ )
607
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
608
+ idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
609
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
610
+
611
+ min_idx = min(idx_star, idx_qmark, idx_brace)
612
+
613
+ detail = kwargs.pop("detail", False)
614
+
615
+ if not has_magic(path):
616
+ if self.exists(path, **kwargs):
617
+ if not detail:
618
+ return [path]
619
+ else:
620
+ return {path: self.info(path, **kwargs)}
621
+ else:
622
+ if not detail:
623
+ return [] # glob of non-existent returns empty
624
+ else:
625
+ return {}
626
+ elif "/" in path[:min_idx]:
627
+ min_idx = path[:min_idx].rindex("/")
628
+ root = path[: min_idx + 1]
629
+ depth = path[min_idx + 1 :].count("/") + 1
630
+ else:
631
+ root = ""
632
+ depth = path[min_idx + 1 :].count("/") + 1
633
+
634
+ if "**" in path:
635
+ if maxdepth is not None:
636
+ idx_double_stars = path.find("**")
637
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
638
+ depth = depth - depth_double_stars + maxdepth
639
+ else:
640
+ depth = None
641
+
642
+ allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
643
+
644
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
645
+ pattern = re.compile(pattern)
646
+
647
+ out = {
648
+ p: info
649
+ for p, info in sorted(allpaths.items())
650
+ if pattern.match(
651
+ p + "/"
652
+ if append_slash_to_dirname and info["type"] == "directory"
653
+ else p
654
+ )
655
+ }
656
+
657
+ if detail:
658
+ return out
659
+ else:
660
+ return list(out)
661
+
662
+ def exists(self, path, **kwargs):
663
+ """Is there a file at the given path"""
664
+ try:
665
+ self.info(path, **kwargs)
666
+ return True
667
+ except: # noqa: E722
668
+ # any exception allowed bar FileNotFoundError?
669
+ return False
670
+
671
+ def lexists(self, path, **kwargs):
672
+ """If there is a file at the given path (including
673
+ broken links)"""
674
+ return self.exists(path)
675
+
676
+ def info(self, path, **kwargs):
677
+ """Give details of entry at path
678
+
679
+ Returns a single dictionary, with exactly the same information as ``ls``
680
+ would with ``detail=True``.
681
+
682
+ The default implementation calls ls and could be overridden by a
683
+ shortcut. kwargs are passed on to ```ls()``.
684
+
685
+ Some file systems might not be able to measure the file's size, in
686
+ which case, the returned dict will include ``'size': None``.
687
+
688
+ Returns
689
+ -------
690
+ dict with keys: name (full path in the FS), size (in bytes), type (file,
691
+ directory, or something else) and other FS-specific keys.
692
+ """
693
+ path = self._strip_protocol(path)
694
+ out = self.ls(self._parent(path), detail=True, **kwargs)
695
+ out = [o for o in out if o["name"].rstrip("/") == path]
696
+ if out:
697
+ return out[0]
698
+ out = self.ls(path, detail=True, **kwargs)
699
+ path = path.rstrip("/")
700
+ out1 = [o for o in out if o["name"].rstrip("/") == path]
701
+ if len(out1) == 1:
702
+ if "size" not in out1[0]:
703
+ out1[0]["size"] = None
704
+ return out1[0]
705
+ elif len(out1) > 1 or out:
706
+ return {"name": path, "size": 0, "type": "directory"}
707
+ else:
708
+ raise FileNotFoundError(path)
709
+
710
+ def checksum(self, path):
711
+ """Unique value for current version of file
712
+
713
+ If the checksum is the same from one moment to another, the contents
714
+ are guaranteed to be the same. If the checksum changes, the contents
715
+ *might* have changed.
716
+
717
+ This should normally be overridden; default will probably capture
718
+ creation/modification timestamp (which would be good) or maybe
719
+ access timestamp (which would be bad)
720
+ """
721
+ return int(tokenize(self.info(path)), 16)
722
+
723
+ def size(self, path):
724
+ """Size in bytes of file"""
725
+ return self.info(path).get("size", None)
726
+
727
+ def sizes(self, paths):
728
+ """Size in bytes of each file in a list of paths"""
729
+ return [self.size(p) for p in paths]
730
+
731
+ def isdir(self, path):
732
+ """Is this entry directory-like?"""
733
+ try:
734
+ return self.info(path)["type"] == "directory"
735
+ except OSError:
736
+ return False
737
+
738
+ def isfile(self, path):
739
+ """Is this entry file-like?"""
740
+ try:
741
+ return self.info(path)["type"] == "file"
742
+ except: # noqa: E722
743
+ return False
744
+
745
+ def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
746
+ """Get the contents of the file as a string.
747
+
748
+ Parameters
749
+ ----------
750
+ path: str
751
+ URL of file on this filesystems
752
+ encoding, errors, newline: same as `open`.
753
+ """
754
+ with self.open(
755
+ path,
756
+ mode="r",
757
+ encoding=encoding,
758
+ errors=errors,
759
+ newline=newline,
760
+ **kwargs,
761
+ ) as f:
762
+ return f.read()
763
+
764
+ def write_text(
765
+ self, path, value, encoding=None, errors=None, newline=None, **kwargs
766
+ ):
767
+ """Write the text to the given file.
768
+
769
+ An existing file will be overwritten.
770
+
771
+ Parameters
772
+ ----------
773
+ path: str
774
+ URL of file on this filesystems
775
+ value: str
776
+ Text to write.
777
+ encoding, errors, newline: same as `open`.
778
+ """
779
+ with self.open(
780
+ path,
781
+ mode="w",
782
+ encoding=encoding,
783
+ errors=errors,
784
+ newline=newline,
785
+ **kwargs,
786
+ ) as f:
787
+ return f.write(value)
788
+
789
+ def cat_file(self, path, start=None, end=None, **kwargs):
790
+ """Get the content of a file
791
+
792
+ Parameters
793
+ ----------
794
+ path: URL of file on this filesystems
795
+ start, end: int
796
+ Bytes limits of the read. If negative, backwards from end,
797
+ like usual python slices. Either can be None for start or
798
+ end of file, respectively
799
+ kwargs: passed to ``open()``.
800
+ """
801
+ # explicitly set buffering off?
802
+ with self.open(path, "rb", **kwargs) as f:
803
+ if start is not None:
804
+ if start >= 0:
805
+ f.seek(start)
806
+ else:
807
+ f.seek(max(0, f.size + start))
808
+ if end is not None:
809
+ if end < 0:
810
+ end = f.size + end
811
+ return f.read(end - f.tell())
812
+ return f.read()
813
+
814
+ def pipe_file(self, path, value, mode="overwrite", **kwargs):
815
+ """Set the bytes of given file"""
816
+ if mode == "create" and self.exists(path):
817
+ # non-atomic but simple way; or could use "xb" in open(), which is likely
818
+ # not as well supported
819
+ raise FileExistsError
820
+ with self.open(path, "wb", **kwargs) as f:
821
+ f.write(value)
822
+
823
+ def pipe(self, path, value=None, **kwargs):
824
+ """Put value into path
825
+
826
+ (counterpart to ``cat``)
827
+
828
+ Parameters
829
+ ----------
830
+ path: string or dict(str, bytes)
831
+ If a string, a single remote location to put ``value`` bytes; if a dict,
832
+ a mapping of {path: bytesvalue}.
833
+ value: bytes, optional
834
+ If using a single path, these are the bytes to put there. Ignored if
835
+ ``path`` is a dict
836
+ """
837
+ if isinstance(path, str):
838
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
839
+ elif isinstance(path, dict):
840
+ for k, v in path.items():
841
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
842
+ else:
843
+ raise ValueError("path must be str or dict")
844
+
845
+ def cat_ranges(
846
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
847
+ ):
848
+ """Get the contents of byte ranges from one or more files
849
+
850
+ Parameters
851
+ ----------
852
+ paths: list
853
+ A list of of filepaths on this filesystems
854
+ starts, ends: int or list
855
+ Bytes limits of the read. If using a single int, the same value will be
856
+ used to read all the specified files.
857
+ """
858
+ if max_gap is not None:
859
+ raise NotImplementedError
860
+ if not isinstance(paths, list):
861
+ raise TypeError
862
+ if not isinstance(starts, list):
863
+ starts = [starts] * len(paths)
864
+ if not isinstance(ends, list):
865
+ ends = [ends] * len(paths)
866
+ if len(starts) != len(paths) or len(ends) != len(paths):
867
+ raise ValueError
868
+ out = []
869
+ for p, s, e in zip(paths, starts, ends):
870
+ try:
871
+ out.append(self.cat_file(p, s, e))
872
+ except Exception as e:
873
+ if on_error == "return":
874
+ out.append(e)
875
+ else:
876
+ raise
877
+ return out
878
+
879
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
880
+ """Fetch (potentially multiple) paths' contents
881
+
882
+ Parameters
883
+ ----------
884
+ recursive: bool
885
+ If True, assume the path(s) are directories, and get all the
886
+ contained files
887
+ on_error : "raise", "omit", "return"
888
+ If raise, an underlying exception will be raised (converted to KeyError
889
+ if the type is in self.missing_exceptions); if omit, keys with exception
890
+ will simply not be included in the output; if "return", all keys are
891
+ included in the output, but the value will be bytes or an exception
892
+ instance.
893
+ kwargs: passed to cat_file
894
+
895
+ Returns
896
+ -------
897
+ dict of {path: contents} if there are multiple paths
898
+ or the path has been otherwise expanded
899
+ """
900
+ paths = self.expand_path(path, recursive=recursive, **kwargs)
901
+ if (
902
+ len(paths) > 1
903
+ or isinstance(path, list)
904
+ or paths[0] != self._strip_protocol(path)
905
+ ):
906
+ out = {}
907
+ for path in paths:
908
+ try:
909
+ out[path] = self.cat_file(path, **kwargs)
910
+ except Exception as e:
911
+ if on_error == "raise":
912
+ raise
913
+ if on_error == "return":
914
+ out[path] = e
915
+ return out
916
+ else:
917
+ return self.cat_file(paths[0], **kwargs)
918
+
919
+ def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
920
+ """Copy single remote file to local"""
921
+ from .implementations.local import LocalFileSystem
922
+
923
+ if isfilelike(lpath):
924
+ outfile = lpath
925
+ elif self.isdir(rpath):
926
+ os.makedirs(lpath, exist_ok=True)
927
+ return None
928
+
929
+ fs = LocalFileSystem(auto_mkdir=True)
930
+ fs.makedirs(fs._parent(lpath), exist_ok=True)
931
+
932
+ with self.open(rpath, "rb", **kwargs) as f1:
933
+ if outfile is None:
934
+ outfile = open(lpath, "wb")
935
+
936
+ try:
937
+ callback.set_size(getattr(f1, "size", None))
938
+ data = True
939
+ while data:
940
+ data = f1.read(self.blocksize)
941
+ segment_len = outfile.write(data)
942
+ if segment_len is None:
943
+ segment_len = len(data)
944
+ callback.relative_update(segment_len)
945
+ finally:
946
+ if not isfilelike(lpath):
947
+ outfile.close()
948
+
949
+ def get(
950
+ self,
951
+ rpath,
952
+ lpath,
953
+ recursive=False,
954
+ callback=DEFAULT_CALLBACK,
955
+ maxdepth=None,
956
+ **kwargs,
957
+ ):
958
+ """Copy file(s) to local.
959
+
960
+ Copies a specific file or tree of files (if recursive=True). If lpath
961
+ ends with a "/", it will be assumed to be a directory, and target files
962
+ will go within. Can submit a list of paths, which may be glob-patterns
963
+ and will be expanded.
964
+
965
+ Calls get_file for each source.
966
+ """
967
+ if isinstance(lpath, list) and isinstance(rpath, list):
968
+ # No need to expand paths when both source and destination
969
+ # are provided as lists
970
+ rpaths = rpath
971
+ lpaths = lpath
972
+ else:
973
+ from .implementations.local import (
974
+ LocalFileSystem,
975
+ make_path_posix,
976
+ trailing_sep,
977
+ )
978
+
979
+ source_is_str = isinstance(rpath, str)
980
+ rpaths = self.expand_path(
981
+ rpath, recursive=recursive, maxdepth=maxdepth, **kwargs
982
+ )
983
+ if source_is_str and (not recursive or maxdepth is not None):
984
+ # Non-recursive glob does not copy directories
985
+ rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
986
+ if not rpaths:
987
+ return
988
+
989
+ if isinstance(lpath, str):
990
+ lpath = make_path_posix(lpath)
991
+
992
+ source_is_file = len(rpaths) == 1
993
+ dest_is_dir = isinstance(lpath, str) and (
994
+ trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
995
+ )
996
+
997
+ exists = source_is_str and (
998
+ (has_magic(rpath) and source_is_file)
999
+ or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
1000
+ )
1001
+ lpaths = other_paths(
1002
+ rpaths,
1003
+ lpath,
1004
+ exists=exists,
1005
+ flatten=not source_is_str,
1006
+ )
1007
+
1008
+ callback.set_size(len(lpaths))
1009
+ for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
1010
+ with callback.branched(rpath, lpath) as child:
1011
+ self.get_file(rpath, lpath, callback=child, **kwargs)
1012
+
1013
+ def put_file(
1014
+ self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
1015
+ ):
1016
+ """Copy single file to remote"""
1017
+ if mode == "create" and self.exists(rpath):
1018
+ raise FileExistsError
1019
+ if os.path.isdir(lpath):
1020
+ self.makedirs(rpath, exist_ok=True)
1021
+ return None
1022
+
1023
+ with open(lpath, "rb") as f1:
1024
+ size = f1.seek(0, 2)
1025
+ callback.set_size(size)
1026
+ f1.seek(0)
1027
+
1028
+ self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
1029
+ with self.open(rpath, "wb", **kwargs) as f2:
1030
+ while f1.tell() < size:
1031
+ data = f1.read(self.blocksize)
1032
+ segment_len = f2.write(data)
1033
+ if segment_len is None:
1034
+ segment_len = len(data)
1035
+ callback.relative_update(segment_len)
1036
+
1037
+ def put(
1038
+ self,
1039
+ lpath,
1040
+ rpath,
1041
+ recursive=False,
1042
+ callback=DEFAULT_CALLBACK,
1043
+ maxdepth=None,
1044
+ **kwargs,
1045
+ ):
1046
+ """Copy file(s) from local.
1047
+
1048
+ Copies a specific file or tree of files (if recursive=True). If rpath
1049
+ ends with a "/", it will be assumed to be a directory, and target files
1050
+ will go within.
1051
+
1052
+ Calls put_file for each source.
1053
+ """
1054
+ if isinstance(lpath, list) and isinstance(rpath, list):
1055
+ # No need to expand paths when both source and destination
1056
+ # are provided as lists
1057
+ rpaths = rpath
1058
+ lpaths = lpath
1059
+ else:
1060
+ from .implementations.local import (
1061
+ LocalFileSystem,
1062
+ make_path_posix,
1063
+ trailing_sep,
1064
+ )
1065
+
1066
+ source_is_str = isinstance(lpath, str)
1067
+ if source_is_str:
1068
+ lpath = make_path_posix(lpath)
1069
+ fs = LocalFileSystem()
1070
+ lpaths = fs.expand_path(
1071
+ lpath, recursive=recursive, maxdepth=maxdepth, **kwargs
1072
+ )
1073
+ if source_is_str and (not recursive or maxdepth is not None):
1074
+ # Non-recursive glob does not copy directories
1075
+ lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
1076
+ if not lpaths:
1077
+ return
1078
+
1079
+ source_is_file = len(lpaths) == 1
1080
+ dest_is_dir = isinstance(rpath, str) and (
1081
+ trailing_sep(rpath) or self.isdir(rpath)
1082
+ )
1083
+
1084
+ rpath = (
1085
+ self._strip_protocol(rpath)
1086
+ if isinstance(rpath, str)
1087
+ else [self._strip_protocol(p) for p in rpath]
1088
+ )
1089
+ exists = source_is_str and (
1090
+ (has_magic(lpath) and source_is_file)
1091
+ or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
1092
+ )
1093
+ rpaths = other_paths(
1094
+ lpaths,
1095
+ rpath,
1096
+ exists=exists,
1097
+ flatten=not source_is_str,
1098
+ )
1099
+
1100
+ callback.set_size(len(rpaths))
1101
+ for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
1102
+ with callback.branched(lpath, rpath) as child:
1103
+ self.put_file(lpath, rpath, callback=child, **kwargs)
1104
+
1105
+ def head(self, path, size=1024):
1106
+ """Get the first ``size`` bytes from file"""
1107
+ with self.open(path, "rb") as f:
1108
+ return f.read(size)
1109
+
1110
+ def tail(self, path, size=1024):
1111
+ """Get the last ``size`` bytes from file"""
1112
+ with self.open(path, "rb") as f:
1113
+ f.seek(max(-size, -f.size), 2)
1114
+ return f.read()
1115
+
1116
+ def cp_file(self, path1, path2, **kwargs):
1117
+ raise NotImplementedError
1118
+
1119
+ def copy(
1120
+ self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
1121
+ ):
1122
+ """Copy within two locations in the filesystem
1123
+
1124
+ on_error : "raise", "ignore"
1125
+ If raise, any not-found exceptions will be raised; if ignore any
1126
+ not-found exceptions will cause the path to be skipped; defaults to
1127
+ raise unless recursive is true, where the default is ignore
1128
+ """
1129
+ if on_error is None and recursive:
1130
+ on_error = "ignore"
1131
+ elif on_error is None:
1132
+ on_error = "raise"
1133
+
1134
+ if isinstance(path1, list) and isinstance(path2, list):
1135
+ # No need to expand paths when both source and destination
1136
+ # are provided as lists
1137
+ paths1 = path1
1138
+ paths2 = path2
1139
+ else:
1140
+ from .implementations.local import trailing_sep
1141
+
1142
+ source_is_str = isinstance(path1, str)
1143
+ paths1 = self.expand_path(
1144
+ path1, recursive=recursive, maxdepth=maxdepth, **kwargs
1145
+ )
1146
+ if source_is_str and (not recursive or maxdepth is not None):
1147
+ # Non-recursive glob does not copy directories
1148
+ paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
1149
+ if not paths1:
1150
+ return
1151
+
1152
+ source_is_file = len(paths1) == 1
1153
+ dest_is_dir = isinstance(path2, str) and (
1154
+ trailing_sep(path2) or self.isdir(path2)
1155
+ )
1156
+
1157
+ exists = source_is_str and (
1158
+ (has_magic(path1) and source_is_file)
1159
+ or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
1160
+ )
1161
+ paths2 = other_paths(
1162
+ paths1,
1163
+ path2,
1164
+ exists=exists,
1165
+ flatten=not source_is_str,
1166
+ )
1167
+
1168
+ for p1, p2 in zip(paths1, paths2):
1169
+ try:
1170
+ self.cp_file(p1, p2, **kwargs)
1171
+ except FileNotFoundError:
1172
+ if on_error == "raise":
1173
+ raise
1174
+
1175
+ def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
1176
+ """Turn one or more globs or directories into a list of all matching paths
1177
+ to files or directories.
1178
+
1179
+ kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
1180
+ """
1181
+
1182
+ if maxdepth is not None and maxdepth < 1:
1183
+ raise ValueError("maxdepth must be at least 1")
1184
+
1185
+ if isinstance(path, (str, os.PathLike)):
1186
+ out = self.expand_path([path], recursive, maxdepth, **kwargs)
1187
+ else:
1188
+ out = set()
1189
+ path = [self._strip_protocol(p) for p in path]
1190
+ for p in path:
1191
+ if has_magic(p):
1192
+ bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
1193
+ out |= bit
1194
+ if recursive:
1195
+ # glob call above expanded one depth so if maxdepth is defined
1196
+ # then decrement it in expand_path call below. If it is zero
1197
+ # after decrementing then avoid expand_path call.
1198
+ if maxdepth is not None and maxdepth <= 1:
1199
+ continue
1200
+ out |= set(
1201
+ self.expand_path(
1202
+ list(bit),
1203
+ recursive=recursive,
1204
+ maxdepth=maxdepth - 1 if maxdepth is not None else None,
1205
+ **kwargs,
1206
+ )
1207
+ )
1208
+ continue
1209
+ elif recursive:
1210
+ rec = set(
1211
+ self.find(
1212
+ p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
1213
+ )
1214
+ )
1215
+ out |= rec
1216
+ if p not in out and (recursive is False or self.exists(p)):
1217
+ # should only check once, for the root
1218
+ out.add(p)
1219
+ if not out:
1220
+ raise FileNotFoundError(path)
1221
+ return sorted(out)
1222
+
1223
+ def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
1224
+ """Move file(s) from one location to another"""
1225
+ if path1 == path2:
1226
+ logger.debug("%s mv: The paths are the same, so no files were moved.", self)
1227
+ else:
1228
+ # explicitly raise exception to prevent data corruption
1229
+ self.copy(
1230
+ path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise"
1231
+ )
1232
+ self.rm(path1, recursive=recursive)
1233
+
1234
+ def rm_file(self, path):
1235
+ """Delete a file"""
1236
+ self._rm(path)
1237
+
1238
+ def _rm(self, path):
1239
+ """Delete one file"""
1240
+ # this is the old name for the method, prefer rm_file
1241
+ raise NotImplementedError
1242
+
1243
+ def rm(self, path, recursive=False, maxdepth=None):
1244
+ """Delete files.
1245
+
1246
+ Parameters
1247
+ ----------
1248
+ path: str or list of str
1249
+ File(s) to delete.
1250
+ recursive: bool
1251
+ If file(s) are directories, recursively delete contents and then
1252
+ also remove the directory
1253
+ maxdepth: int or None
1254
+ Depth to pass to walk for finding files to delete, if recursive.
1255
+ If None, there will be no limit and infinite recursion may be
1256
+ possible.
1257
+ """
1258
+ path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
1259
+ for p in reversed(path):
1260
+ self.rm_file(p)
1261
+
1262
+ @classmethod
1263
+ def _parent(cls, path):
1264
+ path = cls._strip_protocol(path)
1265
+ if "/" in path:
1266
+ parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
1267
+ return cls.root_marker + parent
1268
+ else:
1269
+ return cls.root_marker
1270
+
1271
+ def _open(
1272
+ self,
1273
+ path,
1274
+ mode="rb",
1275
+ block_size=None,
1276
+ autocommit=True,
1277
+ cache_options=None,
1278
+ **kwargs,
1279
+ ):
1280
+ """Return raw bytes-mode file-like from the file-system"""
1281
+ return AbstractBufferedFile(
1282
+ self,
1283
+ path,
1284
+ mode,
1285
+ block_size,
1286
+ autocommit,
1287
+ cache_options=cache_options,
1288
+ **kwargs,
1289
+ )
1290
+
1291
+ def open(
1292
+ self,
1293
+ path,
1294
+ mode="rb",
1295
+ block_size=None,
1296
+ cache_options=None,
1297
+ compression=None,
1298
+ **kwargs,
1299
+ ):
1300
+ """
1301
+ Return a file-like object from the filesystem
1302
+
1303
+ The resultant instance must function correctly in a context ``with``
1304
+ block.
1305
+
1306
+ Parameters
1307
+ ----------
1308
+ path: str
1309
+ Target file
1310
+ mode: str like 'rb', 'w'
1311
+ See builtin ``open()``
1312
+ Mode "x" (exclusive write) may be implemented by the backend. Even if
1313
+ it is, whether it is checked up front or on commit, and whether it is
1314
+ atomic is implementation-dependent.
1315
+ block_size: int
1316
+ Some indication of buffering - this is a value in bytes
1317
+ cache_options : dict, optional
1318
+ Extra arguments to pass through to the cache.
1319
+ compression: string or None
1320
+ If given, open file using compression codec. Can either be a compression
1321
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
1322
+ compression from the filename suffix.
1323
+ encoding, errors, newline: passed on to TextIOWrapper for text mode
1324
+ """
1325
+ import io
1326
+
1327
+ path = self._strip_protocol(path)
1328
+ if "b" not in mode:
1329
+ mode = mode.replace("t", "") + "b"
1330
+
1331
+ text_kwargs = {
1332
+ k: kwargs.pop(k)
1333
+ for k in ["encoding", "errors", "newline"]
1334
+ if k in kwargs
1335
+ }
1336
+ return io.TextIOWrapper(
1337
+ self.open(
1338
+ path,
1339
+ mode,
1340
+ block_size=block_size,
1341
+ cache_options=cache_options,
1342
+ compression=compression,
1343
+ **kwargs,
1344
+ ),
1345
+ **text_kwargs,
1346
+ )
1347
+ else:
1348
+ ac = kwargs.pop("autocommit", not self._intrans)
1349
+ f = self._open(
1350
+ path,
1351
+ mode=mode,
1352
+ block_size=block_size,
1353
+ autocommit=ac,
1354
+ cache_options=cache_options,
1355
+ **kwargs,
1356
+ )
1357
+ if compression is not None:
1358
+ from fsspec.compression import compr
1359
+ from fsspec.core import get_compression
1360
+
1361
+ compression = get_compression(path, compression)
1362
+ compress = compr[compression]
1363
+ f = compress(f, mode=mode[0])
1364
+
1365
+ if not ac and "r" not in mode:
1366
+ self.transaction.files.append(f)
1367
+ return f
1368
+
1369
+ def touch(self, path, truncate=True, **kwargs):
1370
+ """Create empty file, or update timestamp
1371
+
1372
+ Parameters
1373
+ ----------
1374
+ path: str
1375
+ file location
1376
+ truncate: bool
1377
+ If True, always set file size to 0; if False, update timestamp and
1378
+ leave file unchanged, if backend allows this
1379
+ """
1380
+ if truncate or not self.exists(path):
1381
+ with self.open(path, "wb", **kwargs):
1382
+ pass
1383
+ else:
1384
+ raise NotImplementedError # update timestamp, if possible
1385
+
1386
+ def ukey(self, path):
1387
+ """Hash of file properties, to tell if it has changed"""
1388
+ return sha256(str(self.info(path)).encode()).hexdigest()
1389
+
1390
+ def read_block(self, fn, offset, length, delimiter=None):
1391
+ """Read a block of bytes from
1392
+
1393
+ Starting at ``offset`` of the file, read ``length`` bytes. If
1394
+ ``delimiter`` is set then we ensure that the read starts and stops at
1395
+ delimiter boundaries that follow the locations ``offset`` and ``offset
1396
+ + length``. If ``offset`` is zero then we start at zero. The
1397
+ bytestring returned WILL include the end delimiter string.
1398
+
1399
+ If offset+length is beyond the eof, reads to eof.
1400
+
1401
+ Parameters
1402
+ ----------
1403
+ fn: string
1404
+ Path to filename
1405
+ offset: int
1406
+ Byte offset to start read
1407
+ length: int
1408
+ Number of bytes to read. If None, read to end.
1409
+ delimiter: bytes (optional)
1410
+ Ensure reading starts and stops at delimiter bytestring
1411
+
1412
+ Examples
1413
+ --------
1414
+ >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP
1415
+ b'Alice, 100\\nBo'
1416
+ >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP
1417
+ b'Alice, 100\\nBob, 200\\n'
1418
+
1419
+ Use ``length=None`` to read to the end of the file.
1420
+ >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP
1421
+ b'Alice, 100\\nBob, 200\\nCharlie, 300'
1422
+
1423
+ See Also
1424
+ --------
1425
+ :func:`fsspec.utils.read_block`
1426
+ """
1427
+ with self.open(fn, "rb") as f:
1428
+ size = f.size
1429
+ if length is None:
1430
+ length = size
1431
+ if size is not None and offset + length > size:
1432
+ length = size - offset
1433
+ return read_block(f, offset, length, delimiter)
1434
+
1435
+ def to_json(self, *, include_password: bool = True) -> str:
1436
+ """
1437
+ JSON representation of this filesystem instance.
1438
+
1439
+ Parameters
1440
+ ----------
1441
+ include_password: bool, default True
1442
+ Whether to include the password (if any) in the output.
1443
+
1444
+ Returns
1445
+ -------
1446
+ JSON string with keys ``cls`` (the python location of this class),
1447
+ protocol (text name of this class's protocol, first one in case of
1448
+ multiple), ``args`` (positional args, usually empty), and all other
1449
+ keyword arguments as their own keys.
1450
+
1451
+ Warnings
1452
+ --------
1453
+ Serialized filesystems may contain sensitive information which have been
1454
+ passed to the constructor, such as passwords and tokens. Make sure you
1455
+ store and send them in a secure environment!
1456
+ """
1457
+ from .json import FilesystemJSONEncoder
1458
+
1459
+ return json.dumps(
1460
+ self,
1461
+ cls=type(
1462
+ "_FilesystemJSONEncoder",
1463
+ (FilesystemJSONEncoder,),
1464
+ {"include_password": include_password},
1465
+ ),
1466
+ )
1467
+
1468
+ @staticmethod
1469
+ def from_json(blob: str) -> AbstractFileSystem:
1470
+ """
1471
+ Recreate a filesystem instance from JSON representation.
1472
+
1473
+ See ``.to_json()`` for the expected structure of the input.
1474
+
1475
+ Parameters
1476
+ ----------
1477
+ blob: str
1478
+
1479
+ Returns
1480
+ -------
1481
+ file system instance, not necessarily of this particular class.
1482
+
1483
+ Warnings
1484
+ --------
1485
+ This can import arbitrary modules (as determined by the ``cls`` key).
1486
+ Make sure you haven't installed any modules that may execute malicious code
1487
+ at import time.
1488
+ """
1489
+ from .json import FilesystemJSONDecoder
1490
+
1491
+ return json.loads(blob, cls=FilesystemJSONDecoder)
1492
+
1493
+ def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
1494
+ """
1495
+ JSON-serializable dictionary representation of this filesystem instance.
1496
+
1497
+ Parameters
1498
+ ----------
1499
+ include_password: bool, default True
1500
+ Whether to include the password (if any) in the output.
1501
+
1502
+ Returns
1503
+ -------
1504
+ Dictionary with keys ``cls`` (the python location of this class),
1505
+ protocol (text name of this class's protocol, first one in case of
1506
+ multiple), ``args`` (positional args, usually empty), and all other
1507
+ keyword arguments as their own keys.
1508
+
1509
+ Warnings
1510
+ --------
1511
+ Serialized filesystems may contain sensitive information which have been
1512
+ passed to the constructor, such as passwords and tokens. Make sure you
1513
+ store and send them in a secure environment!
1514
+ """
1515
+ from .json import FilesystemJSONEncoder
1516
+
1517
+ json_encoder = FilesystemJSONEncoder()
1518
+
1519
+ cls = type(self)
1520
+ proto = self.protocol
1521
+
1522
+ storage_options = dict(self.storage_options)
1523
+ if not include_password:
1524
+ storage_options.pop("password", None)
1525
+
1526
+ return dict(
1527
+ cls=f"{cls.__module__}:{cls.__name__}",
1528
+ protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
1529
+ args=json_encoder.make_serializable(self.storage_args),
1530
+ **json_encoder.make_serializable(storage_options),
1531
+ )
1532
+
1533
+ @staticmethod
1534
+ def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
1535
+ """
1536
+ Recreate a filesystem instance from dictionary representation.
1537
+
1538
+ See ``.to_dict()`` for the expected structure of the input.
1539
+
1540
+ Parameters
1541
+ ----------
1542
+ dct: Dict[str, Any]
1543
+
1544
+ Returns
1545
+ -------
1546
+ file system instance, not necessarily of this particular class.
1547
+
1548
+ Warnings
1549
+ --------
1550
+ This can import arbitrary modules (as determined by the ``cls`` key).
1551
+ Make sure you haven't installed any modules that may execute malicious code
1552
+ at import time.
1553
+ """
1554
+ from .json import FilesystemJSONDecoder
1555
+
1556
+ json_decoder = FilesystemJSONDecoder()
1557
+
1558
+ dct = dict(dct) # Defensive copy
1559
+
1560
+ cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
1561
+ if cls is None:
1562
+ raise ValueError("Not a serialized AbstractFileSystem")
1563
+
1564
+ dct.pop("cls", None)
1565
+ dct.pop("protocol", None)
1566
+
1567
+ return cls(
1568
+ *json_decoder.unmake_serializable(dct.pop("args", ())),
1569
+ **json_decoder.unmake_serializable(dct),
1570
+ )
1571
+
1572
+ def _get_pyarrow_filesystem(self):
1573
+ """
1574
+ Make a version of the FS instance which will be acceptable to pyarrow
1575
+ """
1576
+ # all instances already also derive from pyarrow
1577
+ return self
1578
+
1579
+ def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
1580
+ """Create key/value store based on this file-system
1581
+
1582
+ Makes a MutableMapping interface to the FS at the given root path.
1583
+ See ``fsspec.mapping.FSMap`` for further details.
1584
+ """
1585
+ from .mapping import FSMap
1586
+
1587
+ return FSMap(
1588
+ root,
1589
+ self,
1590
+ check=check,
1591
+ create=create,
1592
+ missing_exceptions=missing_exceptions,
1593
+ )
1594
+
1595
+ @classmethod
1596
+ def clear_instance_cache(cls):
1597
+ """
1598
+ Clear the cache of filesystem instances.
1599
+
1600
+ Notes
1601
+ -----
1602
+ Unless overridden by setting the ``cachable`` class attribute to False,
1603
+ the filesystem class stores a reference to newly created instances. This
1604
+ prevents Python's normal rules around garbage collection from working,
1605
+ since the instances refcount will not drop to zero until
1606
+ ``clear_instance_cache`` is called.
1607
+ """
1608
+ cls._cache.clear()
1609
+
1610
+ def created(self, path):
1611
+ """Return the created timestamp of a file as a datetime.datetime"""
1612
+ raise NotImplementedError
1613
+
1614
+ def modified(self, path):
1615
+ """Return the modified timestamp of a file as a datetime.datetime"""
1616
+ raise NotImplementedError
1617
+
1618
+ def tree(
1619
+ self,
1620
+ path: str = "/",
1621
+ recursion_limit: int = 2,
1622
+ max_display: int = 25,
1623
+ display_size: bool = False,
1624
+ prefix: str = "",
1625
+ is_last: bool = True,
1626
+ first: bool = True,
1627
+ indent_size: int = 4,
1628
+ ) -> str:
1629
+ """
1630
+ Return a tree-like structure of the filesystem starting from the given path as a string.
1631
+
1632
+ Parameters
1633
+ ----------
1634
+ path: Root path to start traversal from
1635
+ recursion_limit: Maximum depth of directory traversal
1636
+ max_display: Maximum number of items to display per directory
1637
+ display_size: Whether to display file sizes
1638
+ prefix: Current line prefix for visual tree structure
1639
+ is_last: Whether current item is last in its level
1640
+ first: Whether this is the first call (displays root path)
1641
+ indent_size: Number of spaces by indent
1642
+
1643
+ Returns
1644
+ -------
1645
+ str: A string representing the tree structure.
1646
+
1647
+ Example
1648
+ -------
1649
+ >>> from fsspec import filesystem
1650
+
1651
+ >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
1652
+ >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
1653
+ >>> print(tree)
1654
+ """
1655
+
1656
+ def format_bytes(n: int) -> str:
1657
+ """Format bytes as text."""
1658
+ for prefix, k in (
1659
+ ("P", 2**50),
1660
+ ("T", 2**40),
1661
+ ("G", 2**30),
1662
+ ("M", 2**20),
1663
+ ("k", 2**10),
1664
+ ):
1665
+ if n >= 0.9 * k:
1666
+ return f"{n / k:.2f} {prefix}b"
1667
+ return f"{n}B"
1668
+
1669
+ result = []
1670
+
1671
+ if first:
1672
+ result.append(path)
1673
+
1674
+ if recursion_limit:
1675
+ indent = " " * indent_size
1676
+ contents = self.ls(path, detail=True)
1677
+ contents.sort(
1678
+ key=lambda x: (x.get("type") != "directory", x.get("name", ""))
1679
+ )
1680
+
1681
+ if max_display is not None and len(contents) > max_display:
1682
+ displayed_contents = contents[:max_display]
1683
+ remaining_count = len(contents) - max_display
1684
+ else:
1685
+ displayed_contents = contents
1686
+ remaining_count = 0
1687
+
1688
+ for i, item in enumerate(displayed_contents):
1689
+ is_last_item = (i == len(displayed_contents) - 1) and (
1690
+ remaining_count == 0
1691
+ )
1692
+
1693
+ branch = (
1694
+ "└" + ("─" * (indent_size - 2))
1695
+ if is_last_item
1696
+ else "├" + ("─" * (indent_size - 2))
1697
+ )
1698
+ branch += " "
1699
+ new_prefix = prefix + (
1700
+ indent if is_last_item else "│" + " " * (indent_size - 1)
1701
+ )
1702
+
1703
+ name = os.path.basename(item.get("name", ""))
1704
+
1705
+ if display_size and item.get("type") == "directory":
1706
+ sub_contents = self.ls(item.get("name", ""), detail=True)
1707
+ num_files = sum(
1708
+ 1 for sub_item in sub_contents if sub_item.get("type") == "file"
1709
+ )
1710
+ num_folders = sum(
1711
+ 1
1712
+ for sub_item in sub_contents
1713
+ if sub_item.get("type") == "directory"
1714
+ )
1715
+
1716
+ if num_files == 0 and num_folders == 0:
1717
+ size = " (empty folder)"
1718
+ elif num_files == 0:
1719
+ size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
1720
+ elif num_folders == 0:
1721
+ size = f" ({num_files} file{'s' if num_files > 1 else ''})"
1722
+ else:
1723
+ size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
1724
+ elif display_size and item.get("type") == "file":
1725
+ size = f" ({format_bytes(item.get('size', 0))})"
1726
+ else:
1727
+ size = ""
1728
+
1729
+ result.append(f"{prefix}{branch}{name}{size}")
1730
+
1731
+ if item.get("type") == "directory" and recursion_limit > 0:
1732
+ result.append(
1733
+ self.tree(
1734
+ path=item.get("name", ""),
1735
+ recursion_limit=recursion_limit - 1,
1736
+ max_display=max_display,
1737
+ display_size=display_size,
1738
+ prefix=new_prefix,
1739
+ is_last=is_last_item,
1740
+ first=False,
1741
+ indent_size=indent_size,
1742
+ )
1743
+ )
1744
+
1745
+ if remaining_count > 0:
1746
+ more_message = f"{remaining_count} more item(s) not displayed."
1747
+ result.append(
1748
+ f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
1749
+ )
1750
+
1751
+ return "\n".join(_ for _ in result if _)
1752
+
1753
+ # ------------------------------------------------------------------------
1754
+ # Aliases
1755
+
1756
+ def read_bytes(self, path, start=None, end=None, **kwargs):
1757
+ """Alias of `AbstractFileSystem.cat_file`."""
1758
+ return self.cat_file(path, start=start, end=end, **kwargs)
1759
+
1760
+ def write_bytes(self, path, value, **kwargs):
1761
+ """Alias of `AbstractFileSystem.pipe_file`."""
1762
+ self.pipe_file(path, value, **kwargs)
1763
+
1764
+ def makedir(self, path, create_parents=True, **kwargs):
1765
+ """Alias of `AbstractFileSystem.mkdir`."""
1766
+ return self.mkdir(path, create_parents=create_parents, **kwargs)
1767
+
1768
+ def mkdirs(self, path, exist_ok=False):
1769
+ """Alias of `AbstractFileSystem.makedirs`."""
1770
+ return self.makedirs(path, exist_ok=exist_ok)
1771
+
1772
+ def listdir(self, path, detail=True, **kwargs):
1773
+ """Alias of `AbstractFileSystem.ls`."""
1774
+ return self.ls(path, detail=detail, **kwargs)
1775
+
1776
+ def cp(self, path1, path2, **kwargs):
1777
+ """Alias of `AbstractFileSystem.copy`."""
1778
+ return self.copy(path1, path2, **kwargs)
1779
+
1780
+ def move(self, path1, path2, **kwargs):
1781
+ """Alias of `AbstractFileSystem.mv`."""
1782
+ return self.mv(path1, path2, **kwargs)
1783
+
1784
+ def stat(self, path, **kwargs):
1785
+ """Alias of `AbstractFileSystem.info`."""
1786
+ return self.info(path, **kwargs)
1787
+
1788
+ def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
1789
+ """Alias of `AbstractFileSystem.du`."""
1790
+ return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
1791
+
1792
+ def rename(self, path1, path2, **kwargs):
1793
+ """Alias of `AbstractFileSystem.mv`."""
1794
+ return self.mv(path1, path2, **kwargs)
1795
+
1796
+ def delete(self, path, recursive=False, maxdepth=None):
1797
+ """Alias of `AbstractFileSystem.rm`."""
1798
+ return self.rm(path, recursive=recursive, maxdepth=maxdepth)
1799
+
1800
+ def upload(self, lpath, rpath, recursive=False, **kwargs):
1801
+ """Alias of `AbstractFileSystem.put`."""
1802
+ return self.put(lpath, rpath, recursive=recursive, **kwargs)
1803
+
1804
+ def download(self, rpath, lpath, recursive=False, **kwargs):
1805
+ """Alias of `AbstractFileSystem.get`."""
1806
+ return self.get(rpath, lpath, recursive=recursive, **kwargs)
1807
+
1808
+ def sign(self, path, expiration=100, **kwargs):
1809
+ """Create a signed URL representing the given path
1810
+
1811
+ Some implementations allow temporary URLs to be generated, as a
1812
+ way of delegating credentials.
1813
+
1814
+ Parameters
1815
+ ----------
1816
+ path : str
1817
+ The path on the filesystem
1818
+ expiration : int
1819
+ Number of seconds to enable the URL for (if supported)
1820
+
1821
+ Returns
1822
+ -------
1823
+ URL : str
1824
+ The signed URL
1825
+
1826
+ Raises
1827
+ ------
1828
+ NotImplementedError : if method is not implemented for a filesystem
1829
+ """
1830
+ raise NotImplementedError("Sign is not implemented for this filesystem")
1831
+
1832
+ def _isfilestore(self):
1833
+ # Originally inherited from pyarrow DaskFileSystem. Keeping this
1834
+ # here for backwards compatibility as long as pyarrow uses its
1835
+ # legacy fsspec-compatible filesystems and thus accepts fsspec
1836
+ # filesystems as well
1837
+ return False
1838
+
1839
+
1840
+ class AbstractBufferedFile(io.IOBase):
1841
+ """Convenient class to derive from to provide buffering
1842
+
1843
+ In the case that the backend does not provide a pythonic file-like object
1844
+ already, this class contains much of the logic to build one. The only
1845
+ methods that need to be overridden are ``_upload_chunk``,
1846
+ ``_initiate_upload`` and ``_fetch_range``.
1847
+ """
1848
+
1849
+ DEFAULT_BLOCK_SIZE = 5 * 2**20
1850
+ _details = None
1851
+
1852
+ def __init__(
1853
+ self,
1854
+ fs,
1855
+ path,
1856
+ mode="rb",
1857
+ block_size="default",
1858
+ autocommit=True,
1859
+ cache_type="readahead",
1860
+ cache_options=None,
1861
+ size=None,
1862
+ **kwargs,
1863
+ ):
1864
+ """
1865
+ Template for files with buffered reading and writing
1866
+
1867
+ Parameters
1868
+ ----------
1869
+ fs: instance of FileSystem
1870
+ path: str
1871
+ location in file-system
1872
+ mode: str
1873
+ Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
1874
+ systems may be read-only, and some may not support append.
1875
+ block_size: int
1876
+ Buffer size for reading or writing, 'default' for class default
1877
+ autocommit: bool
1878
+ Whether to write to final destination; may only impact what
1879
+ happens when file is being closed.
1880
+ cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
1881
+ Caching policy in read mode. See the definitions in ``core``.
1882
+ cache_options : dict
1883
+ Additional options passed to the constructor for the cache specified
1884
+ by `cache_type`.
1885
+ size: int
1886
+ If given and in read mode, suppressed having to look up the file size
1887
+ kwargs:
1888
+ Gets stored as self.kwargs
1889
+ """
1890
+ from .core import caches
1891
+
1892
+ self.path = path
1893
+ self.fs = fs
1894
+ self.mode = mode
1895
+ self.blocksize = (
1896
+ self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
1897
+ )
1898
+ self.loc = 0
1899
+ self.autocommit = autocommit
1900
+ self.end = None
1901
+ self.start = None
1902
+ self.closed = False
1903
+
1904
+ if cache_options is None:
1905
+ cache_options = {}
1906
+
1907
+ if "trim" in kwargs:
1908
+ warnings.warn(
1909
+ "Passing 'trim' to control the cache behavior has been deprecated. "
1910
+ "Specify it within the 'cache_options' argument instead.",
1911
+ FutureWarning,
1912
+ )
1913
+ cache_options["trim"] = kwargs.pop("trim")
1914
+
1915
+ self.kwargs = kwargs
1916
+
1917
+ if mode not in {"ab", "rb", "wb", "xb"}:
1918
+ raise NotImplementedError("File mode not supported")
1919
+ if mode == "rb":
1920
+ if size is not None:
1921
+ self.size = size
1922
+ else:
1923
+ self.size = self.details["size"]
1924
+ self.cache = caches[cache_type](
1925
+ self.blocksize, self._fetch_range, self.size, **cache_options
1926
+ )
1927
+ else:
1928
+ self.buffer = io.BytesIO()
1929
+ self.offset = None
1930
+ self.forced = False
1931
+ self.location = None
1932
+
1933
+ @property
1934
+ def details(self):
1935
+ if self._details is None:
1936
+ self._details = self.fs.info(self.path)
1937
+ return self._details
1938
+
1939
+ @details.setter
1940
+ def details(self, value):
1941
+ self._details = value
1942
+ self.size = value["size"]
1943
+
1944
+ @property
1945
+ def full_name(self):
1946
+ return _unstrip_protocol(self.path, self.fs)
1947
+
1948
+ @property
1949
+ def closed(self):
1950
+ # get around this attr being read-only in IOBase
1951
+ # use getattr here, since this can be called during del
1952
+ return getattr(self, "_closed", True)
1953
+
1954
+ @closed.setter
1955
+ def closed(self, c):
1956
+ self._closed = c
1957
+
1958
+ def __hash__(self):
1959
+ if "w" in self.mode:
1960
+ return id(self)
1961
+ else:
1962
+ return int(tokenize(self.details), 16)
1963
+
1964
+ def __eq__(self, other):
1965
+ """Files are equal if they have the same checksum, only in read mode"""
1966
+ if self is other:
1967
+ return True
1968
+ return (
1969
+ isinstance(other, type(self))
1970
+ and self.mode == "rb"
1971
+ and other.mode == "rb"
1972
+ and hash(self) == hash(other)
1973
+ )
1974
+
1975
+ def commit(self):
1976
+ """Move from temp to final destination"""
1977
+
1978
+ def discard(self):
1979
+ """Throw away temporary file"""
1980
+
1981
+ def info(self):
1982
+ """File information about this path"""
1983
+ if self.readable():
1984
+ return self.details
1985
+ else:
1986
+ raise ValueError("Info not available while writing")
1987
+
1988
+ def tell(self):
1989
+ """Current file location"""
1990
+ return self.loc
1991
+
1992
+ def seek(self, loc, whence=0):
1993
+ """Set current file location
1994
+
1995
+ Parameters
1996
+ ----------
1997
+ loc: int
1998
+ byte location
1999
+ whence: {0, 1, 2}
2000
+ from start of file, current location or end of file, resp.
2001
+ """
2002
+ loc = int(loc)
2003
+ if not self.mode == "rb":
2004
+ raise OSError(ESPIPE, "Seek only available in read mode")
2005
+ if whence == 0:
2006
+ nloc = loc
2007
+ elif whence == 1:
2008
+ nloc = self.loc + loc
2009
+ elif whence == 2:
2010
+ nloc = self.size + loc
2011
+ else:
2012
+ raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
2013
+ if nloc < 0:
2014
+ raise ValueError("Seek before start of file")
2015
+ self.loc = nloc
2016
+ return self.loc
2017
+
2018
+ def write(self, data):
2019
+ """
2020
+ Write data to buffer.
2021
+
2022
+ Buffer only sent on flush() or if buffer is greater than
2023
+ or equal to blocksize.
2024
+
2025
+ Parameters
2026
+ ----------
2027
+ data: bytes
2028
+ Set of bytes to be written.
2029
+ """
2030
+ if not self.writable():
2031
+ raise ValueError("File not in write mode")
2032
+ if self.closed:
2033
+ raise ValueError("I/O operation on closed file.")
2034
+ if self.forced:
2035
+ raise ValueError("This file has been force-flushed, can only close")
2036
+ out = self.buffer.write(data)
2037
+ self.loc += out
2038
+ if self.buffer.tell() >= self.blocksize:
2039
+ self.flush()
2040
+ return out
2041
+
2042
+ def flush(self, force=False):
2043
+ """
2044
+ Write buffered data to backend store.
2045
+
2046
+ Writes the current buffer, if it is larger than the block-size, or if
2047
+ the file is being closed.
2048
+
2049
+ Parameters
2050
+ ----------
2051
+ force: bool
2052
+ When closing, write the last block even if it is smaller than
2053
+ blocks are allowed to be. Disallows further writing to this file.
2054
+ """
2055
+
2056
+ if self.closed:
2057
+ raise ValueError("Flush on closed file")
2058
+ if force and self.forced:
2059
+ raise ValueError("Force flush cannot be called more than once")
2060
+ if force:
2061
+ self.forced = True
2062
+
2063
+ if self.readable():
2064
+ # no-op to flush on read-mode
2065
+ return
2066
+
2067
+ if not force and self.buffer.tell() < self.blocksize:
2068
+ # Defer write on small block
2069
+ return
2070
+
2071
+ if self.offset is None:
2072
+ # Initialize a multipart upload
2073
+ self.offset = 0
2074
+ try:
2075
+ self._initiate_upload()
2076
+ except:
2077
+ self.closed = True
2078
+ raise
2079
+
2080
+ if self._upload_chunk(final=force) is not False:
2081
+ self.offset += self.buffer.seek(0, 2)
2082
+ self.buffer = io.BytesIO()
2083
+
2084
+ def _upload_chunk(self, final=False):
2085
+ """Write one part of a multi-block file upload
2086
+
2087
+ Parameters
2088
+ ==========
2089
+ final: bool
2090
+ This is the last block, so should complete file, if
2091
+ self.autocommit is True.
2092
+ """
2093
+ # may not yet have been initialized, may need to call _initialize_upload
2094
+
2095
+ def _initiate_upload(self):
2096
+ """Create remote file/upload"""
2097
+ pass
2098
+
2099
+ def _fetch_range(self, start, end):
2100
+ """Get the specified set of bytes from remote"""
2101
+ return self.fs.cat_file(self.path, start=start, end=end)
2102
+
2103
+ def read(self, length=-1):
2104
+ """
2105
+ Return data from cache, or fetch pieces as necessary
2106
+
2107
+ Parameters
2108
+ ----------
2109
+ length: int (-1)
2110
+ Number of bytes to read; if <0, all remaining bytes.
2111
+ """
2112
+ length = -1 if length is None else int(length)
2113
+ if self.mode != "rb":
2114
+ raise ValueError("File not in read mode")
2115
+ if length < 0:
2116
+ length = self.size - self.loc
2117
+ if self.closed:
2118
+ raise ValueError("I/O operation on closed file.")
2119
+ if length == 0:
2120
+ # don't even bother calling fetch
2121
+ return b""
2122
+ out = self.cache._fetch(self.loc, self.loc + length)
2123
+
2124
+ logger.debug(
2125
+ "%s read: %i - %i %s",
2126
+ self,
2127
+ self.loc,
2128
+ self.loc + length,
2129
+ self.cache._log_stats(),
2130
+ )
2131
+ self.loc += len(out)
2132
+ return out
2133
+
2134
+ def readinto(self, b):
2135
+ """mirrors builtin file's readinto method
2136
+
2137
+ https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
2138
+ """
2139
+ out = memoryview(b).cast("B")
2140
+ data = self.read(out.nbytes)
2141
+ out[: len(data)] = data
2142
+ return len(data)
2143
+
2144
+ def readuntil(self, char=b"\n", blocks=None):
2145
+ """Return data between current position and first occurrence of char
2146
+
2147
+ char is included in the output, except if the end of the tile is
2148
+ encountered first.
2149
+
2150
+ Parameters
2151
+ ----------
2152
+ char: bytes
2153
+ Thing to find
2154
+ blocks: None or int
2155
+ How much to read in each go. Defaults to file blocksize - which may
2156
+ mean a new read on every call.
2157
+ """
2158
+ out = []
2159
+ while True:
2160
+ start = self.tell()
2161
+ part = self.read(blocks or self.blocksize)
2162
+ if len(part) == 0:
2163
+ break
2164
+ found = part.find(char)
2165
+ if found > -1:
2166
+ out.append(part[: found + len(char)])
2167
+ self.seek(start + found + len(char))
2168
+ break
2169
+ out.append(part)
2170
+ return b"".join(out)
2171
+
2172
+ def readline(self):
2173
+ """Read until and including the first occurrence of newline character
2174
+
2175
+ Note that, because of character encoding, this is not necessarily a
2176
+ true line ending.
2177
+ """
2178
+ return self.readuntil(b"\n")
2179
+
2180
+ def __next__(self):
2181
+ out = self.readline()
2182
+ if out:
2183
+ return out
2184
+ raise StopIteration
2185
+
2186
+ def __iter__(self):
2187
+ return self
2188
+
2189
+ def readlines(self):
2190
+ """Return all data, split by the newline character, including the newline character"""
2191
+ data = self.read()
2192
+ lines = data.split(b"\n")
2193
+ out = [l + b"\n" for l in lines[:-1]]
2194
+ if data.endswith(b"\n"):
2195
+ return out
2196
+ else:
2197
+ return out + [lines[-1]]
2198
+ # return list(self) ???
2199
+
2200
+ def readinto1(self, b):
2201
+ return self.readinto(b)
2202
+
2203
+ def close(self):
2204
+ """Close file
2205
+
2206
+ Finalizes writes, discards cache
2207
+ """
2208
+ if getattr(self, "_unclosable", False):
2209
+ return
2210
+ if self.closed:
2211
+ return
2212
+ try:
2213
+ if self.mode == "rb":
2214
+ self.cache = None
2215
+ else:
2216
+ if not self.forced:
2217
+ self.flush(force=True)
2218
+
2219
+ if self.fs is not None:
2220
+ self.fs.invalidate_cache(self.path)
2221
+ self.fs.invalidate_cache(self.fs._parent(self.path))
2222
+ finally:
2223
+ self.closed = True
2224
+
2225
+ def readable(self):
2226
+ """Whether opened for reading"""
2227
+ return "r" in self.mode and not self.closed
2228
+
2229
+ def seekable(self):
2230
+ """Whether is seekable (only in read mode)"""
2231
+ return self.readable()
2232
+
2233
+ def writable(self):
2234
+ """Whether opened for writing"""
2235
+ return self.mode in {"wb", "ab", "xb"} and not self.closed
2236
+
2237
+ def __reduce__(self):
2238
+ if self.mode != "rb":
2239
+ raise RuntimeError("Pickling a writeable file is not supported")
2240
+
2241
+ return reopen, (
2242
+ self.fs,
2243
+ self.path,
2244
+ self.mode,
2245
+ self.blocksize,
2246
+ self.loc,
2247
+ self.size,
2248
+ self.autocommit,
2249
+ self.cache.name if self.cache else "none",
2250
+ self.kwargs,
2251
+ )
2252
+
2253
+ def __del__(self):
2254
+ if not self.closed:
2255
+ self.close()
2256
+
2257
+ def __str__(self):
2258
+ return f"<File-like object {type(self.fs).__name__}, {self.path}>"
2259
+
2260
+ __repr__ = __str__
2261
+
2262
+ def __enter__(self):
2263
+ return self
2264
+
2265
+ def __exit__(self, *args):
2266
+ self.close()
2267
+
2268
+
2269
+ def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
2270
+ file = fs.open(
2271
+ path,
2272
+ mode=mode,
2273
+ block_size=blocksize,
2274
+ autocommit=autocommit,
2275
+ cache_type=cache_type,
2276
+ size=size,
2277
+ **kwargs,
2278
+ )
2279
+ if loc > 0:
2280
+ file.seek(loc)
2281
+ return file
venv/lib/python3.13/site-packages/fsspec/transaction.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import deque
2
+
3
+
4
+ class Transaction:
5
+ """Filesystem transaction write context
6
+
7
+ Gathers files for deferred commit or discard, so that several write
8
+ operations can be finalized semi-atomically. This works by having this
9
+ instance as the ``.transaction`` attribute of the given filesystem
10
+ """
11
+
12
+ def __init__(self, fs, **kwargs):
13
+ """
14
+ Parameters
15
+ ----------
16
+ fs: FileSystem instance
17
+ """
18
+ self.fs = fs
19
+ self.files = deque()
20
+
21
+ def __enter__(self):
22
+ self.start()
23
+ return self
24
+
25
+ def __exit__(self, exc_type, exc_val, exc_tb):
26
+ """End transaction and commit, if exit is not due to exception"""
27
+ # only commit if there was no exception
28
+ self.complete(commit=exc_type is None)
29
+ if self.fs:
30
+ self.fs._intrans = False
31
+ self.fs._transaction = None
32
+ self.fs = None
33
+
34
+ def start(self):
35
+ """Start a transaction on this FileSystem"""
36
+ self.files = deque() # clean up after previous failed completions
37
+ self.fs._intrans = True
38
+
39
+ def complete(self, commit=True):
40
+ """Finish transaction: commit or discard all deferred files"""
41
+ while self.files:
42
+ f = self.files.popleft()
43
+ if commit:
44
+ f.commit()
45
+ else:
46
+ f.discard()
47
+ self.fs._intrans = False
48
+ self.fs._transaction = None
49
+ self.fs = None
50
+
51
+
52
+ class FileActor:
53
+ def __init__(self):
54
+ self.files = []
55
+
56
+ def commit(self):
57
+ for f in self.files:
58
+ f.commit()
59
+ self.files.clear()
60
+
61
+ def discard(self):
62
+ for f in self.files:
63
+ f.discard()
64
+ self.files.clear()
65
+
66
+ def append(self, f):
67
+ self.files.append(f)
68
+
69
+
70
+ class DaskTransaction(Transaction):
71
+ def __init__(self, fs):
72
+ """
73
+ Parameters
74
+ ----------
75
+ fs: FileSystem instance
76
+ """
77
+ import distributed
78
+
79
+ super().__init__(fs)
80
+ client = distributed.default_client()
81
+ self.files = client.submit(FileActor, actor=True).result()
82
+
83
+ def complete(self, commit=True):
84
+ """Finish transaction: commit or discard all deferred files"""
85
+ if commit:
86
+ self.files.commit().result()
87
+ else:
88
+ self.files.discard().result()
89
+ self.fs._intrans = False
90
+ self.fs = None
venv/lib/python3.13/site-packages/hf_xet/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .hf_xet import *
2
+
3
+ __doc__ = hf_xet.__doc__
4
+ if hasattr(hf_xet, "__all__"):
5
+ __all__ = hf_xet.__all__
venv/lib/python3.13/site-packages/idna-3.11.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
venv/lib/python3.13/site-packages/idna-3.11.dist-info/METADATA ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: idna
3
+ Version: 3.11
4
+ Summary: Internationalized Domain Names in Applications (IDNA)
5
+ Author-email: Kim Davies <kim+pypi@gumleaf.org>
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/x-rst
8
+ License-Expression: BSD-3-Clause
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: System Administrators
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
+ Classifier: Programming Language :: Python :: Implementation :: CPython
24
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
25
+ Classifier: Topic :: Internet :: Name Service (DNS)
26
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
+ Classifier: Topic :: Utilities
28
+ License-File: LICENSE.md
29
+ Requires-Dist: ruff >= 0.6.2 ; extra == "all"
30
+ Requires-Dist: mypy >= 1.11.2 ; extra == "all"
31
+ Requires-Dist: pytest >= 8.3.2 ; extra == "all"
32
+ Requires-Dist: flake8 >= 7.1.1 ; extra == "all"
33
+ Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst
34
+ Project-URL: Issue tracker, https://github.com/kjd/idna/issues
35
+ Project-URL: Source, https://github.com/kjd/idna
36
+ Provides-Extra: all
37
+
38
+ Internationalized Domain Names in Applications (IDNA)
39
+ =====================================================
40
+
41
+ Support for `Internationalized Domain Names in
42
+ Applications (IDNA) <https://tools.ietf.org/html/rfc5891>`_
43
+ and `Unicode IDNA Compatibility Processing
44
+ <https://unicode.org/reports/tr46/>`_.
45
+
46
+ The latest versions of these standards supplied here provide
47
+ more comprehensive language coverage and reduce the potential of
48
+ allowing domains with known security vulnerabilities. This library
49
+ is a suitable replacement for the “encodings.idna”
50
+ module that comes with the Python standard library, but which
51
+ only supports an older superseded IDNA specification from 2003.
52
+
53
+ Basic functions are simply executed:
54
+
55
+ .. code-block:: pycon
56
+
57
+ >>> import idna
58
+ >>> idna.encode('ドメイン.テスト')
59
+ b'xn--eckwd4c7c.xn--zckzah'
60
+ >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
61
+ ドメイン.テスト
62
+
63
+
64
+ Installation
65
+ ------------
66
+
67
+ This package is available for installation from PyPI via the
68
+ typical mechanisms, such as:
69
+
70
+ .. code-block:: bash
71
+
72
+ $ python3 -m pip install idna
73
+
74
+
75
+ Usage
76
+ -----
77
+
78
+ For typical usage, the ``encode`` and ``decode`` functions will take a
79
+ domain name argument and perform a conversion to ASCII compatible encoding
80
+ (known as A-labels), or to Unicode strings (known as U-labels)
81
+ respectively.
82
+
83
+ .. code-block:: pycon
84
+
85
+ >>> import idna
86
+ >>> idna.encode('ドメイン.テスト')
87
+ b'xn--eckwd4c7c.xn--zckzah'
88
+ >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
89
+ ドメイン.テスト
90
+
91
+ Conversions can be applied at a per-label basis using the ``ulabel`` or
92
+ ``alabel`` functions if necessary:
93
+
94
+ .. code-block:: pycon
95
+
96
+ >>> idna.alabel('测试')
97
+ b'xn--0zwm56d'
98
+
99
+
100
+ Compatibility Mapping (UTS #46)
101
+ +++++++++++++++++++++++++++++++
102
+
103
+ This library provides support for `Unicode IDNA Compatibility
104
+ Processing <https://unicode.org/reports/tr46/>`_ which normalizes input from
105
+ different potential ways a user may input a domain prior to performing the IDNA
106
+ conversion operations. This functionality, known as a
107
+ `mapping <https://tools.ietf.org/html/rfc5895>`_, is considered by the
108
+ specification to be a local user-interface issue distinct from IDNA
109
+ conversion functionality.
110
+
111
+ For example, “Königsgäßchen” is not a permissible label as *LATIN
112
+ CAPITAL LETTER K* is not allowed (nor are capital letters in general).
113
+ UTS 46 will convert this into lower case prior to applying the IDNA
114
+ conversion.
115
+
116
+ .. code-block:: pycon
117
+
118
+ >>> import idna
119
+ >>> idna.encode('Königsgäßchen')
120
+ ...
121
+ idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
122
+ >>> idna.encode('Königsgäßchen', uts46=True)
123
+ b'xn--knigsgchen-b4a3dun'
124
+ >>> print(idna.decode('xn--knigsgchen-b4a3dun'))
125
+ königsgäßchen
126
+
127
+
128
+ Exceptions
129
+ ----------
130
+
131
+ All errors raised during the conversion following the specification
132
+ should raise an exception derived from the ``idna.IDNAError`` base
133
+ class.
134
+
135
+ More specific exceptions that may be generated as ``idna.IDNABidiError``
136
+ when the error reflects an illegal combination of left-to-right and
137
+ right-to-left characters in a label; ``idna.InvalidCodepoint`` when
138
+ a specific codepoint is an illegal character in an IDN label (i.e.
139
+ INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is
140
+ illegal based on its position in the string (i.e. it is CONTEXTO or CONTEXTJ
141
+ but the contextual requirements are not satisfied.)
142
+
143
+ Building and Diagnostics
144
+ ------------------------
145
+
146
+ The IDNA and UTS 46 functionality relies upon pre-calculated lookup
147
+ tables for performance. These tables are derived from computing against
148
+ eligibility criteria in the respective standards using the command-line
149
+ script ``tools/idna-data``.
150
+
151
+ This tool will fetch relevant codepoint data from the Unicode repository
152
+ and perform the required calculations to identify eligibility. There are
153
+ three main modes:
154
+
155
+ * ``idna-data make-libdata``. Generates ``idnadata.py`` and
156
+ ``uts46data.py``, the pre-calculated lookup tables used for IDNA and
157
+ UTS 46 conversions. Implementers who wish to track this library against
158
+ a different Unicode version may use this tool to manually generate a
159
+ different version of the ``idnadata.py`` and ``uts46data.py`` files.
160
+
161
+ * ``idna-data make-table``. Generate a table of the IDNA disposition
162
+ (e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix
163
+ B.1 of RFC 5892 and the pre-computed tables published by `IANA
164
+ <https://www.iana.org/>`_.
165
+
166
+ * ``idna-data U+0061``. Prints debugging output on the various
167
+ properties associated with an individual Unicode codepoint (in this
168
+ case, U+0061), that are used to assess the IDNA and UTS 46 status of a
169
+ codepoint. This is helpful in debugging or analysis.
170
+
171
+ The tool accepts a number of arguments, described using ``idna-data
172
+ -h``. Most notably, the ``--version`` argument allows the specification
173
+ of the version of Unicode to be used in computing the table data. For
174
+ example, ``idna-data --version 9.0.0 make-libdata`` will generate
175
+ library data against Unicode 9.0.0.
176
+
177
+
178
+ Additional Notes
179
+ ----------------
180
+
181
+ * **Packages**. The latest tagged release version is published in the
182
+ `Python Package Index <https://pypi.org/project/idna/>`_.
183
+
184
+ * **Version support**. This library supports Python 3.8 and higher.
185
+ As this library serves as a low-level toolkit for a variety of
186
+ applications, many of which strive for broad compatibility with older
187
+ Python versions, there is no rush to remove older interpreter support.
188
+ Support for older versions are likely to be removed from new releases
189
+ as automated tests can no longer easily be run, i.e. once the Python
190
+ version is officially end-of-life.
191
+
192
+ * **Testing**. The library has a test suite based on each rule of the
193
+ IDNA specification, as well as tests that are provided as part of the
194
+ Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing
195
+ <https://unicode.org/reports/tr46/>`_.
196
+
197
+ * **Emoji**. It is an occasional request to support emoji domains in
198
+ this library. Encoding of symbols like emoji is expressly prohibited by
199
+ the technical standard IDNA 2008 and emoji domains are broadly phased
200
+ out across the domain industry due to associated security risks. For
201
+ now, applications that need to support these non-compliant labels
202
+ may wish to consider trying the encode/decode operation in this library
203
+ first, and then falling back to using `encodings.idna`. See `the Github
204
+ project <https://github.com/kjd/idna/issues/18>`_ for more discussion.
205
+
206
+ * **Transitional processing**. Unicode 16.0.0 removed transitional
207
+ processing so the `transitional` argument for the encode() method
208
+ no longer has any effect and will be removed at a later date.
209
+
venv/lib/python3.13/site-packages/idna-3.11.dist-info/RECORD ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ idna-3.11.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ idna-3.11.dist-info/METADATA,sha256=fCwSww9SuiN8TIHllFSASUQCW55hAs8dzKnr9RaEEbA,8378
3
+ idna-3.11.dist-info/RECORD,,
4
+ idna-3.11.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
5
+ idna-3.11.dist-info/licenses/LICENSE.md,sha256=t6M2q_OwThgOwGXN0W5wXQeeHMehT5EKpukYfza5zYc,1541
6
+ idna/__init__.py,sha256=MPqNDLZbXqGaNdXxAFhiqFPKEQXju2jNQhCey6-5eJM,868
7
+ idna/__pycache__/__init__.cpython-313.pyc,,
8
+ idna/__pycache__/codec.cpython-313.pyc,,
9
+ idna/__pycache__/compat.cpython-313.pyc,,
10
+ idna/__pycache__/core.cpython-313.pyc,,
11
+ idna/__pycache__/idnadata.cpython-313.pyc,,
12
+ idna/__pycache__/intranges.cpython-313.pyc,,
13
+ idna/__pycache__/package_data.cpython-313.pyc,,
14
+ idna/__pycache__/uts46data.cpython-313.pyc,,
15
+ idna/codec.py,sha256=M2SGWN7cs_6B32QmKTyTN6xQGZeYQgQ2wiX3_DR6loE,3438
16
+ idna/compat.py,sha256=RzLy6QQCdl9784aFhb2EX9EKGCJjg0P3PilGdeXXcx8,316
17
+ idna/core.py,sha256=P26_XVycuMTZ1R2mNK1ZREVzM5mvTzdabBXfyZVU1Lc,13246
18
+ idna/idnadata.py,sha256=SG8jhaGE53iiD6B49pt2pwTv_UvClciWE-N54oR2p4U,79623
19
+ idna/intranges.py,sha256=amUtkdhYcQG8Zr-CoMM_kVRacxkivC1WgxN1b63KKdU,1898
20
+ idna/package_data.py,sha256=_CUavOxobnbyNG2FLyHoN8QHP3QM9W1tKuw7eq9QwBk,21
21
+ idna/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ idna/uts46data.py,sha256=H9J35VkD0F9L9mKOqjeNGd2A-Va6FlPoz6Jz4K7h-ps,243725
venv/lib/python3.13/site-packages/idna-3.11.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: flit 3.12.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
venv/lib/python3.13/site-packages/packaging/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is dual licensed under the terms of the Apache License, Version
2
+ # 2.0, and the BSD License. See the LICENSE file in the root of this repository
3
+ # for complete details.
4
+
5
+ __title__ = "packaging"
6
+ __summary__ = "Core utilities for Python packages"
7
+ __uri__ = "https://github.com/pypa/packaging"
8
+
9
+ __version__ = "25.0"
10
+
11
+ __author__ = "Donald Stufft and individual contributors"
12
+ __email__ = "donald@stufft.io"
13
+
14
+ __license__ = "BSD-2-Clause or Apache-2.0"
15
+ __copyright__ = f"2014 {__author__}"
venv/lib/python3.13/site-packages/packaging/_elffile.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ELF file parser.
3
+
4
+ This provides a class ``ELFFile`` that parses an ELF executable in a similar
5
+ interface to ``ZipFile``. Only the read interface is implemented.
6
+
7
+ Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
8
+ ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import enum
14
+ import os
15
+ import struct
16
+ from typing import IO
17
+
18
+
19
+ class ELFInvalid(ValueError):
20
+ pass
21
+
22
+
23
+ class EIClass(enum.IntEnum):
24
+ C32 = 1
25
+ C64 = 2
26
+
27
+
28
+ class EIData(enum.IntEnum):
29
+ Lsb = 1
30
+ Msb = 2
31
+
32
+
33
+ class EMachine(enum.IntEnum):
34
+ I386 = 3
35
+ S390 = 22
36
+ Arm = 40
37
+ X8664 = 62
38
+ AArc64 = 183
39
+
40
+
41
+ class ELFFile:
42
+ """
43
+ Representation of an ELF executable.
44
+ """
45
+
46
+ def __init__(self, f: IO[bytes]) -> None:
47
+ self._f = f
48
+
49
+ try:
50
+ ident = self._read("16B")
51
+ except struct.error as e:
52
+ raise ELFInvalid("unable to parse identification") from e
53
+ magic = bytes(ident[:4])
54
+ if magic != b"\x7fELF":
55
+ raise ELFInvalid(f"invalid magic: {magic!r}")
56
+
57
+ self.capacity = ident[4] # Format for program header (bitness).
58
+ self.encoding = ident[5] # Data structure encoding (endianness).
59
+
60
+ try:
61
+ # e_fmt: Format for program header.
62
+ # p_fmt: Format for section header.
63
+ # p_idx: Indexes to find p_type, p_offset, and p_filesz.
64
+ e_fmt, self._p_fmt, self._p_idx = {
65
+ (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
66
+ (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
67
+ (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
68
+ (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
69
+ }[(self.capacity, self.encoding)]
70
+ except KeyError as e:
71
+ raise ELFInvalid(
72
+ f"unrecognized capacity ({self.capacity}) or encoding ({self.encoding})"
73
+ ) from e
74
+
75
+ try:
76
+ (
77
+ _,
78
+ self.machine, # Architecture type.
79
+ _,
80
+ _,
81
+ self._e_phoff, # Offset of program header.
82
+ _,
83
+ self.flags, # Processor-specific flags.
84
+ _,
85
+ self._e_phentsize, # Size of section.
86
+ self._e_phnum, # Number of sections.
87
+ ) = self._read(e_fmt)
88
+ except struct.error as e:
89
+ raise ELFInvalid("unable to parse machine and section information") from e
90
+
91
+ def _read(self, fmt: str) -> tuple[int, ...]:
92
+ return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
93
+
94
+ @property
95
+ def interpreter(self) -> str | None:
96
+ """
97
+ The path recorded in the ``PT_INTERP`` section header.
98
+ """
99
+ for index in range(self._e_phnum):
100
+ self._f.seek(self._e_phoff + self._e_phentsize * index)
101
+ try:
102
+ data = self._read(self._p_fmt)
103
+ except struct.error:
104
+ continue
105
+ if data[self._p_idx[0]] != 3: # Not PT_INTERP.
106
+ continue
107
+ self._f.seek(data[self._p_idx[1]])
108
+ return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
109
+ return None
venv/lib/python3.13/site-packages/packaging/_manylinux.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import collections
4
+ import contextlib
5
+ import functools
6
+ import os
7
+ import re
8
+ import sys
9
+ import warnings
10
+ from typing import Generator, Iterator, NamedTuple, Sequence
11
+
12
+ from ._elffile import EIClass, EIData, ELFFile, EMachine
13
+
14
+ EF_ARM_ABIMASK = 0xFF000000
15
+ EF_ARM_ABI_VER5 = 0x05000000
16
+ EF_ARM_ABI_FLOAT_HARD = 0x00000400
17
+
18
+
19
+ # `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
20
+ # as the type for `path` until then.
21
+ @contextlib.contextmanager
22
+ def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]:
23
+ try:
24
+ with open(path, "rb") as f:
25
+ yield ELFFile(f)
26
+ except (OSError, TypeError, ValueError):
27
+ yield None
28
+
29
+
30
+ def _is_linux_armhf(executable: str) -> bool:
31
+ # hard-float ABI can be detected from the ELF header of the running
32
+ # process
33
+ # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
34
+ with _parse_elf(executable) as f:
35
+ return (
36
+ f is not None
37
+ and f.capacity == EIClass.C32
38
+ and f.encoding == EIData.Lsb
39
+ and f.machine == EMachine.Arm
40
+ and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
41
+ and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
42
+ )
43
+
44
+
45
+ def _is_linux_i686(executable: str) -> bool:
46
+ with _parse_elf(executable) as f:
47
+ return (
48
+ f is not None
49
+ and f.capacity == EIClass.C32
50
+ and f.encoding == EIData.Lsb
51
+ and f.machine == EMachine.I386
52
+ )
53
+
54
+
55
+ def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
56
+ if "armv7l" in archs:
57
+ return _is_linux_armhf(executable)
58
+ if "i686" in archs:
59
+ return _is_linux_i686(executable)
60
+ allowed_archs = {
61
+ "x86_64",
62
+ "aarch64",
63
+ "ppc64",
64
+ "ppc64le",
65
+ "s390x",
66
+ "loongarch64",
67
+ "riscv64",
68
+ }
69
+ return any(arch in allowed_archs for arch in archs)
70
+
71
+
72
+ # If glibc ever changes its major version, we need to know what the last
73
+ # minor version was, so we can build the complete list of all versions.
74
+ # For now, guess what the highest minor version might be, assume it will
75
+ # be 50 for testing. Once this actually happens, update the dictionary
76
+ # with the actual value.
77
+ _LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50)
78
+
79
+
80
+ class _GLibCVersion(NamedTuple):
81
+ major: int
82
+ minor: int
83
+
84
+
85
+ def _glibc_version_string_confstr() -> str | None:
86
+ """
87
+ Primary implementation of glibc_version_string using os.confstr.
88
+ """
89
+ # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
90
+ # to be broken or missing. This strategy is used in the standard library
91
+ # platform module.
92
+ # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
93
+ try:
94
+ # Should be a string like "glibc 2.17".
95
+ version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
96
+ assert version_string is not None
97
+ _, version = version_string.rsplit()
98
+ except (AssertionError, AttributeError, OSError, ValueError):
99
+ # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
100
+ return None
101
+ return version
102
+
103
+
104
+ def _glibc_version_string_ctypes() -> str | None:
105
+ """
106
+ Fallback implementation of glibc_version_string using ctypes.
107
+ """
108
+ try:
109
+ import ctypes
110
+ except ImportError:
111
+ return None
112
+
113
+ # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
114
+ # manpage says, "If filename is NULL, then the returned handle is for the
115
+ # main program". This way we can let the linker do the work to figure out
116
+ # which libc our process is actually using.
117
+ #
118
+ # We must also handle the special case where the executable is not a
119
+ # dynamically linked executable. This can occur when using musl libc,
120
+ # for example. In this situation, dlopen() will error, leading to an
121
+ # OSError. Interestingly, at least in the case of musl, there is no
122
+ # errno set on the OSError. The single string argument used to construct
123
+ # OSError comes from libc itself and is therefore not portable to
124
+ # hard code here. In any case, failure to call dlopen() means we
125
+ # can proceed, so we bail on our attempt.
126
+ try:
127
+ process_namespace = ctypes.CDLL(None)
128
+ except OSError:
129
+ return None
130
+
131
+ try:
132
+ gnu_get_libc_version = process_namespace.gnu_get_libc_version
133
+ except AttributeError:
134
+ # Symbol doesn't exist -> therefore, we are not linked to
135
+ # glibc.
136
+ return None
137
+
138
+ # Call gnu_get_libc_version, which returns a string like "2.5"
139
+ gnu_get_libc_version.restype = ctypes.c_char_p
140
+ version_str: str = gnu_get_libc_version()
141
+ # py2 / py3 compatibility:
142
+ if not isinstance(version_str, str):
143
+ version_str = version_str.decode("ascii")
144
+
145
+ return version_str
146
+
147
+
148
+ def _glibc_version_string() -> str | None:
149
+ """Returns glibc version string, or None if not using glibc."""
150
+ return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
151
+
152
+
153
+ def _parse_glibc_version(version_str: str) -> tuple[int, int]:
154
+ """Parse glibc version.
155
+
156
+ We use a regexp instead of str.split because we want to discard any
157
+ random junk that might come after the minor version -- this might happen
158
+ in patched/forked versions of glibc (e.g. Linaro's version of glibc
159
+ uses version strings like "2.20-2014.11"). See gh-3588.
160
+ """
161
+ m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
162
+ if not m:
163
+ warnings.warn(
164
+ f"Expected glibc version with 2 components major.minor, got: {version_str}",
165
+ RuntimeWarning,
166
+ stacklevel=2,
167
+ )
168
+ return -1, -1
169
+ return int(m.group("major")), int(m.group("minor"))
170
+
171
+
172
+ @functools.lru_cache
173
+ def _get_glibc_version() -> tuple[int, int]:
174
+ version_str = _glibc_version_string()
175
+ if version_str is None:
176
+ return (-1, -1)
177
+ return _parse_glibc_version(version_str)
178
+
179
+
180
+ # From PEP 513, PEP 600
181
+ def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
182
+ sys_glibc = _get_glibc_version()
183
+ if sys_glibc < version:
184
+ return False
185
+ # Check for presence of _manylinux module.
186
+ try:
187
+ import _manylinux
188
+ except ImportError:
189
+ return True
190
+ if hasattr(_manylinux, "manylinux_compatible"):
191
+ result = _manylinux.manylinux_compatible(version[0], version[1], arch)
192
+ if result is not None:
193
+ return bool(result)
194
+ return True
195
+ if version == _GLibCVersion(2, 5):
196
+ if hasattr(_manylinux, "manylinux1_compatible"):
197
+ return bool(_manylinux.manylinux1_compatible)
198
+ if version == _GLibCVersion(2, 12):
199
+ if hasattr(_manylinux, "manylinux2010_compatible"):
200
+ return bool(_manylinux.manylinux2010_compatible)
201
+ if version == _GLibCVersion(2, 17):
202
+ if hasattr(_manylinux, "manylinux2014_compatible"):
203
+ return bool(_manylinux.manylinux2014_compatible)
204
+ return True
205
+
206
+
207
+ _LEGACY_MANYLINUX_MAP = {
208
+ # CentOS 7 w/ glibc 2.17 (PEP 599)
209
+ (2, 17): "manylinux2014",
210
+ # CentOS 6 w/ glibc 2.12 (PEP 571)
211
+ (2, 12): "manylinux2010",
212
+ # CentOS 5 w/ glibc 2.5 (PEP 513)
213
+ (2, 5): "manylinux1",
214
+ }
215
+
216
+
217
+ def platform_tags(archs: Sequence[str]) -> Iterator[str]:
218
+ """Generate manylinux tags compatible to the current platform.
219
+
220
+ :param archs: Sequence of compatible architectures.
221
+ The first one shall be the closest to the actual architecture and be the part of
222
+ platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
223
+ The ``linux_`` prefix is assumed as a prerequisite for the current platform to
224
+ be manylinux-compatible.
225
+
226
+ :returns: An iterator of compatible manylinux tags.
227
+ """
228
+ if not _have_compatible_abi(sys.executable, archs):
229
+ return
230
+ # Oldest glibc to be supported regardless of architecture is (2, 17).
231
+ too_old_glibc2 = _GLibCVersion(2, 16)
232
+ if set(archs) & {"x86_64", "i686"}:
233
+ # On x86/i686 also oldest glibc to be supported is (2, 5).
234
+ too_old_glibc2 = _GLibCVersion(2, 4)
235
+ current_glibc = _GLibCVersion(*_get_glibc_version())
236
+ glibc_max_list = [current_glibc]
237
+ # We can assume compatibility across glibc major versions.
238
+ # https://sourceware.org/bugzilla/show_bug.cgi?id=24636
239
+ #
240
+ # Build a list of maximum glibc versions so that we can
241
+ # output the canonical list of all glibc from current_glibc
242
+ # down to too_old_glibc2, including all intermediary versions.
243
+ for glibc_major in range(current_glibc.major - 1, 1, -1):
244
+ glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
245
+ glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
246
+ for arch in archs:
247
+ for glibc_max in glibc_max_list:
248
+ if glibc_max.major == too_old_glibc2.major:
249
+ min_minor = too_old_glibc2.minor
250
+ else:
251
+ # For other glibc major versions oldest supported is (x, 0).
252
+ min_minor = -1
253
+ for glibc_minor in range(glibc_max.minor, min_minor, -1):
254
+ glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
255
+ tag = "manylinux_{}_{}".format(*glibc_version)
256
+ if _is_compatible(arch, glibc_version):
257
+ yield f"{tag}_{arch}"
258
+ # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
259
+ if glibc_version in _LEGACY_MANYLINUX_MAP:
260
+ legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
261
+ if _is_compatible(arch, glibc_version):
262
+ yield f"{legacy_tag}_{arch}"
venv/lib/python3.13/site-packages/packaging/_musllinux.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """PEP 656 support.
2
+
3
+ This module implements logic to detect if the currently running Python is
4
+ linked against musl, and what musl version is used.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import functools
10
+ import re
11
+ import subprocess
12
+ import sys
13
+ from typing import Iterator, NamedTuple, Sequence
14
+
15
+ from ._elffile import ELFFile
16
+
17
+
18
+ class _MuslVersion(NamedTuple):
19
+ major: int
20
+ minor: int
21
+
22
+
23
+ def _parse_musl_version(output: str) -> _MuslVersion | None:
24
+ lines = [n for n in (n.strip() for n in output.splitlines()) if n]
25
+ if len(lines) < 2 or lines[0][:4] != "musl":
26
+ return None
27
+ m = re.match(r"Version (\d+)\.(\d+)", lines[1])
28
+ if not m:
29
+ return None
30
+ return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
31
+
32
+
33
+ @functools.lru_cache
34
+ def _get_musl_version(executable: str) -> _MuslVersion | None:
35
+ """Detect currently-running musl runtime version.
36
+
37
+ This is done by checking the specified executable's dynamic linking
38
+ information, and invoking the loader to parse its output for a version
39
+ string. If the loader is musl, the output would be something like::
40
+
41
+ musl libc (x86_64)
42
+ Version 1.2.2
43
+ Dynamic Program Loader
44
+ """
45
+ try:
46
+ with open(executable, "rb") as f:
47
+ ld = ELFFile(f).interpreter
48
+ except (OSError, TypeError, ValueError):
49
+ return None
50
+ if ld is None or "musl" not in ld:
51
+ return None
52
+ proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True)
53
+ return _parse_musl_version(proc.stderr)
54
+
55
+
56
+ def platform_tags(archs: Sequence[str]) -> Iterator[str]:
57
+ """Generate musllinux tags compatible to the current platform.
58
+
59
+ :param archs: Sequence of compatible architectures.
60
+ The first one shall be the closest to the actual architecture and be the part of
61
+ platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
62
+ The ``linux_`` prefix is assumed as a prerequisite for the current platform to
63
+ be musllinux-compatible.
64
+
65
+ :returns: An iterator of compatible musllinux tags.
66
+ """
67
+ sys_musl = _get_musl_version(sys.executable)
68
+ if sys_musl is None: # Python not dynamically linked against musl.
69
+ return
70
+ for arch in archs:
71
+ for minor in range(sys_musl.minor, -1, -1):
72
+ yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
73
+
74
+
75
+ if __name__ == "__main__": # pragma: no cover
76
+ import sysconfig
77
+
78
+ plat = sysconfig.get_platform()
79
+ assert plat.startswith("linux-"), "not linux"
80
+
81
+ print("plat:", plat)
82
+ print("musl:", _get_musl_version(sys.executable))
83
+ print("tags:", end=" ")
84
+ for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
85
+ print(t, end="\n ")
venv/lib/python3.13/site-packages/packaging/_parser.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Handwritten parser of dependency specifiers.
2
+
3
+ The docstring for each __parse_* function contains EBNF-inspired grammar representing
4
+ the implementation.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import ast
10
+ from typing import NamedTuple, Sequence, Tuple, Union
11
+
12
+ from ._tokenizer import DEFAULT_RULES, Tokenizer
13
+
14
+
15
+ class Node:
16
+ def __init__(self, value: str) -> None:
17
+ self.value = value
18
+
19
+ def __str__(self) -> str:
20
+ return self.value
21
+
22
+ def __repr__(self) -> str:
23
+ return f"<{self.__class__.__name__}('{self}')>"
24
+
25
+ def serialize(self) -> str:
26
+ raise NotImplementedError
27
+
28
+
29
+ class Variable(Node):
30
+ def serialize(self) -> str:
31
+ return str(self)
32
+
33
+
34
+ class Value(Node):
35
+ def serialize(self) -> str:
36
+ return f'"{self}"'
37
+
38
+
39
+ class Op(Node):
40
+ def serialize(self) -> str:
41
+ return str(self)
42
+
43
+
44
+ MarkerVar = Union[Variable, Value]
45
+ MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
46
+ MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
47
+ MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
48
+
49
+
50
+ class ParsedRequirement(NamedTuple):
51
+ name: str
52
+ url: str
53
+ extras: list[str]
54
+ specifier: str
55
+ marker: MarkerList | None
56
+
57
+
58
+ # --------------------------------------------------------------------------------------
59
+ # Recursive descent parser for dependency specifier
60
+ # --------------------------------------------------------------------------------------
61
+ def parse_requirement(source: str) -> ParsedRequirement:
62
+ return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
63
+
64
+
65
+ def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
66
+ """
67
+ requirement = WS? IDENTIFIER WS? extras WS? requirement_details
68
+ """
69
+ tokenizer.consume("WS")
70
+
71
+ name_token = tokenizer.expect(
72
+ "IDENTIFIER", expected="package name at the start of dependency specifier"
73
+ )
74
+ name = name_token.text
75
+ tokenizer.consume("WS")
76
+
77
+ extras = _parse_extras(tokenizer)
78
+ tokenizer.consume("WS")
79
+
80
+ url, specifier, marker = _parse_requirement_details(tokenizer)
81
+ tokenizer.expect("END", expected="end of dependency specifier")
82
+
83
+ return ParsedRequirement(name, url, extras, specifier, marker)
84
+
85
+
86
+ def _parse_requirement_details(
87
+ tokenizer: Tokenizer,
88
+ ) -> tuple[str, str, MarkerList | None]:
89
+ """
90
+ requirement_details = AT URL (WS requirement_marker?)?
91
+ | specifier WS? (requirement_marker)?
92
+ """
93
+
94
+ specifier = ""
95
+ url = ""
96
+ marker = None
97
+
98
+ if tokenizer.check("AT"):
99
+ tokenizer.read()
100
+ tokenizer.consume("WS")
101
+
102
+ url_start = tokenizer.position
103
+ url = tokenizer.expect("URL", expected="URL after @").text
104
+ if tokenizer.check("END", peek=True):
105
+ return (url, specifier, marker)
106
+
107
+ tokenizer.expect("WS", expected="whitespace after URL")
108
+
109
+ # The input might end after whitespace.
110
+ if tokenizer.check("END", peek=True):
111
+ return (url, specifier, marker)
112
+
113
+ marker = _parse_requirement_marker(
114
+ tokenizer, span_start=url_start, after="URL and whitespace"
115
+ )
116
+ else:
117
+ specifier_start = tokenizer.position
118
+ specifier = _parse_specifier(tokenizer)
119
+ tokenizer.consume("WS")
120
+
121
+ if tokenizer.check("END", peek=True):
122
+ return (url, specifier, marker)
123
+
124
+ marker = _parse_requirement_marker(
125
+ tokenizer,
126
+ span_start=specifier_start,
127
+ after=(
128
+ "version specifier"
129
+ if specifier
130
+ else "name and no valid version specifier"
131
+ ),
132
+ )
133
+
134
+ return (url, specifier, marker)
135
+
136
+
137
+ def _parse_requirement_marker(
138
+ tokenizer: Tokenizer, *, span_start: int, after: str
139
+ ) -> MarkerList:
140
+ """
141
+ requirement_marker = SEMICOLON marker WS?
142
+ """
143
+
144
+ if not tokenizer.check("SEMICOLON"):
145
+ tokenizer.raise_syntax_error(
146
+ f"Expected end or semicolon (after {after})",
147
+ span_start=span_start,
148
+ )
149
+ tokenizer.read()
150
+
151
+ marker = _parse_marker(tokenizer)
152
+ tokenizer.consume("WS")
153
+
154
+ return marker
155
+
156
+
157
+ def _parse_extras(tokenizer: Tokenizer) -> list[str]:
158
+ """
159
+ extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
160
+ """
161
+ if not tokenizer.check("LEFT_BRACKET", peek=True):
162
+ return []
163
+
164
+ with tokenizer.enclosing_tokens(
165
+ "LEFT_BRACKET",
166
+ "RIGHT_BRACKET",
167
+ around="extras",
168
+ ):
169
+ tokenizer.consume("WS")
170
+ extras = _parse_extras_list(tokenizer)
171
+ tokenizer.consume("WS")
172
+
173
+ return extras
174
+
175
+
176
+ def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
177
+ """
178
+ extras_list = identifier (wsp* ',' wsp* identifier)*
179
+ """
180
+ extras: list[str] = []
181
+
182
+ if not tokenizer.check("IDENTIFIER"):
183
+ return extras
184
+
185
+ extras.append(tokenizer.read().text)
186
+
187
+ while True:
188
+ tokenizer.consume("WS")
189
+ if tokenizer.check("IDENTIFIER", peek=True):
190
+ tokenizer.raise_syntax_error("Expected comma between extra names")
191
+ elif not tokenizer.check("COMMA"):
192
+ break
193
+
194
+ tokenizer.read()
195
+ tokenizer.consume("WS")
196
+
197
+ extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
198
+ extras.append(extra_token.text)
199
+
200
+ return extras
201
+
202
+
203
+ def _parse_specifier(tokenizer: Tokenizer) -> str:
204
+ """
205
+ specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
206
+ | WS? version_many WS?
207
+ """
208
+ with tokenizer.enclosing_tokens(
209
+ "LEFT_PARENTHESIS",
210
+ "RIGHT_PARENTHESIS",
211
+ around="version specifier",
212
+ ):
213
+ tokenizer.consume("WS")
214
+ parsed_specifiers = _parse_version_many(tokenizer)
215
+ tokenizer.consume("WS")
216
+
217
+ return parsed_specifiers
218
+
219
+
220
+ def _parse_version_many(tokenizer: Tokenizer) -> str:
221
+ """
222
+ version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
223
+ """
224
+ parsed_specifiers = ""
225
+ while tokenizer.check("SPECIFIER"):
226
+ span_start = tokenizer.position
227
+ parsed_specifiers += tokenizer.read().text
228
+ if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
229
+ tokenizer.raise_syntax_error(
230
+ ".* suffix can only be used with `==` or `!=` operators",
231
+ span_start=span_start,
232
+ span_end=tokenizer.position + 1,
233
+ )
234
+ if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
235
+ tokenizer.raise_syntax_error(
236
+ "Local version label can only be used with `==` or `!=` operators",
237
+ span_start=span_start,
238
+ span_end=tokenizer.position,
239
+ )
240
+ tokenizer.consume("WS")
241
+ if not tokenizer.check("COMMA"):
242
+ break
243
+ parsed_specifiers += tokenizer.read().text
244
+ tokenizer.consume("WS")
245
+
246
+ return parsed_specifiers
247
+
248
+
249
+ # --------------------------------------------------------------------------------------
250
+ # Recursive descent parser for marker expression
251
+ # --------------------------------------------------------------------------------------
252
+ def parse_marker(source: str) -> MarkerList:
253
+ return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
254
+
255
+
256
+ def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
257
+ retval = _parse_marker(tokenizer)
258
+ tokenizer.expect("END", expected="end of marker expression")
259
+ return retval
260
+
261
+
262
+ def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
263
+ """
264
+ marker = marker_atom (BOOLOP marker_atom)+
265
+ """
266
+ expression = [_parse_marker_atom(tokenizer)]
267
+ while tokenizer.check("BOOLOP"):
268
+ token = tokenizer.read()
269
+ expr_right = _parse_marker_atom(tokenizer)
270
+ expression.extend((token.text, expr_right))
271
+ return expression
272
+
273
+
274
+ def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
275
+ """
276
+ marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
277
+ | WS? marker_item WS?
278
+ """
279
+
280
+ tokenizer.consume("WS")
281
+ if tokenizer.check("LEFT_PARENTHESIS", peek=True):
282
+ with tokenizer.enclosing_tokens(
283
+ "LEFT_PARENTHESIS",
284
+ "RIGHT_PARENTHESIS",
285
+ around="marker expression",
286
+ ):
287
+ tokenizer.consume("WS")
288
+ marker: MarkerAtom = _parse_marker(tokenizer)
289
+ tokenizer.consume("WS")
290
+ else:
291
+ marker = _parse_marker_item(tokenizer)
292
+ tokenizer.consume("WS")
293
+ return marker
294
+
295
+
296
+ def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
297
+ """
298
+ marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
299
+ """
300
+ tokenizer.consume("WS")
301
+ marker_var_left = _parse_marker_var(tokenizer)
302
+ tokenizer.consume("WS")
303
+ marker_op = _parse_marker_op(tokenizer)
304
+ tokenizer.consume("WS")
305
+ marker_var_right = _parse_marker_var(tokenizer)
306
+ tokenizer.consume("WS")
307
+ return (marker_var_left, marker_op, marker_var_right)
308
+
309
+
310
+ def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
311
+ """
312
+ marker_var = VARIABLE | QUOTED_STRING
313
+ """
314
+ if tokenizer.check("VARIABLE"):
315
+ return process_env_var(tokenizer.read().text.replace(".", "_"))
316
+ elif tokenizer.check("QUOTED_STRING"):
317
+ return process_python_str(tokenizer.read().text)
318
+ else:
319
+ tokenizer.raise_syntax_error(
320
+ message="Expected a marker variable or quoted string"
321
+ )
322
+
323
+
324
+ def process_env_var(env_var: str) -> Variable:
325
+ if env_var in ("platform_python_implementation", "python_implementation"):
326
+ return Variable("platform_python_implementation")
327
+ else:
328
+ return Variable(env_var)
329
+
330
+
331
+ def process_python_str(python_str: str) -> Value:
332
+ value = ast.literal_eval(python_str)
333
+ return Value(str(value))
334
+
335
+
336
+ def _parse_marker_op(tokenizer: Tokenizer) -> Op:
337
+ """
338
+ marker_op = IN | NOT IN | OP
339
+ """
340
+ if tokenizer.check("IN"):
341
+ tokenizer.read()
342
+ return Op("in")
343
+ elif tokenizer.check("NOT"):
344
+ tokenizer.read()
345
+ tokenizer.expect("WS", expected="whitespace after 'not'")
346
+ tokenizer.expect("IN", expected="'in' after 'not'")
347
+ return Op("not in")
348
+ elif tokenizer.check("OP"):
349
+ return Op(tokenizer.read().text)
350
+ else:
351
+ return tokenizer.raise_syntax_error(
352
+ "Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in"
353
+ )
venv/lib/python3.13/site-packages/packaging/_structures.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is dual licensed under the terms of the Apache License, Version
2
+ # 2.0, and the BSD License. See the LICENSE file in the root of this repository
3
+ # for complete details.
4
+
5
+
6
+ class InfinityType:
7
+ def __repr__(self) -> str:
8
+ return "Infinity"
9
+
10
+ def __hash__(self) -> int:
11
+ return hash(repr(self))
12
+
13
+ def __lt__(self, other: object) -> bool:
14
+ return False
15
+
16
+ def __le__(self, other: object) -> bool:
17
+ return False
18
+
19
+ def __eq__(self, other: object) -> bool:
20
+ return isinstance(other, self.__class__)
21
+
22
+ def __gt__(self, other: object) -> bool:
23
+ return True
24
+
25
+ def __ge__(self, other: object) -> bool:
26
+ return True
27
+
28
+ def __neg__(self: object) -> "NegativeInfinityType":
29
+ return NegativeInfinity
30
+
31
+
32
+ Infinity = InfinityType()
33
+
34
+
35
+ class NegativeInfinityType:
36
+ def __repr__(self) -> str:
37
+ return "-Infinity"
38
+
39
+ def __hash__(self) -> int:
40
+ return hash(repr(self))
41
+
42
+ def __lt__(self, other: object) -> bool:
43
+ return True
44
+
45
+ def __le__(self, other: object) -> bool:
46
+ return True
47
+
48
+ def __eq__(self, other: object) -> bool:
49
+ return isinstance(other, self.__class__)
50
+
51
+ def __gt__(self, other: object) -> bool:
52
+ return False
53
+
54
+ def __ge__(self, other: object) -> bool:
55
+ return False
56
+
57
+ def __neg__(self: object) -> InfinityType:
58
+ return Infinity
59
+
60
+
61
+ NegativeInfinity = NegativeInfinityType()
venv/lib/python3.13/site-packages/packaging/_tokenizer.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import re
5
+ from dataclasses import dataclass
6
+ from typing import Iterator, NoReturn
7
+
8
+ from .specifiers import Specifier
9
+
10
+
11
+ @dataclass
12
+ class Token:
13
+ name: str
14
+ text: str
15
+ position: int
16
+
17
+
18
+ class ParserSyntaxError(Exception):
19
+ """The provided source text could not be parsed correctly."""
20
+
21
+ def __init__(
22
+ self,
23
+ message: str,
24
+ *,
25
+ source: str,
26
+ span: tuple[int, int],
27
+ ) -> None:
28
+ self.span = span
29
+ self.message = message
30
+ self.source = source
31
+
32
+ super().__init__()
33
+
34
+ def __str__(self) -> str:
35
+ marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
36
+ return "\n ".join([self.message, self.source, marker])
37
+
38
+
39
+ DEFAULT_RULES: dict[str, str | re.Pattern[str]] = {
40
+ "LEFT_PARENTHESIS": r"\(",
41
+ "RIGHT_PARENTHESIS": r"\)",
42
+ "LEFT_BRACKET": r"\[",
43
+ "RIGHT_BRACKET": r"\]",
44
+ "SEMICOLON": r";",
45
+ "COMMA": r",",
46
+ "QUOTED_STRING": re.compile(
47
+ r"""
48
+ (
49
+ ('[^']*')
50
+ |
51
+ ("[^"]*")
52
+ )
53
+ """,
54
+ re.VERBOSE,
55
+ ),
56
+ "OP": r"(===|==|~=|!=|<=|>=|<|>)",
57
+ "BOOLOP": r"\b(or|and)\b",
58
+ "IN": r"\bin\b",
59
+ "NOT": r"\bnot\b",
60
+ "VARIABLE": re.compile(
61
+ r"""
62
+ \b(
63
+ python_version
64
+ |python_full_version
65
+ |os[._]name
66
+ |sys[._]platform
67
+ |platform_(release|system)
68
+ |platform[._](version|machine|python_implementation)
69
+ |python_implementation
70
+ |implementation_(name|version)
71
+ |extras?
72
+ |dependency_groups
73
+ )\b
74
+ """,
75
+ re.VERBOSE,
76
+ ),
77
+ "SPECIFIER": re.compile(
78
+ Specifier._operator_regex_str + Specifier._version_regex_str,
79
+ re.VERBOSE | re.IGNORECASE,
80
+ ),
81
+ "AT": r"\@",
82
+ "URL": r"[^ \t]+",
83
+ "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
84
+ "VERSION_PREFIX_TRAIL": r"\.\*",
85
+ "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
86
+ "WS": r"[ \t]+",
87
+ "END": r"$",
88
+ }
89
+
90
+
91
+ class Tokenizer:
92
+ """Context-sensitive token parsing.
93
+
94
+ Provides methods to examine the input stream to check whether the next token
95
+ matches.
96
+ """
97
+
98
+ def __init__(
99
+ self,
100
+ source: str,
101
+ *,
102
+ rules: dict[str, str | re.Pattern[str]],
103
+ ) -> None:
104
+ self.source = source
105
+ self.rules: dict[str, re.Pattern[str]] = {
106
+ name: re.compile(pattern) for name, pattern in rules.items()
107
+ }
108
+ self.next_token: Token | None = None
109
+ self.position = 0
110
+
111
+ def consume(self, name: str) -> None:
112
+ """Move beyond provided token name, if at current position."""
113
+ if self.check(name):
114
+ self.read()
115
+
116
+ def check(self, name: str, *, peek: bool = False) -> bool:
117
+ """Check whether the next token has the provided name.
118
+
119
+ By default, if the check succeeds, the token *must* be read before
120
+ another check. If `peek` is set to `True`, the token is not loaded and
121
+ would need to be checked again.
122
+ """
123
+ assert self.next_token is None, (
124
+ f"Cannot check for {name!r}, already have {self.next_token!r}"
125
+ )
126
+ assert name in self.rules, f"Unknown token name: {name!r}"
127
+
128
+ expression = self.rules[name]
129
+
130
+ match = expression.match(self.source, self.position)
131
+ if match is None:
132
+ return False
133
+ if not peek:
134
+ self.next_token = Token(name, match[0], self.position)
135
+ return True
136
+
137
+ def expect(self, name: str, *, expected: str) -> Token:
138
+ """Expect a certain token name next, failing with a syntax error otherwise.
139
+
140
+ The token is *not* read.
141
+ """
142
+ if not self.check(name):
143
+ raise self.raise_syntax_error(f"Expected {expected}")
144
+ return self.read()
145
+
146
+ def read(self) -> Token:
147
+ """Consume the next token and return it."""
148
+ token = self.next_token
149
+ assert token is not None
150
+
151
+ self.position += len(token.text)
152
+ self.next_token = None
153
+
154
+ return token
155
+
156
+ def raise_syntax_error(
157
+ self,
158
+ message: str,
159
+ *,
160
+ span_start: int | None = None,
161
+ span_end: int | None = None,
162
+ ) -> NoReturn:
163
+ """Raise ParserSyntaxError at the given position."""
164
+ span = (
165
+ self.position if span_start is None else span_start,
166
+ self.position if span_end is None else span_end,
167
+ )
168
+ raise ParserSyntaxError(
169
+ message,
170
+ source=self.source,
171
+ span=span,
172
+ )
173
+
174
+ @contextlib.contextmanager
175
+ def enclosing_tokens(
176
+ self, open_token: str, close_token: str, *, around: str
177
+ ) -> Iterator[None]:
178
+ if self.check(open_token):
179
+ open_position = self.position
180
+ self.read()
181
+ else:
182
+ open_position = None
183
+
184
+ yield
185
+
186
+ if open_position is None:
187
+ return
188
+
189
+ if not self.check(close_token):
190
+ self.raise_syntax_error(
191
+ f"Expected matching {close_token} for {open_token}, after {around}",
192
+ span_start=open_position,
193
+ )
194
+
195
+ self.read()
venv/lib/python3.13/site-packages/packaging/markers.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is dual licensed under the terms of the Apache License, Version
2
+ # 2.0, and the BSD License. See the LICENSE file in the root of this repository
3
+ # for complete details.
4
+
5
+ from __future__ import annotations
6
+
7
+ import operator
8
+ import os
9
+ import platform
10
+ import sys
11
+ from typing import AbstractSet, Any, Callable, Literal, TypedDict, Union, cast
12
+
13
+ from ._parser import MarkerAtom, MarkerList, Op, Value, Variable
14
+ from ._parser import parse_marker as _parse_marker
15
+ from ._tokenizer import ParserSyntaxError
16
+ from .specifiers import InvalidSpecifier, Specifier
17
+ from .utils import canonicalize_name
18
+
19
+ __all__ = [
20
+ "EvaluateContext",
21
+ "InvalidMarker",
22
+ "Marker",
23
+ "UndefinedComparison",
24
+ "UndefinedEnvironmentName",
25
+ "default_environment",
26
+ ]
27
+
28
+ Operator = Callable[[str, Union[str, AbstractSet[str]]], bool]
29
+ EvaluateContext = Literal["metadata", "lock_file", "requirement"]
30
+ MARKERS_ALLOWING_SET = {"extras", "dependency_groups"}
31
+
32
+
33
+ class InvalidMarker(ValueError):
34
+ """
35
+ An invalid marker was found, users should refer to PEP 508.
36
+ """
37
+
38
+
39
+ class UndefinedComparison(ValueError):
40
+ """
41
+ An invalid operation was attempted on a value that doesn't support it.
42
+ """
43
+
44
+
45
+ class UndefinedEnvironmentName(ValueError):
46
+ """
47
+ A name was attempted to be used that does not exist inside of the
48
+ environment.
49
+ """
50
+
51
+
52
+ class Environment(TypedDict):
53
+ implementation_name: str
54
+ """The implementation's identifier, e.g. ``'cpython'``."""
55
+
56
+ implementation_version: str
57
+ """
58
+ The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or
59
+ ``'7.3.13'`` for PyPy3.10 v7.3.13.
60
+ """
61
+
62
+ os_name: str
63
+ """
64
+ The value of :py:data:`os.name`. The name of the operating system dependent module
65
+ imported, e.g. ``'posix'``.
66
+ """
67
+
68
+ platform_machine: str
69
+ """
70
+ Returns the machine type, e.g. ``'i386'``.
71
+
72
+ An empty string if the value cannot be determined.
73
+ """
74
+
75
+ platform_release: str
76
+ """
77
+ The system's release, e.g. ``'2.2.0'`` or ``'NT'``.
78
+
79
+ An empty string if the value cannot be determined.
80
+ """
81
+
82
+ platform_system: str
83
+ """
84
+ The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``.
85
+
86
+ An empty string if the value cannot be determined.
87
+ """
88
+
89
+ platform_version: str
90
+ """
91
+ The system's release version, e.g. ``'#3 on degas'``.
92
+
93
+ An empty string if the value cannot be determined.
94
+ """
95
+
96
+ python_full_version: str
97
+ """
98
+ The Python version as string ``'major.minor.patchlevel'``.
99
+
100
+ Note that unlike the Python :py:data:`sys.version`, this value will always include
101
+ the patchlevel (it defaults to 0).
102
+ """
103
+
104
+ platform_python_implementation: str
105
+ """
106
+ A string identifying the Python implementation, e.g. ``'CPython'``.
107
+ """
108
+
109
+ python_version: str
110
+ """The Python version as string ``'major.minor'``."""
111
+
112
+ sys_platform: str
113
+ """
114
+ This string contains a platform identifier that can be used to append
115
+ platform-specific components to :py:data:`sys.path`, for instance.
116
+
117
+ For Unix systems, except on Linux and AIX, this is the lowercased OS name as
118
+ returned by ``uname -s`` with the first part of the version as returned by
119
+ ``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python
120
+ was built.
121
+ """
122
+
123
+
124
+ def _normalize_extra_values(results: Any) -> Any:
125
+ """
126
+ Normalize extra values.
127
+ """
128
+ if isinstance(results[0], tuple):
129
+ lhs, op, rhs = results[0]
130
+ if isinstance(lhs, Variable) and lhs.value == "extra":
131
+ normalized_extra = canonicalize_name(rhs.value)
132
+ rhs = Value(normalized_extra)
133
+ elif isinstance(rhs, Variable) and rhs.value == "extra":
134
+ normalized_extra = canonicalize_name(lhs.value)
135
+ lhs = Value(normalized_extra)
136
+ results[0] = lhs, op, rhs
137
+ return results
138
+
139
+
140
+ def _format_marker(
141
+ marker: list[str] | MarkerAtom | str, first: bool | None = True
142
+ ) -> str:
143
+ assert isinstance(marker, (list, tuple, str))
144
+
145
+ # Sometimes we have a structure like [[...]] which is a single item list
146
+ # where the single item is itself it's own list. In that case we want skip
147
+ # the rest of this function so that we don't get extraneous () on the
148
+ # outside.
149
+ if (
150
+ isinstance(marker, list)
151
+ and len(marker) == 1
152
+ and isinstance(marker[0], (list, tuple))
153
+ ):
154
+ return _format_marker(marker[0])
155
+
156
+ if isinstance(marker, list):
157
+ inner = (_format_marker(m, first=False) for m in marker)
158
+ if first:
159
+ return " ".join(inner)
160
+ else:
161
+ return "(" + " ".join(inner) + ")"
162
+ elif isinstance(marker, tuple):
163
+ return " ".join([m.serialize() for m in marker])
164
+ else:
165
+ return marker
166
+
167
+
168
+ _operators: dict[str, Operator] = {
169
+ "in": lambda lhs, rhs: lhs in rhs,
170
+ "not in": lambda lhs, rhs: lhs not in rhs,
171
+ "<": operator.lt,
172
+ "<=": operator.le,
173
+ "==": operator.eq,
174
+ "!=": operator.ne,
175
+ ">=": operator.ge,
176
+ ">": operator.gt,
177
+ }
178
+
179
+
180
+ def _eval_op(lhs: str, op: Op, rhs: str | AbstractSet[str]) -> bool:
181
+ if isinstance(rhs, str):
182
+ try:
183
+ spec = Specifier("".join([op.serialize(), rhs]))
184
+ except InvalidSpecifier:
185
+ pass
186
+ else:
187
+ return spec.contains(lhs, prereleases=True)
188
+
189
+ oper: Operator | None = _operators.get(op.serialize())
190
+ if oper is None:
191
+ raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
192
+
193
+ return oper(lhs, rhs)
194
+
195
+
196
+ def _normalize(
197
+ lhs: str, rhs: str | AbstractSet[str], key: str
198
+ ) -> tuple[str, str | AbstractSet[str]]:
199
+ # PEP 685 – Comparison of extra names for optional distribution dependencies
200
+ # https://peps.python.org/pep-0685/
201
+ # > When comparing extra names, tools MUST normalize the names being
202
+ # > compared using the semantics outlined in PEP 503 for names
203
+ if key == "extra":
204
+ assert isinstance(rhs, str), "extra value must be a string"
205
+ return (canonicalize_name(lhs), canonicalize_name(rhs))
206
+ if key in MARKERS_ALLOWING_SET:
207
+ if isinstance(rhs, str): # pragma: no cover
208
+ return (canonicalize_name(lhs), canonicalize_name(rhs))
209
+ else:
210
+ return (canonicalize_name(lhs), {canonicalize_name(v) for v in rhs})
211
+
212
+ # other environment markers don't have such standards
213
+ return lhs, rhs
214
+
215
+
216
+ def _evaluate_markers(
217
+ markers: MarkerList, environment: dict[str, str | AbstractSet[str]]
218
+ ) -> bool:
219
+ groups: list[list[bool]] = [[]]
220
+
221
+ for marker in markers:
222
+ assert isinstance(marker, (list, tuple, str))
223
+
224
+ if isinstance(marker, list):
225
+ groups[-1].append(_evaluate_markers(marker, environment))
226
+ elif isinstance(marker, tuple):
227
+ lhs, op, rhs = marker
228
+
229
+ if isinstance(lhs, Variable):
230
+ environment_key = lhs.value
231
+ lhs_value = environment[environment_key]
232
+ rhs_value = rhs.value
233
+ else:
234
+ lhs_value = lhs.value
235
+ environment_key = rhs.value
236
+ rhs_value = environment[environment_key]
237
+ assert isinstance(lhs_value, str), "lhs must be a string"
238
+ lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
239
+ groups[-1].append(_eval_op(lhs_value, op, rhs_value))
240
+ else:
241
+ assert marker in ["and", "or"]
242
+ if marker == "or":
243
+ groups.append([])
244
+
245
+ return any(all(item) for item in groups)
246
+
247
+
248
+ def format_full_version(info: sys._version_info) -> str:
249
+ version = f"{info.major}.{info.minor}.{info.micro}"
250
+ kind = info.releaselevel
251
+ if kind != "final":
252
+ version += kind[0] + str(info.serial)
253
+ return version
254
+
255
+
256
+ def default_environment() -> Environment:
257
+ iver = format_full_version(sys.implementation.version)
258
+ implementation_name = sys.implementation.name
259
+ return {
260
+ "implementation_name": implementation_name,
261
+ "implementation_version": iver,
262
+ "os_name": os.name,
263
+ "platform_machine": platform.machine(),
264
+ "platform_release": platform.release(),
265
+ "platform_system": platform.system(),
266
+ "platform_version": platform.version(),
267
+ "python_full_version": platform.python_version(),
268
+ "platform_python_implementation": platform.python_implementation(),
269
+ "python_version": ".".join(platform.python_version_tuple()[:2]),
270
+ "sys_platform": sys.platform,
271
+ }
272
+
273
+
274
+ class Marker:
275
+ def __init__(self, marker: str) -> None:
276
+ # Note: We create a Marker object without calling this constructor in
277
+ # packaging.requirements.Requirement. If any additional logic is
278
+ # added here, make sure to mirror/adapt Requirement.
279
+ try:
280
+ self._markers = _normalize_extra_values(_parse_marker(marker))
281
+ # The attribute `_markers` can be described in terms of a recursive type:
282
+ # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
283
+ #
284
+ # For example, the following expression:
285
+ # python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
286
+ #
287
+ # is parsed into:
288
+ # [
289
+ # (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
290
+ # 'and',
291
+ # [
292
+ # (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
293
+ # 'or',
294
+ # (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
295
+ # ]
296
+ # ]
297
+ except ParserSyntaxError as e:
298
+ raise InvalidMarker(str(e)) from e
299
+
300
+ def __str__(self) -> str:
301
+ return _format_marker(self._markers)
302
+
303
+ def __repr__(self) -> str:
304
+ return f"<Marker('{self}')>"
305
+
306
+ def __hash__(self) -> int:
307
+ return hash((self.__class__.__name__, str(self)))
308
+
309
+ def __eq__(self, other: Any) -> bool:
310
+ if not isinstance(other, Marker):
311
+ return NotImplemented
312
+
313
+ return str(self) == str(other)
314
+
315
+ def evaluate(
316
+ self,
317
+ environment: dict[str, str] | None = None,
318
+ context: EvaluateContext = "metadata",
319
+ ) -> bool:
320
+ """Evaluate a marker.
321
+
322
+ Return the boolean from evaluating the given marker against the
323
+ environment. environment is an optional argument to override all or
324
+ part of the determined environment. The *context* parameter specifies what
325
+ context the markers are being evaluated for, which influences what markers
326
+ are considered valid. Acceptable values are "metadata" (for core metadata;
327
+ default), "lock_file", and "requirement" (i.e. all other situations).
328
+
329
+ The environment is determined from the current Python process.
330
+ """
331
+ current_environment = cast(
332
+ "dict[str, str | AbstractSet[str]]", default_environment()
333
+ )
334
+ if context == "lock_file":
335
+ current_environment.update(
336
+ extras=frozenset(), dependency_groups=frozenset()
337
+ )
338
+ elif context == "metadata":
339
+ current_environment["extra"] = ""
340
+ if environment is not None:
341
+ current_environment.update(environment)
342
+ # The API used to allow setting extra to None. We need to handle this
343
+ # case for backwards compatibility.
344
+ if "extra" in current_environment and current_environment["extra"] is None:
345
+ current_environment["extra"] = ""
346
+
347
+ return _evaluate_markers(
348
+ self._markers, _repair_python_full_version(current_environment)
349
+ )
350
+
351
+
352
+ def _repair_python_full_version(
353
+ env: dict[str, str | AbstractSet[str]],
354
+ ) -> dict[str, str | AbstractSet[str]]:
355
+ """
356
+ Work around platform.python_version() returning something that is not PEP 440
357
+ compliant for non-tagged Python builds.
358
+ """
359
+ python_full_version = cast(str, env["python_full_version"])
360
+ if python_full_version.endswith("+"):
361
+ env["python_full_version"] = f"{python_full_version}local"
362
+ return env
venv/lib/python3.13/site-packages/packaging/metadata.py ADDED
@@ -0,0 +1,862 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import email.feedparser
4
+ import email.header
5
+ import email.message
6
+ import email.parser
7
+ import email.policy
8
+ import pathlib
9
+ import sys
10
+ import typing
11
+ from typing import (
12
+ Any,
13
+ Callable,
14
+ Generic,
15
+ Literal,
16
+ TypedDict,
17
+ cast,
18
+ )
19
+
20
+ from . import licenses, requirements, specifiers, utils
21
+ from . import version as version_module
22
+ from .licenses import NormalizedLicenseExpression
23
+
24
+ T = typing.TypeVar("T")
25
+
26
+
27
+ if sys.version_info >= (3, 11): # pragma: no cover
28
+ ExceptionGroup = ExceptionGroup
29
+ else: # pragma: no cover
30
+
31
+ class ExceptionGroup(Exception):
32
+ """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
33
+
34
+ If :external:exc:`ExceptionGroup` is already defined by Python itself,
35
+ that version is used instead.
36
+ """
37
+
38
+ message: str
39
+ exceptions: list[Exception]
40
+
41
+ def __init__(self, message: str, exceptions: list[Exception]) -> None:
42
+ self.message = message
43
+ self.exceptions = exceptions
44
+
45
+ def __repr__(self) -> str:
46
+ return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
47
+
48
+
49
+ class InvalidMetadata(ValueError):
50
+ """A metadata field contains invalid data."""
51
+
52
+ field: str
53
+ """The name of the field that contains invalid data."""
54
+
55
+ def __init__(self, field: str, message: str) -> None:
56
+ self.field = field
57
+ super().__init__(message)
58
+
59
+
60
+ # The RawMetadata class attempts to make as few assumptions about the underlying
61
+ # serialization formats as possible. The idea is that as long as a serialization
62
+ # formats offer some very basic primitives in *some* way then we can support
63
+ # serializing to and from that format.
64
+ class RawMetadata(TypedDict, total=False):
65
+ """A dictionary of raw core metadata.
66
+
67
+ Each field in core metadata maps to a key of this dictionary (when data is
68
+ provided). The key is lower-case and underscores are used instead of dashes
69
+ compared to the equivalent core metadata field. Any core metadata field that
70
+ can be specified multiple times or can hold multiple values in a single
71
+ field have a key with a plural name. See :class:`Metadata` whose attributes
72
+ match the keys of this dictionary.
73
+
74
+ Core metadata fields that can be specified multiple times are stored as a
75
+ list or dict depending on which is appropriate for the field. Any fields
76
+ which hold multiple values in a single field are stored as a list.
77
+
78
+ """
79
+
80
+ # Metadata 1.0 - PEP 241
81
+ metadata_version: str
82
+ name: str
83
+ version: str
84
+ platforms: list[str]
85
+ summary: str
86
+ description: str
87
+ keywords: list[str]
88
+ home_page: str
89
+ author: str
90
+ author_email: str
91
+ license: str
92
+
93
+ # Metadata 1.1 - PEP 314
94
+ supported_platforms: list[str]
95
+ download_url: str
96
+ classifiers: list[str]
97
+ requires: list[str]
98
+ provides: list[str]
99
+ obsoletes: list[str]
100
+
101
+ # Metadata 1.2 - PEP 345
102
+ maintainer: str
103
+ maintainer_email: str
104
+ requires_dist: list[str]
105
+ provides_dist: list[str]
106
+ obsoletes_dist: list[str]
107
+ requires_python: str
108
+ requires_external: list[str]
109
+ project_urls: dict[str, str]
110
+
111
+ # Metadata 2.0
112
+ # PEP 426 attempted to completely revamp the metadata format
113
+ # but got stuck without ever being able to build consensus on
114
+ # it and ultimately ended up withdrawn.
115
+ #
116
+ # However, a number of tools had started emitting METADATA with
117
+ # `2.0` Metadata-Version, so for historical reasons, this version
118
+ # was skipped.
119
+
120
+ # Metadata 2.1 - PEP 566
121
+ description_content_type: str
122
+ provides_extra: list[str]
123
+
124
+ # Metadata 2.2 - PEP 643
125
+ dynamic: list[str]
126
+
127
+ # Metadata 2.3 - PEP 685
128
+ # No new fields were added in PEP 685, just some edge case were
129
+ # tightened up to provide better interoptability.
130
+
131
+ # Metadata 2.4 - PEP 639
132
+ license_expression: str
133
+ license_files: list[str]
134
+
135
+
136
+ _STRING_FIELDS = {
137
+ "author",
138
+ "author_email",
139
+ "description",
140
+ "description_content_type",
141
+ "download_url",
142
+ "home_page",
143
+ "license",
144
+ "license_expression",
145
+ "maintainer",
146
+ "maintainer_email",
147
+ "metadata_version",
148
+ "name",
149
+ "requires_python",
150
+ "summary",
151
+ "version",
152
+ }
153
+
154
+ _LIST_FIELDS = {
155
+ "classifiers",
156
+ "dynamic",
157
+ "license_files",
158
+ "obsoletes",
159
+ "obsoletes_dist",
160
+ "platforms",
161
+ "provides",
162
+ "provides_dist",
163
+ "provides_extra",
164
+ "requires",
165
+ "requires_dist",
166
+ "requires_external",
167
+ "supported_platforms",
168
+ }
169
+
170
+ _DICT_FIELDS = {
171
+ "project_urls",
172
+ }
173
+
174
+
175
+ def _parse_keywords(data: str) -> list[str]:
176
+ """Split a string of comma-separated keywords into a list of keywords."""
177
+ return [k.strip() for k in data.split(",")]
178
+
179
+
180
+ def _parse_project_urls(data: list[str]) -> dict[str, str]:
181
+ """Parse a list of label/URL string pairings separated by a comma."""
182
+ urls = {}
183
+ for pair in data:
184
+ # Our logic is slightly tricky here as we want to try and do
185
+ # *something* reasonable with malformed data.
186
+ #
187
+ # The main thing that we have to worry about, is data that does
188
+ # not have a ',' at all to split the label from the Value. There
189
+ # isn't a singular right answer here, and we will fail validation
190
+ # later on (if the caller is validating) so it doesn't *really*
191
+ # matter, but since the missing value has to be an empty str
192
+ # and our return value is dict[str, str], if we let the key
193
+ # be the missing value, then they'd have multiple '' values that
194
+ # overwrite each other in a accumulating dict.
195
+ #
196
+ # The other potentional issue is that it's possible to have the
197
+ # same label multiple times in the metadata, with no solid "right"
198
+ # answer with what to do in that case. As such, we'll do the only
199
+ # thing we can, which is treat the field as unparseable and add it
200
+ # to our list of unparsed fields.
201
+ parts = [p.strip() for p in pair.split(",", 1)]
202
+ parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
203
+
204
+ # TODO: The spec doesn't say anything about if the keys should be
205
+ # considered case sensitive or not... logically they should
206
+ # be case-preserving and case-insensitive, but doing that
207
+ # would open up more cases where we might have duplicate
208
+ # entries.
209
+ label, url = parts
210
+ if label in urls:
211
+ # The label already exists in our set of urls, so this field
212
+ # is unparseable, and we can just add the whole thing to our
213
+ # unparseable data and stop processing it.
214
+ raise KeyError("duplicate labels in project urls")
215
+ urls[label] = url
216
+
217
+ return urls
218
+
219
+
220
+ def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
221
+ """Get the body of the message."""
222
+ # If our source is a str, then our caller has managed encodings for us,
223
+ # and we don't need to deal with it.
224
+ if isinstance(source, str):
225
+ payload = msg.get_payload()
226
+ assert isinstance(payload, str)
227
+ return payload
228
+ # If our source is a bytes, then we're managing the encoding and we need
229
+ # to deal with it.
230
+ else:
231
+ bpayload = msg.get_payload(decode=True)
232
+ assert isinstance(bpayload, bytes)
233
+ try:
234
+ return bpayload.decode("utf8", "strict")
235
+ except UnicodeDecodeError as exc:
236
+ raise ValueError("payload in an invalid encoding") from exc
237
+
238
+
239
+ # The various parse_FORMAT functions here are intended to be as lenient as
240
+ # possible in their parsing, while still returning a correctly typed
241
+ # RawMetadata.
242
+ #
243
+ # To aid in this, we also generally want to do as little touching of the
244
+ # data as possible, except where there are possibly some historic holdovers
245
+ # that make valid data awkward to work with.
246
+ #
247
+ # While this is a lower level, intermediate format than our ``Metadata``
248
+ # class, some light touch ups can make a massive difference in usability.
249
+
250
+ # Map METADATA fields to RawMetadata.
251
+ _EMAIL_TO_RAW_MAPPING = {
252
+ "author": "author",
253
+ "author-email": "author_email",
254
+ "classifier": "classifiers",
255
+ "description": "description",
256
+ "description-content-type": "description_content_type",
257
+ "download-url": "download_url",
258
+ "dynamic": "dynamic",
259
+ "home-page": "home_page",
260
+ "keywords": "keywords",
261
+ "license": "license",
262
+ "license-expression": "license_expression",
263
+ "license-file": "license_files",
264
+ "maintainer": "maintainer",
265
+ "maintainer-email": "maintainer_email",
266
+ "metadata-version": "metadata_version",
267
+ "name": "name",
268
+ "obsoletes": "obsoletes",
269
+ "obsoletes-dist": "obsoletes_dist",
270
+ "platform": "platforms",
271
+ "project-url": "project_urls",
272
+ "provides": "provides",
273
+ "provides-dist": "provides_dist",
274
+ "provides-extra": "provides_extra",
275
+ "requires": "requires",
276
+ "requires-dist": "requires_dist",
277
+ "requires-external": "requires_external",
278
+ "requires-python": "requires_python",
279
+ "summary": "summary",
280
+ "supported-platform": "supported_platforms",
281
+ "version": "version",
282
+ }
283
+ _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
284
+
285
+
286
+ def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
287
+ """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
288
+
289
+ This function returns a two-item tuple of dicts. The first dict is of
290
+ recognized fields from the core metadata specification. Fields that can be
291
+ parsed and translated into Python's built-in types are converted
292
+ appropriately. All other fields are left as-is. Fields that are allowed to
293
+ appear multiple times are stored as lists.
294
+
295
+ The second dict contains all other fields from the metadata. This includes
296
+ any unrecognized fields. It also includes any fields which are expected to
297
+ be parsed into a built-in type but were not formatted appropriately. Finally,
298
+ any fields that are expected to appear only once but are repeated are
299
+ included in this dict.
300
+
301
+ """
302
+ raw: dict[str, str | list[str] | dict[str, str]] = {}
303
+ unparsed: dict[str, list[str]] = {}
304
+
305
+ if isinstance(data, str):
306
+ parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
307
+ else:
308
+ parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
309
+
310
+ # We have to wrap parsed.keys() in a set, because in the case of multiple
311
+ # values for a key (a list), the key will appear multiple times in the
312
+ # list of keys, but we're avoiding that by using get_all().
313
+ for name in frozenset(parsed.keys()):
314
+ # Header names in RFC are case insensitive, so we'll normalize to all
315
+ # lower case to make comparisons easier.
316
+ name = name.lower()
317
+
318
+ # We use get_all() here, even for fields that aren't multiple use,
319
+ # because otherwise someone could have e.g. two Name fields, and we
320
+ # would just silently ignore it rather than doing something about it.
321
+ headers = parsed.get_all(name) or []
322
+
323
+ # The way the email module works when parsing bytes is that it
324
+ # unconditionally decodes the bytes as ascii using the surrogateescape
325
+ # handler. When you pull that data back out (such as with get_all() ),
326
+ # it looks to see if the str has any surrogate escapes, and if it does
327
+ # it wraps it in a Header object instead of returning the string.
328
+ #
329
+ # As such, we'll look for those Header objects, and fix up the encoding.
330
+ value = []
331
+ # Flag if we have run into any issues processing the headers, thus
332
+ # signalling that the data belongs in 'unparsed'.
333
+ valid_encoding = True
334
+ for h in headers:
335
+ # It's unclear if this can return more types than just a Header or
336
+ # a str, so we'll just assert here to make sure.
337
+ assert isinstance(h, (email.header.Header, str))
338
+
339
+ # If it's a header object, we need to do our little dance to get
340
+ # the real data out of it. In cases where there is invalid data
341
+ # we're going to end up with mojibake, but there's no obvious, good
342
+ # way around that without reimplementing parts of the Header object
343
+ # ourselves.
344
+ #
345
+ # That should be fine since, if mojibacked happens, this key is
346
+ # going into the unparsed dict anyways.
347
+ if isinstance(h, email.header.Header):
348
+ # The Header object stores it's data as chunks, and each chunk
349
+ # can be independently encoded, so we'll need to check each
350
+ # of them.
351
+ chunks: list[tuple[bytes, str | None]] = []
352
+ for bin, encoding in email.header.decode_header(h):
353
+ try:
354
+ bin.decode("utf8", "strict")
355
+ except UnicodeDecodeError:
356
+ # Enable mojibake.
357
+ encoding = "latin1"
358
+ valid_encoding = False
359
+ else:
360
+ encoding = "utf8"
361
+ chunks.append((bin, encoding))
362
+
363
+ # Turn our chunks back into a Header object, then let that
364
+ # Header object do the right thing to turn them into a
365
+ # string for us.
366
+ value.append(str(email.header.make_header(chunks)))
367
+ # This is already a string, so just add it.
368
+ else:
369
+ value.append(h)
370
+
371
+ # We've processed all of our values to get them into a list of str,
372
+ # but we may have mojibake data, in which case this is an unparsed
373
+ # field.
374
+ if not valid_encoding:
375
+ unparsed[name] = value
376
+ continue
377
+
378
+ raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
379
+ if raw_name is None:
380
+ # This is a bit of a weird situation, we've encountered a key that
381
+ # we don't know what it means, so we don't know whether it's meant
382
+ # to be a list or not.
383
+ #
384
+ # Since we can't really tell one way or another, we'll just leave it
385
+ # as a list, even though it may be a single item list, because that's
386
+ # what makes the most sense for email headers.
387
+ unparsed[name] = value
388
+ continue
389
+
390
+ # If this is one of our string fields, then we'll check to see if our
391
+ # value is a list of a single item. If it is then we'll assume that
392
+ # it was emitted as a single string, and unwrap the str from inside
393
+ # the list.
394
+ #
395
+ # If it's any other kind of data, then we haven't the faintest clue
396
+ # what we should parse it as, and we have to just add it to our list
397
+ # of unparsed stuff.
398
+ if raw_name in _STRING_FIELDS and len(value) == 1:
399
+ raw[raw_name] = value[0]
400
+ # If this is one of our list of string fields, then we can just assign
401
+ # the value, since email *only* has strings, and our get_all() call
402
+ # above ensures that this is a list.
403
+ elif raw_name in _LIST_FIELDS:
404
+ raw[raw_name] = value
405
+ # Special Case: Keywords
406
+ # The keywords field is implemented in the metadata spec as a str,
407
+ # but it conceptually is a list of strings, and is serialized using
408
+ # ", ".join(keywords), so we'll do some light data massaging to turn
409
+ # this into what it logically is.
410
+ elif raw_name == "keywords" and len(value) == 1:
411
+ raw[raw_name] = _parse_keywords(value[0])
412
+ # Special Case: Project-URL
413
+ # The project urls is implemented in the metadata spec as a list of
414
+ # specially-formatted strings that represent a key and a value, which
415
+ # is fundamentally a mapping, however the email format doesn't support
416
+ # mappings in a sane way, so it was crammed into a list of strings
417
+ # instead.
418
+ #
419
+ # We will do a little light data massaging to turn this into a map as
420
+ # it logically should be.
421
+ elif raw_name == "project_urls":
422
+ try:
423
+ raw[raw_name] = _parse_project_urls(value)
424
+ except KeyError:
425
+ unparsed[name] = value
426
+ # Nothing that we've done has managed to parse this, so it'll just
427
+ # throw it in our unparseable data and move on.
428
+ else:
429
+ unparsed[name] = value
430
+
431
+ # We need to support getting the Description from the message payload in
432
+ # addition to getting it from the the headers. This does mean, though, there
433
+ # is the possibility of it being set both ways, in which case we put both
434
+ # in 'unparsed' since we don't know which is right.
435
+ try:
436
+ payload = _get_payload(parsed, data)
437
+ except ValueError:
438
+ unparsed.setdefault("description", []).append(
439
+ parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload]
440
+ )
441
+ else:
442
+ if payload:
443
+ # Check to see if we've already got a description, if so then both
444
+ # it, and this body move to unparseable.
445
+ if "description" in raw:
446
+ description_header = cast(str, raw.pop("description"))
447
+ unparsed.setdefault("description", []).extend(
448
+ [description_header, payload]
449
+ )
450
+ elif "description" in unparsed:
451
+ unparsed["description"].append(payload)
452
+ else:
453
+ raw["description"] = payload
454
+
455
+ # We need to cast our `raw` to a metadata, because a TypedDict only support
456
+ # literal key names, but we're computing our key names on purpose, but the
457
+ # way this function is implemented, our `TypedDict` can only have valid key
458
+ # names.
459
+ return cast(RawMetadata, raw), unparsed
460
+
461
+
462
+ _NOT_FOUND = object()
463
+
464
+
465
+ # Keep the two values in sync.
466
+ _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
467
+ _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
468
+
469
+ _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
470
+
471
+
472
+ class _Validator(Generic[T]):
473
+ """Validate a metadata field.
474
+
475
+ All _process_*() methods correspond to a core metadata field. The method is
476
+ called with the field's raw value. If the raw value is valid it is returned
477
+ in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
478
+ If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
479
+ as appropriate).
480
+ """
481
+
482
+ name: str
483
+ raw_name: str
484
+ added: _MetadataVersion
485
+
486
+ def __init__(
487
+ self,
488
+ *,
489
+ added: _MetadataVersion = "1.0",
490
+ ) -> None:
491
+ self.added = added
492
+
493
+ def __set_name__(self, _owner: Metadata, name: str) -> None:
494
+ self.name = name
495
+ self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
496
+
497
+ def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
498
+ # With Python 3.8, the caching can be replaced with functools.cached_property().
499
+ # No need to check the cache as attribute lookup will resolve into the
500
+ # instance's __dict__ before __get__ is called.
501
+ cache = instance.__dict__
502
+ value = instance._raw.get(self.name)
503
+
504
+ # To make the _process_* methods easier, we'll check if the value is None
505
+ # and if this field is NOT a required attribute, and if both of those
506
+ # things are true, we'll skip the the converter. This will mean that the
507
+ # converters never have to deal with the None union.
508
+ if self.name in _REQUIRED_ATTRS or value is not None:
509
+ try:
510
+ converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
511
+ except AttributeError:
512
+ pass
513
+ else:
514
+ value = converter(value)
515
+
516
+ cache[self.name] = value
517
+ try:
518
+ del instance._raw[self.name] # type: ignore[misc]
519
+ except KeyError:
520
+ pass
521
+
522
+ return cast(T, value)
523
+
524
+ def _invalid_metadata(
525
+ self, msg: str, cause: Exception | None = None
526
+ ) -> InvalidMetadata:
527
+ exc = InvalidMetadata(
528
+ self.raw_name, msg.format_map({"field": repr(self.raw_name)})
529
+ )
530
+ exc.__cause__ = cause
531
+ return exc
532
+
533
+ def _process_metadata_version(self, value: str) -> _MetadataVersion:
534
+ # Implicitly makes Metadata-Version required.
535
+ if value not in _VALID_METADATA_VERSIONS:
536
+ raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
537
+ return cast(_MetadataVersion, value)
538
+
539
+ def _process_name(self, value: str) -> str:
540
+ if not value:
541
+ raise self._invalid_metadata("{field} is a required field")
542
+ # Validate the name as a side-effect.
543
+ try:
544
+ utils.canonicalize_name(value, validate=True)
545
+ except utils.InvalidName as exc:
546
+ raise self._invalid_metadata(
547
+ f"{value!r} is invalid for {{field}}", cause=exc
548
+ ) from exc
549
+ else:
550
+ return value
551
+
552
+ def _process_version(self, value: str) -> version_module.Version:
553
+ if not value:
554
+ raise self._invalid_metadata("{field} is a required field")
555
+ try:
556
+ return version_module.parse(value)
557
+ except version_module.InvalidVersion as exc:
558
+ raise self._invalid_metadata(
559
+ f"{value!r} is invalid for {{field}}", cause=exc
560
+ ) from exc
561
+
562
+ def _process_summary(self, value: str) -> str:
563
+ """Check the field contains no newlines."""
564
+ if "\n" in value:
565
+ raise self._invalid_metadata("{field} must be a single line")
566
+ return value
567
+
568
+ def _process_description_content_type(self, value: str) -> str:
569
+ content_types = {"text/plain", "text/x-rst", "text/markdown"}
570
+ message = email.message.EmailMessage()
571
+ message["content-type"] = value
572
+
573
+ content_type, parameters = (
574
+ # Defaults to `text/plain` if parsing failed.
575
+ message.get_content_type().lower(),
576
+ message["content-type"].params,
577
+ )
578
+ # Check if content-type is valid or defaulted to `text/plain` and thus was
579
+ # not parseable.
580
+ if content_type not in content_types or content_type not in value.lower():
581
+ raise self._invalid_metadata(
582
+ f"{{field}} must be one of {list(content_types)}, not {value!r}"
583
+ )
584
+
585
+ charset = parameters.get("charset", "UTF-8")
586
+ if charset != "UTF-8":
587
+ raise self._invalid_metadata(
588
+ f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
589
+ )
590
+
591
+ markdown_variants = {"GFM", "CommonMark"}
592
+ variant = parameters.get("variant", "GFM") # Use an acceptable default.
593
+ if content_type == "text/markdown" and variant not in markdown_variants:
594
+ raise self._invalid_metadata(
595
+ f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
596
+ f"not {variant!r}",
597
+ )
598
+ return value
599
+
600
+ def _process_dynamic(self, value: list[str]) -> list[str]:
601
+ for dynamic_field in map(str.lower, value):
602
+ if dynamic_field in {"name", "version", "metadata-version"}:
603
+ raise self._invalid_metadata(
604
+ f"{dynamic_field!r} is not allowed as a dynamic field"
605
+ )
606
+ elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
607
+ raise self._invalid_metadata(
608
+ f"{dynamic_field!r} is not a valid dynamic field"
609
+ )
610
+ return list(map(str.lower, value))
611
+
612
+ def _process_provides_extra(
613
+ self,
614
+ value: list[str],
615
+ ) -> list[utils.NormalizedName]:
616
+ normalized_names = []
617
+ try:
618
+ for name in value:
619
+ normalized_names.append(utils.canonicalize_name(name, validate=True))
620
+ except utils.InvalidName as exc:
621
+ raise self._invalid_metadata(
622
+ f"{name!r} is invalid for {{field}}", cause=exc
623
+ ) from exc
624
+ else:
625
+ return normalized_names
626
+
627
+ def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
628
+ try:
629
+ return specifiers.SpecifierSet(value)
630
+ except specifiers.InvalidSpecifier as exc:
631
+ raise self._invalid_metadata(
632
+ f"{value!r} is invalid for {{field}}", cause=exc
633
+ ) from exc
634
+
635
+ def _process_requires_dist(
636
+ self,
637
+ value: list[str],
638
+ ) -> list[requirements.Requirement]:
639
+ reqs = []
640
+ try:
641
+ for req in value:
642
+ reqs.append(requirements.Requirement(req))
643
+ except requirements.InvalidRequirement as exc:
644
+ raise self._invalid_metadata(
645
+ f"{req!r} is invalid for {{field}}", cause=exc
646
+ ) from exc
647
+ else:
648
+ return reqs
649
+
650
+ def _process_license_expression(
651
+ self, value: str
652
+ ) -> NormalizedLicenseExpression | None:
653
+ try:
654
+ return licenses.canonicalize_license_expression(value)
655
+ except ValueError as exc:
656
+ raise self._invalid_metadata(
657
+ f"{value!r} is invalid for {{field}}", cause=exc
658
+ ) from exc
659
+
660
+ def _process_license_files(self, value: list[str]) -> list[str]:
661
+ paths = []
662
+ for path in value:
663
+ if ".." in path:
664
+ raise self._invalid_metadata(
665
+ f"{path!r} is invalid for {{field}}, "
666
+ "parent directory indicators are not allowed"
667
+ )
668
+ if "*" in path:
669
+ raise self._invalid_metadata(
670
+ f"{path!r} is invalid for {{field}}, paths must be resolved"
671
+ )
672
+ if (
673
+ pathlib.PurePosixPath(path).is_absolute()
674
+ or pathlib.PureWindowsPath(path).is_absolute()
675
+ ):
676
+ raise self._invalid_metadata(
677
+ f"{path!r} is invalid for {{field}}, paths must be relative"
678
+ )
679
+ if pathlib.PureWindowsPath(path).as_posix() != path:
680
+ raise self._invalid_metadata(
681
+ f"{path!r} is invalid for {{field}}, paths must use '/' delimiter"
682
+ )
683
+ paths.append(path)
684
+ return paths
685
+
686
+
687
+ class Metadata:
688
+ """Representation of distribution metadata.
689
+
690
+ Compared to :class:`RawMetadata`, this class provides objects representing
691
+ metadata fields instead of only using built-in types. Any invalid metadata
692
+ will cause :exc:`InvalidMetadata` to be raised (with a
693
+ :py:attr:`~BaseException.__cause__` attribute as appropriate).
694
+ """
695
+
696
+ _raw: RawMetadata
697
+
698
+ @classmethod
699
+ def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
700
+ """Create an instance from :class:`RawMetadata`.
701
+
702
+ If *validate* is true, all metadata will be validated. All exceptions
703
+ related to validation will be gathered and raised as an :class:`ExceptionGroup`.
704
+ """
705
+ ins = cls()
706
+ ins._raw = data.copy() # Mutations occur due to caching enriched values.
707
+
708
+ if validate:
709
+ exceptions: list[Exception] = []
710
+ try:
711
+ metadata_version = ins.metadata_version
712
+ metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
713
+ except InvalidMetadata as metadata_version_exc:
714
+ exceptions.append(metadata_version_exc)
715
+ metadata_version = None
716
+
717
+ # Make sure to check for the fields that are present, the required
718
+ # fields (so their absence can be reported).
719
+ fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
720
+ # Remove fields that have already been checked.
721
+ fields_to_check -= {"metadata_version"}
722
+
723
+ for key in fields_to_check:
724
+ try:
725
+ if metadata_version:
726
+ # Can't use getattr() as that triggers descriptor protocol which
727
+ # will fail due to no value for the instance argument.
728
+ try:
729
+ field_metadata_version = cls.__dict__[key].added
730
+ except KeyError:
731
+ exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
732
+ exceptions.append(exc)
733
+ continue
734
+ field_age = _VALID_METADATA_VERSIONS.index(
735
+ field_metadata_version
736
+ )
737
+ if field_age > metadata_age:
738
+ field = _RAW_TO_EMAIL_MAPPING[key]
739
+ exc = InvalidMetadata(
740
+ field,
741
+ f"{field} introduced in metadata version "
742
+ f"{field_metadata_version}, not {metadata_version}",
743
+ )
744
+ exceptions.append(exc)
745
+ continue
746
+ getattr(ins, key)
747
+ except InvalidMetadata as exc:
748
+ exceptions.append(exc)
749
+
750
+ if exceptions:
751
+ raise ExceptionGroup("invalid metadata", exceptions)
752
+
753
+ return ins
754
+
755
+ @classmethod
756
+ def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
757
+ """Parse metadata from email headers.
758
+
759
+ If *validate* is true, the metadata will be validated. All exceptions
760
+ related to validation will be gathered and raised as an :class:`ExceptionGroup`.
761
+ """
762
+ raw, unparsed = parse_email(data)
763
+
764
+ if validate:
765
+ exceptions: list[Exception] = []
766
+ for unparsed_key in unparsed:
767
+ if unparsed_key in _EMAIL_TO_RAW_MAPPING:
768
+ message = f"{unparsed_key!r} has invalid data"
769
+ else:
770
+ message = f"unrecognized field: {unparsed_key!r}"
771
+ exceptions.append(InvalidMetadata(unparsed_key, message))
772
+
773
+ if exceptions:
774
+ raise ExceptionGroup("unparsed", exceptions)
775
+
776
+ try:
777
+ return cls.from_raw(raw, validate=validate)
778
+ except ExceptionGroup as exc_group:
779
+ raise ExceptionGroup(
780
+ "invalid or unparsed metadata", exc_group.exceptions
781
+ ) from None
782
+
783
+ metadata_version: _Validator[_MetadataVersion] = _Validator()
784
+ """:external:ref:`core-metadata-metadata-version`
785
+ (required; validated to be a valid metadata version)"""
786
+ # `name` is not normalized/typed to NormalizedName so as to provide access to
787
+ # the original/raw name.
788
+ name: _Validator[str] = _Validator()
789
+ """:external:ref:`core-metadata-name`
790
+ (required; validated using :func:`~packaging.utils.canonicalize_name` and its
791
+ *validate* parameter)"""
792
+ version: _Validator[version_module.Version] = _Validator()
793
+ """:external:ref:`core-metadata-version` (required)"""
794
+ dynamic: _Validator[list[str] | None] = _Validator(
795
+ added="2.2",
796
+ )
797
+ """:external:ref:`core-metadata-dynamic`
798
+ (validated against core metadata field names and lowercased)"""
799
+ platforms: _Validator[list[str] | None] = _Validator()
800
+ """:external:ref:`core-metadata-platform`"""
801
+ supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
802
+ """:external:ref:`core-metadata-supported-platform`"""
803
+ summary: _Validator[str | None] = _Validator()
804
+ """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
805
+ description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body
806
+ """:external:ref:`core-metadata-description`"""
807
+ description_content_type: _Validator[str | None] = _Validator(added="2.1")
808
+ """:external:ref:`core-metadata-description-content-type` (validated)"""
809
+ keywords: _Validator[list[str] | None] = _Validator()
810
+ """:external:ref:`core-metadata-keywords`"""
811
+ home_page: _Validator[str | None] = _Validator()
812
+ """:external:ref:`core-metadata-home-page`"""
813
+ download_url: _Validator[str | None] = _Validator(added="1.1")
814
+ """:external:ref:`core-metadata-download-url`"""
815
+ author: _Validator[str | None] = _Validator()
816
+ """:external:ref:`core-metadata-author`"""
817
+ author_email: _Validator[str | None] = _Validator()
818
+ """:external:ref:`core-metadata-author-email`"""
819
+ maintainer: _Validator[str | None] = _Validator(added="1.2")
820
+ """:external:ref:`core-metadata-maintainer`"""
821
+ maintainer_email: _Validator[str | None] = _Validator(added="1.2")
822
+ """:external:ref:`core-metadata-maintainer-email`"""
823
+ license: _Validator[str | None] = _Validator()
824
+ """:external:ref:`core-metadata-license`"""
825
+ license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
826
+ added="2.4"
827
+ )
828
+ """:external:ref:`core-metadata-license-expression`"""
829
+ license_files: _Validator[list[str] | None] = _Validator(added="2.4")
830
+ """:external:ref:`core-metadata-license-file`"""
831
+ classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
832
+ """:external:ref:`core-metadata-classifier`"""
833
+ requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
834
+ added="1.2"
835
+ )
836
+ """:external:ref:`core-metadata-requires-dist`"""
837
+ requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
838
+ added="1.2"
839
+ )
840
+ """:external:ref:`core-metadata-requires-python`"""
841
+ # Because `Requires-External` allows for non-PEP 440 version specifiers, we
842
+ # don't do any processing on the values.
843
+ requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
844
+ """:external:ref:`core-metadata-requires-external`"""
845
+ project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
846
+ """:external:ref:`core-metadata-project-url`"""
847
+ # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
848
+ # regardless of metadata version.
849
+ provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
850
+ added="2.1",
851
+ )
852
+ """:external:ref:`core-metadata-provides-extra`"""
853
+ provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
854
+ """:external:ref:`core-metadata-provides-dist`"""
855
+ obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
856
+ """:external:ref:`core-metadata-obsoletes-dist`"""
857
+ requires: _Validator[list[str] | None] = _Validator(added="1.1")
858
+ """``Requires`` (deprecated)"""
859
+ provides: _Validator[list[str] | None] = _Validator(added="1.1")
860
+ """``Provides`` (deprecated)"""
861
+ obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
862
+ """``Obsoletes`` (deprecated)"""
venv/lib/python3.13/site-packages/packaging/py.typed ADDED
File without changes
venv/lib/python3.13/site-packages/packaging/requirements.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is dual licensed under the terms of the Apache License, Version
2
+ # 2.0, and the BSD License. See the LICENSE file in the root of this repository
3
+ # for complete details.
4
+ from __future__ import annotations
5
+
6
+ from typing import Any, Iterator
7
+
8
+ from ._parser import parse_requirement as _parse_requirement
9
+ from ._tokenizer import ParserSyntaxError
10
+ from .markers import Marker, _normalize_extra_values
11
+ from .specifiers import SpecifierSet
12
+ from .utils import canonicalize_name
13
+
14
+
15
+ class InvalidRequirement(ValueError):
16
+ """
17
+ An invalid requirement was found, users should refer to PEP 508.
18
+ """
19
+
20
+
21
+ class Requirement:
22
+ """Parse a requirement.
23
+
24
+ Parse a given requirement string into its parts, such as name, specifier,
25
+ URL, and extras. Raises InvalidRequirement on a badly-formed requirement
26
+ string.
27
+ """
28
+
29
+ # TODO: Can we test whether something is contained within a requirement?
30
+ # If so how do we do that? Do we need to test against the _name_ of
31
+ # the thing as well as the version? What about the markers?
32
+ # TODO: Can we normalize the name and extra name?
33
+
34
+ def __init__(self, requirement_string: str) -> None:
35
+ try:
36
+ parsed = _parse_requirement(requirement_string)
37
+ except ParserSyntaxError as e:
38
+ raise InvalidRequirement(str(e)) from e
39
+
40
+ self.name: str = parsed.name
41
+ self.url: str | None = parsed.url or None
42
+ self.extras: set[str] = set(parsed.extras or [])
43
+ self.specifier: SpecifierSet = SpecifierSet(parsed.specifier)
44
+ self.marker: Marker | None = None
45
+ if parsed.marker is not None:
46
+ self.marker = Marker.__new__(Marker)
47
+ self.marker._markers = _normalize_extra_values(parsed.marker)
48
+
49
+ def _iter_parts(self, name: str) -> Iterator[str]:
50
+ yield name
51
+
52
+ if self.extras:
53
+ formatted_extras = ",".join(sorted(self.extras))
54
+ yield f"[{formatted_extras}]"
55
+
56
+ if self.specifier:
57
+ yield str(self.specifier)
58
+
59
+ if self.url:
60
+ yield f"@ {self.url}"
61
+ if self.marker:
62
+ yield " "
63
+
64
+ if self.marker:
65
+ yield f"; {self.marker}"
66
+
67
+ def __str__(self) -> str:
68
+ return "".join(self._iter_parts(self.name))
69
+
70
+ def __repr__(self) -> str:
71
+ return f"<Requirement('{self}')>"
72
+
73
+ def __hash__(self) -> int:
74
+ return hash(
75
+ (
76
+ self.__class__.__name__,
77
+ *self._iter_parts(canonicalize_name(self.name)),
78
+ )
79
+ )
80
+
81
+ def __eq__(self, other: Any) -> bool:
82
+ if not isinstance(other, Requirement):
83
+ return NotImplemented
84
+
85
+ return (
86
+ canonicalize_name(self.name) == canonicalize_name(other.name)
87
+ and self.extras == other.extras
88
+ and self.specifier == other.specifier
89
+ and self.url == other.url
90
+ and self.marker == other.marker
91
+ )