diff --git a/.gitattributes b/.gitattributes index b0ee5bbefa67d1e28604e4fb174a8a92f2582909..e183c02fc2620bdc2b51f1ef7cf385394bbbbbf8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -325,3 +325,6 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/ .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/crypto.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/SSL.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text .venv/lib/python3.11/site-packages/networkx/algorithms/isomorphism/tests/__pycache__/test_vf2pp_helpers.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text +.venv/lib/python3.11/site-packages/networkx/utils/__pycache__/backends.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text +.venv/lib/python3.11/site-packages/httptools/parser/url_parser.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +.venv/lib/python3.11/site-packages/httptools/parser/parser.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text diff --git a/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/METADATA b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..8ac541d1b185dcf0ddecd0a43668bb3f124346e7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/METADATA @@ -0,0 +1,159 @@ +Metadata-Version: 2.1 +Name: compressed-tensors +Version: 0.9.1 +Summary: Library for utilization of compressed safetensors of neural network models +Home-page: https://github.com/neuralmagic/compressed-tensors +Author: Neuralmagic, Inc. +Author-email: support@neuralmagic.com +License: Apache 2.0 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: torch>=1.7.0 +Requires-Dist: transformers +Requires-Dist: pydantic>=2.0 +Provides-Extra: accelerate +Requires-Dist: accelerate; extra == "accelerate" +Provides-Extra: dev +Requires-Dist: black==22.12.0; extra == "dev" +Requires-Dist: isort==5.8.0; extra == "dev" +Requires-Dist: wheel>=0.36.2; extra == "dev" +Requires-Dist: flake8>=3.8.3; extra == "dev" +Requires-Dist: pytest>=6.0.0; extra == "dev" +Requires-Dist: nbconvert>=7.16.3; extra == "dev" + +# compressed-tensors + +The `compressed-tensors` library extends the [safetensors](https://github.com/huggingface/safetensors) format, providing a versatile and efficient way to store and manage compressed tensor data. This library supports various quantization and sparsity schemes, making it a unified format for handling different model optimizations like GPTQ, AWQ, SmoothQuant, INT8, FP8, SparseGPT, and more. + +## Why `compressed-tensors`? + +As model compression becomes increasingly important for efficient deployment of LLMs, the landscape of quantization and compression techniques has become increasingly fragmented. +Each method often comes with its own storage format and loading procedures, making it challenging to work with multiple techniques or switch between them. +`compressed-tensors` addresses this by providing a single, extensible format that can represent a wide variety of compression schemes. + +* **Unified Checkpoint Format**: Supports various compression schemes in a single, consistent format. +* **Wide Compatibility**: Works with popular quantization methods like GPTQ, SmoothQuant, and FP8. See [llm-compressor](https://github.com/vllm-project/llm-compressor) +* **Flexible Quantization Support**: + * Weight-only quantization (e.g., W4A16, W8A16, WnA16) + * Activation quantization (e.g., W8A8) + * KV cache quantization + * Non-uniform schemes (different layers can be quantized in different ways!) +* **Sparsity Support**: Handles both unstructured and semi-structured (e.g., 2:4) sparsity patterns. +* **Open-Source Integration**: Designed to work seamlessly with Hugging Face models and PyTorch. + +This allows developers and researchers to easily experiment with composing different quantization methods, simplify model deployment pipelines, and reduce the overhead of supporting multiple compression formats in inference engines. + +## Installation + +### From [PyPI](https://pypi.org/project/compressed-tensors) + +Stable release: +```bash +pip install compressed-tensors +``` + +Nightly release: +```bash +pip install compressed-tensors-nightly +``` + +### From Source + +```bash +git clone https://github.com/neuralmagic/compressed-tensors +cd compressed-tensors +pip install -e . +``` + +## Getting started + +### Saving/Loading Compressed Tensors (Bitmask Compression) + +The function `save_compressed` uses the `compression_format` argument to apply compression to tensors. +The function `load_compressed` reverses the process: converts the compressed weights on disk to decompressed weights in device memory. + +```python +from compressed_tensors import save_compressed, load_compressed, BitmaskConfig +from torch import Tensor +from typing import Dict + +# the example BitmaskConfig method efficiently compresses +# tensors with large number of zero entries +compression_config = BitmaskConfig() + +tensors: Dict[str, Tensor] = {"tensor_1": Tensor( + [[0.0, 0.0, 0.0], + [1.0, 1.0, 1.0]] +)} +# compress tensors using BitmaskConfig compression format (save them efficiently on disk) +save_compressed(tensors, "model.safetensors", compression_format=compression_config.format) + +# decompress tensors (load_compressed returns a generator for memory efficiency) +decompressed_tensors = {} +for tensor_name, tensor in load_compressed("model.safetensors", compression_config = compression_config): + decompressed_tensors[tensor_name] = tensor +``` + +## Saving/Loading Compressed Models (Bitmask Compression) + +We can apply bitmask compression to a whole model. For more detailed example see `example` directory. +```python +from compressed_tensors import save_compressed_model, load_compressed, BitmaskConfig +from transformers import AutoModelForCausalLM + +model_name = "neuralmagic/llama2.c-stories110M-pruned50" +model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto") + +original_state_dict = model.state_dict() + +compression_config = BitmaskConfig() + +# save compressed model weights +save_compressed_model(model, "compressed_model.safetensors", compression_format=compression_config.format) + +# load compressed model weights (`dict` turns generator into a dictionary) +state_dict = dict(load_compressed("compressed_model.safetensors", compression_config)) +``` + +For more in-depth tutorial on bitmask compression, refer to the [notebook](https://github.com/neuralmagic/compressed-tensors/blob/d707c5b84bc3fef164aebdcd97cb6eaa571982f8/examples/bitmask_compression.ipynb). + + +## Saving a Compressed Model with PTQ + +We can use compressed-tensors to run basic post training quantization (PTQ) and save the quantized model compressed on disk + +```python +model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" +model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0", torch_dtype="auto") + +config = QuantizationConfig.parse_file("./examples/bit_packing/int4_config.json") +config.quantization_status = QuantizationStatus.CALIBRATION +apply_quantization_config(model, config) + +dataset = load_dataset("ptb_text_only")["train"] +tokenizer = AutoTokenizer.from_pretrained(model_name) + +def tokenize_function(examples): + return tokenizer(examples["sentence"], padding=False, truncation=True, max_length=1024) + +tokenized_dataset = dataset.map(tokenize_function, batched=True) +data_loader = DataLoader(tokenized_dataset, batch_size=1, collate_fn=DefaultDataCollator()) + +with torch.no_grad(): + for idx, sample in tqdm(enumerate(data_loader), desc="Running calibration"): + sample = {key: value.to(device) for key,value in sample.items()} + _ = model(**sample) + + if idx >= 512: + break + +model.apply(freeze_module_quantization) +model.apply(compress_quantized_weights) + +output_dir = "./ex_llama1.1b_w4a16_packed_quantize" +compressor = ModelCompressor(quantization_config=config) +compressed_state_dict = compressor.compress(model) +model.save_pretrained(output_dir, state_dict=compressed_state_dict) +``` + +For more in-depth tutorial on quantization compression, refer to the [notebook](./examples/quantize_and_pack_int4.ipynb). diff --git a/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/RECORD b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..22a80a5f1ff88dc0c0c0c509fff18d59f19ada8c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/RECORD @@ -0,0 +1,100 @@ +compressed_tensors-0.9.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +compressed_tensors-0.9.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357 +compressed_tensors-0.9.1.dist-info/METADATA,sha256=LTzdui2DBwsv09xaTEsW2X66rd775Jf2lY9v9hs_WJg,6782 +compressed_tensors-0.9.1.dist-info/RECORD,, +compressed_tensors-0.9.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92 +compressed_tensors-0.9.1.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19 +compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812 +compressed_tensors/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/__pycache__/base.cpython-311.pyc,, +compressed_tensors/__pycache__/version.cpython-311.pyc,, +compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875 +compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825 +compressed_tensors/compressors/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/compressors/__pycache__/base.cpython-311.pyc,, +compressed_tensors/compressors/__pycache__/helpers.cpython-311.pyc,, +compressed_tensors/compressors/base.py,sha256=D9TNwQcjanDiAHODPbg8JUqc66e3j50rctY7A708NEs,6743 +compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431 +compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666 +compressed_tensors/compressors/model_compressors/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/compressors/model_compressors/__pycache__/model_compressor.cpython-311.pyc,, +compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=3WyzAW2Rm_uLprxwO2QH6FR76W6Mk4r2yedayaSZHhw,18396 +compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714 +compressed_tensors/compressors/quantized_compressors/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/compressors/quantized_compressors/__pycache__/base.cpython-311.pyc,, +compressed_tensors/compressors/quantized_compressors/__pycache__/naive_quantized.cpython-311.pyc,, +compressed_tensors/compressors/quantized_compressors/__pycache__/pack_quantized.cpython-311.pyc,, +compressed_tensors/compressors/quantized_compressors/base.py,sha256=LVqSSqSjGi8LB-X13zC_0AFHc8BobGQVC0zjInDhOWE,7217 +compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=fahmPJFz49rVS7q705uQwZ0kUtdP46GuXR7nPr6uIqI,4943 +compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=OO5dceCfNVuY8A23kBg6z2wk-zGUVqR_MyLvObvT7pk,7741 +compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737 +compressed_tensors/compressors/sparse_compressors/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/compressors/sparse_compressors/__pycache__/base.cpython-311.pyc,, +compressed_tensors/compressors/sparse_compressors/__pycache__/dense.cpython-311.pyc,, +compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_24_bitmask.cpython-311.pyc,, +compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_bitmask.cpython-311.pyc,, +compressed_tensors/compressors/sparse_compressors/base.py,sha256=9e841MQWr0j8m33ejDw_jP5_BIpQ5099x9_pvuZ-Nr0,5944 +compressed_tensors/compressors/sparse_compressors/dense.py,sha256=lSKNWRx6H7aUqaJj1j4qbXk8Gkm1UohbnvW1Rvq6Ra4,1284 +compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=_g139pe4iAFn5jvGIEk4v-qMoyx9ID6E88vriPSNYV4,8604 +compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=7zSr9bqkpuH1ivQpxtYBNxXIoElal7Jo1nSKpZN_IFk,5633 +compressed_tensors/compressors/sparse_quantized_compressors/__init__.py,sha256=4f_cwcKXB1nVVMoiKgTFAc8jAPjPLElo-Df_EDm1_xw,675 +compressed_tensors/compressors/sparse_quantized_compressors/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/compressors/sparse_quantized_compressors/__pycache__/marlin_24.cpython-311.pyc,, +compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=BMIQWTLlnUvxy14iEJegtiP75WHJeOVojey9mKOK1hE,9427 +compressed_tensors/config/__init__.py,sha256=8sOoZ6xvYSC79mBvEtO8l6xk4PC80d29AnnJiGMrY2M,737 +compressed_tensors/config/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/config/__pycache__/base.cpython-311.pyc,, +compressed_tensors/config/__pycache__/dense.cpython-311.pyc,, +compressed_tensors/config/__pycache__/sparse_24_bitmask.cpython-311.pyc,, +compressed_tensors/config/__pycache__/sparse_bitmask.cpython-311.pyc,, +compressed_tensors/config/base.py,sha256=R3iUmFf1MslEjin5LgwQbmfJHIsS7Uw0UIxfn780uqY,3479 +compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317 +compressed_tensors/config/sparse_24_bitmask.py,sha256=Lhj39zT2V1hxftprvxvneyhv45ShlXOKd75DBbDTyTE,1401 +compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308 +compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617 +compressed_tensors/linear/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/linear/__pycache__/compressed_linear.cpython-311.pyc,, +compressed_tensors/linear/compressed_linear.py,sha256=MJa-UfoKhIkdUWRD1shrXXri2cOwR5GK0a4t4bNYosM,3268 +compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760 +compressed_tensors/quantization/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/quantization/__pycache__/quant_args.cpython-311.pyc,, +compressed_tensors/quantization/__pycache__/quant_config.cpython-311.pyc,, +compressed_tensors/quantization/__pycache__/quant_scheme.cpython-311.pyc,, +compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772 +compressed_tensors/quantization/lifecycle/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/quantization/lifecycle/__pycache__/apply.cpython-311.pyc,, +compressed_tensors/quantization/lifecycle/__pycache__/compressed.cpython-311.pyc,, +compressed_tensors/quantization/lifecycle/__pycache__/forward.cpython-311.pyc,, +compressed_tensors/quantization/lifecycle/__pycache__/helpers.cpython-311.pyc,, +compressed_tensors/quantization/lifecycle/__pycache__/initialize.cpython-311.pyc,, +compressed_tensors/quantization/lifecycle/apply.py,sha256=XS4M6N1opKBybhkuQsS338QVb_CKMhUM5TUKrqoNQ0k,16517 +compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296 +compressed_tensors/quantization/lifecycle/forward.py,sha256=DOWouUqfaLA4Qhg-ojVVBdhhSAlgZqFC26vZARxE0ko,12961 +compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944 +compressed_tensors/quantization/lifecycle/initialize.py,sha256=hymYtayTSumm8KCYAYPY267aWmlsJpt8oQFiRblk8qE,7452 +compressed_tensors/quantization/quant_args.py,sha256=jwC__lSmuiJ2qSJYYZGgWgQNbZu6YhhS0e-qugrTNXE,9058 +compressed_tensors/quantization/quant_config.py,sha256=vx06wBo91p4LCb3Vzd-2eCTUeIf_Sz2ZXRP263eQyjQ,10385 +compressed_tensors/quantization/quant_scheme.py,sha256=eQ0JrRZ80GX69fpwW87VzPzzhajhk4mUaJScjk82OY4,6010 +compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656 +compressed_tensors/quantization/utils/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/quantization/utils/__pycache__/helpers.cpython-311.pyc,, +compressed_tensors/quantization/utils/helpers.py,sha256=DBP-sGRpGAY01K0LFE7qqonNj4hkTYL_mXrMs2LtAD8,14100 +compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658 +compressed_tensors/registry/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/registry/__pycache__/registry.cpython-311.pyc,, +compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896 +compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808 +compressed_tensors/utils/__pycache__/__init__.cpython-311.pyc,, +compressed_tensors/utils/__pycache__/helpers.cpython-311.pyc,, +compressed_tensors/utils/__pycache__/offload.cpython-311.pyc,, +compressed_tensors/utils/__pycache__/permutations_24.cpython-311.pyc,, +compressed_tensors/utils/__pycache__/permute.cpython-311.pyc,, +compressed_tensors/utils/__pycache__/safetensors_load.cpython-311.pyc,, +compressed_tensors/utils/__pycache__/semi_structured_conversions.cpython-311.pyc,, +compressed_tensors/utils/helpers.py,sha256=xQHZXwIAAybC8mMTiAtWSOeggZMT1JOC6_wcDvlo2yk,10320 +compressed_tensors/utils/offload.py,sha256=cMmzd9IdlNbs29CReHj1PPSLUM6OWaT5YumlLT5eP3w,13845 +compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530 +compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355 +compressed_tensors/utils/safetensors_load.py,sha256=fBuoHVPoBt1mkvqFJ60zQIASX_4nhl0-6QfFS27NY8I,11430 +compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489 +compressed_tensors/version.py,sha256=cPIrNBysxeJxrC4lzqGpVUu_oM56xF851VDnvn1gsew,1585 diff --git a/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..79d5c89a71989389294854aa34e329701325f8b0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.45.1) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/top_level.txt b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..8a05844b986341fe812005800f6def048765cb58 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/compressed_tensors-0.9.1.dist-info/top_level.txt @@ -0,0 +1 @@ +compressed_tensors diff --git a/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..79a03ca513a7c94562cb1f8d83aa8bfc736ba0c2 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2015 MagicStack Inc. http://magic.io + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/METADATA b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..57c5f5c27548ff066a3641292a6688101873ae4a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/METADATA @@ -0,0 +1,133 @@ +Metadata-Version: 2.1 +Name: httptools +Version: 0.6.4 +Summary: A collection of framework independent HTTP protocol utils. +Home-page: https://github.com/MagicStack/httptools +Author: Yury Selivanov +Author-email: yury@magic.io +License: MIT +Platform: macOS +Platform: POSIX +Platform: Windows +Classifier: License :: OSI Approved :: MIT License +Classifier: Intended Audience :: Developers +Classifier: Programming Language :: Python :: 3 +Classifier: Operating System :: POSIX +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Environment :: Web Environment +Classifier: Development Status :: 5 - Production/Stable +Requires-Python: >=3.8.0 +Description-Content-Type: text/markdown +License-File: LICENSE +Provides-Extra: test +Requires-Dist: Cython>=0.29.24; extra == "test" + +![Tests](https://github.com/MagicStack/httptools/workflows/Tests/badge.svg) + +httptools is a Python binding for the nodejs HTTP parser. + +The package is available on PyPI: `pip install httptools`. + + +# APIs + +httptools contains two classes `httptools.HttpRequestParser`, +`httptools.HttpResponseParser` (fulfilled through +[llhttp](https://github.com/nodejs/llhttp)) and a function for +parsing URLs `httptools.parse_url` (through +[http-parse](https://github.com/nodejs/http-parser) for now). +See unittests for examples. + + +```python + +class HttpRequestParser: + + def __init__(self, protocol): + """HttpRequestParser + + protocol -- a Python object with the following methods + (all optional): + + - on_message_begin() + - on_url(url: bytes) + - on_header(name: bytes, value: bytes) + - on_headers_complete() + - on_body(body: bytes) + - on_message_complete() + - on_chunk_header() + - on_chunk_complete() + - on_status(status: bytes) + """ + + def get_http_version(self) -> str: + """Return an HTTP protocol version.""" + + def should_keep_alive(self) -> bool: + """Return ``True`` if keep-alive mode is preferred.""" + + def should_upgrade(self) -> bool: + """Return ``True`` if the parsed request is a valid Upgrade request. + The method exposes a flag set just before on_headers_complete. + Calling this method earlier will only yield `False`. + """ + + def feed_data(self, data: bytes): + """Feed data to the parser. + + Will eventually trigger callbacks on the ``protocol`` + object. + + On HTTP upgrade, this method will raise an + ``HttpParserUpgrade`` exception, with its sole argument + set to the offset of the non-HTTP data in ``data``. + """ + + def get_method(self) -> bytes: + """Return HTTP request method (GET, HEAD, etc)""" + + +class HttpResponseParser: + + """Has all methods except ``get_method()`` that + HttpRequestParser has.""" + + def get_status_code(self) -> int: + """Return the status code of the HTTP response""" + + +def parse_url(url: bytes): + """Parse URL strings into a structured Python object. + + Returns an instance of ``httptools.URL`` class with the + following attributes: + + - schema: bytes + - host: bytes + - port: int + - path: bytes + - query: bytes + - fragment: bytes + - userinfo: bytes + """ +``` + + +# Development + +1. Clone this repository with + `git clone --recursive git@github.com:MagicStack/httptools.git` + +2. Create a virtual environment with Python 3: + `python3 -m venv envname` + +3. Activate the environment with `source envname/bin/activate` + +4. Install development requirements with `pip install -e .[test]` + +5. Run `make` and `make test`. + + +# License + +MIT. diff --git a/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/RECORD b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..5361ec9371cd5e0cfc76f26ca24d68130b45d01d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/RECORD @@ -0,0 +1,21 @@ +httptools-0.6.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +httptools-0.6.4.dist-info/LICENSE,sha256=9Fc-fLdnZ0X7W402-lSKqT45HPtoct2s1lEwxF6mqS0,1093 +httptools-0.6.4.dist-info/METADATA,sha256=U4VRvayKRJbdHp18Fagk4_L7RvIQJ4anhwtwS64rO9A,3583 +httptools-0.6.4.dist-info/RECORD,, +httptools-0.6.4.dist-info/WHEEL,sha256=1xuwdrbeIuHvQX3uMggPJqjJpR5WLor7jKc2zZeYNs0,224 +httptools-0.6.4.dist-info/top_level.txt,sha256=APjJKTbZcj0OQ4fdgf2eTCk82nK1n2BFXOD7ky41MPY,10 +httptools/__init__.py,sha256=plt3MIbueJdco9Dy7zoH3ksLNeyirqWagat5rwRmAjo,147 +httptools/__pycache__/__init__.cpython-311.pyc,, +httptools/__pycache__/_version.cpython-311.pyc,, +httptools/_version.py,sha256=ASqOB8fLS7jwZsM551Lc49WxYPyjteqnz1iDWmka-KA,575 +httptools/parser/__init__.py,sha256=fWyconPEHZlJojzRwmBKSn4C85OGXmKEwiEcdjHqXO8,166 +httptools/parser/__pycache__/__init__.cpython-311.pyc,, +httptools/parser/__pycache__/errors.cpython-311.pyc,, +httptools/parser/cparser.pxd,sha256=4qBxnma83Vz86Z9sOZRxjqYj20A-aLSWVGXZgTVLJqE,4977 +httptools/parser/errors.py,sha256=ZVrtN1smPIb_opQ2Ud3uCbGlNLMlECYM2-6S7r5LnHs,566 +httptools/parser/parser.cpython-311-x86_64-linux-gnu.so,sha256=UOjazfkDQuZnn0kC8JGK8CiM9JeOD9oBrP5Qr6PbAaw,1068536 +httptools/parser/parser.pyx,sha256=x0BUY9EzHNKCDaw-U8bkZ1MaKGtrOQ8iVCm1IuOtEQI,15140 +httptools/parser/python.pxd,sha256=zWCdGZh34fyQNt3BUHIUjPqY8a5sodRUkfdABxqYHgQ,138 +httptools/parser/url_cparser.pxd,sha256=X5dDI8A7T0l5HL_Czt0mTs0l_d2lXnUDHx1TN8LeiCM,779 +httptools/parser/url_parser.cpython-311-x86_64-linux-gnu.so,sha256=lXhCXgLJvVc3i6lhRppiuOEB3p7mKWrBd_inE7JNZDg,455336 +httptools/parser/url_parser.pyx,sha256=ZJVUZqrIDdhzVodA7tTtoFb570av-SczIyh2oAZXKzM,3758 diff --git a/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..0e803118a0cd35f572fab9a8884f4fa97d6be7dd --- /dev/null +++ b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/WHEEL @@ -0,0 +1,8 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.1.0) +Root-Is-Purelib: false +Tag: cp311-cp311-manylinux_2_5_x86_64 +Tag: cp311-cp311-manylinux1_x86_64 +Tag: cp311-cp311-manylinux_2_17_x86_64 +Tag: cp311-cp311-manylinux2014_x86_64 + diff --git a/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/top_level.txt b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..bef3b40b2f33a46ae13d46456df480f77ab712e3 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/httptools-0.6.4.dist-info/top_level.txt @@ -0,0 +1 @@ +httptools diff --git a/.venv/lib/python3.11/site-packages/httptools/parser/parser.cpython-311-x86_64-linux-gnu.so b/.venv/lib/python3.11/site-packages/httptools/parser/parser.cpython-311-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..bd6f78a0c1e1b5610030260326619e843e5387cc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/httptools/parser/parser.cpython-311-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e8dacdf90342e6679f4902f0918af0288cf4978e0fda01acfe50afa3db01ac +size 1068536 diff --git a/.venv/lib/python3.11/site-packages/httptools/parser/url_parser.cpython-311-x86_64-linux-gnu.so b/.venv/lib/python3.11/site-packages/httptools/parser/url_parser.cpython-311-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..52a5fef5214a9a6d7f8331cd02162ae34474d048 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/httptools/parser/url_parser.cpython-311-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9578425e02c9bd57378ba961469a62b8e101de9ee6296ac177f8a713b24d6438 +size 455336 diff --git a/.venv/lib/python3.11/site-packages/networkx/utils/__pycache__/backends.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/utils/__pycache__/backends.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ce92b44872bf0c2317e9d745f11092c55c27106 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/utils/__pycache__/backends.cpython-311.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2373c4c1cea9ffed4078a7dd1eb72fa70a887af62d64fcf618f3c3a838a27b9 +size 102722 diff --git a/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/License.txt b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/License.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0d485c1c82d2c86b62ac0deeb8568fcdb58e0bb --- /dev/null +++ b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/License.txt @@ -0,0 +1,154 @@ +LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS + +This license agreement, including exhibits attached ("Agreement”) is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of a NVIDIA software development kit (“SDK”). + +Each SDK has its own set of software and materials, but here is a description of the types of items that may be included in a SDK: source code, header files, APIs, data sets and assets (examples include images, textures, models, scenes, videos, native API input/output files), binary software, sample code, libraries, utility programs, programming code and documentation. + +This Agreement can be accepted only by an adult of legal age of majority in the country in which the SDK is used. + +If you are entering into this Agreement on behalf of a company or other legal entity, you represent that you have the legal authority to bind the entity to this Agreement, in which case “you” will mean the entity you represent. + +If you don’t have the required age or authority to accept this Agreement, or if you don’t accept all the terms and conditions of this Agreement, do not download, install or use the SDK. + +You agree to use the SDK only for purposes that are permitted by (a) this Agreement, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + +1. License. + +1.1 Grant + +Subject to the terms of this Agreement, NVIDIA hereby grants you a non-exclusive, non-transferable license, without the right to sublicense (except as expressly provided in this Agreement) to: + +(i) Install and use the SDK, + +(ii) Modify and create derivative works of sample source code delivered in the SDK, and + +(iii) Distribute those portions of the SDK that are identified in this Agreement as distributable, as incorporated in object code format into a software application that meets the distribution requirements indicated in this Agreement. + +1.2 Distribution Requirements + +These are the distribution requirements for you to exercise the distribution grant: + +(i) Your application must have material additional functionality, beyond the included portions of the SDK. + +(ii) The distributable portions of the SDK shall only be accessed by your application. + +(iii) The following notice shall be included in modifications and derivative works of sample source code distributed: “This software contains source code provided by NVIDIA Corporation.” + +(iv) Unless a developer tool is identified in this Agreement as distributable, it is delivered for your internal use only. + +(v) The terms under which you distribute your application must be consistent with the terms of this Agreement, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. Additionally, you agree that you will protect the privacy, security and legal rights of your application users. + +(vi) You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SDK not in compliance with the requirements of this Agreement, and to enforce the terms of your agreements with respect to distributed SDK. + +1.3 Authorized Users + +You may allow employees and contractors of your entity or of your subsidiary(ies) to access and use the SDK from your secure network to perform work on your behalf. + +If you are an academic institution you may allow users enrolled or employed by the academic institution to access and use the SDK from your secure network. + +You are responsible for the compliance with the terms of this Agreement by your authorized users. If you become aware that your authorized users didn’t follow the terms of this Agreement, you agree to take reasonable steps to resolve the non-compliance and prevent new occurrences. + +1.4 Pre-Release SDK +The SDK versions identified as alpha, beta, preview or otherwise as pre-release, may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, accessibility, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. Use of a pre-release SDK may result in unexpected results, loss of data, project delays or other unpredictable damage or loss. +You may use a pre-release SDK at your own risk, understanding that pre-release SDKs are not intended for use in production or business-critical systems. +NVIDIA may choose not to make available a commercial version of any pre-release SDK. NVIDIA may also choose to abandon development and terminate the availability of a pre-release SDK at any time without liability. +1.5 Updates + +NVIDIA may, at its option, make available patches, workarounds or other updates to this SDK. Unless the updates are provided with their separate governing terms, they are deemed part of the SDK licensed to you as provided in this Agreement. + +You agree that the form and content of the SDK that NVIDIA provides may change without prior notice to you. While NVIDIA generally maintains compatibility between versions, NVIDIA may in some cases make changes that introduce incompatibilities in future versions of the SDK. + +1.6 Third Party Licenses + +The SDK may come bundled with, or otherwise include or be distributed with, third party software licensed by a NVIDIA supplier and/or open source software provided under an open source license. Use of third party software is subject to the third-party license terms, or in the absence of third party terms, the terms of this Agreement. Copyright to third party software is held by the copyright holders indicated in the third-party software or license. + +1.7 Reservation of Rights + +NVIDIA reserves all rights, title and interest in and to the SDK not expressly granted to you under this Agreement. + +2. Limitations. + +The following license limitations apply to your use of the SDK: + +2.1 You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SDK or copies of the SDK. + +2.2 Except as expressly provided in this Agreement, you may not copy, sell, rent, sublicense, transfer, distribute, modify, or create derivative works of any portion of the SDK. + +2.3 Unless you have an agreement with NVIDIA for this purpose, you may not indicate that an application created with the SDK is sponsored or endorsed by NVIDIA. + +2.4 You may not bypass, disable, or circumvent any encryption, security, digital rights management or authentication mechanism in the SDK. + +2.5 You may not use the SDK in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SDK be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. + +2.6 Unless you have an agreement with NVIDIA for this purpose, you may not use the SDK with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SDK for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. + +2.7 You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to your use of the SDK outside of the scope of this Agreement, or not in compliance with its terms. + +3. Ownership. + +3.1 NVIDIA or its licensors hold all rights, title and interest in and to the SDK and its modifications and derivative works, including their respective intellectual property rights, subject to your rights under Section 3.2. This SDK may include software and materials from NVIDIA’s licensors, and these licensors are intended third party beneficiaries that may enforce this Agreement with respect to their intellectual property rights. + +3.2 You hold all rights, title and interest in and to your applications and your derivative works of the sample source code delivered in the SDK, including their respective intellectual property rights, subject to NVIDIA’s rights under section 3.1. + +3.3 You may, but don’t have to, provide to NVIDIA suggestions, feature requests or other feedback regarding the SDK, including possible enhancements or modifications to the SDK. For any feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) it without the payment of any royalties or fees to you. NVIDIA will use feedback at its choice. NVIDIA is constantly looking for ways to improve its products, so you may send feedback to NVIDIA through the developer portal at https://developer.nvidia.com. + +4. No Warranties. + +THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF DEALING OR COURSE OF TRADE. + +5. Limitations of Liability. + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + +These exclusions and limitations of liability shall apply regardless if NVIDIA or its affiliates have been advised of the possibility of such damages, and regardless of whether a remedy fails its essential purpose. These exclusions and limitations of liability form an essential basis of the bargain between the parties, and, absent any of these exclusions or limitations of liability, the provisions of this Agreement, including, without limitation, the economic terms, would be substantially different. + +6. Termination. + +6.1 This Agreement will continue to apply until terminated by either you or NVIDIA as described below. + +6.2 If you want to terminate this Agreement, you may do so by stopping to use the SDK. + +6.3 NVIDIA may, at any time, terminate this Agreement if: (i) you fail to comply with any term of this Agreement and the non-compliance is not fixed within thirty (30) days following notice from NVIDIA (or immediately if you violate NVIDIA’s intellectual property rights); (ii) you commence or participate in any legal proceeding against NVIDIA with respect to the SDK; or (iii) NVIDIA decides to no longer provide the SDK in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. + +6.4 Upon any termination of this Agreement, you agree to promptly discontinue use of the SDK and destroy all copies in your possession or control. Your prior distributions in accordance with this Agreement are not affected by the termination of this Agreement. Upon written request, you will certify in writing that you have complied with your commitments under this section. Upon any termination of this Agreement all provisions survive except for the licenses granted to you. + +7. General. + +If you wish to assign this Agreement or your rights and obligations, including by merger, consolidation, dissolution or operation of law, contact NVIDIA to ask for permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. NVIDIA may assign, delegate or transfer this Agreement and its rights and obligations, and if to a non-affiliate you will be notified. + +You agree to cooperate with NVIDIA and provide reasonably requested information to verify your compliance with this Agreement. + +This Agreement will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. + +The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this Agreement. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + +If any court of competent jurisdiction determines that any provision of this Agreement is illegal, invalid or unenforceable, such provision will be construed as limited to the extent necessary to be consistent with and fully enforceable under the law and the remaining provisions will remain in full force and effect. Unless otherwise specified, remedies are cumulative. + +Each party acknowledges and agrees that the other is an independent contractor in the performance of this Agreement. + +The SDK has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this Agreement pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + +The SDK is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SDK into any country, or use the SDK in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this Agreement, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SDK. + +Any notice delivered by NVIDIA to you under this Agreement will be delivered via mail, email or fax. You agree that any notices that NVIDIA sends you electronically will satisfy any legal communication requirements. Please direct your legal notices or other correspondence to NVIDIA Corporation, 2788 San Tomas Expressway, Santa Clara, California 95051, United States of America, Attention: Legal Department. + +This Agreement and any exhibits incorporated into this Agreement constitute the entire agreement of the parties with respect to the subject matter of this Agreement and supersede all prior negotiations or documentation exchanged between the parties relating to this SDK license. Any additional and/or conflicting terms on documents issued by you are null, void, and invalid. Any amendment or waiver under this Agreement shall be in writing and signed by representatives of both parties. + +(v. January 28, 2020) + + +cuDNN SUPPLEMENT TO SOFTWARE LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS + +The terms in this supplement govern your use of the NVIDIA cuDNN SDK under the terms of your license agreement (“Agreement”) as modified by this supplement. Capitalized terms used but not defined below have the meaning assigned to them in the Agreement. + +This supplement is an exhibit to the Agreement and is incorporated as an integral part of the Agreement. In the event of conflict between the terms in this supplement and the terms in the Agreement, the terms in this supplement govern. + +4.1 License Scope. The SDK is licensed for you to develop applications only for use in systems with NVIDIA GPUs. + +2. Distribution. The following portions of the SDK are distributable under the Agreement: the runtime files .so and .h, cudnn64_7.dll, and cudnn.lib. + +In addition to the rights above, for parties that are developing software intended solely for use on Jetson development kits or Jetson modules and running Linux for Tegra software the following shall apply: the SDK may be distributed in its entirety, as provided by NVIDIA and without separation of its components, for you and/or your licensees to create software development kits for use only on the Jetson platform and running Linux for Tegra software. + +3. Licensing. If the distribution terms in this Agreement are not suitable for your organization, or for any questions regarding this Agreement, please contact NVIDIA at nvidia-compute-license-questions@nvidia.com. + (v. January 28, 2020) + diff --git a/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/METADATA b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..dd5524ea6b075288c44a1abc9522181541d96ab9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/METADATA @@ -0,0 +1,36 @@ +Metadata-Version: 2.1 +Name: nvidia-cudnn-cu12 +Version: 9.1.0.70 +Summary: cuDNN runtime libraries +Home-page: https://developer.nvidia.com/cuda-zone +Author: Nvidia CUDA Installer Team +Author-email: cuda_installer@nvidia.com +License: NVIDIA Proprietary Software +Keywords: cuda,nvidia,runtime,machine learning,deep learning +Classifier: Development Status :: 4 - Beta +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Science/Research +Classifier: License :: Other/Proprietary License +Classifier: Natural Language :: English +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Topic :: Scientific/Engineering +Classifier: Topic :: Scientific/Engineering :: Mathematics +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Classifier: Topic :: Software Development +Classifier: Topic :: Software Development :: Libraries +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX :: Linux +Requires-Python: >=3 +License-File: License.txt +Requires-Dist: nvidia-cublas-cu12 + +cuDNN runtime libraries containing primitives for deep neural networks. diff --git a/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/RECORD b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..36f08ebd878574972eed2950257da1b6d9baab44 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/RECORD @@ -0,0 +1,36 @@ +nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/__pycache__/__init__.cpython-311.pyc,, +nvidia/cudnn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/cudnn/__pycache__/__init__.cpython-311.pyc,, +nvidia/cudnn/include/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/cudnn/include/__pycache__/__init__.cpython-311.pyc,, +nvidia/cudnn/include/cudnn.h,sha256=EILVHTtWS6zo72_G7jNbZosr-lF_V8Bu_UKCY-XCGFU,2841 +nvidia/cudnn/include/cudnn_adv.h,sha256=S-ddXP6m4morsP31Uxk3Thx4VO-FF-t333vq2mY4Zlg,30820 +nvidia/cudnn/include/cudnn_adv_v9.h,sha256=S-ddXP6m4morsP31Uxk3Thx4VO-FF-t333vq2mY4Zlg,30820 +nvidia/cudnn/include/cudnn_backend.h,sha256=rdTyTGHQftsrhWeJS6bQwZDQCzrdyAFhoj0gbRkn1F8,2751 +nvidia/cudnn/include/cudnn_backend_v9.h,sha256=rdTyTGHQftsrhWeJS6bQwZDQCzrdyAFhoj0gbRkn1F8,2751 +nvidia/cudnn/include/cudnn_cnn.h,sha256=BDVryoG36A-oIfj-pPWldDsBoUds58TOCT6eNZVBr-s,36699 +nvidia/cudnn/include/cudnn_cnn_v9.h,sha256=BDVryoG36A-oIfj-pPWldDsBoUds58TOCT6eNZVBr-s,36699 +nvidia/cudnn/include/cudnn_graph.h,sha256=kKH3ylAvBBXYEj5p6rWX63imMBfx-O-dOrwOozFYE-k,37961 +nvidia/cudnn/include/cudnn_graph_v9.h,sha256=kKH3ylAvBBXYEj5p6rWX63imMBfx-O-dOrwOozFYE-k,37961 +nvidia/cudnn/include/cudnn_ops.h,sha256=B56tkXg6lAwKfSPH5-hid3UQi4uhRnHQbbROmBppmZ0,63633 +nvidia/cudnn/include/cudnn_ops_v9.h,sha256=B56tkXg6lAwKfSPH5-hid3UQi4uhRnHQbbROmBppmZ0,63633 +nvidia/cudnn/include/cudnn_v9.h,sha256=EILVHTtWS6zo72_G7jNbZosr-lF_V8Bu_UKCY-XCGFU,2841 +nvidia/cudnn/include/cudnn_version.h,sha256=PGhK3WeGHAhJAJgHBF3RuX5vZSB-ClQHiPYBPPR1EGI,3112 +nvidia/cudnn/include/cudnn_version_v9.h,sha256=PGhK3WeGHAhJAJgHBF3RuX5vZSB-ClQHiPYBPPR1EGI,3112 +nvidia/cudnn/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/cudnn/lib/__pycache__/__init__.cpython-311.pyc,, +nvidia/cudnn/lib/libcudnn.so.9,sha256=dKTElaJtUQQVAkHoShCEJELAJMEpzcJ0rYtqeoeUmA4,104664 +nvidia/cudnn/lib/libcudnn_adv.so.9,sha256=voF0cmqNFSvPbhNrLR-Bdjh23rHStzKbvH0qUmNbAgI,240706416 +nvidia/cudnn/lib/libcudnn_cnn.so.9,sha256=P4w7j3yZS-f7HOYL1WeitJ4WmJHTUVzB5ZUz6XA7byw,4724176 +nvidia/cudnn/lib/libcudnn_engines_precompiled.so.9,sha256=7lSDboo9BIDyq1QRh3CpfLfwLx9CjVpdBHHGuJuQz8w,569645536 +nvidia/cudnn/lib/libcudnn_engines_runtime_compiled.so.9,sha256=UZ_0FROjqBVfQw8IWsxYpfhdwWyGW4KH1UgkCVB-4sA,9585864 +nvidia/cudnn/lib/libcudnn_graph.so.9,sha256=j9hFIHx0d3AOVqA5Wkp0ag3GGBNYU1CAJAgZu2fxoTg,3442456 +nvidia/cudnn/lib/libcudnn_heuristic.so.9,sha256=lPq5jBUEBVjDyA8sGi9f2puqcq_Dmoi9zIIYX0nSQcM,86326864 +nvidia/cudnn/lib/libcudnn_ops.so.9,sha256=JCudupU64uSHjWYDJiQTWpEYoWFsokWI7VhtS8xHXGk,108421928 +nvidia_cudnn_cu12-9.1.0.70.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +nvidia_cudnn_cu12-9.1.0.70.dist-info/License.txt,sha256=Sc95vbNXNLUv5iAwE7O9dZ-B6ZjNMqosZcUduaiMYdI,18174 +nvidia_cudnn_cu12-9.1.0.70.dist-info/METADATA,sha256=V1vE7NqVAZDUF6pjtO5uhb9auIBe9qfRi_tR3rYIK3E,1570 +nvidia_cudnn_cu12-9.1.0.70.dist-info/RECORD,, +nvidia_cudnn_cu12-9.1.0.70.dist-info/WHEEL,sha256=XDTs3wIbcE-BcRO08VJlZpA6z9OaC1mOKPCGGGwuM2g,109 +nvidia_cudnn_cu12-9.1.0.70.dist-info/top_level.txt,sha256=fTkAtiFuL16nUrB9ytDDtpytz2t0B4NvYTnRzwAhO14,7 diff --git a/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..e6c30e957cfb045017a9fef3430bb8ee87c4a074 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.42.0) +Root-Is-Purelib: true +Tag: py3-none-manylinux2014_x86_64 + diff --git a/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/top_level.txt b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..862f7abf232cdfbb928609856247292e81c9decb --- /dev/null +++ b/.venv/lib/python3.11/site-packages/nvidia_cudnn_cu12-9.1.0.70.dist-info/top_level.txt @@ -0,0 +1 @@ +nvidia diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..47daf1a98cc90598b9e9356002b274afd80b11a3 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_helpers.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_helpers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70d09f4a48006f080698f66ce21575c1099c1d6f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_helpers.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_openssl_crypt.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_openssl_crypt.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0055d3ff21faaa86e44a05bd5f28f5a927cbf6e3 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_openssl_crypt.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pkce.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pkce.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d9581b853b2edb97d943db8704358d85a5ee0fe Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pkce.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pure_python_crypt.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pure_python_crypt.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eaa6d2e2e4b58d146e8f5c360ae663f116bf6beb Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pure_python_crypt.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pycrypto_crypt.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pycrypto_crypt.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5f3f8e3896dafcd6baee1e589786437b1e5c10a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/_pycrypto_crypt.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/client.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/client.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e948137a4dc6e79bdf75533febbac0fe6c62c7b9 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/client.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/clientsecrets.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/clientsecrets.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b6ad9488ca31addab1e4157363b6eb76b84aeb9c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/clientsecrets.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/crypt.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/crypt.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fb655493bf8649ac24995bfb1e1b59ee71f7004 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/crypt.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/file.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/file.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..132b96e3d3a61e03f0922def02b1dccd4586c421 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/file.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/service_account.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/service_account.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf43a454e2438a67b35a464fc4b9b196465df385 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/service_account.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/tools.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/tools.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b75247f2685695f7de3b82726ef15f8f7ff3a08a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/tools.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/transport.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/transport.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..295862af6dd4e3e6f3733dab359f88a040e0e2b5 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/__pycache__/transport.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16132789712195c54499ea6da138c2d86ad5d6e9 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/apps.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/apps.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..157469a5ec823b22703dfb86ba704d66dc5b1800 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/apps.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/decorators.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/decorators.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ee46de9e7d9d733c91723cb7fa366e0c74ad6d8 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/decorators.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/models.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/models.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc1e0922deeb57e7e37dfae407f161d16949bd6a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/models.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/signals.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/signals.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83121e4dc714da94c3c6dd8583b5c95d79e66a8c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/signals.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/storage.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/storage.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b87b56e67a10cfabbe083d1301ff1201c1e8fc80 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/storage.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/views.cpython-311.pyc b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/views.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d8f37cae28298e456c48f0db1d683750d7f1151 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/oauth2client/contrib/django_util/__pycache__/views.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/METADATA b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..11b461495bd4ce1403db05517f3ed541aa3a2d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/METADATA @@ -0,0 +1,306 @@ +Metadata-Version: 2.3 +Name: py-spy +Version: 0.4.0 +Classifier: Development Status :: 5 - Production/Stable +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 2 +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: Utilities +License-File: LICENSE +Summary: Sampling profiler for Python programs +Home-Page: https://github.com/benfred/py-spy +Author: Ben Frederickson +Author-email: Ben Frederickson +License: MIT +Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM +Project-URL: Source Code, https://github.com/benfred/py-spy + +py-spy: Sampling profiler for Python programs +===== +[![Build Status](https://github.com/benfred/py-spy/workflows/Build/badge.svg?branch=master)](https://github.com/benfred/py-spy/actions?query=branch%3Amaster) +[![FreeBSD Build Status](https://api.cirrus-ci.com/github/benfred/py-spy.svg)](https://cirrus-ci.com/github/benfred/py-spy) + +py-spy is a sampling profiler for Python programs. It lets you visualize what your Python +program is spending time on without restarting the program or modifying the code in any way. +py-spy is extremely low overhead: it is written in Rust for speed and doesn't run +in the same process as the profiled Python program. This means py-spy is safe to use against production Python code. + +py-spy works on Linux, OSX, Windows and FreeBSD, and supports profiling all recent versions of the CPython +interpreter (versions 2.3-2.7 and 3.3-3.13). + +## Installation + +Prebuilt binary wheels can be installed from PyPI with: + +``` +pip install py-spy +``` + +You can also download prebuilt binaries from the [GitHub Releases +Page](https://github.com/benfred/py-spy/releases). + +If you're a Rust user, py-spy can also be installed with: ```cargo install py-spy```. Note this +builds py-spy from source and requires `libunwind` on Linux and Window, e.g., +`apt install libunwind-dev`. + +On macOS, [py-spy is in Homebrew](https://formulae.brew.sh/formula/py-spy#default) and +can be installed with ```brew install py-spy```. + +On Arch Linux, [py-spy is in AUR](https://aur.archlinux.org/packages/py-spy/) and can be +installed with ```yay -S py-spy```. + +On Alpine Linux, [py-spy is in testing repository](https://pkgs.alpinelinux.org/packages?name=py-spy&branch=edge&repo=testing) and +can be installed with ```apk add py-spy --update-cache --repository http://dl-3.alpinelinux.org/alpine/edge/testing/ --allow-untrusted```. + +## Usage + +py-spy works from the command line and takes either the PID of the program you want to sample from +or the command line of the python program you want to run. py-spy has three subcommands +```record```, ```top``` and ```dump```: + +### record + +py-spy supports recording profiles to a file using the ```record``` command. For example, you can +generate a [flame graph](http://www.brendangregg.com/flamegraphs.html) of your python process by +going: + +``` bash +py-spy record -o profile.svg --pid 12345 +# OR +py-spy record -o profile.svg -- python myprogram.py +``` + +Which will generate an interactive SVG file looking like: + +![flame graph](./images/flamegraph.svg) + +You can change the file format to generate +[speedscope](https://github.com/jlfwong/speedscope) profiles or raw data with the ```--format``` parameter. +See ```py-spy record --help``` for information on other options including changing +the sampling rate, filtering to only include threads that hold the GIL, profiling native C extensions, +showing thread-ids, profiling subprocesses and more. + +### top + +Top shows a live view of what functions are taking the most time in your python program, similar +to the Unix [top](https://linux.die.net/man/1/top) command. Running py-spy with: + +``` bash +py-spy top --pid 12345 +# OR +py-spy top -- python myprogram.py +``` + +will bring up a live updating high level view of your python program: + +![console viewer demo](./images/console_viewer.gif) + +### dump + +py-spy can also display the current call stack for each python thread with the ```dump``` command: + +```bash +py-spy dump --pid 12345 +``` + +This will dump out the call stacks for each thread, and some other basic process info to the +console: + +![dump output](./images/dump.png) + +This is useful for the case where you just need a single call stack to figure out where your +python program is hung on. This command also has the ability to print out the local variables +associated with each stack frame by setting the ```--locals``` flag. + +## Frequently Asked Questions + +### Why do we need another Python profiler? + +This project aims to let you profile and debug any running Python program, even if the program is +serving production traffic. + +While there are many other python profiling projects, almost all of them require modifying +the profiled program in some way. Usually, the profiling code runs inside of the target python process, +which will slow down and change how the program operates. This means it's not generally safe +to use these profilers for debugging issues in production services since they will usually have +a noticeable impact on performance. + +### How does py-spy work? + +py-spy works by directly reading the memory of the python program using the +[process_vm_readv](http://man7.org/linux/man-pages/man2/process_vm_readv.2.html) system call on Linux, +the [vm_read](https://developer.apple.com/documentation/kernel/1585350-vm_read?language=objc) call on OSX +or the [ReadProcessMemory](https://msdn.microsoft.com/en-us/library/windows/desktop/ms680553(v=vs.85).aspx) call +on Windows. + +Figuring out the call stack of the Python program is done by looking at the global PyInterpreterState variable +to get all the Python threads running in the interpreter, and then iterating over each PyFrameObject in each thread +to get the call stack. Since the Python ABI changes between versions, we use rust's [bindgen](https://github.com/rust-lang-nursery/rust-bindgen) to generate different rust structures for each Python interpreter +class we care about and use these generated structs to figure out the memory layout in the Python program. + +Getting the memory address of the Python Interpreter can be a little tricky due to [Address Space Layout Randomization](https://en.wikipedia.org/wiki/Address_space_layout_randomization). If the target python interpreter ships +with symbols it is pretty easy to figure out the memory address of the interpreter by dereferencing the +```interp_head``` or ```_PyRuntime``` variables depending on the Python version. However, many Python +versions are shipped with either stripped binaries or shipped without the corresponding PDB symbol files on Windows. In +these cases we scan through the BSS section for addresses that look like they may point to a valid PyInterpreterState +and check if the layout of that address is what we expect. + + +### Can py-spy profile native extensions? + +Yes! py-spy supports profiling native python extensions written in languages like C/C++ or Cython, +on x86_64 Linux and Windows. You can enable this mode by passing ```--native``` on the +command line. For best results, you should compile your Python extension with symbols. Also worth +noting for Cython programs is that py-spy needs the generated C or C++ file in order to return line +numbers of the original .pyx file. Read the [blog post](https://www.benfrederickson.com/profiling-native-python-extensions-with-py-spy/) +for more information. + +### How can I profile subprocesses? + +By passing in the ```--subprocesses``` flag to either the record or top view, py-spy will also include +the output from any python process that is a child process of the target program. This is useful +for profiling applications that use multiprocessing or gunicorn worker pools. py-spy will monitor +for new processes being created, and automatically attach to them and include samples from them in +the output. The record view will include the PID and cmdline of each program in the callstack, +with subprocesses appearing as children of their parent processes. + +### When do you need to run as sudo? + +py-spy works by reading memory from a different python process, and this might not be allowed for security reasons depending on +your OS and system settings. In many cases, running as a root user (with sudo or similar) gets around these security restrictions. +OSX always requires running as root, but on Linux it depends on how you are launching py-spy and the system +security settings. + +On Linux the default configuration is to require root permissions when attaching to a process that isn't a child. +For py-spy this means you can profile without root access by getting py-spy to create the process +(```py-spy record -- python myprogram.py```) but attaching to an existing process by specifying a +PID will usually require root (```sudo py-spy record --pid 123456```). +You can remove this restriction on Linux by setting the [ptrace_scope sysctl variable](https://wiki.ubuntu.com/SecurityTeam/Roadmap/KernelHardening#ptrace_Protection). + +### How do you detect if a thread is idle or not? + +py-spy attempts to only include stack traces from threads that are actively running code, and exclude threads that +are sleeping or otherwise idle. When possible, py-spy attempts to get this thread activity information +from the OS: by reading in ```/proc/PID/stat``` on Linux, by using the mach +[thread_basic_info](https://opensource.apple.com/source/xnu/xnu-792/osfmk/mach/thread_info.h.auto.html) +call on OSX, and by looking if the current SysCall is [known to be +idle](https://github.com/benfred/py-spy/blob/8326c6dbc6241d60125dfd4c01b70fed8b8b8138/remoteprocess/src/windows/mod.rs#L212-L229) +on Windows. + +There are some limitations with this approach though that may cause idle threads to still be +marked as active. First off, we have to get this thread activity information before pausing the +program, because getting this from a paused program will cause it to always return that this is +idle. This means there is a potential race condition, where we get the thread activity and +then the thread is in a different state when we get the stack trace. Querying the OS for thread +activity also isn't implemented yet for FreeBSD and i686/ARM processors on Linux. On Windows, +calls that are blocked on IO also won't be marked as idle yet, for instance when reading input +from stdin. Finally, on some Linux calls the ptrace attach that we are using may cause idle threads +to wake up momentarily, causing false positives when reading from procfs. For these reasons, +we also have a heuristic fallback that marks known certain known calls in +python as being idle. + +You can disable this functionality by setting the ```--idle``` flag, which +will include frames that py-spy considers idle. + +### How does GIL detection work? + +We get GIL activity by looking at the threadid value pointed to by the ```_PyThreadState_Current``` symbol +for Python 3.6 and earlier and by figuring out the equivalent from the ```_PyRuntime``` struct in +Python 3.7 and later. These symbols might not be included in your python distribution, which will +cause resolving which thread holds on to the GIL to fail. Current GIL usage is also shown in the +```top``` view as %GIL. + +Passing the ```--gil``` flag will only include traces for threads that are holding on to the +[Global Interpreter Lock](https://wiki.python.org/moin/GlobalInterpreterLock). In some cases this +might be a more accurate view of how your python program is spending its time, though you should +be aware that this will miss activity in extensions that release the GIL while still active. + +### Why am I having issues profiling /usr/bin/python on OSX? + +OSX has a feature called [System Integrity Protection](https://en.wikipedia.org/wiki/System_Integrity_Protection) that prevents even the root user from reading memory from any binary located in /usr/bin. Unfortunately, this includes the python interpreter that ships with OSX. + +There are a couple of different ways to deal with this: + * You can install a different Python distribution. The built-in Python [will be removed](https://developer.apple.com/documentation/macos_release_notes/macos_catalina_10_15_release_notes) in a future OSX, and you probably want to migrate away from Python 2 anyways =). + * You can use [virtualenv](https://virtualenv.pypa.io/en/stable/) to run the system python in an environment where SIP doesn't apply. + * You can [disable System Integrity Protection](https://www.macworld.co.uk/how-to/mac/how-turn-off-mac-os-x-system-integrity-protection-rootless-3638975/). + +### How do I run py-spy in Docker? + +Running py-spy inside of a docker container will also usually bring up a permissions denied error even when running as root. + +This error is caused by docker restricting the process_vm_readv system call we are using. This can +be overridden by setting +[```--cap-add SYS_PTRACE```](https://docs.docker.com/engine/security/seccomp/) when starting the docker container. + +Alternatively you can edit the docker-compose yaml file + +``` +your_service: + cap_add: + - SYS_PTRACE +``` + +Note that you'll need to restart the docker container in order for this setting to take effect. + +You can also use py-spy from the Host OS to profile a running process running inside the docker +container. + +### How do I run py-spy in Kubernetes? + +py-spy needs `SYS_PTRACE` to be able to read process memory. Kubernetes drops that capability by default, resulting in the error +``` +Permission Denied: Try running again with elevated permissions by going 'sudo env "PATH=$PATH" !!' +``` +The recommended way to deal with this is to edit the spec and add that capability. For a deployment, this is done by adding this to `Deployment.spec.template.spec.containers` +``` +securityContext: + capabilities: + add: + - SYS_PTRACE +``` +More details on this here: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-capabilities-for-a-container +Note that this will remove the existing pods and create those again. + +### How do I install py-spy on Alpine Linux? + +Alpine python opts out of the `manylinux` wheels: [pypa/pip#3969 (comment)](https://github.com/pypa/pip/issues/3969#issuecomment-247381915). +You can override this behaviour to use pip to install py-spy on Alpine by going: + + echo 'manylinux1_compatible = True' > /usr/local/lib/python3.7/site-packages/_manylinux.py + +Alternatively you can download a musl binary from the [GitHub releases page](https://github.com/benfred/py-spy/releases). + +### How can I avoid pausing the Python program? + +By setting the ```--nonblocking``` option, py-spy won't pause the target python you are profiling from. While +the performance impact of sampling from a process with py-spy is usually extremely low, setting this option +will totally avoid interrupting your running python program. + +With this option set, py-spy will instead read the interpreter state from the python process as it is running. +Since the calls we use to read memory from are not atomic, and we have to issue multiple calls to get a stack trace this +means that occasionally we get errors when sampling. This can show up as an increased error rate when sampling, or as +partial stack frames being included in the output. + +### Does py-spy support 32-bit Windows? Integrate with PyPy? Work with USC2 versions of Python2? + +Not yet =). + +If there are features you'd like to see in py-spy either thumb up the [appropriate +issue](https://github.com/benfred/py-spy/issues?q=is%3Aissue+is%3Aopen+sort%3Areactions-%2B1-desc) or create a new one that describes what functionality is missing. + +### How to force colored output when piping to a pager? + +py-spy follows the [CLICOLOR](https://bixense.com/clicolors/) specification, thus setting `CLICOLOR_FORCE=1` in your environment will have py-spy print colored output even when piped to a pager. + +## Credits + +py-spy is heavily inspired by [Julia Evans](https://github.com/jvns/) excellent work on [rbspy](http://github.com/rbspy/rbspy). +In particular, the code to generate flamegraph and speedscope files is taken directly from rbspy, and this project uses the +[read-process-memory](https://github.com/luser/read-process-memory) and [proc-maps](https://github.com/benfred/proc-maps) crates that were spun off from rbspy. + +## License + +py-spy is released under the MIT License, see the [LICENSE](https://github.com/benfred/py-spy/blob/master/LICENSE) file for the full text. + diff --git a/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/RECORD b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..fa914fb1412f3634bddc36ba2a05337ffa97f303 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/RECORD @@ -0,0 +1,6 @@ +../../../bin/py-spy,sha256=rPM5-WiLmVQZ1Ox5lM_MgA8ZF8pkjon68C-HWKel7zo,7658552 +py_spy-0.4.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +py_spy-0.4.0.dist-info/METADATA,sha256=T74VqNoB9RgvkC9_eTA9w54U2ulg0I9vqiupazuWEus,16461 +py_spy-0.4.0.dist-info/RECORD,, +py_spy-0.4.0.dist-info/WHEEL,sha256=McrmRllnOpiJfNX5v2otJqnu23qUciIDu9Ur1276LCQ,122 +py_spy-0.4.0.dist-info/licenses/LICENSE,sha256=gLu4cx21nNg19Z_NBvEnlTgEu1EchIf9Jl2Wx8cCzQA,1088 diff --git a/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..312939b51a3a99ef3c114347684cc54a2276ef22 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: maturin (1.7.4) +Root-Is-Purelib: false +Tag: py3-none-manylinux_2_5_x86_64.manylinux1_x86_64 diff --git a/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/licenses/LICENSE b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/licenses/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..dc6b266240189e247b1797a1d2d834774aacb51d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/py_spy-0.4.0.dist-info/licenses/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2018-2019 Ben Frederickson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/.venv/lib/python3.11/site-packages/sentencepiece/__init__.py b/.venv/lib/python3.11/site-packages/sentencepiece/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e028957ddc4affef83a39588158359020fd31f83 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/sentencepiece/__init__.py @@ -0,0 +1,1224 @@ +# This file was automatically generated by SWIG (https://www.swig.org). +# Version 4.1.0 +# +# Do not make changes to this file unless you know what you are doing - modify +# the SWIG interface file instead. + +from sys import version_info as _swig_python_version_info +# Import the low-level C/C++ module +if __package__ or "." in __name__: + from . import _sentencepiece +else: + import _sentencepiece + +try: + import builtins as __builtin__ +except ImportError: + import __builtin__ + +def _swig_repr(self): + try: + strthis = "proxy of " + self.this.__repr__() + except __builtin__.Exception: + strthis = "" + return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) + + +def _swig_setattr_nondynamic_instance_variable(set): + def set_instance_attr(self, name, value): + if name == "this": + set(self, name, value) + elif name == "thisown": + self.this.own(value) + elif hasattr(self, name) and isinstance(getattr(type(self), name), property): + set(self, name, value) + else: + raise AttributeError("You cannot add instance attributes to %s" % self) + return set_instance_attr + + +def _swig_setattr_nondynamic_class_variable(set): + def set_class_attr(cls, name, value): + if hasattr(cls, name) and not isinstance(getattr(cls, name), property): + set(cls, name, value) + else: + raise AttributeError("You cannot add class attributes to %s" % cls) + return set_class_attr + + +def _swig_add_metaclass(metaclass): + """Class decorator for adding a metaclass to a SWIG wrapped class - a slimmed down version of six.add_metaclass""" + def wrapper(cls): + return metaclass(cls.__name__, cls.__bases__, cls.__dict__.copy()) + return wrapper + + +class _SwigNonDynamicMeta(type): + """Meta class to enforce nondynamic attributes (no new attributes) for a class""" + __setattr__ = _swig_setattr_nondynamic_class_variable(type.__setattr__) + + +class ImmutableSentencePieceText_ImmutableSentencePiece(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + __repr__ = _swig_repr + + def __init__(self): + _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_swiginit(self, _sentencepiece.new_ImmutableSentencePieceText_ImmutableSentencePiece()) + __swig_destroy__ = _sentencepiece.delete_ImmutableSentencePieceText_ImmutableSentencePiece + + def _piece(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__piece(self) + + def _surface(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__surface(self) + + def _id(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__id(self) + + def _begin(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__begin(self) + + def _end(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__end(self) + + def _surface_as_bytes(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__surface_as_bytes(self) + + def _piece_as_bytes(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__piece_as_bytes(self) + + piece = property(_piece) + piece_as_bytes = property(_piece_as_bytes) + surface = property(_surface) + surface_as_bytes = property(_surface_as_bytes) + id = property(_id) + begin = property(_begin) + end = property(_end) + + def __str__(self): + return ('piece: \"{}\"\n' + 'id: {}\n' + 'surface: \"{}\"\n' + 'begin: {}\n' + 'end: {}\n').format(self.piece, self.id, self.surface, + self.begin, self.end) + + def __eq__(self, other): + return self.piece == other.piece and self.id == other.id and self.surface == other.surface and self.begin == other.begin and self.end == other.end + + def __hash__(self): + return hash(str(self)) + + __repr__ = __str__ + + +# Register ImmutableSentencePieceText_ImmutableSentencePiece in _sentencepiece: +_sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_swigregister(ImmutableSentencePieceText_ImmutableSentencePiece) +class ImmutableSentencePieceText(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + __repr__ = _swig_repr + + def __init__(self): + _sentencepiece.ImmutableSentencePieceText_swiginit(self, _sentencepiece.new_ImmutableSentencePieceText()) + __swig_destroy__ = _sentencepiece.delete_ImmutableSentencePieceText + + def _pieces_size(self): + return _sentencepiece.ImmutableSentencePieceText__pieces_size(self) + + def _pieces(self, index): + return _sentencepiece.ImmutableSentencePieceText__pieces(self, index) + + def _text(self): + return _sentencepiece.ImmutableSentencePieceText__text(self) + + def _score(self): + return _sentencepiece.ImmutableSentencePieceText__score(self) + + def SerializeAsString(self): + return _sentencepiece.ImmutableSentencePieceText_SerializeAsString(self) + + def _text_as_bytes(self): + return _sentencepiece.ImmutableSentencePieceText__text_as_bytes(self) + + text = property(_text) + text_as_bytes = property(_text_as_bytes) + score = property(_score) + + class ImmutableSentencePieceIterator: + def __init__(self, proto): + self.proto = proto + self.len = self.proto._pieces_size() + + def __len__(self): + return self.len + + def __getitem__(self, index): + if isinstance(index, slice): + return [self.proto._pieces(i) for i in range(self.len)][index.start:index.stop:index.step] + if index < 0: + index = index + self.len + if index < 0 or index >= self.len: + raise IndexError('piece index is out of range') + return self.proto._pieces(index) + + def __str__(self): + return '\n'.join(['pieces {{\n{}}}'.format(str(x)) for x in self]) + + __repr__ = __str__ + + @property + def pieces(self): + return ImmutableSentencePieceText.ImmutableSentencePieceIterator(self) + + def __eq__(self, other): + return self.SerializeAsString() == other.SerializeAsString() + + def __hash__(self): + return hash(self.SerializeAsString()) + + def __str__(self): + return ('text: \"{}\"\n' + 'score: {}\n' + '{}').format(self.text, self.score, + '\n'.join(['pieces {{\n{}}}'.format(str(x)) for x in self.pieces])) + + __repr__ = __str__ + + +# Register ImmutableSentencePieceText in _sentencepiece: +_sentencepiece.ImmutableSentencePieceText_swigregister(ImmutableSentencePieceText) +class ImmutableNBestSentencePieceText(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + __repr__ = _swig_repr + + def __init__(self): + _sentencepiece.ImmutableNBestSentencePieceText_swiginit(self, _sentencepiece.new_ImmutableNBestSentencePieceText()) + __swig_destroy__ = _sentencepiece.delete_ImmutableNBestSentencePieceText + + def _nbests_size(self): + return _sentencepiece.ImmutableNBestSentencePieceText__nbests_size(self) + + def _nbests(self, index): + return _sentencepiece.ImmutableNBestSentencePieceText__nbests(self, index) + + def SerializeAsString(self): + return _sentencepiece.ImmutableNBestSentencePieceText_SerializeAsString(self) + + class ImmutableSentencePieceTextIterator: + def __init__(self, proto): + self.proto = proto + self.len = self.proto._nbests_size() + + def __len__(self): + return self.len + + def __getitem__(self, index): + if isinstance(index, slice): + return [self.proto._nbests(i) for i in range(self.len)][index.start:index.stop:index.step] + if index < 0: + index = index + self.len + if index < 0 or index >= self.len: + raise IndexError('nbests index is out of range') + return self.proto._nbests(index) + + def __str__(self): + return '\n'.join(['nbests {{\n{}}}'.format(str(x)) for x in self]) + + __repr__ = __str__ + + @property + def nbests(self): + return ImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator(self) + + def __eq__(self, other): + return self.SerializeAsString() == other.SerializeAsString() + + def __hash__(self): + return hash(self.SerializeAsString()) + + def __str__(self): + return '\n'.join(['nbests {{\n{}}}'.format(str(x)) for x in self.nbests]) + + __repr__ = __str__ + + +# Register ImmutableNBestSentencePieceText in _sentencepiece: +_sentencepiece.ImmutableNBestSentencePieceText_swigregister(ImmutableNBestSentencePieceText) +class SentencePieceProcessor(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + __repr__ = _swig_repr + + def __init__(self): + _sentencepiece.SentencePieceProcessor_swiginit(self, _sentencepiece.new_SentencePieceProcessor()) + __swig_destroy__ = _sentencepiece.delete_SentencePieceProcessor + + def LoadFromSerializedProto(self, serialized): + return _sentencepiece.SentencePieceProcessor_LoadFromSerializedProto(self, serialized) + + def SetEncodeExtraOptions(self, extra_option): + return _sentencepiece.SentencePieceProcessor_SetEncodeExtraOptions(self, extra_option) + + def SetDecodeExtraOptions(self, extra_option): + return _sentencepiece.SentencePieceProcessor_SetDecodeExtraOptions(self, extra_option) + + def SetVocabulary(self, valid_vocab): + return _sentencepiece.SentencePieceProcessor_SetVocabulary(self, valid_vocab) + + def ResetVocabulary(self): + return _sentencepiece.SentencePieceProcessor_ResetVocabulary(self) + + def LoadVocabulary(self, filename, threshold): + return _sentencepiece.SentencePieceProcessor_LoadVocabulary(self, filename, threshold) + + def CalculateEntropy(self, *args): + return _sentencepiece.SentencePieceProcessor_CalculateEntropy(self, *args) + + def GetPieceSize(self): + return _sentencepiece.SentencePieceProcessor_GetPieceSize(self) + + def PieceToId(self, piece): + return _sentencepiece.SentencePieceProcessor_PieceToId(self, piece) + + def IdToPiece(self, id): + return _sentencepiece.SentencePieceProcessor_IdToPiece(self, id) + + def GetScore(self, id): + return _sentencepiece.SentencePieceProcessor_GetScore(self, id) + + def IsUnknown(self, id): + return _sentencepiece.SentencePieceProcessor_IsUnknown(self, id) + + def IsControl(self, id): + return _sentencepiece.SentencePieceProcessor_IsControl(self, id) + + def IsUnused(self, id): + return _sentencepiece.SentencePieceProcessor_IsUnused(self, id) + + def IsByte(self, id): + return _sentencepiece.SentencePieceProcessor_IsByte(self, id) + + def unk_id(self): + return _sentencepiece.SentencePieceProcessor_unk_id(self) + + def bos_id(self): + return _sentencepiece.SentencePieceProcessor_bos_id(self) + + def eos_id(self): + return _sentencepiece.SentencePieceProcessor_eos_id(self) + + def pad_id(self): + return _sentencepiece.SentencePieceProcessor_pad_id(self) + + def serialized_model_proto(self): + return _sentencepiece.SentencePieceProcessor_serialized_model_proto(self) + + def LoadFromFile(self, arg): + return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg) + + def _EncodeAsIds(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsIds(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsPieces(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsPieces(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsSerializedProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsSerializedProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsImmutableProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsImmutableProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsIdsBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsIdsBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsPiecesBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsPiecesBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsSerializedProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsSerializedProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsImmutableProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsImmutableProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _DecodeIds(self, ids): + return _sentencepiece.SentencePieceProcessor__DecodeIds(self, ids) + + def _DecodeIdsAsBytes(self, ids): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsBytes(self, ids) + + def _DecodePieces(self, pieces): + return _sentencepiece.SentencePieceProcessor__DecodePieces(self, pieces) + + def _DecodeIdsAsSerializedProto(self, ids): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsSerializedProto(self, ids) + + def _DecodePiecesAsSerializedProto(self, pieces): + return _sentencepiece.SentencePieceProcessor__DecodePiecesAsSerializedProto(self, pieces) + + def _DecodeIdsAsImmutableProto(self, ids): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsImmutableProto(self, ids) + + def _DecodePiecesAsImmutableProto(self, pieces): + return _sentencepiece.SentencePieceProcessor__DecodePiecesAsImmutableProto(self, pieces) + + def _DecodeIdsBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodeIdsBatch(self, ins, num_threads) + + def _DecodeIdsAsBytesBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsBytesBatch(self, ins, num_threads) + + def _DecodeIdsAsSerializedProtoBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch(self, ins, num_threads) + + def _DecodeIdsAsImmutableProtoBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch(self, ins, num_threads) + + def _DecodePiecesBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodePiecesBatch(self, ins, num_threads) + + def _DecodePiecesAsSerializedProtoBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch(self, ins, num_threads) + + def _DecodePiecesAsImmutableProtoBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch(self, ins, num_threads) + + def _NBestEncodeAsIds(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__NBestEncodeAsIds(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece) + + def _NBestEncodeAsPieces(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__NBestEncodeAsPieces(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece) + + def _NBestEncodeAsSerializedProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__NBestEncodeAsSerializedProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece) + + def _NBestEncodeAsImmutableProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__NBestEncodeAsImmutableProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece) + + def _SampleEncodeAndScoreAsIds(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsIds(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece) + + def _SampleEncodeAndScoreAsPieces(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsPieces(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece) + + def _SampleEncodeAndScoreAsSerializedProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece) + + def _SampleEncodeAndScoreAsImmutableProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece) + + def _Normalize(self, text): + return _sentencepiece.SentencePieceProcessor__Normalize(self, text) + + def _NormalizeWithOffsets(self, text): + return _sentencepiece.SentencePieceProcessor__NormalizeWithOffsets(self, text) + + def _CalculateEntropy(self, text, alpha): + return _sentencepiece.SentencePieceProcessor__CalculateEntropy(self, text, alpha) + + def _CalculateEntropyBatch(self, ins, alpha, num_threads): + return _sentencepiece.SentencePieceProcessor__CalculateEntropyBatch(self, ins, alpha, num_threads) + + def _OverrideNormalizerSpec(self, args): + return _sentencepiece.SentencePieceProcessor__OverrideNormalizerSpec(self, args) + + def Init(self, + model_file=None, + model_proto=None, + out_type=int, + add_bos=False, + add_eos=False, + reverse=False, + emit_unk_piece=False, + enable_sampling=False, + nbest_size=-1, + alpha=0.1, + num_threads=-1): + """Initialzie sentencepieceProcessor. + + Args: + model_file: The sentencepiece model file path. + model_proto: The sentencepiece model serialized proto. + out_type: output type. int or str. + add_bos: Add to the result (Default = false) + add_eos: Add to the result (Default = false) / is added after + reversing (if enabled). + reverse: Reverses the tokenized sequence (Default = false) + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout. + nbest_size = {0,1}: No sampling is performed. + nbest_size > 1: samples from the nbest_size results. + nbest_size < 0: assuming that nbest_size is infinite and samples + from the all hypothesis (lattice) using + forward-filtering-and-backward-sampling algorithm. + alpha: Soothing parameter for unigram sampling, and dropout probability of + merge operations for BPE-dropout. + num_threads: number of threads in batch processing (Default = -1, auto-detected) + """ + + _sentencepiece_processor_init_native(self) + self._out_type = out_type + self._add_bos = add_bos + self._add_eos = add_eos + self._reverse = reverse + self._emit_unk_piece = emit_unk_piece + self._enable_sampling = enable_sampling + self._nbest_size = nbest_size + self._alpha = alpha + self._num_threads = num_threads + if model_file or model_proto: + self.Load(model_file=model_file, model_proto=model_proto) + + + def Encode(self, + input, + out_type=None, + add_bos=None, + add_eos=None, + reverse=None, + emit_unk_piece=None, + enable_sampling=None, + nbest_size=None, + alpha=None, + num_threads=None): + """Encode text input to segmented ids or tokens. + + Args: + input: input string. accepsts list of string. + out_type: output type. int or str. + add_bos: Add to the result (Default = false) + add_eos: Add to the result (Default = false) / is added after + reversing (if enabled). + reverse: Reverses the tokenized sequence (Default = false) + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout. + nbest_size = {0,1}: No sampling is performed. + nbest_size > 1: samples from the nbest_size results. + nbest_size < 0: assuming that nbest_size is infinite and samples + from the all hypothesis (lattice) using + forward-filtering-and-backward-sampling algorithm. + alpha: Soothing parameter for unigram sampling, and merge probability for + BPE-dropout (probablity 'p' in BPE-dropout paper). + num_threads: the number of threads used in the batch processing (Default = -1). + """ + + if out_type is None: + out_type = self._out_type + if add_bos is None: + add_bos = self._add_bos + if add_eos is None: + add_eos = self._add_eos + if reverse is None: + reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece + if enable_sampling is None: + enable_sampling = self._enable_sampling + if nbest_size is None: + nbest_size = self._nbest_size + if alpha is None: + alpha = self._alpha + if num_threads is None: + num_threads = self._num_threads + + if enable_sampling == True and (nbest_size is None or nbest_size == 0 or + nbest_size == 1 or alpha is None): + raise RuntimeError( + 'When enable_sampling is True, We must specify "nbest_size > 1" or "nbest_size = -1", ' + 'and "alpha". "nbest_size" is enabled only on unigram mode ignored in BPE-dropout. ' + 'when "nbest_size = -1" , this method samples from all candidates on the lattice ' + 'instead of nbest segmentations.' + ) + + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') + + if type(input) is list: + if out_type is int: + return self._EncodeAsIdsBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._EncodeAsPiecesBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._EncodeAsSerializedProtoBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._EncodeAsImmutableProtoBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + + if out_type is int: + return self._EncodeAsIds(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._EncodeAsPieces(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._EncodeAsSerializedProto(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._EncodeAsImmutableProto(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown out_type={}'.format(out_type)) + return None + + + def EncodeAsPieces(self, input, **kwargs): + return self.Encode(input=input, out_type=str, **kwargs) + + + def EncodeAsIds(self, input, **kwargs): + return self.Encode(input=input, out_type=int, **kwargs) + + + def EncodeAsSerializedProto(self, input, **kwargs): + return self.Encode(input=input, out_type='serialized_proto', **kwargs) + + + def EncodeAsImmutableProto(self, input, **kwargs): + return self.Encode(input=input, out_type='immutable_proto', **kwargs) + + + def SampleEncodeAsPieces(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type=str, enable_sampling=True, **kwargs) + + + def SampleEncodeAsIds(self, input, nbest_size=None, alpha=None,**kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type=int, enable_sampling=True, **kwargs) + + + def SampleEncodeAsSerializedProto(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type='serialized_proto', enable_sampling=True, **kwargs) + + + def SampleEncodeAsImmutableProto(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type='immutable_proto', enable_sampling=True, **kwargs) + + + def NBestEncode(self, + input, + out_type=None, + add_bos=None, + add_eos=None, + reverse=None, + emit_unk_piece=None, + nbest_size=None): + """NBestEncode text input to segmented ids or tokens. + + Args: + input: input string. accepsts list of string. + out_type: output type. int or str. + add_bos: Add to the result (Default = false) + add_eos: Add to the result (Default = false) / is added after reversing (if enabled). + reverse: Reverses the tokenized sequence (Default = false) + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: nbest size + """ + + if out_type is None: + out_type = self._out_type + if add_bos is None: + add_bos = self._add_bos + if add_eos is None: + add_eos = self._add_eos + if reverse is None: + reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece + if nbest_size is None: + nbest_size = self._nbest_size + + if nbest_size <= 0: + nbest_size=1 + + def _encode(text): + if out_type is int: + return self._NBestEncodeAsIds(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._NBestEncodeAsPieces(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._NBestEncodeAsSerializedProto(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._NBestEncodeAsImmutableProto(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown out_type') + + if type(input) is list: + return [_encode(n) for n in input] + + return _encode(input) + + + def NBestEncodeAsPieces(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type=str, **kwargs) + + + def NBestEncodeAsIds(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type=int, **kwargs) + + + def NBestEncodeAsSerializedProto(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type='serialized_proto', **kwargs) + + + def NBestEncodeAsImmutableProto(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type='immutable_proto', **kwargs) + + + def SampleEncodeAndScore(self, + input, + out_type=None, + add_bos=None, + add_eos=None, + reverse=None, + emit_unk_piece=None, + num_samples=None, + alpha=None, + wor=None, + include_best=None): + """SampleEncodeAndScore text input to segmented ids or tokens. + + Args: + input: input string. accepsts list of string. + out_type: output type. int or str or 'serialized_proto' or 'immutable_proto' + add_bos: Add to the result (Default = false) + add_eos: Add to the result (Default = false) / is added after reversing (if enabled). + reverse: Reverses the tokenized sequence (Default = false) + emit_unk_piece: Emits the unk literal string (Default = false) + num_samples: How many samples to return (Default = 1) + alpha: inverse temperature for sampling + wor: whether to sample without replacement (Default = false) + include_best: whether to include the best tokenization, requires wor=True (Default = false) + """ + + if out_type is None: + out_type = self._out_type + if add_bos is None: + add_bos = self._add_bos + if add_eos is None: + add_eos = self._add_eos + if reverse is None: + reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece + if num_samples is None: + num_samples = 1 + if alpha is None: + alpha = 1. + if wor is None: + wor = False + if include_best is None: + include_best = False + + if num_samples <= 0: + raise RuntimeError('num_examples must be positive') + + if include_best and not wor: + raise RuntimeError('When include_best is True, We must specify "wor = True".') + + + def _encode(text): + if out_type is int: + return self._SampleEncodeAndScoreAsIds(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._SampleEncodeAndScoreAsPieces(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + if out_type == 'serialized_proto' or out_type == 'proto': + return self._SampleEncodeAndScoreAsSerializedProto(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + if out_type == 'immutable_proto': + return self._SampleEncodeAndScoreAsImmutableProto(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown output type') + + + if type(input) is list: + return [_encode(n) for n in input] + + return _encode(input) + + + def SampleEncodeAndScoreAsPieces(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type=str, **kwargs) + + + def SampleEncodeAndScoreAsIds(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type=int, **kwargs) + + + def SampleEncodeAndScoreAsSerializedProto(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type='serialized_proto', **kwargs) + + + def SampleEncodeAndScoreAsImmutableProto(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type='immutable_proto', **kwargs) + + + def Decode(self, input, out_type=str, num_threads=None): + """Decode processed id or token sequences. + + Args: + out_type: output type. str, bytes or 'serialized_proto' or 'immutable_proto' (Default = str) + num_threads: the number of threads used in the batch processing (Default = -1). + """ + + if num_threads is None: + num_threads = self._num_threads + + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') + + if not input: + return '' + + if out_type is str: + if type(input) is int: + return self._DecodeIds([input]) + if type(input) is str: + return self._DecodePieces([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIds(input) + if type(input[0]) is str: + return self._DecodePieces(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesBatch(input, num_threads) + + if out_type is bytes: + if type(input) is int: + return self._DecodeIdsAsBytes([input]) + if type(input) is str: + return self._DecodePieces([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIdsAsBytes(input) + if type(input[0]) is str: + return self._DecodePieces(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsAsBytesBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesBatch(input, num_threads) + + if out_type == 'serialized_proto': + if type(input) is int: + return self._DecodeIdsAsSerializedProto([input]) + if type(input) is str: + return self._DecodePiecesAsSerializedProto([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIdsAsSerializedProto(input) + if type(input[0]) is str: + return self._DecodePiecesAsSerializedProto(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsAsSerializedProtoBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesAsSerializedProtoBatch(input, num_threads) + + + if out_type == 'immutable_proto': + if type(input) is int: + return self._DecodeIdsAsImmutableProto([input]) + if type(input) is str: + return self._DecodePiecesAsImmutableProto([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIdsAsImmutableProto(input) + if type(input[0]) is str: + return self._DecodePiecesAsImmutableProto(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsAsImmutableProtoBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesAsImmutableProtoBatch(input, num_threads) + + + raise RuntimeError('unknown output or input type') + return None + + + def DecodePieces(self, input, out_type=str, **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodeIds(self, input, out_type=str, **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodePiecesAsSerializedProto(self, input, out_type='serialized_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodeIdsAsSerializedProto(self, input, out_type='serialized_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodePiecesAsImmutableProto(self, input, out_type='immutable_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodeIdsAsImmutableProto(self, input, out_type='immutable_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def CalculateEntropy(self, input, alpha, num_threads=None): + """Calculate sentence entropy""" + if type(input) is list: + if num_threads is None: + num_threads = self._num_threads + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') + return self._CalculateEntropyBatch(input, alpha, num_threads) + + return self._CalculateEntropy(input, alpha) + + + def Normalize(self, input, with_offsets=None): + def _normalize(text): + if with_offsets: + return self._NormalizeWithOffsets(text) + return self._Normalize(text) + + if type(input) is list: + return [_normalize(x) for x in input] + return _normalize(input) + + def OverrideNormalizerSpec(self, **kwargs): + new_kwargs = {} + for key, value in kwargs.items(): + new_kwargs[key] = str(value) + return self._OverrideNormalizerSpec(new_kwargs) + + + def piece_size(self): + return self.GetPieceSize() + + + def vocab_size(self): + return self.GetPieceSize() + + + def __getstate__(self): + return self.serialized_model_proto() + + + def __setstate__(self, serialized_model_proto): + self.__init__() + self.LoadFromSerializedProto(serialized_model_proto) + + + def __len__(self): + return self.GetPieceSize() + + + def __getitem__(self, piece): + return self.PieceToId(piece) + + + def Load(self, model_file=None, model_proto=None): + """Overwride SentencePieceProcessor.Load to support both model_file and model_proto. + + Args: + model_file: The sentencepiece model file path. + model_proto: The sentencepiece model serialized proto. Either `model_file` + or `model_proto` must be set. + """ + if model_file and model_proto: + raise RuntimeError('model_file and model_proto must be exclusive.') + if model_proto: + return self.LoadFromSerializedProto(model_proto) + return self.LoadFromFile(model_file) + + +# Register SentencePieceProcessor in _sentencepiece: +_sentencepiece.SentencePieceProcessor_swigregister(SentencePieceProcessor) + +def SetRandomGeneratorSeed(seed): + return _sentencepiece.SetRandomGeneratorSeed(seed) + +def SetMinLogLevel(v): + return _sentencepiece.SetMinLogLevel(v) +class SentencePieceTrainer(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + + def __init__(self, *args, **kwargs): + raise AttributeError("No constructor defined") + __repr__ = _swig_repr + + @staticmethod + def _TrainFromString(arg): + return _sentencepiece.SentencePieceTrainer__TrainFromString(arg) + + @staticmethod + def _TrainFromMap(args): + return _sentencepiece.SentencePieceTrainer__TrainFromMap(args) + + @staticmethod + def _TrainFromMap2(args, iter): + return _sentencepiece.SentencePieceTrainer__TrainFromMap2(args, iter) + + @staticmethod + def _TrainFromMap3(args): + return _sentencepiece.SentencePieceTrainer__TrainFromMap3(args) + + @staticmethod + def _TrainFromMap4(args, iter): + return _sentencepiece.SentencePieceTrainer__TrainFromMap4(args, iter) + + @staticmethod + def _Train(arg=None, **kwargs): + """Train Sentencepiece model. Accept both kwargs and legacy string arg.""" + if arg is not None and type(arg) is str: + return SentencePieceTrainer._TrainFromString(arg) + + def _encode(value): + """Encode value to CSV..""" + if type(value) is list: + if sys.version_info[0] == 3: + f = StringIO() + else: + f = BytesIO() + writer = csv.writer(f, lineterminator='') + writer.writerow([str(v) for v in value]) + return f.getvalue() + else: + return str(value) + + sentence_iterator = None + model_writer = None + new_kwargs = {} + for key, value in kwargs.items(): + if key in ['sentence_iterator', 'sentence_reader']: + sentence_iterator = value + elif key in ['model_writer']: + model_writer = value + else: + new_kwargs[key] = _encode(value) + + if model_writer: + if sentence_iterator: + model_proto = SentencePieceTrainer._TrainFromMap4(new_kwargs, + sentence_iterator) + else: + model_proto = SentencePieceTrainer._TrainFromMap3(new_kwargs) + model_writer.write(model_proto) + else: + if sentence_iterator: + return SentencePieceTrainer._TrainFromMap2(new_kwargs, sentence_iterator) + else: + return SentencePieceTrainer._TrainFromMap(new_kwargs) + + return None + + @staticmethod + def Train(arg=None, logstream=None, **kwargs): + with _LogStream(ostream=logstream): + SentencePieceTrainer._Train(arg=arg, **kwargs) + + +# Register SentencePieceTrainer in _sentencepiece: +_sentencepiece.SentencePieceTrainer_swigregister(SentencePieceTrainer) +class SentencePieceNormalizer(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + __repr__ = _swig_repr + + def __init__(self): + _sentencepiece.SentencePieceNormalizer_swiginit(self, _sentencepiece.new_SentencePieceNormalizer()) + __swig_destroy__ = _sentencepiece.delete_SentencePieceNormalizer + + def LoadFromSerializedProto(self, serialized): + return _sentencepiece.SentencePieceNormalizer_LoadFromSerializedProto(self, serialized) + + def LoadFromRuleTSV(self, filename): + return _sentencepiece.SentencePieceNormalizer_LoadFromRuleTSV(self, filename) + + def LoadFromRuleName(self, name): + return _sentencepiece.SentencePieceNormalizer_LoadFromRuleName(self, name) + + def serialized_model_proto(self): + return _sentencepiece.SentencePieceNormalizer_serialized_model_proto(self) + + def LoadFromFile(self, arg): + return _sentencepiece.SentencePieceNormalizer_LoadFromFile(self, arg) + + def _Normalize(self, text): + return _sentencepiece.SentencePieceNormalizer__Normalize(self, text) + + def _NormalizeWithOffsets(self, text): + return _sentencepiece.SentencePieceNormalizer__NormalizeWithOffsets(self, text) + + def _SetProtoField(self, name, value): + return _sentencepiece.SentencePieceNormalizer__SetProtoField(self, name, value) + + def Init(self, + model_file=None, + model_proto=None, + rule_tsv=None, + rule_name=None, + add_dummy_prefix=False, + escape_whitespaces=False, + remove_extra_whitespaces=False): + """Initialzie sentencePieceNormalizer. + + Args: + model_file: The sentencepiece model file path. + model_proto: The sentencepiece model serialized proto. + rule_tsv: The normalization rule file in TSV format. + rule_name: Pre-defined normalization name. + add_dummy_prefix: add dummy prefix. + escape_whitespaces: escape whitespaces. + remove_extra_whitespaces: remove extra whitespaces. + """ + + _sentencepiece_normalizer_init_native(self) + + if model_file: + status = self.LoadFromFile(model_file) + elif model_proto: + status = self.LoadFromSerializedProto(model_proto) + elif rule_tsv: + status = self.LoadFromRuleTSV(rule_tsv) + elif rule_name: + status = self.LoadFromRuleName(rule_name) + else: + raise RuntimeError('no model is specified') + + if status: + self._SetProtoField('add_dummy_prefix', add_dummy_prefix) + self._SetProtoField('escape_whitespaces', escape_whitespaces) + self._SetProtoField('remove_extra_whitespaces', remove_extra_whitespaces) + + def Normalize(self, input, with_offsets=None): + def _normalize(text): + if with_offsets: + return self._NormalizeWithOffsets(text) + return self._Normalize(text) + + if type(input) is list: + return [_normalize(x) for x in input] + return _normalize(input) + + + def __getstate__(self): + return self.serialized_model_proto() + + + def __setstate__(self, serialized_model_proto): + self.__init__() + self.LoadFromSerializedProto(serialized_model_proto) + + +# Register SentencePieceNormalizer in _sentencepiece: +_sentencepiece.SentencePieceNormalizer_swigregister(SentencePieceNormalizer) + + +import re +import csv +import sys +import os +from io import StringIO +from io import BytesIO + + +def _add_snake_case(classname): + """Added snake_cased method from CammelCased method.""" + + snake_map = {} + for k, v in classname.__dict__.items(): + if re.match(r'^[A-Z]+', k): + snake = re.sub(r'(?= v.piece_size()): + raise IndexError('piece id is out of range.') + return func(v, n) + + def _batched_func(self, arg): + if type(arg) is list: + return [_func(self, n) for n in arg] + else: + return _func(self, arg) + + setattr(classname, name, _batched_func) + + +_sentencepiece_processor_init_native = SentencePieceProcessor.__init__ +_sentencepiece_normalizer_init_native = SentencePieceNormalizer.__init__ +setattr(SentencePieceProcessor, '__init__', SentencePieceProcessor.Init) +setattr(SentencePieceNormalizer, '__init__', SentencePieceNormalizer.Init) + +SentencePieceProcessor.Tokenize = SentencePieceProcessor.Encode +SentencePieceProcessor.Detokenize = SentencePieceProcessor.Decode + +for m in [ + 'PieceToId', 'IdToPiece', 'GetScore', 'IsUnknown', 'IsControl', 'IsUnused', + 'IsByte' +]: + _batchnize(SentencePieceProcessor, m) + +_add_snake_case(SentencePieceProcessor) +_add_snake_case(SentencePieceTrainer) +_add_snake_case(SentencePieceNormalizer) +set_random_generator_seed = SetRandomGeneratorSeed +set_min_log_level = SetMinLogLevel + +from ._version import __version__ + +class _LogStream(object): + def __init__(self, ostream=None): + self.ostream = ostream + if self.ostream is not None: + self.orig_stream_fileno = sys.stderr.fileno() + + def __enter__(self): + if self.ostream is not None: + self.orig_stream_dup = os.dup(self.orig_stream_fileno) + os.dup2(self.ostream.fileno(), self.orig_stream_fileno) + + def __exit__(self, type, value, traceback): + if self.ostream is not None: + os.close(self.orig_stream_fileno) + os.dup2(self.orig_stream_dup, self.orig_stream_fileno) + os.close(self.orig_stream_dup) + self.ostream.close() + + diff --git a/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a1c92b6fd1908272279f7bac7488b1569620501 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/_version.cpython-311.pyc b/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/_version.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fe4c5abb99ba68c0a16a8136ad5fb49ada812b4 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/_version.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/sentencepiece_pb2.cpython-311.pyc b/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/sentencepiece_pb2.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4502abbaa72ac4f23a04bb91a97bf858199c4bee Binary files /dev/null and b/.venv/lib/python3.11/site-packages/sentencepiece/__pycache__/sentencepiece_pb2.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/sentencepiece/_version.py b/.venv/lib/python3.11/site-packages/sentencepiece/_version.py new file mode 100644 index 0000000000000000000000000000000000000000..7fd229a32b5eefeffa54b9c187302d2c03a0a680 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/sentencepiece/_version.py @@ -0,0 +1 @@ +__version__ = '0.2.0' diff --git a/.venv/lib/python3.11/site-packages/sentencepiece/sentencepiece_model_pb2.py b/.venv/lib/python3.11/site-packages/sentencepiece/sentencepiece_model_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..b07107d69de178e107544a29bb4d0280b5482241 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/sentencepiece/sentencepiece_model_pb2.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: sentencepiece_model.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x19sentencepiece_model.proto\x12\rsentencepiece\"\x80\x0c\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01 \x03(\t\x12\x14\n\x0cinput_format\x18\x07 \x01(\t\x12\x14\n\x0cmodel_prefix\x18\x02 \x01(\t\x12\x41\n\nmodel_type\x18\x03 \x01(\x0e\x32$.sentencepiece.TrainerSpec.ModelType:\x07UNIGRAM\x12\x18\n\nvocab_size\x18\x04 \x01(\x05:\x04\x38\x30\x30\x30\x12\x17\n\x0f\x61\x63\x63\x65pt_language\x18\x05 \x03(\t\x12 \n\x15self_test_sample_size\x18\x06 \x01(\x05:\x01\x30\x12*\n\x1b\x65nable_differential_privacy\x18\x32 \x01(\x08:\x05\x66\x61lse\x12+\n differential_privacy_noise_level\x18\x33 \x01(\x02:\x01\x30\x12\x32\n\'differential_privacy_clipping_threshold\x18\x34 \x01(\x04:\x01\x30\x12\"\n\x12\x63haracter_coverage\x18\n \x01(\x02:\x06\x30.9995\x12\x1e\n\x13input_sentence_size\x18\x0b \x01(\x04:\x01\x30\x12$\n\x16shuffle_input_sentence\x18\x13 \x01(\x08:\x04true\x12 \n\x14mining_sentence_size\x18\x0c \x01(\x05\x42\x02\x18\x01\x12\"\n\x16training_sentence_size\x18\r \x01(\x05\x42\x02\x18\x01\x12(\n\x17seed_sentencepiece_size\x18\x0e \x01(\x05:\x07\x31\x30\x30\x30\x30\x30\x30\x12\x1e\n\x10shrinking_factor\x18\x0f \x01(\x02:\x04\x30.75\x12!\n\x13max_sentence_length\x18\x12 \x01(\x05:\x04\x34\x31\x39\x32\x12\x17\n\x0bnum_threads\x18\x10 \x01(\x05:\x02\x31\x36\x12\x1d\n\x12num_sub_iterations\x18\x11 \x01(\x05:\x01\x32\x12$\n\x18max_sentencepiece_length\x18\x14 \x01(\x05:\x02\x31\x36\x12%\n\x17split_by_unicode_script\x18\x15 \x01(\x08:\x04true\x12\x1d\n\x0fsplit_by_number\x18\x17 \x01(\x08:\x04true\x12!\n\x13split_by_whitespace\x18\x16 \x01(\x08:\x04true\x12)\n\x1atreat_whitespace_as_suffix\x18\x18 \x01(\x08:\x05\x66\x61lse\x12+\n\x1c\x61llow_whitespace_only_pieces\x18\x1a \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0csplit_digits\x18\x19 \x01(\x08:\x05\x66\x61lse\x12#\n\x19pretokenization_delimiter\x18\x35 \x01(\t:\x00\x12\x17\n\x0f\x63ontrol_symbols\x18\x1e \x03(\t\x12\x1c\n\x14user_defined_symbols\x18\x1f \x03(\t\x12\x16\n\x0erequired_chars\x18$ \x01(\t\x12\x1c\n\rbyte_fallback\x18# \x01(\x08:\x05\x66\x61lse\x12+\n\x1dvocabulary_output_piece_score\x18 \x01(\x08:\x04true\x12\x1e\n\x10hard_vocab_limit\x18! \x01(\x08:\x04true\x12\x1c\n\ruse_all_vocab\x18\" \x01(\x08:\x05\x66\x61lse\x12\x11\n\x06unk_id\x18( \x01(\x05:\x01\x30\x12\x11\n\x06\x62os_id\x18) \x01(\x05:\x01\x31\x12\x11\n\x06\x65os_id\x18* \x01(\x05:\x01\x32\x12\x12\n\x06pad_id\x18+ \x01(\x05:\x02-1\x12\x18\n\tunk_piece\x18- \x01(\t:\x05\x12\x16\n\tbos_piece\x18. \x01(\t:\x03\x12\x17\n\teos_piece\x18/ \x01(\t:\x04\x12\x18\n\tpad_piece\x18\x30 \x01(\t:\x05\x12\x1a\n\x0bunk_surface\x18, \x01(\t:\x05 \xe2\x81\x87 \x12+\n\x1ctrain_extremely_large_corpus\x18\x31 \x01(\x08:\x05\x66\x61lse\"5\n\tModelType\x12\x0b\n\x07UNIGRAM\x10\x01\x12\x07\n\x03\x42PE\x10\x02\x12\x08\n\x04WORD\x10\x03\x12\x08\n\x04\x43HAR\x10\x04*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"\xd1\x01\n\x0eNormalizerSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1c\n\x14precompiled_charsmap\x18\x02 \x01(\x0c\x12\x1e\n\x10\x61\x64\x64_dummy_prefix\x18\x03 \x01(\x08:\x04true\x12&\n\x18remove_extra_whitespaces\x18\x04 \x01(\x08:\x04true\x12 \n\x12\x65scape_whitespaces\x18\x05 \x01(\x08:\x04true\x12\x1e\n\x16normalization_rule_tsv\x18\x06 \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"y\n\x0cSelfTestData\x12\x33\n\x07samples\x18\x01 \x03(\x0b\x32\".sentencepiece.SelfTestData.Sample\x1a)\n\x06Sample\x12\r\n\x05input\x18\x01 \x01(\t\x12\x10\n\x08\x65xpected\x18\x02 \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"\xfe\x03\n\nModelProto\x12\x37\n\x06pieces\x18\x01 \x03(\x0b\x32\'.sentencepiece.ModelProto.SentencePiece\x12\x30\n\x0ctrainer_spec\x18\x02 \x01(\x0b\x32\x1a.sentencepiece.TrainerSpec\x12\x36\n\x0fnormalizer_spec\x18\x03 \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x12\x33\n\x0eself_test_data\x18\x04 \x01(\x0b\x32\x1b.sentencepiece.SelfTestData\x12\x38\n\x11\x64\x65normalizer_spec\x18\x05 \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x1a\xd2\x01\n\rSentencePiece\x12\r\n\x05piece\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x42\n\x04type\x18\x03 \x01(\x0e\x32,.sentencepiece.ModelProto.SentencePiece.Type:\x06NORMAL\"T\n\x04Type\x12\n\n\x06NORMAL\x10\x01\x12\x0b\n\x07UNKNOWN\x10\x02\x12\x0b\n\x07\x43ONTROL\x10\x03\x12\x10\n\x0cUSER_DEFINED\x10\x04\x12\x08\n\x04\x42YTE\x10\x06\x12\n\n\x06UNUSED\x10\x05*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\x42\x02H\x03') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'sentencepiece_model_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'H\003' + _TRAINERSPEC.fields_by_name['mining_sentence_size']._options = None + _TRAINERSPEC.fields_by_name['mining_sentence_size']._serialized_options = b'\030\001' + _TRAINERSPEC.fields_by_name['training_sentence_size']._options = None + _TRAINERSPEC.fields_by_name['training_sentence_size']._serialized_options = b'\030\001' + _TRAINERSPEC._serialized_start=45 + _TRAINERSPEC._serialized_end=1581 + _TRAINERSPEC_MODELTYPE._serialized_start=1517 + _TRAINERSPEC_MODELTYPE._serialized_end=1570 + _NORMALIZERSPEC._serialized_start=1584 + _NORMALIZERSPEC._serialized_end=1793 + _SELFTESTDATA._serialized_start=1795 + _SELFTESTDATA._serialized_end=1916 + _SELFTESTDATA_SAMPLE._serialized_start=1864 + _SELFTESTDATA_SAMPLE._serialized_end=1905 + _MODELPROTO._serialized_start=1919 + _MODELPROTO._serialized_end=2429 + _MODELPROTO_SENTENCEPIECE._serialized_start=2208 + _MODELPROTO_SENTENCEPIECE._serialized_end=2418 + _MODELPROTO_SENTENCEPIECE_TYPE._serialized_start=2323 + _MODELPROTO_SENTENCEPIECE_TYPE._serialized_end=2407 +# @@protoc_insertion_point(module_scope) diff --git a/.venv/lib/python3.11/site-packages/sentencepiece/sentencepiece_pb2.py b/.venv/lib/python3.11/site-packages/sentencepiece/sentencepiece_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..840cfd2143a47d8a972008bd946a8dd271aa0a0f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/sentencepiece/sentencepiece_pb2.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: sentencepiece.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13sentencepiece.proto\x12\rsentencepiece\"\xdf\x01\n\x11SentencePieceText\x12\x0c\n\x04text\x18\x01 \x01(\t\x12>\n\x06pieces\x18\x02 \x03(\x0b\x32..sentencepiece.SentencePieceText.SentencePiece\x12\r\n\x05score\x18\x03 \x01(\x02\x1a\x62\n\rSentencePiece\x12\r\n\x05piece\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\r\x12\x0f\n\x07surface\x18\x03 \x01(\t\x12\r\n\x05\x62\x65gin\x18\x04 \x01(\r\x12\x0b\n\x03\x65nd\x18\x05 \x01(\r*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"J\n\x16NBestSentencePieceText\x12\x30\n\x06nbests\x18\x01 \x03(\x0b\x32 .sentencepiece.SentencePieceTextB\x02H\x03') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'sentencepiece_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'H\003' + _SENTENCEPIECETEXT._serialized_start=39 + _SENTENCEPIECETEXT._serialized_end=262 + _SENTENCEPIECETEXT_SENTENCEPIECE._serialized_start=153 + _SENTENCEPIECETEXT_SENTENCEPIECE._serialized_end=251 + _NBESTSENTENCEPIECETEXT._serialized_start=264 + _NBESTSENTENCEPIECETEXT._serialized_end=338 +# @@protoc_insertion_point(module_scope) diff --git a/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..1cc22a5aa7679ebaa10934212f356823931bdc3e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) 2010-2024 Benjamin Peterson + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/METADATA b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..cfde03c2631c5c1d5cdc0949d0ee3379e7110f0e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/METADATA @@ -0,0 +1,43 @@ +Metadata-Version: 2.1 +Name: six +Version: 1.17.0 +Summary: Python 2 and 3 compatibility utilities +Home-page: https://github.com/benjaminp/six +Author: Benjamin Peterson +Author-email: benjamin@python.org +License: MIT +Classifier: Development Status :: 5 - Production/Stable +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: Utilities +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.* +License-File: LICENSE + +.. image:: https://img.shields.io/pypi/v/six.svg + :target: https://pypi.org/project/six/ + :alt: six on PyPI + +.. image:: https://readthedocs.org/projects/six/badge/?version=latest + :target: https://six.readthedocs.io/ + :alt: six's documentation on Read the Docs + +.. image:: https://img.shields.io/badge/license-MIT-green.svg + :target: https://github.com/benjaminp/six/blob/master/LICENSE + :alt: MIT License badge + +Six is a Python 2 and 3 compatibility library. It provides utility functions +for smoothing over the differences between the Python versions with the goal of +writing Python code that is compatible on both Python versions. See the +documentation for more information on what is provided. + +Six supports Python 2.7 and 3.3+. It is contained in only one Python +file, so it can be easily copied into your project. (The copyright and license +notice must be retained.) + +Online documentation is at https://six.readthedocs.io/. + +Bugs can be reported to https://github.com/benjaminp/six. The code can also +be found there. diff --git a/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/RECORD b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..bcd83a187f36ed78dee3d4a7238b5767978aa473 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/RECORD @@ -0,0 +1,8 @@ +__pycache__/six.cpython-311.pyc,, +six-1.17.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +six-1.17.0.dist-info/LICENSE,sha256=Q3W6IOK5xsTnytKUCmKP2Q6VzD1Q7pKq51VxXYuh-9A,1066 +six-1.17.0.dist-info/METADATA,sha256=ViBCB4wnUlSfbYp8htvF3XCAiKe-bYBnLsewcQC3JGg,1658 +six-1.17.0.dist-info/RECORD,, +six-1.17.0.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109 +six-1.17.0.dist-info/top_level.txt,sha256=_iVH_iYEtEXnD8nYGQYpYFUvkUW9sEO1GYbkeKSAais,4 +six.py,sha256=xRyR9wPT1LNpbJI8tf7CE-BeddkhU5O--sfy-mo5BN8,34703 diff --git a/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..104f3874635f24f0d2918dfeaf6a59652274460c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.6.0) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/top_level.txt b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..ffe2fce498955b628014618b28c6bcf152466a4a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/six-1.17.0.dist-info/top_level.txt @@ -0,0 +1 @@ +six diff --git a/.venv/lib/python3.11/site-packages/yarl/__init__.py b/.venv/lib/python3.11/site-packages/yarl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3640407105dddc9128b617d4222cb642e25c7e23 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/__init__.py @@ -0,0 +1,14 @@ +from ._query import Query, QueryVariable, SimpleQuery +from ._url import URL, cache_clear, cache_configure, cache_info + +__version__ = "1.18.3" + +__all__ = ( + "URL", + "SimpleQuery", + "QueryVariable", + "Query", + "cache_clear", + "cache_configure", + "cache_info", +) diff --git a/.venv/lib/python3.11/site-packages/yarl/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/yarl/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07c544ca2bbf8ceec9813db389c6b64a9c13d5d8 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/yarl/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/yarl/__pycache__/_parse.cpython-311.pyc b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_parse.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2f70f2158a49eb4812f8413e6ba5a36d3becec9 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_parse.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/yarl/__pycache__/_path.cpython-311.pyc b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_path.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d10edec2c15fcbee67ded395e6bb316a5b4e22ee Binary files /dev/null and b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_path.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/yarl/__pycache__/_query.cpython-311.pyc b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_query.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5aabba6b58c5ccbe750d99e49d29ca245f79af54 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_query.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoters.cpython-311.pyc b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoters.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ccdba125db029ba50d1b45395581d6dfb5d0f37 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoters.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoting.cpython-311.pyc b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoting.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9932b753328ab375c307595085e56b7326bb3c73 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoting.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoting_py.cpython-311.pyc b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoting_py.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09eb2a4c51fd90e072392d52de791ca20b22c1da Binary files /dev/null and b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_quoting_py.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/yarl/__pycache__/_url.cpython-311.pyc b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_url.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03a700960d1cdc09bff27848980e0c50428fbf07 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/yarl/__pycache__/_url.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/yarl/_parse.py b/.venv/lib/python3.11/site-packages/yarl/_parse.py new file mode 100644 index 0000000000000000000000000000000000000000..cc259ea86dc89aa0766d0cd30945fd605743b7b0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_parse.py @@ -0,0 +1,189 @@ +"""URL parsing utilities.""" + +import re +import unicodedata +from functools import lru_cache +from typing import Union +from urllib.parse import scheme_chars, uses_netloc + +from ._quoters import QUOTER + +# Leading and trailing C0 control and space to be stripped per WHATWG spec. +# == "".join([chr(i) for i in range(0, 0x20 + 1)]) +WHATWG_C0_CONTROL_OR_SPACE = ( + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f " +) + +# Unsafe bytes to be removed per WHATWG spec +UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"] +USES_AUTHORITY = frozenset(uses_netloc) + +SplitURLType = tuple[str, str, str, str, str] + + +def split_url(url: str) -> SplitURLType: + """Split URL into parts.""" + # Adapted from urllib.parse.urlsplit + # Only lstrip url as some applications rely on preserving trailing space. + # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both) + url = url.lstrip(WHATWG_C0_CONTROL_OR_SPACE) + for b in UNSAFE_URL_BYTES_TO_REMOVE: + if b in url: + url = url.replace(b, "") + + scheme = netloc = query = fragment = "" + i = url.find(":") + if i > 0 and url[0] in scheme_chars: + for c in url[1:i]: + if c not in scheme_chars: + break + else: + scheme, url = url[:i].lower(), url[i + 1 :] + has_hash = "#" in url + has_question_mark = "?" in url + if url[:2] == "//": + delim = len(url) # position of end of domain part of url, default is end + if has_hash and has_question_mark: + delim_chars = "/?#" + elif has_question_mark: + delim_chars = "/?" + elif has_hash: + delim_chars = "/#" + else: + delim_chars = "/" + for c in delim_chars: # look for delimiters; the order is NOT important + wdelim = url.find(c, 2) # find first of this delim + if wdelim >= 0 and wdelim < delim: # if found + delim = wdelim # use earliest delim position + netloc = url[2:delim] + url = url[delim:] + has_left_bracket = "[" in netloc + has_right_bracket = "]" in netloc + if (has_left_bracket and not has_right_bracket) or ( + has_right_bracket and not has_left_bracket + ): + raise ValueError("Invalid IPv6 URL") + if has_left_bracket: + bracketed_host = netloc.partition("[")[2].partition("]")[0] + # Valid bracketed hosts are defined in + # https://www.rfc-editor.org/rfc/rfc3986#page-49 + # https://url.spec.whatwg.org/ + if bracketed_host[0] == "v": + if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", bracketed_host): + raise ValueError("IPvFuture address is invalid") + elif ":" not in bracketed_host: + raise ValueError("An IPv4 address cannot be in brackets") + if has_hash: + url, _, fragment = url.partition("#") + if has_question_mark: + url, _, query = url.partition("?") + if netloc and not netloc.isascii(): + _check_netloc(netloc) + return scheme, netloc, url, query, fragment + + +def _check_netloc(netloc: str) -> None: + # Adapted from urllib.parse._checknetloc + # looking for characters like \u2100 that expand to 'a/c' + # IDNA uses NFKC equivalence, so normalize for this check + + # ignore characters already included + # but not the surrounding text + n = netloc.replace("@", "").replace(":", "").replace("#", "").replace("?", "") + normalized_netloc = unicodedata.normalize("NFKC", n) + if n == normalized_netloc: + return + # Note that there are no unicode decompositions for the character '@' so + # its currently impossible to have test coverage for this branch, however if the + # one should be added in the future we want to make sure its still checked. + for c in "/?#@:": # pragma: no branch + if c in normalized_netloc: + raise ValueError( + f"netloc '{netloc}' contains invalid " + "characters under NFKC normalization" + ) + + +@lru_cache # match the same size as urlsplit +def split_netloc( + netloc: str, +) -> tuple[Union[str, None], Union[str, None], Union[str, None], Union[int, None]]: + """Split netloc into username, password, host and port.""" + if "@" not in netloc: + username: Union[str, None] = None + password: Union[str, None] = None + hostinfo = netloc + else: + userinfo, _, hostinfo = netloc.rpartition("@") + username, have_password, password = userinfo.partition(":") + if not have_password: + password = None + + if "[" in hostinfo: + _, _, bracketed = hostinfo.partition("[") + hostname, _, port_str = bracketed.partition("]") + _, _, port_str = port_str.partition(":") + else: + hostname, _, port_str = hostinfo.partition(":") + + if not port_str: + return username or None, password, hostname or None, None + + try: + port = int(port_str) + except ValueError: + raise ValueError("Invalid URL: port can't be converted to integer") + if not (0 <= port <= 65535): + raise ValueError("Port out of range 0-65535") + return username or None, password, hostname or None, port + + +def unsplit_result( + scheme: str, netloc: str, url: str, query: str, fragment: str +) -> str: + """Unsplit a URL without any normalization.""" + if netloc or (scheme and scheme in USES_AUTHORITY) or url[:2] == "//": + if url and url[:1] != "/": + url = f"{scheme}://{netloc}/{url}" if scheme else f"{scheme}:{url}" + else: + url = f"{scheme}://{netloc}{url}" if scheme else f"//{netloc}{url}" + elif scheme: + url = f"{scheme}:{url}" + if query: + url = f"{url}?{query}" + return f"{url}#{fragment}" if fragment else url + + +@lru_cache # match the same size as urlsplit +def make_netloc( + user: Union[str, None], + password: Union[str, None], + host: Union[str, None], + port: Union[int, None], + encode: bool = False, +) -> str: + """Make netloc from parts. + + The user and password are encoded if encode is True. + + The host must already be encoded with _encode_host. + """ + if host is None: + return "" + ret = host + if port is not None: + ret = f"{ret}:{port}" + if user is None and password is None: + return ret + if password is not None: + if not user: + user = "" + elif encode: + user = QUOTER(user) + if encode: + password = QUOTER(password) + user = f"{user}:{password}" + elif user and encode: + user = QUOTER(user) + return f"{user}@{ret}" if user else ret diff --git a/.venv/lib/python3.11/site-packages/yarl/_path.py b/.venv/lib/python3.11/site-packages/yarl/_path.py new file mode 100644 index 0000000000000000000000000000000000000000..c22f0b4b8cdd9280fd36789e2bc052b1c4938167 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_path.py @@ -0,0 +1,41 @@ +"""Utilities for working with paths.""" + +from collections.abc import Sequence +from contextlib import suppress + + +def normalize_path_segments(segments: Sequence[str]) -> list[str]: + """Drop '.' and '..' from a sequence of str segments""" + + resolved_path: list[str] = [] + + for seg in segments: + if seg == "..": + # ignore any .. segments that would otherwise cause an + # IndexError when popped from resolved_path if + # resolving for rfc3986 + with suppress(IndexError): + resolved_path.pop() + elif seg != ".": + resolved_path.append(seg) + + if segments and segments[-1] in (".", ".."): + # do some post-processing here. + # if the last segment was a relative dir, + # then we need to append the trailing '/' + resolved_path.append("") + + return resolved_path + + +def normalize_path(path: str) -> str: + # Drop '.' and '..' from str path + prefix = "" + if path and path[0] == "/": + # preserve the "/" root element of absolute paths, copying it to the + # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. + prefix = "/" + path = path[1:] + + segments = path.split("/") + return prefix + "/".join(normalize_path_segments(segments)) diff --git a/.venv/lib/python3.11/site-packages/yarl/_query.py b/.venv/lib/python3.11/site-packages/yarl/_query.py new file mode 100644 index 0000000000000000000000000000000000000000..6a663fc999c3985e5a5e0229c97db3f85c6e7252 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_query.py @@ -0,0 +1,118 @@ +"""Query string handling.""" + +import math +from collections.abc import Iterable, Mapping, Sequence +from typing import TYPE_CHECKING, Any, SupportsInt, Union + +from multidict import istr + +from ._quoters import QUERY_PART_QUOTER, QUERY_QUOTER + +SimpleQuery = Union[str, int, float] +QueryVariable = Union[SimpleQuery, Sequence[SimpleQuery]] +Query = Union[ + None, str, Mapping[str, QueryVariable], Sequence[tuple[str, QueryVariable]] +] + + +def query_var(v: QueryVariable) -> str: + """Convert a query variable to a string.""" + cls = type(v) + if cls is int: # Fast path for non-subclassed int + return str(v) + if issubclass(cls, str): + if TYPE_CHECKING: + assert isinstance(v, str) + return v + if cls is float or issubclass(cls, float): + if TYPE_CHECKING: + assert isinstance(v, float) + if math.isinf(v): + raise ValueError("float('inf') is not supported") + if math.isnan(v): + raise ValueError("float('nan') is not supported") + return str(float(v)) + if cls is not bool and isinstance(cls, SupportsInt): + return str(int(v)) + raise TypeError( + "Invalid variable type: value " + "should be str, int or float, got {!r} " + "of type {}".format(v, cls) + ) + + +def get_str_query_from_sequence_iterable( + items: Iterable[tuple[Union[str, istr], QueryVariable]], +) -> str: + """Return a query string from a sequence of (key, value) pairs. + + value is a single value or a sequence of values for the key + + The sequence of values must be a list or tuple. + """ + quoter = QUERY_PART_QUOTER + pairs = [ + f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}" + for k, val in items + for v in ( + val if type(val) is not str and isinstance(val, (list, tuple)) else (val,) + ) + ] + return "&".join(pairs) + + +def get_str_query_from_iterable( + items: Iterable[tuple[Union[str, istr], SimpleQuery]] +) -> str: + """Return a query string from an iterable. + + The iterable must contain (key, value) pairs. + + The values are not allowed to be sequences, only single values are + allowed. For sequences, use `_get_str_query_from_sequence_iterable`. + """ + quoter = QUERY_PART_QUOTER + # A listcomp is used since listcomps are inlined on CPython 3.12+ and + # they are a bit faster than a generator expression. + pairs = [ + f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}" for k, v in items + ] + return "&".join(pairs) + + +def get_str_query(*args: Any, **kwargs: Any) -> Union[str, None]: + """Return a query string from supported args.""" + query: Union[str, Mapping[str, QueryVariable], None] + if kwargs: + if args: + msg = "Either kwargs or single query parameter must be present" + raise ValueError(msg) + query = kwargs + elif len(args) == 1: + query = args[0] + else: + raise ValueError("Either kwargs or single query parameter must be present") + + if query is None: + return None + if not query: + return "" + if type(query) is dict: + return get_str_query_from_sequence_iterable(query.items()) + if type(query) is str or isinstance(query, str): + return QUERY_QUOTER(query) + if isinstance(query, Mapping): + return get_str_query_from_sequence_iterable(query.items()) + if isinstance(query, (bytes, bytearray, memoryview)): + msg = "Invalid query type: bytes, bytearray and memoryview are forbidden" + raise TypeError(msg) + if isinstance(query, Sequence): + # We don't expect sequence values if we're given a list of pairs + # already; only mappings like builtin `dict` which can't have the + # same key pointing to multiple values are allowed to use + # `_query_seq_pairs`. + return get_str_query_from_iterable(query) + raise TypeError( + "Invalid query type: only str, mapping or " + "sequence of (key, value) pairs is allowed" + ) diff --git a/.venv/lib/python3.11/site-packages/yarl/_quoters.py b/.venv/lib/python3.11/site-packages/yarl/_quoters.py new file mode 100644 index 0000000000000000000000000000000000000000..c1d2d7f81ef1e3d61baaa831f95f1bf582b8b806 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_quoters.py @@ -0,0 +1,32 @@ +"""Quoting and unquoting utilities for URL parts.""" + +from typing import Union +from urllib.parse import quote + +from ._quoting import _Quoter, _Unquoter + +QUOTER = _Quoter(requote=False) +REQUOTER = _Quoter() +PATH_QUOTER = _Quoter(safe="@:", protected="/+", requote=False) +PATH_REQUOTER = _Quoter(safe="@:", protected="/+") +QUERY_QUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True, requote=False) +QUERY_REQUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True) +QUERY_PART_QUOTER = _Quoter(safe="?/:@", qs=True, requote=False) +FRAGMENT_QUOTER = _Quoter(safe="?/:@", requote=False) +FRAGMENT_REQUOTER = _Quoter(safe="?/:@") + +UNQUOTER = _Unquoter() +PATH_UNQUOTER = _Unquoter(unsafe="+") +PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+") +QS_UNQUOTER = _Unquoter(qs=True) + + +def human_quote(s: Union[str, None], unsafe: str) -> Union[str, None]: + if not s: + return s + for c in "%" + unsafe: + if c in s: + s = s.replace(c, f"%{ord(c):02X}") + if s.isprintable(): + return s + return "".join(c if c.isprintable() else quote(c) for c in s) diff --git a/.venv/lib/python3.11/site-packages/yarl/_quoting.py b/.venv/lib/python3.11/site-packages/yarl/_quoting.py new file mode 100644 index 0000000000000000000000000000000000000000..95e86095d1dec243a96559a39460ba99bdfa6826 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_quoting.py @@ -0,0 +1,18 @@ +import os +import sys + +__all__ = ("_Quoter", "_Unquoter") + + +NO_EXTENSIONS = bool(os.environ.get("YARL_NO_EXTENSIONS")) # type: bool +if sys.implementation.name != "cpython": + NO_EXTENSIONS = True + + +if not NO_EXTENSIONS: # pragma: no branch + try: + from ._quoting_c import _Quoter, _Unquoter + except ImportError: # pragma: no cover + from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment] +else: + from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment] diff --git a/.venv/lib/python3.11/site-packages/yarl/_quoting_c.pyi b/.venv/lib/python3.11/site-packages/yarl/_quoting_c.pyi new file mode 100644 index 0000000000000000000000000000000000000000..9a6b79adf9106814e32a57cfb51447baba555e13 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_quoting_c.pyi @@ -0,0 +1,16 @@ +class _Quoter: + def __init__( + self, + *, + safe: str = ..., + protected: str = ..., + qs: bool = ..., + requote: bool = ... + ) -> None: ... + def __call__(self, val: str = ...) -> str: ... + +class _Unquoter: + def __init__( + self, *, ignore: str = ..., unsafe: str = ..., qs: bool = ... + ) -> None: ... + def __call__(self, val: str = ...) -> str: ... diff --git a/.venv/lib/python3.11/site-packages/yarl/_quoting_c.pyx b/.venv/lib/python3.11/site-packages/yarl/_quoting_c.pyx new file mode 100644 index 0000000000000000000000000000000000000000..067ba96e4efa297669b22fa407af500c7ea1e636 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_quoting_c.pyx @@ -0,0 +1,423 @@ +# cython: language_level=3 + +from cpython.exc cimport PyErr_NoMemory +from cpython.mem cimport PyMem_Free, PyMem_Malloc, PyMem_Realloc +from cpython.unicode cimport ( + PyUnicode_DATA, + PyUnicode_DecodeASCII, + PyUnicode_DecodeUTF8Stateful, + PyUnicode_GET_LENGTH, + PyUnicode_KIND, + PyUnicode_READ, +) +from libc.stdint cimport uint8_t, uint64_t +from libc.string cimport memcpy, memset + +from string import ascii_letters, digits + + +cdef str GEN_DELIMS = ":/?#[]@" +cdef str SUB_DELIMS_WITHOUT_QS = "!$'()*," +cdef str SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + '+?=;' +cdef str RESERVED = GEN_DELIMS + SUB_DELIMS +cdef str UNRESERVED = ascii_letters + digits + '-._~' +cdef str ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS +cdef str QS = '+&=;' + +DEF BUF_SIZE = 8 * 1024 # 8KiB +cdef char BUFFER[BUF_SIZE] + +cdef inline Py_UCS4 _to_hex(uint8_t v) noexcept: + if v < 10: + return (v+0x30) # ord('0') == 0x30 + else: + return (v+0x41-10) # ord('A') == 0x41 + + +cdef inline int _from_hex(Py_UCS4 v) noexcept: + if '0' <= v <= '9': + return (v) - 0x30 # ord('0') == 0x30 + elif 'A' <= v <= 'F': + return (v) - 0x41 + 10 # ord('A') == 0x41 + elif 'a' <= v <= 'f': + return (v) - 0x61 + 10 # ord('a') == 0x61 + else: + return -1 + + +cdef inline int _is_lower_hex(Py_UCS4 v) noexcept: + return 'a' <= v <= 'f' + + +cdef inline Py_UCS4 _restore_ch(Py_UCS4 d1, Py_UCS4 d2): + cdef int digit1 = _from_hex(d1) + if digit1 < 0: + return -1 + cdef int digit2 = _from_hex(d2) + if digit2 < 0: + return -1 + return (digit1 << 4 | digit2) + + +cdef uint8_t ALLOWED_TABLE[16] +cdef uint8_t ALLOWED_NOTQS_TABLE[16] + + +cdef inline bint bit_at(uint8_t array[], uint64_t ch) noexcept: + return array[ch >> 3] & (1 << (ch & 7)) + + +cdef inline void set_bit(uint8_t array[], uint64_t ch) noexcept: + array[ch >> 3] |= (1 << (ch & 7)) + + +memset(ALLOWED_TABLE, 0, sizeof(ALLOWED_TABLE)) +memset(ALLOWED_NOTQS_TABLE, 0, sizeof(ALLOWED_NOTQS_TABLE)) + +for i in range(128): + if chr(i) in ALLOWED: + set_bit(ALLOWED_TABLE, i) + set_bit(ALLOWED_NOTQS_TABLE, i) + if chr(i) in QS: + set_bit(ALLOWED_NOTQS_TABLE, i) + +# ----------------- writer --------------------------- + +cdef struct Writer: + char *buf + Py_ssize_t size + Py_ssize_t pos + bint changed + + +cdef inline void _init_writer(Writer* writer): + writer.buf = &BUFFER[0] + writer.size = BUF_SIZE + writer.pos = 0 + writer.changed = 0 + + +cdef inline void _release_writer(Writer* writer): + if writer.buf != BUFFER: + PyMem_Free(writer.buf) + + +cdef inline int _write_char(Writer* writer, Py_UCS4 ch, bint changed): + cdef char * buf + cdef Py_ssize_t size + + if writer.pos == writer.size: + # reallocate + size = writer.size + BUF_SIZE + if writer.buf == BUFFER: + buf = PyMem_Malloc(size) + if buf == NULL: + PyErr_NoMemory() + return -1 + memcpy(buf, writer.buf, writer.size) + else: + buf = PyMem_Realloc(writer.buf, size) + if buf == NULL: + PyErr_NoMemory() + return -1 + writer.buf = buf + writer.size = size + writer.buf[writer.pos] = ch + writer.pos += 1 + writer.changed |= changed + return 0 + + +cdef inline int _write_pct(Writer* writer, uint8_t ch, bint changed): + if _write_char(writer, '%', changed) < 0: + return -1 + if _write_char(writer, _to_hex(ch >> 4), changed) < 0: + return -1 + return _write_char(writer, _to_hex(ch & 0x0f), changed) + + +cdef inline int _write_utf8(Writer* writer, Py_UCS4 symbol): + cdef uint64_t utf = symbol + + if utf < 0x80: + return _write_pct(writer, utf, True) + elif utf < 0x800: + if _write_pct(writer, (0xc0 | (utf >> 6)), True) < 0: + return -1 + return _write_pct(writer, (0x80 | (utf & 0x3f)), True) + elif 0xD800 <= utf <= 0xDFFF: + # surogate pair, ignored + return 0 + elif utf < 0x10000: + if _write_pct(writer, (0xe0 | (utf >> 12)), True) < 0: + return -1 + if _write_pct(writer, (0x80 | ((utf >> 6) & 0x3f)), + True) < 0: + return -1 + return _write_pct(writer, (0x80 | (utf & 0x3f)), True) + elif utf > 0x10FFFF: + # symbol is too large + return 0 + else: + if _write_pct(writer, (0xf0 | (utf >> 18)), True) < 0: + return -1 + if _write_pct(writer, (0x80 | ((utf >> 12) & 0x3f)), + True) < 0: + return -1 + if _write_pct(writer, (0x80 | ((utf >> 6) & 0x3f)), + True) < 0: + return -1 + return _write_pct(writer, (0x80 | (utf & 0x3f)), True) + + +# --------------------- end writer -------------------------- + + +cdef class _Quoter: + cdef bint _qs + cdef bint _requote + + cdef uint8_t _safe_table[16] + cdef uint8_t _protected_table[16] + + def __init__( + self, *, str safe='', str protected='', bint qs=False, bint requote=True, + ): + cdef Py_UCS4 ch + + self._qs = qs + self._requote = requote + + if not self._qs: + memcpy(self._safe_table, + ALLOWED_NOTQS_TABLE, + sizeof(self._safe_table)) + else: + memcpy(self._safe_table, + ALLOWED_TABLE, + sizeof(self._safe_table)) + for ch in safe: + if ord(ch) > 127: + raise ValueError("Only safe symbols with ORD < 128 are allowed") + set_bit(self._safe_table, ch) + + memset(self._protected_table, 0, sizeof(self._protected_table)) + for ch in protected: + if ord(ch) > 127: + raise ValueError("Only safe symbols with ORD < 128 are allowed") + set_bit(self._safe_table, ch) + set_bit(self._protected_table, ch) + + def __call__(self, val): + if val is None: + return None + if type(val) is not str: + if isinstance(val, str): + # derived from str + val = str(val) + else: + raise TypeError("Argument should be str") + return self._do_quote_or_skip(val) + + cdef str _do_quote_or_skip(self, str val): + cdef Py_UCS4 ch + cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val) + cdef Py_ssize_t idx = length + cdef bint must_quote = 0 + cdef Writer writer + cdef int kind = PyUnicode_KIND(val) + cdef const void *data = PyUnicode_DATA(val) + + # If everything in the string is in the safe + # table and all ASCII, we can skip quoting + while idx: + idx -= 1 + ch = PyUnicode_READ(kind, data, idx) + if ch >= 128 or not bit_at(self._safe_table, ch): + must_quote = 1 + break + + if not must_quote: + return val + + _init_writer(&writer) + try: + return self._do_quote(val, length, kind, data, &writer) + finally: + _release_writer(&writer) + + cdef str _do_quote( + self, + str val, + Py_ssize_t length, + int kind, + const void *data, + Writer *writer + ): + cdef Py_UCS4 ch + cdef int changed + cdef Py_ssize_t idx = 0 + + while idx < length: + ch = PyUnicode_READ(kind, data, idx) + idx += 1 + if ch == '%' and self._requote and idx <= length - 2: + ch = _restore_ch( + PyUnicode_READ(kind, data, idx), + PyUnicode_READ(kind, data, idx + 1) + ) + if ch != -1: + idx += 2 + if ch < 128: + if bit_at(self._protected_table, ch): + if _write_pct(writer, ch, True) < 0: + raise + continue + + if bit_at(self._safe_table, ch): + if _write_char(writer, ch, True) < 0: + raise + continue + + changed = (_is_lower_hex(PyUnicode_READ(kind, data, idx - 2)) or + _is_lower_hex(PyUnicode_READ(kind, data, idx - 1))) + if _write_pct(writer, ch, changed) < 0: + raise + continue + else: + ch = '%' + + if self._write(writer, ch) < 0: + raise + + if not writer.changed: + return val + else: + return PyUnicode_DecodeASCII(writer.buf, writer.pos, "strict") + + cdef inline int _write(self, Writer *writer, Py_UCS4 ch): + if self._qs: + if ch == ' ': + return _write_char(writer, '+', True) + + if ch < 128 and bit_at(self._safe_table, ch): + return _write_char(writer, ch, False) + + return _write_utf8(writer, ch) + + +cdef class _Unquoter: + cdef str _ignore + cdef str _unsafe + cdef bint _qs + cdef _Quoter _quoter + cdef _Quoter _qs_quoter + + def __init__(self, *, ignore="", unsafe="", qs=False): + self._ignore = ignore + self._unsafe = unsafe + self._qs = qs + self._quoter = _Quoter() + self._qs_quoter = _Quoter(qs=True) + + def __call__(self, val): + if val is None: + return None + if type(val) is not str: + if isinstance(val, str): + # derived from str + val = str(val) + else: + raise TypeError("Argument should be str") + return self._do_unquote(val) + + cdef str _do_unquote(self, str val): + cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val) + if length == 0: + return val + + cdef list ret = [] + cdef char buffer[4] + cdef Py_ssize_t buflen = 0 + cdef Py_ssize_t consumed + cdef str unquoted + cdef Py_UCS4 ch = 0 + cdef Py_ssize_t idx = 0 + cdef Py_ssize_t start_pct + cdef int kind = PyUnicode_KIND(val) + cdef const void *data = PyUnicode_DATA(val) + cdef bint changed = 0 + while idx < length: + ch = PyUnicode_READ(kind, data, idx) + idx += 1 + if ch == '%' and idx <= length - 2: + changed = 1 + ch = _restore_ch( + PyUnicode_READ(kind, data, idx), + PyUnicode_READ(kind, data, idx + 1) + ) + if ch != -1: + idx += 2 + assert buflen < 4 + buffer[buflen] = ch + buflen += 1 + try: + unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen, + NULL, &consumed) + except UnicodeDecodeError: + start_pct = idx - buflen * 3 + buffer[0] = ch + buflen = 1 + ret.append(val[start_pct : idx - 3]) + try: + unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen, + NULL, &consumed) + except UnicodeDecodeError: + buflen = 0 + ret.append(val[idx - 3 : idx]) + continue + if not unquoted: + assert consumed == 0 + continue + assert consumed == buflen + buflen = 0 + if self._qs and unquoted in '+=&;': + ret.append(self._qs_quoter(unquoted)) + elif unquoted in self._unsafe or unquoted in self._ignore: + ret.append(self._quoter(unquoted)) + else: + ret.append(unquoted) + continue + else: + ch = '%' + + if buflen: + start_pct = idx - 1 - buflen * 3 + ret.append(val[start_pct : idx - 1]) + buflen = 0 + + if ch == '+': + if not self._qs or ch in self._unsafe: + ret.append('+') + else: + changed = 1 + ret.append(' ') + continue + + if ch in self._unsafe: + changed = 1 + ret.append('%') + h = hex(ord(ch)).upper()[2:] + for ch in h: + ret.append(ch) + continue + + ret.append(ch) + + if not changed: + return val + + if buflen: + ret.append(val[length - buflen * 3 : length]) + + return ''.join(ret) diff --git a/.venv/lib/python3.11/site-packages/yarl/_quoting_py.py b/.venv/lib/python3.11/site-packages/yarl/_quoting_py.py new file mode 100644 index 0000000000000000000000000000000000000000..7256acd8c82dcdf9dc0674dc5ea04e07250baa4d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_quoting_py.py @@ -0,0 +1,197 @@ +import codecs +import re +from string import ascii_letters, ascii_lowercase, digits +from typing import cast + +BASCII_LOWERCASE = ascii_lowercase.encode("ascii") +BPCT_ALLOWED = {f"%{i:02X}".encode("ascii") for i in range(256)} +GEN_DELIMS = ":/?#[]@" +SUB_DELIMS_WITHOUT_QS = "!$'()*," +SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + "+&=;" +RESERVED = GEN_DELIMS + SUB_DELIMS +UNRESERVED = ascii_letters + digits + "-._~" +ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS + + +_IS_HEX = re.compile(b"[A-Z0-9][A-Z0-9]") +_IS_HEX_STR = re.compile("[A-Fa-f0-9][A-Fa-f0-9]") + +utf8_decoder = codecs.getincrementaldecoder("utf-8") + + +class _Quoter: + def __init__( + self, + *, + safe: str = "", + protected: str = "", + qs: bool = False, + requote: bool = True, + ) -> None: + self._safe = safe + self._protected = protected + self._qs = qs + self._requote = requote + + def __call__(self, val: str) -> str: + if val is None: + return None + if not isinstance(val, str): + raise TypeError("Argument should be str") + if not val: + return "" + bval = val.encode("utf8", errors="ignore") + ret = bytearray() + pct = bytearray() + safe = self._safe + safe += ALLOWED + if not self._qs: + safe += "+&=;" + safe += self._protected + bsafe = safe.encode("ascii") + idx = 0 + while idx < len(bval): + ch = bval[idx] + idx += 1 + + if pct: + if ch in BASCII_LOWERCASE: + ch = ch - 32 # convert to uppercase + pct.append(ch) + if len(pct) == 3: # pragma: no branch # peephole optimizer + buf = pct[1:] + if not _IS_HEX.match(buf): + ret.extend(b"%25") + pct.clear() + idx -= 2 + continue + try: + unquoted = chr(int(pct[1:].decode("ascii"), base=16)) + except ValueError: + ret.extend(b"%25") + pct.clear() + idx -= 2 + continue + + if unquoted in self._protected: + ret.extend(pct) + elif unquoted in safe: + ret.append(ord(unquoted)) + else: + ret.extend(pct) + pct.clear() + + # special case, if we have only one char after "%" + elif len(pct) == 2 and idx == len(bval): + ret.extend(b"%25") + pct.clear() + idx -= 1 + + continue + + elif ch == ord("%") and self._requote: + pct.clear() + pct.append(ch) + + # special case if "%" is last char + if idx == len(bval): + ret.extend(b"%25") + + continue + + if self._qs and ch == ord(" "): + ret.append(ord("+")) + continue + if ch in bsafe: + ret.append(ch) + continue + + ret.extend((f"%{ch:02X}").encode("ascii")) + + ret2 = ret.decode("ascii") + if ret2 == val: + return val + return ret2 + + +class _Unquoter: + def __init__(self, *, ignore: str = "", unsafe: str = "", qs: bool = False) -> None: + self._ignore = ignore + self._unsafe = unsafe + self._qs = qs + self._quoter = _Quoter() + self._qs_quoter = _Quoter(qs=True) + + def __call__(self, val: str) -> str: + if val is None: + return None + if not isinstance(val, str): + raise TypeError("Argument should be str") + if not val: + return "" + decoder = cast(codecs.BufferedIncrementalDecoder, utf8_decoder()) + ret = [] + idx = 0 + while idx < len(val): + ch = val[idx] + idx += 1 + if ch == "%" and idx <= len(val) - 2: + pct = val[idx : idx + 2] + if _IS_HEX_STR.fullmatch(pct): + b = bytes([int(pct, base=16)]) + idx += 2 + try: + unquoted = decoder.decode(b) + except UnicodeDecodeError: + start_pct = idx - 3 - len(decoder.buffer) * 3 + ret.append(val[start_pct : idx - 3]) + decoder.reset() + try: + unquoted = decoder.decode(b) + except UnicodeDecodeError: + ret.append(val[idx - 3 : idx]) + continue + if not unquoted: + continue + if self._qs and unquoted in "+=&;": + to_add = self._qs_quoter(unquoted) + if to_add is None: # pragma: no cover + raise RuntimeError("Cannot quote None") + ret.append(to_add) + elif unquoted in self._unsafe or unquoted in self._ignore: + to_add = self._quoter(unquoted) + if to_add is None: # pragma: no cover + raise RuntimeError("Cannot quote None") + ret.append(to_add) + else: + ret.append(unquoted) + continue + + if decoder.buffer: + start_pct = idx - 1 - len(decoder.buffer) * 3 + ret.append(val[start_pct : idx - 1]) + decoder.reset() + + if ch == "+": + if not self._qs or ch in self._unsafe: + ret.append("+") + else: + ret.append(" ") + continue + + if ch in self._unsafe: + ret.append("%") + h = hex(ord(ch)).upper()[2:] + for ch in h: + ret.append(ch) + continue + + ret.append(ch) + + if decoder.buffer: + ret.append(val[-len(decoder.buffer) * 3 :]) + + ret2 = "".join(ret) + if ret2 == val: + return val + return ret2 diff --git a/.venv/lib/python3.11/site-packages/yarl/_url.py b/.venv/lib/python3.11/site-packages/yarl/_url.py new file mode 100644 index 0000000000000000000000000000000000000000..4e4b8a3763c127b41ece5973b25fbd9a4bfb6122 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/_url.py @@ -0,0 +1,1584 @@ +import re +import sys +import warnings +from collections.abc import Mapping, Sequence +from enum import Enum +from functools import _CacheInfo, lru_cache +from ipaddress import ip_address +from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload +from urllib.parse import SplitResult, parse_qsl, uses_relative + +import idna +from multidict import MultiDict, MultiDictProxy +from propcache.api import under_cached_property as cached_property + +from ._parse import ( + USES_AUTHORITY, + SplitURLType, + make_netloc, + split_netloc, + split_url, + unsplit_result, +) +from ._path import normalize_path, normalize_path_segments +from ._query import ( + Query, + QueryVariable, + SimpleQuery, + get_str_query, + get_str_query_from_iterable, + get_str_query_from_sequence_iterable, +) +from ._quoters import ( + FRAGMENT_QUOTER, + FRAGMENT_REQUOTER, + PATH_QUOTER, + PATH_REQUOTER, + PATH_SAFE_UNQUOTER, + PATH_UNQUOTER, + QS_UNQUOTER, + QUERY_QUOTER, + QUERY_REQUOTER, + QUOTER, + REQUOTER, + UNQUOTER, + human_quote, +) + +DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443, "ftp": 21} +USES_RELATIVE = frozenset(uses_relative) + +# Special schemes https://url.spec.whatwg.org/#special-scheme +# are not allowed to have an empty host https://url.spec.whatwg.org/#url-representation +SCHEME_REQUIRES_HOST = frozenset(("http", "https", "ws", "wss", "ftp")) + + +# reg-name: unreserved / pct-encoded / sub-delims +# this pattern matches anything that is *not* in those classes. and is only used +# on lower-cased ASCII values. +NOT_REG_NAME = re.compile( + r""" + # any character not in the unreserved or sub-delims sets, plus % + # (validated with the additional check for pct-encoded sequences below) + [^a-z0-9\-._~!$&'()*+,;=%] + | + # % only allowed if it is part of a pct-encoded + # sequence of 2 hex digits. + %(?![0-9a-f]{2}) + """, + re.VERBOSE, +) + +_T = TypeVar("_T") + +if sys.version_info >= (3, 11): + from typing import Self +else: + Self = Any + + +class UndefinedType(Enum): + """Singleton type for use with not set sentinel values.""" + + _singleton = 0 + + +UNDEFINED = UndefinedType._singleton + + +class CacheInfo(TypedDict): + """Host encoding cache.""" + + idna_encode: _CacheInfo + idna_decode: _CacheInfo + ip_address: _CacheInfo + host_validate: _CacheInfo + encode_host: _CacheInfo + + +class _InternalURLCache(TypedDict, total=False): + _val: SplitURLType + _origin: "URL" + absolute: bool + scheme: str + raw_authority: str + authority: str + raw_user: Union[str, None] + user: Union[str, None] + raw_password: Union[str, None] + password: Union[str, None] + raw_host: Union[str, None] + host: Union[str, None] + host_subcomponent: Union[str, None] + host_port_subcomponent: Union[str, None] + port: Union[int, None] + explicit_port: Union[int, None] + raw_path: str + path: str + _parsed_query: list[tuple[str, str]] + query: "MultiDictProxy[str]" + raw_query_string: str + query_string: str + path_qs: str + raw_path_qs: str + raw_fragment: str + fragment: str + raw_parts: tuple[str, ...] + parts: tuple[str, ...] + parent: "URL" + raw_name: str + name: str + raw_suffix: str + suffix: str + raw_suffixes: tuple[str, ...] + suffixes: tuple[str, ...] + + +def rewrite_module(obj: _T) -> _T: + obj.__module__ = "yarl" + return obj + + +@lru_cache +def encode_url(url_str: str) -> "URL": + """Parse unencoded URL.""" + cache: _InternalURLCache = {} + host: Union[str, None] + scheme, netloc, path, query, fragment = split_url(url_str) + if not netloc: # netloc + host = "" + else: + if ":" in netloc or "@" in netloc or "[" in netloc: + # Complex netloc + username, password, host, port = split_netloc(netloc) + else: + username = password = port = None + host = netloc + if host is None: + if scheme in SCHEME_REQUIRES_HOST: + msg = ( + "Invalid URL: host is required for " + f"absolute urls with the {scheme} scheme" + ) + raise ValueError(msg) + else: + host = "" + host = _encode_host(host, validate_host=False) + # Remove brackets as host encoder adds back brackets for IPv6 addresses + cache["raw_host"] = host[1:-1] if "[" in host else host + cache["explicit_port"] = port + if password is None and username is None: + # Fast path for URLs without user, password + netloc = host if port is None else f"{host}:{port}" + cache["raw_user"] = None + cache["raw_password"] = None + else: + raw_user = REQUOTER(username) if username else username + raw_password = REQUOTER(password) if password else password + netloc = make_netloc(raw_user, raw_password, host, port) + cache["raw_user"] = raw_user + cache["raw_password"] = raw_password + + if path: + path = PATH_REQUOTER(path) + if netloc and "." in path: + path = normalize_path(path) + if query: + query = QUERY_REQUOTER(query) + if fragment: + fragment = FRAGMENT_REQUOTER(fragment) + + cache["scheme"] = scheme + cache["raw_path"] = "/" if not path and netloc else path + cache["raw_query_string"] = query + cache["raw_fragment"] = fragment + + self = object.__new__(URL) + self._scheme = scheme + self._netloc = netloc + self._path = path + self._query = query + self._fragment = fragment + self._cache = cache + return self + + +@lru_cache +def pre_encoded_url(url_str: str) -> "URL": + """Parse pre-encoded URL.""" + self = object.__new__(URL) + val = split_url(url_str) + self._scheme, self._netloc, self._path, self._query, self._fragment = val + self._cache = {} + return self + + +@lru_cache +def build_pre_encoded_url( + scheme: str, + authority: str, + user: Union[str, None], + password: Union[str, None], + host: str, + port: Union[int, None], + path: str, + query_string: str, + fragment: str, +) -> "URL": + """Build a pre-encoded URL from parts.""" + self = object.__new__(URL) + self._scheme = scheme + if authority: + self._netloc = authority + elif host: + if port is not None: + port = None if port == DEFAULT_PORTS.get(scheme) else port + if user is None and password is None: + self._netloc = host if port is None else f"{host}:{port}" + else: + self._netloc = make_netloc(user, password, host, port) + else: + self._netloc = "" + self._path = path + self._query = query_string + self._fragment = fragment + self._cache = {} + return self + + +def from_parts_uncached( + scheme: str, netloc: str, path: str, query: str, fragment: str +) -> "URL": + """Create a new URL from parts.""" + self = object.__new__(URL) + self._scheme = scheme + self._netloc = netloc + self._path = path + self._query = query + self._fragment = fragment + self._cache = {} + return self + + +from_parts = lru_cache(from_parts_uncached) + + +@rewrite_module +class URL: + # Don't derive from str + # follow pathlib.Path design + # probably URL will not suffer from pathlib problems: + # it's intended for libraries like aiohttp, + # not to be passed into standard library functions like os.open etc. + + # URL grammar (RFC 3986) + # pct-encoded = "%" HEXDIG HEXDIG + # reserved = gen-delims / sub-delims + # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + # / "*" / "+" / "," / ";" / "=" + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + # hier-part = "//" authority path-abempty + # / path-absolute + # / path-rootless + # / path-empty + # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + # authority = [ userinfo "@" ] host [ ":" port ] + # userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + # host = IP-literal / IPv4address / reg-name + # IP-literal = "[" ( IPv6address / IPvFuture ) "]" + # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + # IPv6address = 6( h16 ":" ) ls32 + # / "::" 5( h16 ":" ) ls32 + # / [ h16 ] "::" 4( h16 ":" ) ls32 + # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + # / [ *4( h16 ":" ) h16 ] "::" ls32 + # / [ *5( h16 ":" ) h16 ] "::" h16 + # / [ *6( h16 ":" ) h16 ] "::" + # ls32 = ( h16 ":" h16 ) / IPv4address + # ; least-significant 32 bits of address + # h16 = 1*4HEXDIG + # ; 16 bits of address represented in hexadecimal + # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + # dec-octet = DIGIT ; 0-9 + # / %x31-39 DIGIT ; 10-99 + # / "1" 2DIGIT ; 100-199 + # / "2" %x30-34 DIGIT ; 200-249 + # / "25" %x30-35 ; 250-255 + # reg-name = *( unreserved / pct-encoded / sub-delims ) + # port = *DIGIT + # path = path-abempty ; begins with "/" or is empty + # / path-absolute ; begins with "/" but not "//" + # / path-noscheme ; begins with a non-colon segment + # / path-rootless ; begins with a segment + # / path-empty ; zero characters + # path-abempty = *( "/" segment ) + # path-absolute = "/" [ segment-nz *( "/" segment ) ] + # path-noscheme = segment-nz-nc *( "/" segment ) + # path-rootless = segment-nz *( "/" segment ) + # path-empty = 0 + # segment = *pchar + # segment-nz = 1*pchar + # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + # ; non-zero-length segment without any colon ":" + # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + # query = *( pchar / "/" / "?" ) + # fragment = *( pchar / "/" / "?" ) + # URI-reference = URI / relative-ref + # relative-ref = relative-part [ "?" query ] [ "#" fragment ] + # relative-part = "//" authority path-abempty + # / path-absolute + # / path-noscheme + # / path-empty + # absolute-URI = scheme ":" hier-part [ "?" query ] + __slots__ = ("_cache", "_scheme", "_netloc", "_path", "_query", "_fragment") + + _scheme: str + _netloc: str + _path: str + _query: str + _fragment: str + + def __new__( + cls, + val: Union[str, SplitResult, "URL", UndefinedType] = UNDEFINED, + *, + encoded: bool = False, + strict: Union[bool, None] = None, + ) -> "URL": + if strict is not None: # pragma: no cover + warnings.warn("strict parameter is ignored") + if type(val) is str: + return pre_encoded_url(val) if encoded else encode_url(val) + if type(val) is cls: + return val + if type(val) is SplitResult: + if not encoded: + raise ValueError("Cannot apply decoding to SplitResult") + return from_parts(*val) + if isinstance(val, str): + return pre_encoded_url(str(val)) if encoded else encode_url(str(val)) + if val is UNDEFINED: + # Special case for UNDEFINED since it might be unpickling and we do + # not want to cache as the `__set_state__` call would mutate the URL + # object in the `pre_encoded_url` or `encoded_url` caches. + self = object.__new__(URL) + self._scheme = self._netloc = self._path = self._query = self._fragment = "" + self._cache = {} + return self + raise TypeError("Constructor parameter should be str") + + @classmethod + def build( + cls, + *, + scheme: str = "", + authority: str = "", + user: Union[str, None] = None, + password: Union[str, None] = None, + host: str = "", + port: Union[int, None] = None, + path: str = "", + query: Union[Query, None] = None, + query_string: str = "", + fragment: str = "", + encoded: bool = False, + ) -> "URL": + """Creates and returns a new URL""" + + if authority and (user or password or host or port): + raise ValueError( + 'Can\'t mix "authority" with "user", "password", "host" or "port".' + ) + if port is not None and not isinstance(port, int): + raise TypeError(f"The port is required to be int, got {type(port)!r}.") + if port and not host: + raise ValueError('Can\'t build URL with "port" but without "host".') + if query and query_string: + raise ValueError('Only one of "query" or "query_string" should be passed') + if ( + scheme is None + or authority is None + or host is None + or path is None + or query_string is None + or fragment is None + ): + raise TypeError( + 'NoneType is illegal for "scheme", "authority", "host", "path", ' + '"query_string", and "fragment" args, use empty string instead.' + ) + + if query: + query_string = get_str_query(query) or "" + + if encoded: + return build_pre_encoded_url( + scheme, + authority, + user, + password, + host, + port, + path, + query_string, + fragment, + ) + + self = object.__new__(URL) + self._scheme = scheme + _host: Union[str, None] = None + if authority: + user, password, _host, port = split_netloc(authority) + _host = _encode_host(_host, validate_host=False) if _host else "" + elif host: + _host = _encode_host(host, validate_host=True) + else: + self._netloc = "" + + if _host is not None: + if port is not None: + port = None if port == DEFAULT_PORTS.get(scheme) else port + if user is None and password is None: + self._netloc = _host if port is None else f"{_host}:{port}" + else: + self._netloc = make_netloc(user, password, _host, port, True) + + path = PATH_QUOTER(path) if path else path + if path and self._netloc: + if "." in path: + path = normalize_path(path) + if path[0] != "/": + msg = ( + "Path in a URL with authority should " + "start with a slash ('/') if set" + ) + raise ValueError(msg) + + self._path = path + if not query and query_string: + query_string = QUERY_QUOTER(query_string) + self._query = query_string + self._fragment = FRAGMENT_QUOTER(fragment) if fragment else fragment + self._cache = {} + return self + + def __init_subclass__(cls): + raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden") + + def __str__(self) -> str: + if not self._path and self._netloc and (self._query or self._fragment): + path = "/" + else: + path = self._path + if (port := self.explicit_port) is not None and port == DEFAULT_PORTS.get( + self._scheme + ): + # port normalization - using None for default ports to remove from rendering + # https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3 + host = self.host_subcomponent + netloc = make_netloc(self.raw_user, self.raw_password, host, None) + else: + netloc = self._netloc + return unsplit_result(self._scheme, netloc, path, self._query, self._fragment) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}('{str(self)}')" + + def __bytes__(self) -> bytes: + return str(self).encode("ascii") + + def __eq__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + + path1 = "/" if not self._path and self._netloc else self._path + path2 = "/" if not other._path and other._netloc else other._path + return ( + self._scheme == other._scheme + and self._netloc == other._netloc + and path1 == path2 + and self._query == other._query + and self._fragment == other._fragment + ) + + def __hash__(self) -> int: + if (ret := self._cache.get("hash")) is None: + path = "/" if not self._path and self._netloc else self._path + ret = self._cache["hash"] = hash( + (self._scheme, self._netloc, path, self._query, self._fragment) + ) + return ret + + def __le__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + return self._val <= other._val + + def __lt__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + return self._val < other._val + + def __ge__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + return self._val >= other._val + + def __gt__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + return self._val > other._val + + def __truediv__(self, name: str) -> "URL": + if not isinstance(name, str): + return NotImplemented + return self._make_child((str(name),)) + + def __mod__(self, query: Query) -> "URL": + return self.update_query(query) + + def __bool__(self) -> bool: + return bool(self._netloc or self._path or self._query or self._fragment) + + def __getstate__(self) -> tuple[SplitResult]: + return (tuple.__new__(SplitResult, self._val),) + + def __setstate__(self, state): + if state[0] is None and isinstance(state[1], dict): + # default style pickle + val = state[1]["_val"] + else: + val, *unused = state + self._scheme, self._netloc, self._path, self._query, self._fragment = val + self._cache = {} + + def _cache_netloc(self) -> None: + """Cache the netloc parts of the URL.""" + c = self._cache + split_loc = split_netloc(self._netloc) + c["raw_user"], c["raw_password"], c["raw_host"], c["explicit_port"] = split_loc + + def is_absolute(self) -> bool: + """A check for absolute URLs. + + Return True for absolute ones (having scheme or starting + with //), False otherwise. + + Is is preferred to call the .absolute property instead + as it is cached. + """ + return self.absolute + + def is_default_port(self) -> bool: + """A check for default port. + + Return True if port is default for specified scheme, + e.g. 'http://python.org' or 'http://python.org:80', False + otherwise. + + Return False for relative URLs. + + """ + if (explicit := self.explicit_port) is None: + # If the explicit port is None, then the URL must be + # using the default port unless its a relative URL + # which does not have an implicit port / default port + return self._netloc != "" + return explicit == DEFAULT_PORTS.get(self._scheme) + + def origin(self) -> "URL": + """Return an URL with scheme, host and port parts only. + + user, password, path, query and fragment are removed. + + """ + # TODO: add a keyword-only option for keeping user/pass maybe? + return self._origin + + @cached_property + def _val(self) -> SplitURLType: + return (self._scheme, self._netloc, self._path, self._query, self._fragment) + + @cached_property + def _origin(self) -> "URL": + """Return an URL with scheme, host and port parts only. + + user, password, path, query and fragment are removed. + """ + if not (netloc := self._netloc): + raise ValueError("URL should be absolute") + if not (scheme := self._scheme): + raise ValueError("URL should have scheme") + if "@" in netloc: + encoded_host = self.host_subcomponent + netloc = make_netloc(None, None, encoded_host, self.explicit_port) + elif not self._path and not self._query and not self._fragment: + return self + return from_parts(scheme, netloc, "", "", "") + + def relative(self) -> "URL": + """Return a relative part of the URL. + + scheme, user, password, host and port are removed. + + """ + if not self._netloc: + raise ValueError("URL should be absolute") + return from_parts("", "", self._path, self._query, self._fragment) + + @cached_property + def absolute(self) -> bool: + """A check for absolute URLs. + + Return True for absolute ones (having scheme or starting + with //), False otherwise. + + """ + # `netloc`` is an empty string for relative URLs + # Checking `netloc` is faster than checking `hostname` + # because `hostname` is a property that does some extra work + # to parse the host from the `netloc` + return self._netloc != "" + + @cached_property + def scheme(self) -> str: + """Scheme for absolute URLs. + + Empty string for relative URLs or URLs starting with // + + """ + return self._scheme + + @cached_property + def raw_authority(self) -> str: + """Encoded authority part of URL. + + Empty string for relative URLs. + + """ + return self._netloc + + @cached_property + def authority(self) -> str: + """Decoded authority part of URL. + + Empty string for relative URLs. + + """ + return make_netloc(self.user, self.password, self.host, self.port) + + @cached_property + def raw_user(self) -> Union[str, None]: + """Encoded user part of URL. + + None if user is missing. + + """ + # not .username + self._cache_netloc() + return self._cache["raw_user"] + + @cached_property + def user(self) -> Union[str, None]: + """Decoded user part of URL. + + None if user is missing. + + """ + if (raw_user := self.raw_user) is None: + return None + return UNQUOTER(raw_user) + + @cached_property + def raw_password(self) -> Union[str, None]: + """Encoded password part of URL. + + None if password is missing. + + """ + self._cache_netloc() + return self._cache["raw_password"] + + @cached_property + def password(self) -> Union[str, None]: + """Decoded password part of URL. + + None if password is missing. + + """ + if (raw_password := self.raw_password) is None: + return None + return UNQUOTER(raw_password) + + @cached_property + def raw_host(self) -> Union[str, None]: + """Encoded host part of URL. + + None for relative URLs. + + When working with IPv6 addresses, use the `host_subcomponent` property instead + as it will return the host subcomponent with brackets. + """ + # Use host instead of hostname for sake of shortness + # May add .hostname prop later + self._cache_netloc() + return self._cache["raw_host"] + + @cached_property + def host(self) -> Union[str, None]: + """Decoded host part of URL. + + None for relative URLs. + + """ + if (raw := self.raw_host) is None: + return None + if raw and raw[-1].isdigit() or ":" in raw: + # IP addresses are never IDNA encoded + return raw + return _idna_decode(raw) + + @cached_property + def host_subcomponent(self) -> Union[str, None]: + """Return the host subcomponent part of URL. + + None for relative URLs. + + https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 + + `IP-literal = "[" ( IPv6address / IPvFuture ) "]"` + + Examples: + - `http://example.com:8080` -> `example.com` + - `http://example.com:80` -> `example.com` + - `https://127.0.0.1:8443` -> `127.0.0.1` + - `https://[::1]:8443` -> `[::1]` + - `http://[::1]` -> `[::1]` + + """ + if (raw := self.raw_host) is None: + return None + return f"[{raw}]" if ":" in raw else raw + + @cached_property + def host_port_subcomponent(self) -> Union[str, None]: + """Return the host and port subcomponent part of URL. + + Trailing dots are removed from the host part. + + This value is suitable for use in the Host header of an HTTP request. + + None for relative URLs. + + https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 + `IP-literal = "[" ( IPv6address / IPvFuture ) "]"` + https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3 + port = *DIGIT + + Examples: + - `http://example.com:8080` -> `example.com:8080` + - `http://example.com:80` -> `example.com` + - `http://example.com.:80` -> `example.com` + - `https://127.0.0.1:8443` -> `127.0.0.1:8443` + - `https://[::1]:8443` -> `[::1]:8443` + - `http://[::1]` -> `[::1]` + + """ + if (raw := self.raw_host) is None: + return None + if raw[-1] == ".": + # Remove all trailing dots from the netloc as while + # they are valid FQDNs in DNS, TLS validation fails. + # See https://github.com/aio-libs/aiohttp/issues/3636. + # To avoid string manipulation we only call rstrip if + # the last character is a dot. + raw = raw.rstrip(".") + port = self.explicit_port + if port is None or port == DEFAULT_PORTS.get(self._scheme): + return f"[{raw}]" if ":" in raw else raw + return f"[{raw}]:{port}" if ":" in raw else f"{raw}:{port}" + + @cached_property + def port(self) -> Union[int, None]: + """Port part of URL, with scheme-based fallback. + + None for relative URLs or URLs without explicit port and + scheme without default port substitution. + + """ + if (explicit_port := self.explicit_port) is not None: + return explicit_port + return DEFAULT_PORTS.get(self._scheme) + + @cached_property + def explicit_port(self) -> Union[int, None]: + """Port part of URL, without scheme-based fallback. + + None for relative URLs or URLs without explicit port. + + """ + self._cache_netloc() + return self._cache["explicit_port"] + + @cached_property + def raw_path(self) -> str: + """Encoded path of URL. + + / for absolute URLs without path part. + + """ + return self._path if self._path or not self._netloc else "/" + + @cached_property + def path(self) -> str: + """Decoded path of URL. + + / for absolute URLs without path part. + + """ + return PATH_UNQUOTER(self._path) if self._path else "/" if self._netloc else "" + + @cached_property + def path_safe(self) -> str: + """Decoded path of URL. + + / for absolute URLs without path part. + + / (%2F) and % (%25) are not decoded + + """ + if self._path: + return PATH_SAFE_UNQUOTER(self._path) + return "/" if self._netloc else "" + + @cached_property + def _parsed_query(self) -> list[tuple[str, str]]: + """Parse query part of URL.""" + return parse_qsl(self._query, keep_blank_values=True) + + @cached_property + def query(self) -> "MultiDictProxy[str]": + """A MultiDictProxy representing parsed query parameters in decoded + representation. + + Empty value if URL has no query part. + + """ + return MultiDictProxy(MultiDict(self._parsed_query)) + + @cached_property + def raw_query_string(self) -> str: + """Encoded query part of URL. + + Empty string if query is missing. + + """ + return self._query + + @cached_property + def query_string(self) -> str: + """Decoded query part of URL. + + Empty string if query is missing. + + """ + return QS_UNQUOTER(self._query) if self._query else "" + + @cached_property + def path_qs(self) -> str: + """Decoded path of URL with query.""" + return self.path if not (q := self.query_string) else f"{self.path}?{q}" + + @cached_property + def raw_path_qs(self) -> str: + """Encoded path of URL with query.""" + if q := self._query: + return f"{self._path}?{q}" if self._path or not self._netloc else f"/?{q}" + return self._path if self._path or not self._netloc else "/" + + @cached_property + def raw_fragment(self) -> str: + """Encoded fragment part of URL. + + Empty string if fragment is missing. + + """ + return self._fragment + + @cached_property + def fragment(self) -> str: + """Decoded fragment part of URL. + + Empty string if fragment is missing. + + """ + return UNQUOTER(self._fragment) if self._fragment else "" + + @cached_property + def raw_parts(self) -> tuple[str, ...]: + """A tuple containing encoded *path* parts. + + ('/',) for absolute URLs if *path* is missing. + + """ + path = self._path + if self._netloc: + return ("/", *path[1:].split("/")) if path else ("/",) + if path and path[0] == "/": + return ("/", *path[1:].split("/")) + return tuple(path.split("/")) + + @cached_property + def parts(self) -> tuple[str, ...]: + """A tuple containing decoded *path* parts. + + ('/',) for absolute URLs if *path* is missing. + + """ + return tuple(UNQUOTER(part) for part in self.raw_parts) + + @cached_property + def parent(self) -> "URL": + """A new URL with last part of path removed and cleaned up query and + fragment. + + """ + path = self._path + if not path or path == "/": + if self._fragment or self._query: + return from_parts(self._scheme, self._netloc, path, "", "") + return self + parts = path.split("/") + return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "") + + @cached_property + def raw_name(self) -> str: + """The last part of raw_parts.""" + parts = self.raw_parts + if not self._netloc: + return parts[-1] + parts = parts[1:] + return parts[-1] if parts else "" + + @cached_property + def name(self) -> str: + """The last part of parts.""" + return UNQUOTER(self.raw_name) + + @cached_property + def raw_suffix(self) -> str: + name = self.raw_name + i = name.rfind(".") + return name[i:] if 0 < i < len(name) - 1 else "" + + @cached_property + def suffix(self) -> str: + return UNQUOTER(self.raw_suffix) + + @cached_property + def raw_suffixes(self) -> tuple[str, ...]: + name = self.raw_name + if name.endswith("."): + return () + name = name.lstrip(".") + return tuple("." + suffix for suffix in name.split(".")[1:]) + + @cached_property + def suffixes(self) -> tuple[str, ...]: + return tuple(UNQUOTER(suffix) for suffix in self.raw_suffixes) + + def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": + """ + add paths to self._path, accounting for absolute vs relative paths, + keep existing, but do not create new, empty segments + """ + parsed: list[str] = [] + needs_normalize: bool = False + for idx, path in enumerate(reversed(paths)): + # empty segment of last is not removed + last = idx == 0 + if path and path[0] == "/": + raise ValueError( + f"Appending path {path!r} starting from slash is forbidden" + ) + # We need to quote the path if it is not already encoded + # This cannot be done at the end because the existing + # path is already quoted and we do not want to double quote + # the existing path. + path = path if encoded else PATH_QUOTER(path) + needs_normalize |= "." in path + segments = path.split("/") + segments.reverse() + # remove trailing empty segment for all but the last path + parsed += segments[1:] if not last and segments[0] == "" else segments + + if (path := self._path) and (old_segments := path.split("/")): + # If the old path ends with a slash, the last segment is an empty string + # and should be removed before adding the new path segments. + old = old_segments[:-1] if old_segments[-1] == "" else old_segments + old.reverse() + parsed += old + + # If the netloc is present, inject a leading slash when adding a + # path to an absolute URL where there was none before. + if (netloc := self._netloc) and parsed and parsed[-1] != "": + parsed.append("") + + parsed.reverse() + if not netloc or not needs_normalize: + return from_parts(self._scheme, netloc, "/".join(parsed), "", "") + + path = "/".join(normalize_path_segments(parsed)) + # If normalizing the path segments removed the leading slash, add it back. + if path and path[0] != "/": + path = f"/{path}" + return from_parts(self._scheme, netloc, path, "", "") + + def with_scheme(self, scheme: str) -> "URL": + """Return a new URL with scheme replaced.""" + # N.B. doesn't cleanup query/fragment + if not isinstance(scheme, str): + raise TypeError("Invalid scheme type") + lower_scheme = scheme.lower() + netloc = self._netloc + if not netloc and lower_scheme in SCHEME_REQUIRES_HOST: + msg = ( + "scheme replacement is not allowed for " + f"relative URLs for the {lower_scheme} scheme" + ) + raise ValueError(msg) + return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment) + + def with_user(self, user: Union[str, None]) -> "URL": + """Return a new URL with user replaced. + + Autoencode user if needed. + + Clear user/password if user is None. + + """ + # N.B. doesn't cleanup query/fragment + if user is None: + password = None + elif isinstance(user, str): + user = QUOTER(user) + password = self.raw_password + else: + raise TypeError("Invalid user type") + if not (netloc := self._netloc): + raise ValueError("user replacement is not allowed for relative URLs") + encoded_host = self.host_subcomponent or "" + netloc = make_netloc(user, password, encoded_host, self.explicit_port) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) + + def with_password(self, password: Union[str, None]) -> "URL": + """Return a new URL with password replaced. + + Autoencode password if needed. + + Clear password if argument is None. + + """ + # N.B. doesn't cleanup query/fragment + if password is None: + pass + elif isinstance(password, str): + password = QUOTER(password) + else: + raise TypeError("Invalid password type") + if not (netloc := self._netloc): + raise ValueError("password replacement is not allowed for relative URLs") + encoded_host = self.host_subcomponent or "" + port = self.explicit_port + netloc = make_netloc(self.raw_user, password, encoded_host, port) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) + + def with_host(self, host: str) -> "URL": + """Return a new URL with host replaced. + + Autoencode host if needed. + + Changing host for relative URLs is not allowed, use .join() + instead. + + """ + # N.B. doesn't cleanup query/fragment + if not isinstance(host, str): + raise TypeError("Invalid host type") + if not (netloc := self._netloc): + raise ValueError("host replacement is not allowed for relative URLs") + if not host: + raise ValueError("host removing is not allowed") + encoded_host = _encode_host(host, validate_host=True) if host else "" + port = self.explicit_port + netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) + + def with_port(self, port: Union[int, None]) -> "URL": + """Return a new URL with port replaced. + + Clear port to default if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + if port is not None: + if isinstance(port, bool) or not isinstance(port, int): + raise TypeError(f"port should be int or None, got {type(port)}") + if not (0 <= port <= 65535): + raise ValueError(f"port must be between 0 and 65535, got {port}") + if not (netloc := self._netloc): + raise ValueError("port replacement is not allowed for relative URLs") + encoded_host = self.host_subcomponent or "" + netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) + + def with_path( + self, + path: str, + *, + encoded: bool = False, + keep_query: bool = False, + keep_fragment: bool = False, + ) -> "URL": + """Return a new URL with path replaced.""" + netloc = self._netloc + if not encoded: + path = PATH_QUOTER(path) + if netloc: + path = normalize_path(path) if "." in path else path + if path and path[0] != "/": + path = f"/{path}" + query = self._query if keep_query else "" + fragment = self._fragment if keep_fragment else "" + return from_parts(self._scheme, netloc, path, query, fragment) + + @overload + def with_query(self, query: Query) -> "URL": ... + + @overload + def with_query(self, **kwargs: QueryVariable) -> "URL": ... + + def with_query(self, *args: Any, **kwargs: Any) -> "URL": + """Return a new URL with query part replaced. + + Accepts any Mapping (e.g. dict, multidict.MultiDict instances) + or str, autoencode the argument if needed. + + A sequence of (key, value) pairs is supported as well. + + It also can take an arbitrary number of keyword arguments. + + Clear query if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + query = get_str_query(*args, **kwargs) or "" + return from_parts_uncached( + self._scheme, self._netloc, self._path, query, self._fragment + ) + + @overload + def extend_query(self, query: Query) -> "URL": ... + + @overload + def extend_query(self, **kwargs: QueryVariable) -> "URL": ... + + def extend_query(self, *args: Any, **kwargs: Any) -> "URL": + """Return a new URL with query part combined with the existing. + + This method will not remove existing query parameters. + + Example: + >>> url = URL('http://example.com/?a=1&b=2') + >>> url.extend_query(a=3, c=4) + URL('http://example.com/?a=1&b=2&a=3&c=4') + """ + if not (new_query := get_str_query(*args, **kwargs)): + return self + if query := self._query: + # both strings are already encoded so we can use a simple + # string join + query += new_query if query[-1] == "&" else f"&{new_query}" + else: + query = new_query + return from_parts_uncached( + self._scheme, self._netloc, self._path, query, self._fragment + ) + + @overload + def update_query(self, query: Query) -> "URL": ... + + @overload + def update_query(self, **kwargs: QueryVariable) -> "URL": ... + + def update_query(self, *args: Any, **kwargs: Any) -> "URL": + """Return a new URL with query part updated. + + This method will overwrite existing query parameters. + + Example: + >>> url = URL('http://example.com/?a=1&b=2') + >>> url.update_query(a=3, c=4) + URL('http://example.com/?a=3&b=2&c=4') + """ + in_query: Union[str, Mapping[str, QueryVariable], None] + if kwargs: + if args: + msg = "Either kwargs or single query parameter must be present" + raise ValueError(msg) + in_query = kwargs + elif len(args) == 1: + in_query = args[0] + else: + raise ValueError("Either kwargs or single query parameter must be present") + + if in_query is None: + query = "" + elif not in_query: + query = self._query + elif isinstance(in_query, Mapping): + qm: MultiDict[QueryVariable] = MultiDict(self._parsed_query) + qm.update(in_query) + query = get_str_query_from_sequence_iterable(qm.items()) + elif isinstance(in_query, str): + qstr: MultiDict[str] = MultiDict(self._parsed_query) + qstr.update(parse_qsl(in_query, keep_blank_values=True)) + query = get_str_query_from_iterable(qstr.items()) + elif isinstance(in_query, (bytes, bytearray, memoryview)): + msg = "Invalid query type: bytes, bytearray and memoryview are forbidden" + raise TypeError(msg) + elif isinstance(in_query, Sequence): + # We don't expect sequence values if we're given a list of pairs + # already; only mappings like builtin `dict` which can't have the + # same key pointing to multiple values are allowed to use + # `_query_seq_pairs`. + qs: MultiDict[SimpleQuery] = MultiDict(self._parsed_query) + qs.update(in_query) + query = get_str_query_from_iterable(qs.items()) + else: + raise TypeError( + "Invalid query type: only str, mapping or " + "sequence of (key, value) pairs is allowed" + ) + return from_parts_uncached( + self._scheme, self._netloc, self._path, query, self._fragment + ) + + def without_query_params(self, *query_params: str) -> "URL": + """Remove some keys from query part and return new URL.""" + params_to_remove = set(query_params) & self.query.keys() + if not params_to_remove: + return self + return self.with_query( + tuple( + (name, value) + for name, value in self.query.items() + if name not in params_to_remove + ) + ) + + def with_fragment(self, fragment: Union[str, None]) -> "URL": + """Return a new URL with fragment replaced. + + Autoencode fragment if needed. + + Clear fragment to default if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + if fragment is None: + raw_fragment = "" + elif not isinstance(fragment, str): + raise TypeError("Invalid fragment type") + else: + raw_fragment = FRAGMENT_QUOTER(fragment) + if self._fragment == raw_fragment: + return self + return from_parts( + self._scheme, self._netloc, self._path, self._query, raw_fragment + ) + + def with_name( + self, + name: str, + *, + keep_query: bool = False, + keep_fragment: bool = False, + ) -> "URL": + """Return a new URL with name (last part of path) replaced. + + Query and fragment parts are cleaned up. + + Name is encoded if needed. + + """ + # N.B. DOES cleanup query/fragment + if not isinstance(name, str): + raise TypeError("Invalid name type") + if "/" in name: + raise ValueError("Slash in name is not allowed") + name = PATH_QUOTER(name) + if name in (".", ".."): + raise ValueError(". and .. values are forbidden") + parts = list(self.raw_parts) + if netloc := self._netloc: + if len(parts) == 1: + parts.append(name) + else: + parts[-1] = name + parts[0] = "" # replace leading '/' + else: + parts[-1] = name + if parts[0] == "/": + parts[0] = "" # replace leading '/' + + query = self._query if keep_query else "" + fragment = self._fragment if keep_fragment else "" + return from_parts(self._scheme, netloc, "/".join(parts), query, fragment) + + def with_suffix( + self, + suffix: str, + *, + keep_query: bool = False, + keep_fragment: bool = False, + ) -> "URL": + """Return a new URL with suffix (file extension of name) replaced. + + Query and fragment parts are cleaned up. + + suffix is encoded if needed. + """ + if not isinstance(suffix, str): + raise TypeError("Invalid suffix type") + if suffix and not suffix[0] == "." or suffix == ".": + raise ValueError(f"Invalid suffix {suffix!r}") + name = self.raw_name + if not name: + raise ValueError(f"{self!r} has an empty name") + old_suffix = self.raw_suffix + name = name + suffix if not old_suffix else name[: -len(old_suffix)] + suffix + + return self.with_name(name, keep_query=keep_query, keep_fragment=keep_fragment) + + def join(self, url: "URL") -> "URL": + """Join URLs + + Construct a full (“absolute”) URL by combining a “base URL” + (self) with another URL (url). + + Informally, this uses components of the base URL, in + particular the addressing scheme, the network location and + (part of) the path, to provide missing components in the + relative URL. + + """ + if type(url) is not URL: + raise TypeError("url should be URL") + + scheme = url._scheme or self._scheme + if scheme != self._scheme or scheme not in USES_RELATIVE: + return url + + # scheme is in uses_authority as uses_authority is a superset of uses_relative + if (join_netloc := url._netloc) and scheme in USES_AUTHORITY: + return from_parts(scheme, join_netloc, url._path, url._query, url._fragment) + + orig_path = self._path + if join_path := url._path: + if join_path[0] == "/": + path = join_path + elif not orig_path: + path = f"/{join_path}" + elif orig_path[-1] == "/": + path = f"{orig_path}{join_path}" + else: + # … + # and relativizing ".." + # parts[0] is / for absolute urls, + # this join will add a double slash there + path = "/".join([*self.parts[:-1], ""]) + join_path + # which has to be removed + if orig_path[0] == "/": + path = path[1:] + path = normalize_path(path) if "." in path else path + else: + path = orig_path + + return from_parts( + scheme, + self._netloc, + path, + url._query if join_path or url._query else self._query, + url._fragment if join_path or url._fragment else self._fragment, + ) + + def joinpath(self, *other: str, encoded: bool = False) -> "URL": + """Return a new URL with the elements in other appended to the path.""" + return self._make_child(other, encoded=encoded) + + def human_repr(self) -> str: + """Return decoded human readable string for URL representation.""" + user = human_quote(self.user, "#/:?@[]") + password = human_quote(self.password, "#/:?@[]") + if (host := self.host) and ":" in host: + host = f"[{host}]" + path = human_quote(self.path, "#?") + if TYPE_CHECKING: + assert path is not None + query_string = "&".join( + "{}={}".format(human_quote(k, "#&+;="), human_quote(v, "#&+;=")) + for k, v in self.query.items() + ) + fragment = human_quote(self.fragment, "") + if TYPE_CHECKING: + assert fragment is not None + netloc = make_netloc(user, password, host, self.explicit_port) + return unsplit_result(self._scheme, netloc, path, query_string, fragment) + + +_DEFAULT_IDNA_SIZE = 256 +_DEFAULT_ENCODE_SIZE = 512 + + +@lru_cache(_DEFAULT_IDNA_SIZE) +def _idna_decode(raw: str) -> str: + try: + return idna.decode(raw.encode("ascii")) + except UnicodeError: # e.g. '::1' + return raw.encode("ascii").decode("idna") + + +@lru_cache(_DEFAULT_IDNA_SIZE) +def _idna_encode(host: str) -> str: + try: + return idna.encode(host, uts46=True).decode("ascii") + except UnicodeError: + return host.encode("idna").decode("ascii") + + +@lru_cache(_DEFAULT_ENCODE_SIZE) +def _encode_host(host: str, validate_host: bool) -> str: + """Encode host part of URL.""" + # If the host ends with a digit or contains a colon, its likely + # an IP address. + if host and (host[-1].isdigit() or ":" in host): + raw_ip, sep, zone = host.partition("%") + # If it looks like an IP, we check with _ip_compressed_version + # and fall-through if its not an IP address. This is a performance + # optimization to avoid parsing IP addresses as much as possible + # because it is orders of magnitude slower than almost any other + # operation this library does. + # Might be an IP address, check it + # + # IP Addresses can look like: + # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 + # - 127.0.0.1 (last character is a digit) + # - 2001:db8::ff00:42:8329 (contains a colon) + # - 2001:db8::ff00:42:8329%eth0 (contains a colon) + # - [2001:db8::ff00:42:8329] (contains a colon -- brackets should + # have been removed before it gets here) + # Rare IP Address formats are not supported per: + # https://datatracker.ietf.org/doc/html/rfc3986#section-7.4 + # + # IP parsing is slow, so its wrapped in an LRU + try: + ip = ip_address(raw_ip) + except ValueError: + pass + else: + # These checks should not happen in the + # LRU to keep the cache size small + host = ip.compressed + if ip.version == 6: + return f"[{host}%{zone}]" if sep else f"[{host}]" + return f"{host}%{zone}" if sep else host + + # IDNA encoding is slow, skip it for ASCII-only strings + if host.isascii(): + # Check for invalid characters explicitly; _idna_encode() does this + # for non-ascii host names. + host = host.lower() + if validate_host and (invalid := NOT_REG_NAME.search(host)): + value, pos, extra = invalid.group(), invalid.start(), "" + if value == "@" or (value == ":" and "@" in host[pos:]): + # this looks like an authority string + extra = ( + ", if the value includes a username or password, " + "use 'authority' instead of 'host'" + ) + raise ValueError( + f"Host {host!r} cannot contain {value!r} (at position {pos}){extra}" + ) from None + return host + + return _idna_encode(host) + + +@rewrite_module +def cache_clear() -> None: + """Clear all LRU caches.""" + _idna_encode.cache_clear() + _idna_decode.cache_clear() + _encode_host.cache_clear() + + +@rewrite_module +def cache_info() -> CacheInfo: + """Report cache statistics.""" + return { + "idna_encode": _idna_encode.cache_info(), + "idna_decode": _idna_decode.cache_info(), + "ip_address": _encode_host.cache_info(), + "host_validate": _encode_host.cache_info(), + "encode_host": _encode_host.cache_info(), + } + + +@rewrite_module +def cache_configure( + *, + idna_encode_size: Union[int, None] = _DEFAULT_IDNA_SIZE, + idna_decode_size: Union[int, None] = _DEFAULT_IDNA_SIZE, + ip_address_size: Union[int, None, UndefinedType] = UNDEFINED, + host_validate_size: Union[int, None, UndefinedType] = UNDEFINED, + encode_host_size: Union[int, None, UndefinedType] = UNDEFINED, +) -> None: + """Configure LRU cache sizes.""" + global _idna_decode, _idna_encode, _encode_host + # ip_address_size, host_validate_size are no longer + # used, but are kept for backwards compatibility. + if ip_address_size is not UNDEFINED or host_validate_size is not UNDEFINED: + warnings.warn( + "cache_configure() no longer accepts the " + "ip_address_size or host_validate_size arguments, " + "they are used to set the encode_host_size instead " + "and will be removed in the future", + DeprecationWarning, + stacklevel=2, + ) + + if encode_host_size is not None: + for size in (ip_address_size, host_validate_size): + if size is None: + encode_host_size = None + elif encode_host_size is UNDEFINED: + if size is not UNDEFINED: + encode_host_size = size + elif size is not UNDEFINED: + if TYPE_CHECKING: + assert isinstance(size, int) + assert isinstance(encode_host_size, int) + encode_host_size = max(size, encode_host_size) + if encode_host_size is UNDEFINED: + encode_host_size = _DEFAULT_ENCODE_SIZE + + if TYPE_CHECKING: + assert not isinstance(encode_host_size, object) + _encode_host = lru_cache(encode_host_size)(_encode_host.__wrapped__) + _idna_decode = lru_cache(idna_decode_size)(_idna_decode.__wrapped__) + _idna_encode = lru_cache(idna_encode_size)(_idna_encode.__wrapped__) diff --git a/.venv/lib/python3.11/site-packages/yarl/py.typed b/.venv/lib/python3.11/site-packages/yarl/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..dcf2c804da5e19d617a03a6c68aa128d1d1f89a0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/yarl/py.typed @@ -0,0 +1 @@ +# Placeholder