diff --git a/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/AUTHORS b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..662b9f2731c0ce4535526b06e0b4e39d97ed2c3a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/AUTHORS @@ -0,0 +1,19 @@ +Original author of astor/codegen.py: +* Armin Ronacher + +And with some modifications based on Armin's code: +* Paul Dubs + +* Berker Peksag +* Patrick Maupin +* Abhishek L +* Bob Tolbert +* Whyzgeek +* Zack M. Davis +* Ryan Gonzalez +* Lenny Truong +* Radomír Bosák +* Kodi Arfer +* Felix Yan +* Chris Rink +* Batuhan Taskaya diff --git a/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..e3c940f0f61f75255ddd31cc57907a1922aab97a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/LICENSE @@ -0,0 +1,29 @@ +Copyright (c) 2012, Patrick Maupin +Copyright (c) 2013, Berker Peksag +Copyright (c) 2008, Armin Ronacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/METADATA b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..c87bba2fe6f422bef59c8ef79dcd92c82e1ef952 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/METADATA @@ -0,0 +1,98 @@ +Metadata-Version: 2.1 +Name: astor +Version: 0.8.1 +Summary: Read/rewrite/write Python ASTs +Home-page: https://github.com/berkerpeksag/astor +Author: Patrick Maupin +Author-email: pmaupin@gmail.com +License: BSD-3-Clause +Keywords: ast,codegen,PEP 8 +Platform: Independent +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: Implementation +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Code Generators +Classifier: Topic :: Software Development :: Compilers +Requires-Python: !=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7 + +============================= +astor -- AST observe/rewrite +============================= + +:PyPI: https://pypi.org/project/astor/ +:Documentation: https://astor.readthedocs.io +:Source: https://github.com/berkerpeksag/astor +:License: 3-clause BSD +:Build status: + .. image:: https://secure.travis-ci.org/berkerpeksag/astor.svg + :alt: Travis CI + :target: https://travis-ci.org/berkerpeksag/astor/ + +astor is designed to allow easy manipulation of Python source via the AST. + +There are some other similar libraries, but astor focuses on the following areas: + +- Round-trip an AST back to Python [1]_: + + - Modified AST doesn't need linenumbers, ctx, etc. or otherwise + be directly compileable for the round-trip to work. + - Easy to read generated code as, well, code + - Can round-trip two different source trees to compare for functional + differences, using the astor.rtrip tool (for example, after PEP8 edits). + +- Dump pretty-printing of AST + + - Harder to read than round-tripped code, but more accurate to figure out what + is going on. + + - Easier to read than dump from built-in AST module + +- Non-recursive treewalk + + - Sometimes you want a recursive treewalk (and astor supports that, starting + at any node on the tree), but sometimes you don't need to do that. astor + doesn't require you to explicitly visit sub-nodes unless you want to: + + - You can add code that executes before a node's children are visited, and/or + - You can add code that executes after a node's children are visited, and/or + - You can add code that executes and keeps the node's children from being + visited (and optionally visit them yourself via a recursive call) + + - Write functions to access the tree based on object names and/or attribute names + - Enjoy easy access to parent node(s) for tree rewriting + +.. [1] + The decompilation back to Python is based on code originally written + by Armin Ronacher. Armin's code was well-structured, but failed on + some obscure corner cases of the Python language (and even more corner + cases when the AST changed on different versions of Python), and its + output arguably had cosmetic issues -- for example, it produced + parentheses even in some cases where they were not needed, to + avoid having to reason about precedence. + + Other derivatives of Armin's code are floating around, and typically + have fixes for a few corner cases that happened to be noticed by the + maintainers, but most of them have not been tested as thoroughly as + astor. One exception may be the version of codegen + `maintained at github by CensoredUsername`__. This has been tested + to work properly on Python 2.7 using astor's test suite, and, as it + is a single source file, it may be easier to drop into some applications + that do not require astor's other features or Python 3.x compatibility. + +__ https://github.com/CensoredUsername/codegen + + diff --git a/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/RECORD b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..49b72144a836c83521803e17d79cdcbda1f8397b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/RECORD @@ -0,0 +1,29 @@ +astor-0.8.1.dist-info/AUTHORS,sha256=dy5MQIMINxY79YbaRR19C_CNAgHe3tcuvESs7ypxKQc,679 +astor-0.8.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +astor-0.8.1.dist-info/LICENSE,sha256=zkHq_C78AY2cfJahx3lmgkbHfbEaE544ifNH9GSmG50,1554 +astor-0.8.1.dist-info/METADATA,sha256=0nH_-dzD0tPZUB4Hs5o-OOEuId9lteVELQPI5hG0oKo,4235 +astor-0.8.1.dist-info/RECORD,, +astor-0.8.1.dist-info/WHEEL,sha256=8zNYZbwQSXoB9IfXOjPfeNwvAsALAjffgk27FqvCWbo,110 +astor-0.8.1.dist-info/top_level.txt,sha256=M5xfrbiL9-EIlOb1h2T8s6gFbV3b9AbwgI0ARzaRyaY,6 +astor-0.8.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1 +astor/VERSION,sha256=qvZyHcN8QLQjOsz8CB8ld2_zvR0qS51c6nYNHCz4ZmU,6 +astor/__init__.py,sha256=C9rmH4v9K7pkIk3eDuVRhqO5wULt3B42copNJsEw8rw,2291 +astor/__pycache__/__init__.cpython-311.pyc,, +astor/__pycache__/code_gen.cpython-311.pyc,, +astor/__pycache__/codegen.cpython-311.pyc,, +astor/__pycache__/file_util.cpython-311.pyc,, +astor/__pycache__/node_util.cpython-311.pyc,, +astor/__pycache__/op_util.cpython-311.pyc,, +astor/__pycache__/rtrip.cpython-311.pyc,, +astor/__pycache__/source_repr.cpython-311.pyc,, +astor/__pycache__/string_repr.cpython-311.pyc,, +astor/__pycache__/tree_walk.cpython-311.pyc,, +astor/code_gen.py,sha256=0KAimfyV8pIPXQx6s_NyPSXRhAxMLWXbCPEQuCTpxac,32032 +astor/codegen.py,sha256=lTqdJWMK4EAJ1wxDw2XR-MLyHJmvbV1_Q5QLj9naE_g,204 +astor/file_util.py,sha256=BETsKYg8UiKoZNswRkirzPSZWgku41dRzZC7T5X3_F4,3268 +astor/node_util.py,sha256=WEWMUMSfHtLwgx54nMkc2APLV573iOPhqPag4gIbhVQ,6542 +astor/op_util.py,sha256=GGcgYqa3DFOAaoSt7TTu46VUhe1J13dO14-SQTRXRYI,3191 +astor/rtrip.py,sha256=AlvQvsUuUZ8zxvRFpWF_Fsv4-NksPB23rvVkTrkvef8,6741 +astor/source_repr.py,sha256=1lj4jakkrcGDRoo-BIRZDszQ8gukdeLR_fmvGqBrP-U,7373 +astor/string_repr.py,sha256=YeC_DVeIJdPElqjgzzhPFheQsz_QjMEW_SLODFvEsIA,2917 +astor/tree_walk.py,sha256=fJaw54GgTg4NTRJLVRl2XSnfFOG9GdjOUlI6ZChLOb8,6020 diff --git a/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..8b701e93c23159bc1f4145f779049ce0a6a6cf77 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.33.6) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/top_level.txt b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c415033570c42567224fed1cfa39546444a6f55 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/top_level.txt @@ -0,0 +1 @@ +astor diff --git a/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/zip-safe b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/zip-safe new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/astor-0.8.1.dist-info/zip-safe @@ -0,0 +1 @@ + diff --git a/.venv/lib/python3.11/site-packages/huggingface_hub-0.28.1.dist-info/METADATA b/.venv/lib/python3.11/site-packages/huggingface_hub-0.28.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..fb9ee479894e832b632d64f6beb4678d6f31cc10 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/huggingface_hub-0.28.1.dist-info/METADATA @@ -0,0 +1,308 @@ +Metadata-Version: 2.1 +Name: huggingface-hub +Version: 0.28.1 +Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub +Home-page: https://github.com/huggingface/huggingface_hub +Author: Hugging Face, Inc. +Author-email: julien@huggingface.co +License: Apache +Keywords: model-hub machine-learning models natural-language-processing deep-learning pytorch pretrained-models +Platform: UNKNOWN +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Requires-Python: >=3.8.0 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: filelock +Requires-Dist: fsspec>=2023.5.0 +Requires-Dist: packaging>=20.9 +Requires-Dist: pyyaml>=5.1 +Requires-Dist: requests +Requires-Dist: tqdm>=4.42.1 +Requires-Dist: typing-extensions>=3.7.4.3 +Provides-Extra: all +Requires-Dist: InquirerPy==0.3.4; extra == "all" +Requires-Dist: aiohttp; extra == "all" +Requires-Dist: jedi; extra == "all" +Requires-Dist: Jinja2; extra == "all" +Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "all" +Requires-Dist: pytest-cov; extra == "all" +Requires-Dist: pytest-env; extra == "all" +Requires-Dist: pytest-xdist; extra == "all" +Requires-Dist: pytest-vcr; extra == "all" +Requires-Dist: pytest-asyncio; extra == "all" +Requires-Dist: pytest-rerunfailures; extra == "all" +Requires-Dist: pytest-mock; extra == "all" +Requires-Dist: urllib3<2.0; extra == "all" +Requires-Dist: soundfile; extra == "all" +Requires-Dist: Pillow; extra == "all" +Requires-Dist: gradio>=4.0.0; extra == "all" +Requires-Dist: numpy; extra == "all" +Requires-Dist: fastapi; extra == "all" +Requires-Dist: ruff>=0.9.0; extra == "all" +Requires-Dist: mypy==1.5.1; extra == "all" +Requires-Dist: libcst==1.4.0; extra == "all" +Requires-Dist: typing-extensions>=4.8.0; extra == "all" +Requires-Dist: types-PyYAML; extra == "all" +Requires-Dist: types-requests; extra == "all" +Requires-Dist: types-simplejson; extra == "all" +Requires-Dist: types-toml; extra == "all" +Requires-Dist: types-tqdm; extra == "all" +Requires-Dist: types-urllib3; extra == "all" +Provides-Extra: cli +Requires-Dist: InquirerPy==0.3.4; extra == "cli" +Provides-Extra: dev +Requires-Dist: InquirerPy==0.3.4; extra == "dev" +Requires-Dist: aiohttp; extra == "dev" +Requires-Dist: jedi; extra == "dev" +Requires-Dist: Jinja2; extra == "dev" +Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "dev" +Requires-Dist: pytest-cov; extra == "dev" +Requires-Dist: pytest-env; extra == "dev" +Requires-Dist: pytest-xdist; extra == "dev" +Requires-Dist: pytest-vcr; extra == "dev" +Requires-Dist: pytest-asyncio; extra == "dev" +Requires-Dist: pytest-rerunfailures; extra == "dev" +Requires-Dist: pytest-mock; extra == "dev" +Requires-Dist: urllib3<2.0; extra == "dev" +Requires-Dist: soundfile; extra == "dev" +Requires-Dist: Pillow; extra == "dev" +Requires-Dist: gradio>=4.0.0; extra == "dev" +Requires-Dist: numpy; extra == "dev" +Requires-Dist: fastapi; extra == "dev" +Requires-Dist: ruff>=0.9.0; extra == "dev" +Requires-Dist: mypy==1.5.1; extra == "dev" +Requires-Dist: libcst==1.4.0; extra == "dev" +Requires-Dist: typing-extensions>=4.8.0; extra == "dev" +Requires-Dist: types-PyYAML; extra == "dev" +Requires-Dist: types-requests; extra == "dev" +Requires-Dist: types-simplejson; extra == "dev" +Requires-Dist: types-toml; extra == "dev" +Requires-Dist: types-tqdm; extra == "dev" +Requires-Dist: types-urllib3; extra == "dev" +Provides-Extra: fastai +Requires-Dist: toml; extra == "fastai" +Requires-Dist: fastai>=2.4; extra == "fastai" +Requires-Dist: fastcore>=1.3.27; extra == "fastai" +Provides-Extra: hf_transfer +Requires-Dist: hf-transfer>=0.1.4; extra == "hf-transfer" +Provides-Extra: inference +Requires-Dist: aiohttp; extra == "inference" +Provides-Extra: quality +Requires-Dist: ruff>=0.9.0; extra == "quality" +Requires-Dist: mypy==1.5.1; extra == "quality" +Requires-Dist: libcst==1.4.0; extra == "quality" +Provides-Extra: tensorflow +Requires-Dist: tensorflow; extra == "tensorflow" +Requires-Dist: pydot; extra == "tensorflow" +Requires-Dist: graphviz; extra == "tensorflow" +Provides-Extra: tensorflow-testing +Requires-Dist: tensorflow; extra == "tensorflow-testing" +Requires-Dist: keras<3.0; extra == "tensorflow-testing" +Provides-Extra: testing +Requires-Dist: InquirerPy==0.3.4; extra == "testing" +Requires-Dist: aiohttp; extra == "testing" +Requires-Dist: jedi; extra == "testing" +Requires-Dist: Jinja2; extra == "testing" +Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "testing" +Requires-Dist: pytest-cov; extra == "testing" +Requires-Dist: pytest-env; extra == "testing" +Requires-Dist: pytest-xdist; extra == "testing" +Requires-Dist: pytest-vcr; extra == "testing" +Requires-Dist: pytest-asyncio; extra == "testing" +Requires-Dist: pytest-rerunfailures; extra == "testing" +Requires-Dist: pytest-mock; extra == "testing" +Requires-Dist: urllib3<2.0; extra == "testing" +Requires-Dist: soundfile; extra == "testing" +Requires-Dist: Pillow; extra == "testing" +Requires-Dist: gradio>=4.0.0; extra == "testing" +Requires-Dist: numpy; extra == "testing" +Requires-Dist: fastapi; extra == "testing" +Provides-Extra: torch +Requires-Dist: torch; extra == "torch" +Requires-Dist: safetensors[torch]; extra == "torch" +Provides-Extra: typing +Requires-Dist: typing-extensions>=4.8.0; extra == "typing" +Requires-Dist: types-PyYAML; extra == "typing" +Requires-Dist: types-requests; extra == "typing" +Requires-Dist: types-simplejson; extra == "typing" +Requires-Dist: types-toml; extra == "typing" +Requires-Dist: types-tqdm; extra == "typing" +Requires-Dist: types-urllib3; extra == "typing" + +

+ + + + huggingface_hub library logo + +
+
+

+ +

+ The official Python client for the Huggingface Hub. +

+ +

+ Documentation + GitHub release + PyPi version + PyPI - Downloads + Code coverage +

+ +

+

+ English | + Deutsch | + हिंदी | + 한국어 | + 中文(简体) +

+

+ +--- + +**Documentation**: https://hf.co/docs/huggingface_hub + +**Source Code**: https://github.com/huggingface/huggingface_hub + +--- + +## Welcome to the huggingface_hub library + +The `huggingface_hub` library allows you to interact with the [Hugging Face Hub](https://huggingface.co/), a platform democratizing open-source Machine Learning for creators and collaborators. Discover pre-trained models and datasets for your projects or play with the thousands of machine learning apps hosted on the Hub. You can also create and share your own models, datasets and demos with the community. The `huggingface_hub` library provides a simple way to do all these things with Python. + +## Key features + +- [Download files](https://huggingface.co/docs/huggingface_hub/en/guides/download) from the Hub. +- [Upload files](https://huggingface.co/docs/huggingface_hub/en/guides/upload) to the Hub. +- [Manage your repositories](https://huggingface.co/docs/huggingface_hub/en/guides/repository). +- [Run Inference](https://huggingface.co/docs/huggingface_hub/en/guides/inference) on deployed models. +- [Search](https://huggingface.co/docs/huggingface_hub/en/guides/search) for models, datasets and Spaces. +- [Share Model Cards](https://huggingface.co/docs/huggingface_hub/en/guides/model-cards) to document your models. +- [Engage with the community](https://huggingface.co/docs/huggingface_hub/en/guides/community) through PRs and comments. + +## Installation + +Install the `huggingface_hub` package with [pip](https://pypi.org/project/huggingface-hub/): + +```bash +pip install huggingface_hub +``` + +If you prefer, you can also install it with [conda](https://huggingface.co/docs/huggingface_hub/en/installation#install-with-conda). + +In order to keep the package minimal by default, `huggingface_hub` comes with optional dependencies useful for some use cases. For example, if you want have a complete experience for Inference, run: + +```bash +pip install huggingface_hub[inference] +``` + +To learn more installation and optional dependencies, check out the [installation guide](https://huggingface.co/docs/huggingface_hub/en/installation). + +## Quick start + +### Download files + +Download a single file + +```py +from huggingface_hub import hf_hub_download + +hf_hub_download(repo_id="tiiuae/falcon-7b-instruct", filename="config.json") +``` + +Or an entire repository + +```py +from huggingface_hub import snapshot_download + +snapshot_download("stabilityai/stable-diffusion-2-1") +``` + +Files will be downloaded in a local cache folder. More details in [this guide](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache). + +### Login + +The Hugging Face Hub uses tokens to authenticate applications (see [docs](https://huggingface.co/docs/hub/security-tokens)). To log in your machine, run the following CLI: + +```bash +huggingface-cli login +# or using an environment variable +huggingface-cli login --token $HUGGINGFACE_TOKEN +``` + +### Create a repository + +```py +from huggingface_hub import create_repo + +create_repo(repo_id="super-cool-model") +``` + +### Upload files + +Upload a single file + +```py +from huggingface_hub import upload_file + +upload_file( + path_or_fileobj="/home/lysandre/dummy-test/README.md", + path_in_repo="README.md", + repo_id="lysandre/test-model", +) +``` + +Or an entire folder + +```py +from huggingface_hub import upload_folder + +upload_folder( + folder_path="/path/to/local/space", + repo_id="username/my-cool-space", + repo_type="space", +) +``` + +For details in the [upload guide](https://huggingface.co/docs/huggingface_hub/en/guides/upload). + +## Integrating to the Hub. + +We're partnering with cool open source ML libraries to provide free model hosting and versioning. You can find the existing integrations [here](https://huggingface.co/docs/hub/libraries). + +The advantages are: + +- Free model or dataset hosting for libraries and their users. +- Built-in file versioning, even with very large files, thanks to a git-based approach. +- Serverless inference API for all models publicly available. +- In-browser widgets to play with the uploaded models. +- Anyone can upload a new model for your library, they just need to add the corresponding tag for the model to be discoverable. +- Fast downloads! We use Cloudfront (a CDN) to geo-replicate downloads so they're blazing fast from anywhere on the globe. +- Usage stats and more features to come. + +If you would like to integrate your library, feel free to open an issue to begin the discussion. We wrote a [step-by-step guide](https://huggingface.co/docs/hub/adding-a-library) with ❤️ showing how to do this integration. + +## Contributions (feature requests, bugs, etc.) are super welcome 💙💚💛💜🧡❤️ + +Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. +Answering questions, helping others, reaching out and improving the documentations are immensely valuable to the community. +We wrote a [contribution guide](https://github.com/huggingface/huggingface_hub/blob/main/CONTRIBUTING.md) to summarize +how to get started to contribute to this repository. + + diff --git a/.venv/lib/python3.11/site-packages/huggingface_hub-0.28.1.dist-info/top_level.txt b/.venv/lib/python3.11/site-packages/huggingface_hub-0.28.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b964ccca3c1b6766042b3fe3b2707ba25372924 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/huggingface_hub-0.28.1.dist-info/top_level.txt @@ -0,0 +1 @@ +huggingface_hub diff --git a/.venv/lib/python3.11/site-packages/nvidia_cuda_cupti_cu12-12.4.127.dist-info/METADATA b/.venv/lib/python3.11/site-packages/nvidia_cuda_cupti_cu12-12.4.127.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..c0d874a42fb5a6bdb8f0cb78bb40f3d9cb708137 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/nvidia_cuda_cupti_cu12-12.4.127.dist-info/METADATA @@ -0,0 +1,35 @@ +Metadata-Version: 2.1 +Name: nvidia-cuda-cupti-cu12 +Version: 12.4.127 +Summary: CUDA profiling tools runtime libs. +Home-page: https://developer.nvidia.com/cuda-zone +Author: Nvidia CUDA Installer Team +Author-email: cuda_installer@nvidia.com +License: NVIDIA Proprietary Software +Keywords: cuda,nvidia,runtime,machine learning,deep learning +Classifier: Development Status :: 4 - Beta +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Science/Research +Classifier: License :: Other/Proprietary License +Classifier: Natural Language :: English +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Topic :: Scientific/Engineering +Classifier: Topic :: Scientific/Engineering :: Mathematics +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Classifier: Topic :: Software Development +Classifier: Topic :: Software Development :: Libraries +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX :: Linux +Requires-Python: >=3 +License-File: License.txt + +Provides libraries to enable third party tools using GPU profiling APIs. diff --git a/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..dd842d97e8f542fb4e68d9d81902901e2730762a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Radim Řehůřek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/METADATA b/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..371c81cb095bc44dc6b0fb86c37a988fe5a1e0de --- /dev/null +++ b/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/METADATA @@ -0,0 +1,586 @@ +Metadata-Version: 2.1 +Name: smart-open +Version: 7.1.0 +Summary: Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...) +Home-page: https://github.com/piskvorky/smart_open +Author: Radim Rehurek +Author-email: me@radimrehurek.com +Maintainer: Radim Rehurek +Maintainer-email: me@radimrehurek.com +License: MIT +Download-URL: http://pypi.python.org/pypi/smart_open +Keywords: file streaming,s3,hdfs,gcs,azure blob storage +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Topic :: System :: Distributed Computing +Classifier: Topic :: Database :: Front-Ends +Requires-Python: >=3.7,<4.0 +Requires-Dist: wrapt +Provides-Extra: all +Requires-Dist: boto3; extra == "all" +Requires-Dist: google-cloud-storage>=2.6.0; extra == "all" +Requires-Dist: azure-storage-blob; extra == "all" +Requires-Dist: azure-common; extra == "all" +Requires-Dist: azure-core; extra == "all" +Requires-Dist: requests; extra == "all" +Requires-Dist: paramiko; extra == "all" +Requires-Dist: zstandard; extra == "all" +Provides-Extra: azure +Requires-Dist: azure-storage-blob; extra == "azure" +Requires-Dist: azure-common; extra == "azure" +Requires-Dist: azure-core; extra == "azure" +Provides-Extra: gcs +Requires-Dist: google-cloud-storage>=2.6.0; extra == "gcs" +Provides-Extra: http +Requires-Dist: requests; extra == "http" +Provides-Extra: s3 +Requires-Dist: boto3; extra == "s3" +Provides-Extra: ssh +Requires-Dist: paramiko; extra == "ssh" +Provides-Extra: test +Requires-Dist: boto3; extra == "test" +Requires-Dist: google-cloud-storage>=2.6.0; extra == "test" +Requires-Dist: azure-storage-blob; extra == "test" +Requires-Dist: azure-common; extra == "test" +Requires-Dist: azure-core; extra == "test" +Requires-Dist: requests; extra == "test" +Requires-Dist: paramiko; extra == "test" +Requires-Dist: zstandard; extra == "test" +Requires-Dist: moto[server]; extra == "test" +Requires-Dist: responses; extra == "test" +Requires-Dist: pytest; extra == "test" +Requires-Dist: pytest-rerunfailures; extra == "test" +Requires-Dist: pytest-benchmark; extra == "test" +Requires-Dist: awscli; extra == "test" +Requires-Dist: pyopenssl; extra == "test" +Requires-Dist: numpy; extra == "test" +Provides-Extra: webhdfs +Requires-Dist: requests; extra == "webhdfs" +Provides-Extra: zst +Requires-Dist: zstandard; extra == "zst" + +====================================================== +smart_open — utils for streaming large files in Python +====================================================== + + +|License|_ |GHA|_ |Coveralls|_ |Downloads|_ + +.. |License| image:: https://img.shields.io/pypi/l/smart_open.svg +.. |GHA| image:: https://github.com/RaRe-Technologies/smart_open/workflows/Test/badge.svg +.. |Coveralls| image:: https://coveralls.io/repos/github/RaRe-Technologies/smart_open/badge.svg?branch=develop +.. |Downloads| image:: https://pepy.tech/badge/smart-open/month +.. _License: https://github.com/RaRe-Technologies/smart_open/blob/master/LICENSE +.. _GHA: https://github.com/RaRe-Technologies/smart_open/actions?query=workflow%3ATest +.. _Coveralls: https://coveralls.io/github/RaRe-Technologies/smart_open?branch=HEAD +.. _Downloads: https://pypi.org/project/smart-open/ + + +What? +===== + +``smart_open`` is a Python 3 library for **efficient streaming of very large files** from/to storages such as S3, GCS, Azure Blob Storage, HDFS, WebHDFS, HTTP, HTTPS, SFTP, or local filesystem. It supports transparent, on-the-fly (de-)compression for a variety of different formats. + +``smart_open`` is a drop-in replacement for Python's built-in ``open()``: it can do anything ``open`` can (100% compatible, falls back to native ``open`` wherever possible), plus lots of nifty extra stuff on top. + +**Python 2.7 is no longer supported. If you need Python 2.7, please use** `smart_open 1.10.1 `_, **the last version to support Python 2.** + +Why? +==== + +Working with large remote files, for example using Amazon's `boto3 `_ Python library, is a pain. +``boto3``'s ``Object.upload_fileobj()`` and ``Object.download_fileobj()`` methods require gotcha-prone boilerplate to use successfully, such as constructing file-like object wrappers. +``smart_open`` shields you from that. It builds on boto3 and other remote storage libraries, but offers a **clean unified Pythonic API**. The result is less code for you to write and fewer bugs to make. + + +How? +===== + +``smart_open`` is well-tested, well-documented, and has a simple Pythonic API: + + +.. _doctools_before_examples: + +.. code-block:: python + + >>> from smart_open import open + >>> + >>> # stream lines from an S3 object + >>> for line in open('s3://commoncrawl/robots.txt'): + ... print(repr(line)) + ... break + 'User-Agent: *\n' + + >>> # stream from/to compressed files, with transparent (de)compression: + >>> for line in open('smart_open/tests/test_data/1984.txt.gz', encoding='utf-8'): + ... print(repr(line)) + 'It was a bright cold day in April, and the clocks were striking thirteen.\n' + 'Winston Smith, his chin nuzzled into his breast in an effort to escape the vile\n' + 'wind, slipped quickly through the glass doors of Victory Mansions, though not\n' + 'quickly enough to prevent a swirl of gritty dust from entering along with him.\n' + + >>> # can use context managers too: + >>> with open('smart_open/tests/test_data/1984.txt.gz') as fin: + ... with open('smart_open/tests/test_data/1984.txt.bz2', 'w') as fout: + ... for line in fin: + ... fout.write(line) + 74 + 80 + 78 + 79 + + >>> # can use any IOBase operations, like seek + >>> with open('s3://commoncrawl/robots.txt', 'rb') as fin: + ... for line in fin: + ... print(repr(line.decode('utf-8'))) + ... break + ... offset = fin.seek(0) # seek to the beginning + ... print(fin.read(4)) + 'User-Agent: *\n' + b'User' + + >>> # stream from HTTP + >>> for line in open('http://example.com/index.html'): + ... print(repr(line)) + ... break + '\n' + +.. _doctools_after_examples: + +Other examples of URLs that ``smart_open`` accepts:: + + s3://my_bucket/my_key + s3://my_key:my_secret@my_bucket/my_key + s3://my_key:my_secret@my_server:my_port@my_bucket/my_key + gs://my_bucket/my_blob + azure://my_bucket/my_blob + hdfs:///path/file + hdfs://path/file + webhdfs://host:port/path/file + ./local/path/file + ~/local/path/file + local/path/file + ./local/path/file.gz + file:///home/user/file + file:///home/user/file.bz2 + [ssh|scp|sftp]://username@host//path/file + [ssh|scp|sftp]://username@host/path/file + [ssh|scp|sftp]://username:password@host/path/file + + +Documentation +============= + +Installation +------------ + +``smart_open`` supports a wide range of storage solutions, including AWS S3, Google Cloud and Azure. +Each individual solution has its own dependencies. +By default, ``smart_open`` does not install any dependencies, in order to keep the installation size small. +You can install these dependencies explicitly using:: + + pip install smart_open[azure] # Install Azure deps + pip install smart_open[gcs] # Install GCS deps + pip install smart_open[s3] # Install S3 deps + +Or, if you don't mind installing a large number of third party libraries, you can install all dependencies using:: + + pip install smart_open[all] + +Be warned that this option increases the installation size significantly, e.g. over 100MB. + +If you're upgrading from ``smart_open`` versions 2.x and below, please check out the `Migration Guide `_. + +Built-in help +------------- + +For detailed API info, see the online help: + +.. code-block:: python + + help('smart_open') + +or click `here `__ to view the help in your browser. + +More examples +------------- + +For the sake of simplicity, the examples below assume you have all the dependencies installed, i.e. you have done:: + + pip install smart_open[all] + +.. code-block:: python + + >>> import os, boto3 + >>> from smart_open import open + >>> + >>> # stream content *into* S3 (write mode) using a custom session + >>> session = boto3.Session( + ... aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], + ... aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'], + ... ) + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> with open(url, 'wb', transport_params={'client': session.client('s3')}) as fout: + ... bytes_written = fout.write(b'hello world!') + ... print(bytes_written) + 12 + +.. code-block:: python + + # stream from HDFS + for line in open('hdfs://user/hadoop/my_file.txt', encoding='utf8'): + print(line) + + # stream from WebHDFS + for line in open('webhdfs://host:port/user/hadoop/my_file.txt'): + print(line) + + # stream content *into* HDFS (write mode): + with open('hdfs://host:port/user/hadoop/my_file.txt', 'wb') as fout: + fout.write(b'hello world') + + # stream content *into* WebHDFS (write mode): + with open('webhdfs://host:port/user/hadoop/my_file.txt', 'wb') as fout: + fout.write(b'hello world') + + # stream from a completely custom s3 server, like s3proxy: + for line in open('s3u://user:secret@host:port@mybucket/mykey.txt'): + print(line) + + # Stream to Digital Ocean Spaces bucket providing credentials from boto3 profile + session = boto3.Session(profile_name='digitalocean') + client = session.client('s3', endpoint_url='https://ams3.digitaloceanspaces.com') + transport_params = {'client': client} + with open('s3://bucket/key.txt', 'wb', transport_params=transport_params) as fout: + fout.write(b'here we stand') + + # stream from GCS + for line in open('gs://my_bucket/my_file.txt'): + print(line) + + # stream content *into* GCS (write mode): + with open('gs://my_bucket/my_file.txt', 'wb') as fout: + fout.write(b'hello world') + + # stream from Azure Blob Storage + connect_str = os.environ['AZURE_STORAGE_CONNECTION_STRING'] + transport_params = { + 'client': azure.storage.blob.BlobServiceClient.from_connection_string(connect_str), + } + for line in open('azure://mycontainer/myfile.txt', transport_params=transport_params): + print(line) + + # stream content *into* Azure Blob Storage (write mode): + connect_str = os.environ['AZURE_STORAGE_CONNECTION_STRING'] + transport_params = { + 'client': azure.storage.blob.BlobServiceClient.from_connection_string(connect_str), + } + with open('azure://mycontainer/my_file.txt', 'wb', transport_params=transport_params) as fout: + fout.write(b'hello world') + +Compression Handling +-------------------- + +The top-level `compression` parameter controls compression/decompression behavior when reading and writing. +The supported values for this parameter are: + +- ``infer_from_extension`` (default behavior) +- ``disable`` +- ``.gz`` +- ``.bz2`` +- ``.zst`` + +By default, ``smart_open`` determines the compression algorithm to use based on the file extension. + +.. code-block:: python + + >>> from smart_open import open, register_compressor + >>> with open('smart_open/tests/test_data/1984.txt.gz') as fin: + ... print(fin.read(32)) + It was a bright cold day in Apri + +You can override this behavior to either disable compression, or explicitly specify the algorithm to use. +To disable compression: + +.. code-block:: python + + >>> from smart_open import open, register_compressor + >>> with open('smart_open/tests/test_data/1984.txt.gz', 'rb', compression='disable') as fin: + ... print(fin.read(32)) + b'\x1f\x8b\x08\x08\x85F\x94\\\x00\x031984.txt\x005\x8f=r\xc3@\x08\x85{\x9d\xe2\x1d@' + + +To specify the algorithm explicitly (e.g. for non-standard file extensions): + +.. code-block:: python + + >>> from smart_open import open, register_compressor + >>> with open('smart_open/tests/test_data/1984.txt.gzip', compression='.gz') as fin: + ... print(fin.read(32)) + It was a bright cold day in Apri + +You can also easily add support for other file extensions and compression formats. +For example, to open xz-compressed files: + +.. code-block:: python + + >>> import lzma, os + >>> from smart_open import open, register_compressor + + >>> def _handle_xz(file_obj, mode): + ... return lzma.LZMAFile(filename=file_obj, mode=mode, format=lzma.FORMAT_XZ) + + >>> register_compressor('.xz', _handle_xz) + + >>> with open('smart_open/tests/test_data/1984.txt.xz') as fin: + ... print(fin.read(32)) + It was a bright cold day in Apri + +``lzma`` is in the standard library in Python 3.3 and greater. +For 2.7, use `backports.lzma`_. + +.. _backports.lzma: https://pypi.org/project/backports.lzma/ + +Transport-specific Options +-------------------------- + +``smart_open`` supports a wide range of transport options out of the box, including: + +- S3 +- HTTP, HTTPS (read-only) +- SSH, SCP and SFTP +- WebHDFS +- GCS +- Azure Blob Storage + +Each option involves setting up its own set of parameters. +For example, for accessing S3, you often need to set up authentication, like API keys or a profile name. +``smart_open``'s ``open`` function accepts a keyword argument ``transport_params`` which accepts additional parameters for the transport layer. +Here are some examples of using this parameter: + +.. code-block:: python + + >>> import boto3 + >>> fin = open('s3://commoncrawl/robots.txt', transport_params=dict(client=boto3.client('s3'))) + >>> fin = open('s3://commoncrawl/robots.txt', transport_params=dict(buffer_size=1024)) + +For the full list of keyword arguments supported by each transport option, see the documentation: + +.. code-block:: python + + help('smart_open.open') + +S3 Credentials +-------------- + +``smart_open`` uses the ``boto3`` library to talk to S3. +``boto3`` has several `mechanisms `__ for determining the credentials to use. +By default, ``smart_open`` will defer to ``boto3`` and let the latter take care of the credentials. +There are several ways to override this behavior. + +The first is to pass a ``boto3.Client`` object as a transport parameter to the ``open`` function. +You can customize the credentials when constructing the session for the client. +``smart_open`` will then use the session when talking to S3. + +.. code-block:: python + + session = boto3.Session( + aws_access_key_id=ACCESS_KEY, + aws_secret_access_key=SECRET_KEY, + aws_session_token=SESSION_TOKEN, + ) + client = session.client('s3', endpoint_url=..., config=...) + fin = open('s3://bucket/key', transport_params={'client': client}) + +Your second option is to specify the credentials within the S3 URL itself: + +.. code-block:: python + + fin = open('s3://aws_access_key_id:aws_secret_access_key@bucket/key', ...) + +*Important*: The two methods above are **mutually exclusive**. If you pass an AWS client *and* the URL contains credentials, ``smart_open`` will ignore the latter. + +*Important*: ``smart_open`` ignores configuration files from the older ``boto`` library. +Port your old ``boto`` settings to ``boto3`` in order to use them with ``smart_open``. + +S3 Advanced Usage +----------------- + +Additional keyword arguments can be propagated to the boto3 methods that are used by ``smart_open`` under the hood using the ``client_kwargs`` transport parameter. + +For instance, to upload a blob with Metadata, ACL, StorageClass, these keyword arguments can be passed to ``create_multipart_upload`` (`docs `__). + +.. code-block:: python + + kwargs = {'Metadata': {'version': 2}, 'ACL': 'authenticated-read', 'StorageClass': 'STANDARD_IA'} + fout = open('s3://bucket/key', 'wb', transport_params={'client_kwargs': {'S3.Client.create_multipart_upload': kwargs}}) + +Iterating Over an S3 Bucket's Contents +-------------------------------------- + +Since going over all (or select) keys in an S3 bucket is a very common operation, there's also an extra function ``smart_open.s3.iter_bucket()`` that does this efficiently, **processing the bucket keys in parallel** (using multiprocessing): + +.. code-block:: python + + >>> from smart_open import s3 + >>> # we use workers=1 for reproducibility; you should use as many workers as you have cores + >>> bucket = 'silo-open-data' + >>> prefix = 'Official/annual/monthly_rain/' + >>> for key, content in s3.iter_bucket(bucket, prefix=prefix, accept_key=lambda key: '/201' in key, workers=1, key_limit=3): + ... print(key, round(len(content) / 2**20)) + Official/annual/monthly_rain/2010.monthly_rain.nc 13 + Official/annual/monthly_rain/2011.monthly_rain.nc 13 + Official/annual/monthly_rain/2012.monthly_rain.nc 13 + +GCS Credentials +--------------- +``smart_open`` uses the ``google-cloud-storage`` library to talk to GCS. +``google-cloud-storage`` uses the ``google-cloud`` package under the hood to handle authentication. +There are several `options `__ to provide +credentials. +By default, ``smart_open`` will defer to ``google-cloud-storage`` and let it take care of the credentials. + +To override this behavior, pass a ``google.cloud.storage.Client`` object as a transport parameter to the ``open`` function. +You can `customize the credentials `__ +when constructing the client. ``smart_open`` will then use the client when talking to GCS. To follow allow with +the example below, `refer to Google's guide `__ +to setting up GCS authentication with a service account. + +.. code-block:: python + + import os + from google.cloud.storage import Client + service_account_path = os.environ['GOOGLE_APPLICATION_CREDENTIALS'] + client = Client.from_service_account_json(service_account_path) + fin = open('gs://gcp-public-data-landsat/index.csv.gz', transport_params=dict(client=client)) + +If you need more credential options, you can create an explicit ``google.auth.credentials.Credentials`` object +and pass it to the Client. To create an API token for use in the example below, refer to the +`GCS authentication guide `__. + +.. code-block:: python + + import os + from google.auth.credentials import Credentials + from google.cloud.storage import Client + token = os.environ['GOOGLE_API_TOKEN'] + credentials = Credentials(token=token) + client = Client(credentials=credentials) + fin = open('gs://gcp-public-data-landsat/index.csv.gz', transport_params={'client': client}) + +GCS Advanced Usage +------------------ + +Additional keyword arguments can be propagated to the GCS open method (`docs `__), which is used by ``smart_open`` under the hood, using the ``blob_open_kwargs`` transport parameter. + +Additionally keyword arguments can be propagated to the GCS ``get_blob`` method (`docs `__) when in a read-mode, using the ``get_blob_kwargs`` transport parameter. + +Additional blob properties (`docs `__) can be set before an upload, as long as they are not read-only, using the ``blob_properties`` transport parameter. + +.. code-block:: python + + open_kwargs = {'predefined_acl': 'authenticated-read'} + properties = {'metadata': {'version': 2}, 'storage_class': 'COLDLINE'} + fout = open('gs://bucket/key', 'wb', transport_params={'blob_open_kwargs': open_kwargs, 'blob_properties': properties}) + +Azure Credentials +----------------- + +``smart_open`` uses the ``azure-storage-blob`` library to talk to Azure Blob Storage. +By default, ``smart_open`` will defer to ``azure-storage-blob`` and let it take care of the credentials. + +Azure Blob Storage does not have any ways of inferring credentials therefore, passing a ``azure.storage.blob.BlobServiceClient`` +object as a transport parameter to the ``open`` function is required. +You can `customize the credentials `__ +when constructing the client. ``smart_open`` will then use the client when talking to. To follow allow with +the example below, `refer to Azure's guide `__ +to setting up authentication. + +.. code-block:: python + + import os + from azure.storage.blob import BlobServiceClient + azure_storage_connection_string = os.environ['AZURE_STORAGE_CONNECTION_STRING'] + client = BlobServiceClient.from_connection_string(azure_storage_connection_string) + fin = open('azure://my_container/my_blob.txt', transport_params={'client': client}) + +If you need more credential options, refer to the +`Azure Storage authentication guide `__. + +Azure Advanced Usage +-------------------- + +Additional keyword arguments can be propagated to the ``commit_block_list`` method (`docs `__), which is used by ``smart_open`` under the hood for uploads, using the ``blob_kwargs`` transport parameter. + +.. code-block:: python + + kwargs = {'metadata': {'version': 2}} + fout = open('azure://container/key', 'wb', transport_params={'blob_kwargs': kwargs}) + +Drop-in replacement of ``pathlib.Path.open`` +-------------------------------------------- + +``smart_open.open`` can also be used with ``Path`` objects. +The built-in `Path.open()` is not able to read text from compressed files, so use ``patch_pathlib`` to replace it with `smart_open.open()` instead. +This can be helpful when e.g. working with compressed files. + +.. code-block:: python + + >>> from pathlib import Path + >>> from smart_open.smart_open_lib import patch_pathlib + >>> + >>> _ = patch_pathlib() # replace `Path.open` with `smart_open.open` + >>> + >>> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") + >>> + >>> with path.open("r") as infile: + ... print(infile.readline()[:41]) + В начале июля, в чрезвычайно жаркое время + +How do I ...? +============= + +See `this document `__. + +Extending ``smart_open`` +======================== + +See `this document `__. + +Testing ``smart_open`` +====================== + +``smart_open`` comes with a comprehensive suite of unit tests. +Before you can run the test suite, install the test dependencies:: + + pip install -e .[test] + +Now, you can run the unit tests:: + + pytest smart_open + +The tests are also run automatically with `Travis CI `_ on every commit push & pull request. + +Comments, bug reports +===================== + +``smart_open`` lives on `Github `_. You can file +issues or pull requests there. Suggestions, pull requests and improvements welcome! + +---------------- + +``smart_open`` is open source software released under the `MIT license `_. +Copyright (c) 2015-now `Radim Řehůřek `_. + + diff --git a/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..79d5c89a71989389294854aa34e329701325f8b0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/smart_open-7.1.0.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.45.1) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/.venv/lib/python3.11/site-packages/torchvision/__init__.py b/.venv/lib/python3.11/site-packages/torchvision/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5d06156c25f1dfd34e9f01529e5a6b4bbeda7b42 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/__init__.py @@ -0,0 +1,105 @@ +import os +import warnings +from modulefinder import Module + +import torch + +# Don't re-order these, we need to load the _C extension (done when importing +# .extensions) before entering _meta_registrations. +from .extension import _HAS_OPS # usort:skip +from torchvision import _meta_registrations, datasets, io, models, ops, transforms, utils # usort:skip + +try: + from .version import __version__ # noqa: F401 +except ImportError: + pass + + +# Check if torchvision is being imported within the root folder +if not _HAS_OPS and os.path.dirname(os.path.realpath(__file__)) == os.path.join( + os.path.realpath(os.getcwd()), "torchvision" +): + message = ( + "You are importing torchvision within its own root folder ({}). " + "This is not expected to work and may give errors. Please exit the " + "torchvision project source and relaunch your python interpreter." + ) + warnings.warn(message.format(os.getcwd())) + +_image_backend = "PIL" + +_video_backend = "pyav" + + +def set_image_backend(backend): + """ + Specifies the package used to load images. + + Args: + backend (string): Name of the image backend. one of {'PIL', 'accimage'}. + The :mod:`accimage` package uses the Intel IPP library. It is + generally faster than PIL, but does not support as many operations. + """ + global _image_backend + if backend not in ["PIL", "accimage"]: + raise ValueError(f"Invalid backend '{backend}'. Options are 'PIL' and 'accimage'") + _image_backend = backend + + +def get_image_backend(): + """ + Gets the name of the package used to load images + """ + return _image_backend + + +def set_video_backend(backend): + """ + Specifies the package used to decode videos. + + Args: + backend (string): Name of the video backend. one of {'pyav', 'video_reader'}. + The :mod:`pyav` package uses the 3rd party PyAv library. It is a Pythonic + binding for the FFmpeg libraries. + The :mod:`video_reader` package includes a native C++ implementation on + top of FFMPEG libraries, and a python API of TorchScript custom operator. + It generally decodes faster than :mod:`pyav`, but is perhaps less robust. + + .. note:: + Building with FFMPEG is disabled by default in the latest `main`. If you want to use the 'video_reader' + backend, please compile torchvision from source. + """ + global _video_backend + if backend not in ["pyav", "video_reader", "cuda"]: + raise ValueError("Invalid video backend '%s'. Options are 'pyav', 'video_reader' and 'cuda'" % backend) + if backend == "video_reader" and not io._HAS_CPU_VIDEO_DECODER: + # TODO: better messages + message = "video_reader video backend is not available. Please compile torchvision from source and try again" + raise RuntimeError(message) + elif backend == "cuda" and not io._HAS_GPU_VIDEO_DECODER: + # TODO: better messages + message = "cuda video backend is not available." + raise RuntimeError(message) + else: + _video_backend = backend + + +def get_video_backend(): + """ + Returns the currently active video backend used to decode videos. + + Returns: + str: Name of the video backend. one of {'pyav', 'video_reader'}. + """ + + return _video_backend + + +def _is_tracing(): + return torch._C._get_tracing_state() + + +def disable_beta_transforms_warning(): + # Noop, only exists to avoid breaking existing code. + # See https://github.com/pytorch/vision/issues/7896 + pass diff --git a/.venv/lib/python3.11/site-packages/torchvision/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2f1bd47eb07faacc1170fb48ffeba67a3367582 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_internally_replaced_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_internally_replaced_utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..66fc2444de626ef22aa0cab49bac5bbb4265f14a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_internally_replaced_utils.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_meta_registrations.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_meta_registrations.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7b3412bbac91d6026376e616a5e097914611d3b Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_meta_registrations.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c068834b60a2a644e1168bd5a00fb9589c1107f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/_utils.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/__pycache__/extension.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/extension.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be61eab296ee8dcf198d1962cff47fab2867096b Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/extension.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/__pycache__/utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3900fa85a33df76bc9d002c30e038d27df357ec3 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/utils.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/__pycache__/version.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/version.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbacaba33d8482f4c74bf22b3367d355e319fba6 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/__pycache__/version.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/_internally_replaced_utils.py b/.venv/lib/python3.11/site-packages/torchvision/_internally_replaced_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d9a6e261ea277989f4362037352cb24da6564460 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/_internally_replaced_utils.py @@ -0,0 +1,50 @@ +import importlib.machinery +import os + +from torch.hub import _get_torch_home + + +_HOME = os.path.join(_get_torch_home(), "datasets", "vision") +_USE_SHARDED_DATASETS = False + + +def _download_file_from_remote_location(fpath: str, url: str) -> None: + pass + + +def _is_remote_location_available() -> bool: + return False + + +try: + from torch.hub import load_state_dict_from_url # noqa: 401 +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url # noqa: 401 + + +def _get_extension_path(lib_name): + + lib_dir = os.path.dirname(__file__) + if os.name == "nt": + # Register the main torchvision library location on the default DLL path + import ctypes + + kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True) + with_load_library_flags = hasattr(kernel32, "AddDllDirectory") + prev_error_mode = kernel32.SetErrorMode(0x0001) + + if with_load_library_flags: + kernel32.AddDllDirectory.restype = ctypes.c_void_p + + os.add_dll_directory(lib_dir) + + kernel32.SetErrorMode(prev_error_mode) + + loader_details = (importlib.machinery.ExtensionFileLoader, importlib.machinery.EXTENSION_SUFFIXES) + + extfinder = importlib.machinery.FileFinder(lib_dir, loader_details) + ext_specs = extfinder.find_spec(lib_name) + if ext_specs is None: + raise ImportError + + return ext_specs.origin diff --git a/.venv/lib/python3.11/site-packages/torchvision/_meta_registrations.py b/.venv/lib/python3.11/site-packages/torchvision/_meta_registrations.py new file mode 100644 index 0000000000000000000000000000000000000000..f75bfb77a7f25a1842509de595f109f232994574 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/_meta_registrations.py @@ -0,0 +1,225 @@ +import functools + +import torch +import torch._custom_ops +import torch.library + +# Ensure that torch.ops.torchvision is visible +import torchvision.extension # noqa: F401 + + +@functools.lru_cache(None) +def get_meta_lib(): + return torch.library.Library("torchvision", "IMPL", "Meta") + + +def register_meta(op_name, overload_name="default"): + def wrapper(fn): + if torchvision.extension._has_ops(): + get_meta_lib().impl(getattr(getattr(torch.ops.torchvision, op_name), overload_name), fn) + return fn + + return wrapper + + +@register_meta("roi_align") +def meta_roi_align(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned): + torch._check(rois.size(1) == 5, lambda: "rois must have shape as Tensor[K, 5]") + torch._check( + input.dtype == rois.dtype, + lambda: ( + "Expected tensor for input to have the same type as tensor for rois; " + f"but type {input.dtype} does not equal {rois.dtype}" + ), + ) + num_rois = rois.size(0) + channels = input.size(1) + return input.new_empty((num_rois, channels, pooled_height, pooled_width)) + + +@register_meta("_roi_align_backward") +def meta_roi_align_backward( + grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio, aligned +): + torch._check( + grad.dtype == rois.dtype, + lambda: ( + "Expected tensor for grad to have the same type as tensor for rois; " + f"but type {grad.dtype} does not equal {rois.dtype}" + ), + ) + return grad.new_empty((batch_size, channels, height, width)) + + +@register_meta("ps_roi_align") +def meta_ps_roi_align(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio): + torch._check(rois.size(1) == 5, lambda: "rois must have shape as Tensor[K, 5]") + torch._check( + input.dtype == rois.dtype, + lambda: ( + "Expected tensor for input to have the same type as tensor for rois; " + f"but type {input.dtype} does not equal {rois.dtype}" + ), + ) + channels = input.size(1) + torch._check( + channels % (pooled_height * pooled_width) == 0, + "input channels must be a multiple of pooling height * pooling width", + ) + + num_rois = rois.size(0) + out_size = (num_rois, channels // (pooled_height * pooled_width), pooled_height, pooled_width) + return input.new_empty(out_size), torch.empty(out_size, dtype=torch.int32, device="meta") + + +@register_meta("_ps_roi_align_backward") +def meta_ps_roi_align_backward( + grad, + rois, + channel_mapping, + spatial_scale, + pooled_height, + pooled_width, + sampling_ratio, + batch_size, + channels, + height, + width, +): + torch._check( + grad.dtype == rois.dtype, + lambda: ( + "Expected tensor for grad to have the same type as tensor for rois; " + f"but type {grad.dtype} does not equal {rois.dtype}" + ), + ) + return grad.new_empty((batch_size, channels, height, width)) + + +@register_meta("roi_pool") +def meta_roi_pool(input, rois, spatial_scale, pooled_height, pooled_width): + torch._check(rois.size(1) == 5, lambda: "rois must have shape as Tensor[K, 5]") + torch._check( + input.dtype == rois.dtype, + lambda: ( + "Expected tensor for input to have the same type as tensor for rois; " + f"but type {input.dtype} does not equal {rois.dtype}" + ), + ) + num_rois = rois.size(0) + channels = input.size(1) + out_size = (num_rois, channels, pooled_height, pooled_width) + return input.new_empty(out_size), torch.empty(out_size, device="meta", dtype=torch.int32) + + +@register_meta("_roi_pool_backward") +def meta_roi_pool_backward( + grad, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width +): + torch._check( + grad.dtype == rois.dtype, + lambda: ( + "Expected tensor for grad to have the same type as tensor for rois; " + f"but type {grad.dtype} does not equal {rois.dtype}" + ), + ) + return grad.new_empty((batch_size, channels, height, width)) + + +@register_meta("ps_roi_pool") +def meta_ps_roi_pool(input, rois, spatial_scale, pooled_height, pooled_width): + torch._check(rois.size(1) == 5, lambda: "rois must have shape as Tensor[K, 5]") + torch._check( + input.dtype == rois.dtype, + lambda: ( + "Expected tensor for input to have the same type as tensor for rois; " + f"but type {input.dtype} does not equal {rois.dtype}" + ), + ) + channels = input.size(1) + torch._check( + channels % (pooled_height * pooled_width) == 0, + "input channels must be a multiple of pooling height * pooling width", + ) + num_rois = rois.size(0) + out_size = (num_rois, channels // (pooled_height * pooled_width), pooled_height, pooled_width) + return input.new_empty(out_size), torch.empty(out_size, device="meta", dtype=torch.int32) + + +@register_meta("_ps_roi_pool_backward") +def meta_ps_roi_pool_backward( + grad, rois, channel_mapping, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width +): + torch._check( + grad.dtype == rois.dtype, + lambda: ( + "Expected tensor for grad to have the same type as tensor for rois; " + f"but type {grad.dtype} does not equal {rois.dtype}" + ), + ) + return grad.new_empty((batch_size, channels, height, width)) + + +@torch.library.register_fake("torchvision::nms") +def meta_nms(dets, scores, iou_threshold): + torch._check(dets.dim() == 2, lambda: f"boxes should be a 2d tensor, got {dets.dim()}D") + torch._check(dets.size(1) == 4, lambda: f"boxes should have 4 elements in dimension 1, got {dets.size(1)}") + torch._check(scores.dim() == 1, lambda: f"scores should be a 1d tensor, got {scores.dim()}") + torch._check( + dets.size(0) == scores.size(0), + lambda: f"boxes and scores should have same number of elements in dimension 0, got {dets.size(0)} and {scores.size(0)}", + ) + ctx = torch._custom_ops.get_ctx() + num_to_keep = ctx.create_unbacked_symint() + return dets.new_empty(num_to_keep, dtype=torch.long) + + +@register_meta("deform_conv2d") +def meta_deform_conv2d( + input, + weight, + offset, + mask, + bias, + stride_h, + stride_w, + pad_h, + pad_w, + dil_h, + dil_w, + n_weight_grps, + n_offset_grps, + use_mask, +): + + out_height, out_width = offset.shape[-2:] + out_channels = weight.shape[0] + batch_size = input.shape[0] + return input.new_empty((batch_size, out_channels, out_height, out_width)) + + +@register_meta("_deform_conv2d_backward") +def meta_deform_conv2d_backward( + grad, + input, + weight, + offset, + mask, + bias, + stride_h, + stride_w, + pad_h, + pad_w, + dilation_h, + dilation_w, + groups, + offset_groups, + use_mask, +): + + grad_input = input.new_empty(input.shape) + grad_weight = weight.new_empty(weight.shape) + grad_offset = offset.new_empty(offset.shape) + grad_mask = mask.new_empty(mask.shape) + grad_bias = bias.new_empty(bias.shape) + return grad_input, grad_weight, grad_offset, grad_mask, grad_bias diff --git a/.venv/lib/python3.11/site-packages/torchvision/_utils.py b/.venv/lib/python3.11/site-packages/torchvision/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b739ef0966e9b6fac4574f3d6f04051799f75a16 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/_utils.py @@ -0,0 +1,32 @@ +import enum +from typing import Sequence, Type, TypeVar + +T = TypeVar("T", bound=enum.Enum) + + +class StrEnumMeta(enum.EnumMeta): + auto = enum.auto + + def from_str(self: Type[T], member: str) -> T: # type: ignore[misc] + try: + return self[member] + except KeyError: + # TODO: use `add_suggestion` from torchvision.prototype.utils._internal to improve the error message as + # soon as it is migrated. + raise ValueError(f"Unknown value '{member}' for {self.__name__}.") from None + + +class StrEnum(enum.Enum, metaclass=StrEnumMeta): + pass + + +def sequence_to_str(seq: Sequence, separate_last: str = "") -> str: + if not seq: + return "" + if len(seq) == 1: + return f"'{seq[0]}'" + + head = "'" + "', '".join([str(item) for item in seq[:-1]]) + "'" + tail = f"{'' if separate_last and len(seq) == 2 else ','} {separate_last}'{seq[-1]}'" + + return head + tail diff --git a/.venv/lib/python3.11/site-packages/torchvision/extension.py b/.venv/lib/python3.11/site-packages/torchvision/extension.py new file mode 100644 index 0000000000000000000000000000000000000000..67801056e88b44d40bc2d382d62c389bf4ef039e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/extension.py @@ -0,0 +1,92 @@ +import os +import sys + +import torch + +from ._internally_replaced_utils import _get_extension_path + + +_HAS_OPS = False + + +def _has_ops(): + return False + + +try: + # On Windows Python-3.8.x has `os.add_dll_directory` call, + # which is called to configure dll search path. + # To find cuda related dlls we need to make sure the + # conda environment/bin path is configured Please take a look: + # https://stackoverflow.com/questions/59330863/cant-import-dll-module-in-python + # Please note: if some path can't be added using add_dll_directory we simply ignore this path + if os.name == "nt" and sys.version_info < (3, 9): + env_path = os.environ["PATH"] + path_arr = env_path.split(";") + for path in path_arr: + if os.path.exists(path): + try: + os.add_dll_directory(path) # type: ignore[attr-defined] + except Exception: + pass + + lib_path = _get_extension_path("_C") + torch.ops.load_library(lib_path) + _HAS_OPS = True + + def _has_ops(): # noqa: F811 + return True + +except (ImportError, OSError): + pass + + +def _assert_has_ops(): + if not _has_ops(): + raise RuntimeError( + "Couldn't load custom C++ ops. This can happen if your PyTorch and " + "torchvision versions are incompatible, or if you had errors while compiling " + "torchvision from source. For further information on the compatible versions, check " + "https://github.com/pytorch/vision#installation for the compatibility matrix. " + "Please check your PyTorch version with torch.__version__ and your torchvision " + "version with torchvision.__version__ and verify if they are compatible, and if not " + "please reinstall torchvision so that it matches your PyTorch install." + ) + + +def _check_cuda_version(): + """ + Make sure that CUDA versions match between the pytorch install and torchvision install + """ + if not _HAS_OPS: + return -1 + from torch.version import cuda as torch_version_cuda + + _version = torch.ops.torchvision._cuda_version() + if _version != -1 and torch_version_cuda is not None: + tv_version = str(_version) + if int(tv_version) < 10000: + tv_major = int(tv_version[0]) + tv_minor = int(tv_version[2]) + else: + tv_major = int(tv_version[0:2]) + tv_minor = int(tv_version[3]) + t_version = torch_version_cuda.split(".") + t_major = int(t_version[0]) + t_minor = int(t_version[1]) + if t_major != tv_major: + raise RuntimeError( + "Detected that PyTorch and torchvision were compiled with different CUDA major versions. " + f"PyTorch has CUDA Version={t_major}.{t_minor} and torchvision has " + f"CUDA Version={tv_major}.{tv_minor}. " + "Please reinstall the torchvision that matches your PyTorch install." + ) + return _version + + +def _load_library(lib_name): + lib_path = _get_extension_path(lib_name) + torch.ops.load_library(lib_path) + + +_check_cuda_version() diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/alexnet.py b/.venv/lib/python3.11/site-packages/torchvision/models/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f85acbeb2148d2aa8f289808e61aa61e2d68e2f9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/alexnet.py @@ -0,0 +1,119 @@ +from functools import partial +from typing import Any, Optional + +import torch +import torch.nn as nn + +from ..transforms._presets import ImageClassification +from ..utils import _log_api_usage_once +from ._api import register_model, Weights, WeightsEnum +from ._meta import _IMAGENET_CATEGORIES +from ._utils import _ovewrite_named_param, handle_legacy_interface + + +__all__ = ["AlexNet", "AlexNet_Weights", "alexnet"] + + +class AlexNet(nn.Module): + def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None: + super().__init__() + _log_api_usage_once(self) + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) + self.classifier = nn.Sequential( + nn.Dropout(p=dropout), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(p=dropout), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + +class AlexNet_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/alexnet-owt-7be5be79.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + "num_params": 61100840, + "min_size": (63, 63), + "categories": _IMAGENET_CATEGORIES, + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", + "_metrics": { + "ImageNet-1K": { + "acc@1": 56.522, + "acc@5": 79.066, + } + }, + "_ops": 0.714, + "_file_size": 233.087, + "_docs": """ + These weights reproduce closely the results of the paper using a simplified training recipe. + """, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", AlexNet_Weights.IMAGENET1K_V1)) +def alexnet(*, weights: Optional[AlexNet_Weights] = None, progress: bool = True, **kwargs: Any) -> AlexNet: + """AlexNet model architecture from `One weird trick for parallelizing convolutional neural networks `__. + + .. note:: + AlexNet was originally introduced in the `ImageNet Classification with + Deep Convolutional Neural Networks + `__ + paper. Our implementation is based instead on the "One weird trick" + paper above. + + Args: + weights (:class:`~torchvision.models.AlexNet_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.AlexNet_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.squeezenet.AlexNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.AlexNet_Weights + :members: + """ + + weights = AlexNet_Weights.verify(weights) + + if weights is not None: + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + + model = AlexNet(**kwargs) + + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True)) + + return model diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/convnext.py b/.venv/lib/python3.11/site-packages/torchvision/models/convnext.py new file mode 100644 index 0000000000000000000000000000000000000000..444ef3c219efa09d1c720f29db44a8acc8714bbc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/convnext.py @@ -0,0 +1,414 @@ +from functools import partial +from typing import Any, Callable, List, Optional, Sequence + +import torch +from torch import nn, Tensor +from torch.nn import functional as F + +from ..ops.misc import Conv2dNormActivation, Permute +from ..ops.stochastic_depth import StochasticDepth +from ..transforms._presets import ImageClassification +from ..utils import _log_api_usage_once +from ._api import register_model, Weights, WeightsEnum +from ._meta import _IMAGENET_CATEGORIES +from ._utils import _ovewrite_named_param, handle_legacy_interface + + +__all__ = [ + "ConvNeXt", + "ConvNeXt_Tiny_Weights", + "ConvNeXt_Small_Weights", + "ConvNeXt_Base_Weights", + "ConvNeXt_Large_Weights", + "convnext_tiny", + "convnext_small", + "convnext_base", + "convnext_large", +] + + +class LayerNorm2d(nn.LayerNorm): + def forward(self, x: Tensor) -> Tensor: + x = x.permute(0, 2, 3, 1) + x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) + x = x.permute(0, 3, 1, 2) + return x + + +class CNBlock(nn.Module): + def __init__( + self, + dim, + layer_scale: float, + stochastic_depth_prob: float, + norm_layer: Optional[Callable[..., nn.Module]] = None, + ) -> None: + super().__init__() + if norm_layer is None: + norm_layer = partial(nn.LayerNorm, eps=1e-6) + + self.block = nn.Sequential( + nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim, bias=True), + Permute([0, 2, 3, 1]), + norm_layer(dim), + nn.Linear(in_features=dim, out_features=4 * dim, bias=True), + nn.GELU(), + nn.Linear(in_features=4 * dim, out_features=dim, bias=True), + Permute([0, 3, 1, 2]), + ) + self.layer_scale = nn.Parameter(torch.ones(dim, 1, 1) * layer_scale) + self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row") + + def forward(self, input: Tensor) -> Tensor: + result = self.layer_scale * self.block(input) + result = self.stochastic_depth(result) + result += input + return result + + +class CNBlockConfig: + # Stores information listed at Section 3 of the ConvNeXt paper + def __init__( + self, + input_channels: int, + out_channels: Optional[int], + num_layers: int, + ) -> None: + self.input_channels = input_channels + self.out_channels = out_channels + self.num_layers = num_layers + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "input_channels={input_channels}" + s += ", out_channels={out_channels}" + s += ", num_layers={num_layers}" + s += ")" + return s.format(**self.__dict__) + + +class ConvNeXt(nn.Module): + def __init__( + self, + block_setting: List[CNBlockConfig], + stochastic_depth_prob: float = 0.0, + layer_scale: float = 1e-6, + num_classes: int = 1000, + block: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + **kwargs: Any, + ) -> None: + super().__init__() + _log_api_usage_once(self) + + if not block_setting: + raise ValueError("The block_setting should not be empty") + elif not (isinstance(block_setting, Sequence) and all([isinstance(s, CNBlockConfig) for s in block_setting])): + raise TypeError("The block_setting should be List[CNBlockConfig]") + + if block is None: + block = CNBlock + + if norm_layer is None: + norm_layer = partial(LayerNorm2d, eps=1e-6) + + layers: List[nn.Module] = [] + + # Stem + firstconv_output_channels = block_setting[0].input_channels + layers.append( + Conv2dNormActivation( + 3, + firstconv_output_channels, + kernel_size=4, + stride=4, + padding=0, + norm_layer=norm_layer, + activation_layer=None, + bias=True, + ) + ) + + total_stage_blocks = sum(cnf.num_layers for cnf in block_setting) + stage_block_id = 0 + for cnf in block_setting: + # Bottlenecks + stage: List[nn.Module] = [] + for _ in range(cnf.num_layers): + # adjust stochastic depth probability based on the depth of the stage block + sd_prob = stochastic_depth_prob * stage_block_id / (total_stage_blocks - 1.0) + stage.append(block(cnf.input_channels, layer_scale, sd_prob)) + stage_block_id += 1 + layers.append(nn.Sequential(*stage)) + if cnf.out_channels is not None: + # Downsampling + layers.append( + nn.Sequential( + norm_layer(cnf.input_channels), + nn.Conv2d(cnf.input_channels, cnf.out_channels, kernel_size=2, stride=2), + ) + ) + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d(1) + + lastblock = block_setting[-1] + lastconv_output_channels = ( + lastblock.out_channels if lastblock.out_channels is not None else lastblock.input_channels + ) + self.classifier = nn.Sequential( + norm_layer(lastconv_output_channels), nn.Flatten(1), nn.Linear(lastconv_output_channels, num_classes) + ) + + for m in self.modules(): + if isinstance(m, (nn.Conv2d, nn.Linear)): + nn.init.trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + x = self.features(x) + x = self.avgpool(x) + x = self.classifier(x) + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _convnext( + block_setting: List[CNBlockConfig], + stochastic_depth_prob: float, + weights: Optional[WeightsEnum], + progress: bool, + **kwargs: Any, +) -> ConvNeXt: + if weights is not None: + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + + model = ConvNeXt(block_setting, stochastic_depth_prob=stochastic_depth_prob, **kwargs) + + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True)) + + return model + + +_COMMON_META = { + "min_size": (32, 32), + "categories": _IMAGENET_CATEGORIES, + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#convnext", + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, +} + + +class ConvNeXt_Tiny_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/convnext_tiny-983f1562.pth", + transforms=partial(ImageClassification, crop_size=224, resize_size=236), + meta={ + **_COMMON_META, + "num_params": 28589128, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.520, + "acc@5": 96.146, + } + }, + "_ops": 4.456, + "_file_size": 109.119, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class ConvNeXt_Small_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/convnext_small-0c510722.pth", + transforms=partial(ImageClassification, crop_size=224, resize_size=230), + meta={ + **_COMMON_META, + "num_params": 50223688, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.616, + "acc@5": 96.650, + } + }, + "_ops": 8.684, + "_file_size": 191.703, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class ConvNeXt_Base_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/convnext_base-6075fbad.pth", + transforms=partial(ImageClassification, crop_size=224, resize_size=232), + meta={ + **_COMMON_META, + "num_params": 88591464, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.062, + "acc@5": 96.870, + } + }, + "_ops": 15.355, + "_file_size": 338.064, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class ConvNeXt_Large_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/convnext_large-ea097f82.pth", + transforms=partial(ImageClassification, crop_size=224, resize_size=232), + meta={ + **_COMMON_META, + "num_params": 197767336, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.414, + "acc@5": 96.976, + } + }, + "_ops": 34.361, + "_file_size": 754.537, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", ConvNeXt_Tiny_Weights.IMAGENET1K_V1)) +def convnext_tiny(*, weights: Optional[ConvNeXt_Tiny_Weights] = None, progress: bool = True, **kwargs: Any) -> ConvNeXt: + """ConvNeXt Tiny model architecture from the + `A ConvNet for the 2020s `_ paper. + + Args: + weights (:class:`~torchvision.models.convnext.ConvNeXt_Tiny_Weights`, optional): The pretrained + weights to use. See :class:`~torchvision.models.convnext.ConvNeXt_Tiny_Weights` + below for more details and possible values. By default, no pre-trained weights are used. + progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.convnext.ConvNext`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.ConvNeXt_Tiny_Weights + :members: + """ + weights = ConvNeXt_Tiny_Weights.verify(weights) + + block_setting = [ + CNBlockConfig(96, 192, 3), + CNBlockConfig(192, 384, 3), + CNBlockConfig(384, 768, 9), + CNBlockConfig(768, None, 3), + ] + stochastic_depth_prob = kwargs.pop("stochastic_depth_prob", 0.1) + return _convnext(block_setting, stochastic_depth_prob, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", ConvNeXt_Small_Weights.IMAGENET1K_V1)) +def convnext_small( + *, weights: Optional[ConvNeXt_Small_Weights] = None, progress: bool = True, **kwargs: Any +) -> ConvNeXt: + """ConvNeXt Small model architecture from the + `A ConvNet for the 2020s `_ paper. + + Args: + weights (:class:`~torchvision.models.convnext.ConvNeXt_Small_Weights`, optional): The pretrained + weights to use. See :class:`~torchvision.models.convnext.ConvNeXt_Small_Weights` + below for more details and possible values. By default, no pre-trained weights are used. + progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.convnext.ConvNext`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.ConvNeXt_Small_Weights + :members: + """ + weights = ConvNeXt_Small_Weights.verify(weights) + + block_setting = [ + CNBlockConfig(96, 192, 3), + CNBlockConfig(192, 384, 3), + CNBlockConfig(384, 768, 27), + CNBlockConfig(768, None, 3), + ] + stochastic_depth_prob = kwargs.pop("stochastic_depth_prob", 0.4) + return _convnext(block_setting, stochastic_depth_prob, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", ConvNeXt_Base_Weights.IMAGENET1K_V1)) +def convnext_base(*, weights: Optional[ConvNeXt_Base_Weights] = None, progress: bool = True, **kwargs: Any) -> ConvNeXt: + """ConvNeXt Base model architecture from the + `A ConvNet for the 2020s `_ paper. + + Args: + weights (:class:`~torchvision.models.convnext.ConvNeXt_Base_Weights`, optional): The pretrained + weights to use. See :class:`~torchvision.models.convnext.ConvNeXt_Base_Weights` + below for more details and possible values. By default, no pre-trained weights are used. + progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.convnext.ConvNext`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.ConvNeXt_Base_Weights + :members: + """ + weights = ConvNeXt_Base_Weights.verify(weights) + + block_setting = [ + CNBlockConfig(128, 256, 3), + CNBlockConfig(256, 512, 3), + CNBlockConfig(512, 1024, 27), + CNBlockConfig(1024, None, 3), + ] + stochastic_depth_prob = kwargs.pop("stochastic_depth_prob", 0.5) + return _convnext(block_setting, stochastic_depth_prob, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", ConvNeXt_Large_Weights.IMAGENET1K_V1)) +def convnext_large( + *, weights: Optional[ConvNeXt_Large_Weights] = None, progress: bool = True, **kwargs: Any +) -> ConvNeXt: + """ConvNeXt Large model architecture from the + `A ConvNet for the 2020s `_ paper. + + Args: + weights (:class:`~torchvision.models.convnext.ConvNeXt_Large_Weights`, optional): The pretrained + weights to use. See :class:`~torchvision.models.convnext.ConvNeXt_Large_Weights` + below for more details and possible values. By default, no pre-trained weights are used. + progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.convnext.ConvNext`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.ConvNeXt_Large_Weights + :members: + """ + weights = ConvNeXt_Large_Weights.verify(weights) + + block_setting = [ + CNBlockConfig(192, 384, 3), + CNBlockConfig(384, 768, 3), + CNBlockConfig(768, 1536, 27), + CNBlockConfig(1536, None, 3), + ] + stochastic_depth_prob = kwargs.pop("stochastic_depth_prob", 0.5) + return _convnext(block_setting, stochastic_depth_prob, weights, progress, **kwargs) diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/densenet.py b/.venv/lib/python3.11/site-packages/torchvision/models/densenet.py new file mode 100644 index 0000000000000000000000000000000000000000..c4c1bc9525a43fbba565dc3f03c6b7231e913fe5 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/densenet.py @@ -0,0 +1,448 @@ +import re +from collections import OrderedDict +from functools import partial +from typing import Any, List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from torch import Tensor + +from ..transforms._presets import ImageClassification +from ..utils import _log_api_usage_once +from ._api import register_model, Weights, WeightsEnum +from ._meta import _IMAGENET_CATEGORIES +from ._utils import _ovewrite_named_param, handle_legacy_interface + +__all__ = [ + "DenseNet", + "DenseNet121_Weights", + "DenseNet161_Weights", + "DenseNet169_Weights", + "DenseNet201_Weights", + "densenet121", + "densenet161", + "densenet169", + "densenet201", +] + + +class _DenseLayer(nn.Module): + def __init__( + self, num_input_features: int, growth_rate: int, bn_size: int, drop_rate: float, memory_efficient: bool = False + ) -> None: + super().__init__() + self.norm1 = nn.BatchNorm2d(num_input_features) + self.relu1 = nn.ReLU(inplace=True) + self.conv1 = nn.Conv2d(num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False) + + self.norm2 = nn.BatchNorm2d(bn_size * growth_rate) + self.relu2 = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False) + + self.drop_rate = float(drop_rate) + self.memory_efficient = memory_efficient + + def bn_function(self, inputs: List[Tensor]) -> Tensor: + concated_features = torch.cat(inputs, 1) + bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # noqa: T484 + return bottleneck_output + + # todo: rewrite when torchscript supports any + def any_requires_grad(self, input: List[Tensor]) -> bool: + for tensor in input: + if tensor.requires_grad: + return True + return False + + @torch.jit.unused # noqa: T484 + def call_checkpoint_bottleneck(self, input: List[Tensor]) -> Tensor: + def closure(*inputs): + return self.bn_function(inputs) + + return cp.checkpoint(closure, *input, use_reentrant=False) + + @torch.jit._overload_method # noqa: F811 + def forward(self, input: List[Tensor]) -> Tensor: # noqa: F811 + pass + + @torch.jit._overload_method # noqa: F811 + def forward(self, input: Tensor) -> Tensor: # noqa: F811 + pass + + # torchscript does not yet support *args, so we overload method + # allowing it to take either a List[Tensor] or single Tensor + def forward(self, input: Tensor) -> Tensor: # noqa: F811 + if isinstance(input, Tensor): + prev_features = [input] + else: + prev_features = input + + if self.memory_efficient and self.any_requires_grad(prev_features): + if torch.jit.is_scripting(): + raise Exception("Memory Efficient not supported in JIT") + + bottleneck_output = self.call_checkpoint_bottleneck(prev_features) + else: + bottleneck_output = self.bn_function(prev_features) + + new_features = self.conv2(self.relu2(self.norm2(bottleneck_output))) + if self.drop_rate > 0: + new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) + return new_features + + +class _DenseBlock(nn.ModuleDict): + _version = 2 + + def __init__( + self, + num_layers: int, + num_input_features: int, + bn_size: int, + growth_rate: int, + drop_rate: float, + memory_efficient: bool = False, + ) -> None: + super().__init__() + for i in range(num_layers): + layer = _DenseLayer( + num_input_features + i * growth_rate, + growth_rate=growth_rate, + bn_size=bn_size, + drop_rate=drop_rate, + memory_efficient=memory_efficient, + ) + self.add_module("denselayer%d" % (i + 1), layer) + + def forward(self, init_features: Tensor) -> Tensor: + features = [init_features] + for name, layer in self.items(): + new_features = layer(features) + features.append(new_features) + return torch.cat(features, 1) + + +class _Transition(nn.Sequential): + def __init__(self, num_input_features: int, num_output_features: int) -> None: + super().__init__() + self.norm = nn.BatchNorm2d(num_input_features) + self.relu = nn.ReLU(inplace=True) + self.conv = nn.Conv2d(num_input_features, num_output_features, kernel_size=1, stride=1, bias=False) + self.pool = nn.AvgPool2d(kernel_size=2, stride=2) + + +class DenseNet(nn.Module): + r"""Densenet-BC model class, based on + `"Densely Connected Convolutional Networks" `_. + + Args: + growth_rate (int) - how many filters to add each layer (`k` in paper) + block_config (list of 4 ints) - how many layers in each pooling block + num_init_features (int) - the number of filters to learn in the first convolution layer + bn_size (int) - multiplicative factor for number of bottle neck layers + (i.e. bn_size * k features in the bottleneck layer) + drop_rate (float) - dropout rate after each dense layer + num_classes (int) - number of classification classes + memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, + but slower. Default: *False*. See `"paper" `_. + """ + + def __init__( + self, + growth_rate: int = 32, + block_config: Tuple[int, int, int, int] = (6, 12, 24, 16), + num_init_features: int = 64, + bn_size: int = 4, + drop_rate: float = 0, + num_classes: int = 1000, + memory_efficient: bool = False, + ) -> None: + + super().__init__() + _log_api_usage_once(self) + + # First convolution + self.features = nn.Sequential( + OrderedDict( + [ + ("conv0", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), + ("norm0", nn.BatchNorm2d(num_init_features)), + ("relu0", nn.ReLU(inplace=True)), + ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), + ] + ) + ) + + # Each denseblock + num_features = num_init_features + for i, num_layers in enumerate(block_config): + block = _DenseBlock( + num_layers=num_layers, + num_input_features=num_features, + bn_size=bn_size, + growth_rate=growth_rate, + drop_rate=drop_rate, + memory_efficient=memory_efficient, + ) + self.features.add_module("denseblock%d" % (i + 1), block) + num_features = num_features + num_layers * growth_rate + if i != len(block_config) - 1: + trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2) + self.features.add_module("transition%d" % (i + 1), trans) + num_features = num_features // 2 + + # Final batch norm + self.features.add_module("norm5", nn.BatchNorm2d(num_features)) + + # Linear layer + self.classifier = nn.Linear(num_features, num_classes) + + # Official init from torch repo. + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.constant_(m.bias, 0) + + def forward(self, x: Tensor) -> Tensor: + features = self.features(x) + out = F.relu(features, inplace=True) + out = F.adaptive_avg_pool2d(out, (1, 1)) + out = torch.flatten(out, 1) + out = self.classifier(out) + return out + + +def _load_state_dict(model: nn.Module, weights: WeightsEnum, progress: bool) -> None: + # '.'s are no longer allowed in module names, but previous _DenseLayer + # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. + # They are also in the checkpoints in model_urls. This pattern is used + # to find such keys. + pattern = re.compile( + r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$" + ) + + state_dict = weights.get_state_dict(progress=progress, check_hash=True) + for key in list(state_dict.keys()): + res = pattern.match(key) + if res: + new_key = res.group(1) + res.group(2) + state_dict[new_key] = state_dict[key] + del state_dict[key] + model.load_state_dict(state_dict) + + +def _densenet( + growth_rate: int, + block_config: Tuple[int, int, int, int], + num_init_features: int, + weights: Optional[WeightsEnum], + progress: bool, + **kwargs: Any, +) -> DenseNet: + if weights is not None: + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + + model = DenseNet(growth_rate, block_config, num_init_features, **kwargs) + + if weights is not None: + _load_state_dict(model=model, weights=weights, progress=progress) + + return model + + +_COMMON_META = { + "min_size": (29, 29), + "categories": _IMAGENET_CATEGORIES, + "recipe": "https://github.com/pytorch/vision/pull/116", + "_docs": """These weights are ported from LuaTorch.""", +} + + +class DenseNet121_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/densenet121-a639ec97.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 7978856, + "_metrics": { + "ImageNet-1K": { + "acc@1": 74.434, + "acc@5": 91.972, + } + }, + "_ops": 2.834, + "_file_size": 30.845, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class DenseNet161_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/densenet161-8d451a50.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 28681000, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.138, + "acc@5": 93.560, + } + }, + "_ops": 7.728, + "_file_size": 110.369, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class DenseNet169_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/densenet169-b2777c0a.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 14149480, + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.600, + "acc@5": 92.806, + } + }, + "_ops": 3.36, + "_file_size": 54.708, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class DenseNet201_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/densenet201-c1103571.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 20013928, + "_metrics": { + "ImageNet-1K": { + "acc@1": 76.896, + "acc@5": 93.370, + } + }, + "_ops": 4.291, + "_file_size": 77.373, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", DenseNet121_Weights.IMAGENET1K_V1)) +def densenet121(*, weights: Optional[DenseNet121_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet: + r"""Densenet-121 model from + `Densely Connected Convolutional Networks `_. + + Args: + weights (:class:`~torchvision.models.DenseNet121_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.DenseNet121_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.densenet.DenseNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.DenseNet121_Weights + :members: + """ + weights = DenseNet121_Weights.verify(weights) + + return _densenet(32, (6, 12, 24, 16), 64, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", DenseNet161_Weights.IMAGENET1K_V1)) +def densenet161(*, weights: Optional[DenseNet161_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet: + r"""Densenet-161 model from + `Densely Connected Convolutional Networks `_. + + Args: + weights (:class:`~torchvision.models.DenseNet161_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.DenseNet161_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.densenet.DenseNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.DenseNet161_Weights + :members: + """ + weights = DenseNet161_Weights.verify(weights) + + return _densenet(48, (6, 12, 36, 24), 96, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", DenseNet169_Weights.IMAGENET1K_V1)) +def densenet169(*, weights: Optional[DenseNet169_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet: + r"""Densenet-169 model from + `Densely Connected Convolutional Networks `_. + + Args: + weights (:class:`~torchvision.models.DenseNet169_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.DenseNet169_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.densenet.DenseNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.DenseNet169_Weights + :members: + """ + weights = DenseNet169_Weights.verify(weights) + + return _densenet(32, (6, 12, 32, 32), 64, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", DenseNet201_Weights.IMAGENET1K_V1)) +def densenet201(*, weights: Optional[DenseNet201_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet: + r"""Densenet-201 model from + `Densely Connected Convolutional Networks `_. + + Args: + weights (:class:`~torchvision.models.DenseNet201_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.DenseNet201_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.densenet.DenseNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.DenseNet201_Weights + :members: + """ + weights = DenseNet201_Weights.verify(weights) + + return _densenet(32, (6, 12, 48, 32), 64, weights, progress, **kwargs) diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/efficientnet.py b/.venv/lib/python3.11/site-packages/torchvision/models/efficientnet.py new file mode 100644 index 0000000000000000000000000000000000000000..65f0b2fef44702f5c0db9c545c2ecb751a683a72 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/efficientnet.py @@ -0,0 +1,1131 @@ +import copy +import math +from dataclasses import dataclass +from functools import partial +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union + +import torch +from torch import nn, Tensor +from torchvision.ops import StochasticDepth + +from ..ops.misc import Conv2dNormActivation, SqueezeExcitation +from ..transforms._presets import ImageClassification, InterpolationMode +from ..utils import _log_api_usage_once +from ._api import register_model, Weights, WeightsEnum +from ._meta import _IMAGENET_CATEGORIES +from ._utils import _make_divisible, _ovewrite_named_param, handle_legacy_interface + + +__all__ = [ + "EfficientNet", + "EfficientNet_B0_Weights", + "EfficientNet_B1_Weights", + "EfficientNet_B2_Weights", + "EfficientNet_B3_Weights", + "EfficientNet_B4_Weights", + "EfficientNet_B5_Weights", + "EfficientNet_B6_Weights", + "EfficientNet_B7_Weights", + "EfficientNet_V2_S_Weights", + "EfficientNet_V2_M_Weights", + "EfficientNet_V2_L_Weights", + "efficientnet_b0", + "efficientnet_b1", + "efficientnet_b2", + "efficientnet_b3", + "efficientnet_b4", + "efficientnet_b5", + "efficientnet_b6", + "efficientnet_b7", + "efficientnet_v2_s", + "efficientnet_v2_m", + "efficientnet_v2_l", +] + + +@dataclass +class _MBConvConfig: + expand_ratio: float + kernel: int + stride: int + input_channels: int + out_channels: int + num_layers: int + block: Callable[..., nn.Module] + + @staticmethod + def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int: + return _make_divisible(channels * width_mult, 8, min_value) + + +class MBConvConfig(_MBConvConfig): + # Stores information listed at Table 1 of the EfficientNet paper & Table 4 of the EfficientNetV2 paper + def __init__( + self, + expand_ratio: float, + kernel: int, + stride: int, + input_channels: int, + out_channels: int, + num_layers: int, + width_mult: float = 1.0, + depth_mult: float = 1.0, + block: Optional[Callable[..., nn.Module]] = None, + ) -> None: + input_channels = self.adjust_channels(input_channels, width_mult) + out_channels = self.adjust_channels(out_channels, width_mult) + num_layers = self.adjust_depth(num_layers, depth_mult) + if block is None: + block = MBConv + super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block) + + @staticmethod + def adjust_depth(num_layers: int, depth_mult: float): + return int(math.ceil(num_layers * depth_mult)) + + +class FusedMBConvConfig(_MBConvConfig): + # Stores information listed at Table 4 of the EfficientNetV2 paper + def __init__( + self, + expand_ratio: float, + kernel: int, + stride: int, + input_channels: int, + out_channels: int, + num_layers: int, + block: Optional[Callable[..., nn.Module]] = None, + ) -> None: + if block is None: + block = FusedMBConv + super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block) + + +class MBConv(nn.Module): + def __init__( + self, + cnf: MBConvConfig, + stochastic_depth_prob: float, + norm_layer: Callable[..., nn.Module], + se_layer: Callable[..., nn.Module] = SqueezeExcitation, + ) -> None: + super().__init__() + + if not (1 <= cnf.stride <= 2): + raise ValueError("illegal stride value") + + self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels + + layers: List[nn.Module] = [] + activation_layer = nn.SiLU + + # expand + expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio) + if expanded_channels != cnf.input_channels: + layers.append( + Conv2dNormActivation( + cnf.input_channels, + expanded_channels, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + + # depthwise + layers.append( + Conv2dNormActivation( + expanded_channels, + expanded_channels, + kernel_size=cnf.kernel, + stride=cnf.stride, + groups=expanded_channels, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + + # squeeze and excitation + squeeze_channels = max(1, cnf.input_channels // 4) + layers.append(se_layer(expanded_channels, squeeze_channels, activation=partial(nn.SiLU, inplace=True))) + + # project + layers.append( + Conv2dNormActivation( + expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None + ) + ) + + self.block = nn.Sequential(*layers) + self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row") + self.out_channels = cnf.out_channels + + def forward(self, input: Tensor) -> Tensor: + result = self.block(input) + if self.use_res_connect: + result = self.stochastic_depth(result) + result += input + return result + + +class FusedMBConv(nn.Module): + def __init__( + self, + cnf: FusedMBConvConfig, + stochastic_depth_prob: float, + norm_layer: Callable[..., nn.Module], + ) -> None: + super().__init__() + + if not (1 <= cnf.stride <= 2): + raise ValueError("illegal stride value") + + self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels + + layers: List[nn.Module] = [] + activation_layer = nn.SiLU + + expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio) + if expanded_channels != cnf.input_channels: + # fused expand + layers.append( + Conv2dNormActivation( + cnf.input_channels, + expanded_channels, + kernel_size=cnf.kernel, + stride=cnf.stride, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + + # project + layers.append( + Conv2dNormActivation( + expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None + ) + ) + else: + layers.append( + Conv2dNormActivation( + cnf.input_channels, + cnf.out_channels, + kernel_size=cnf.kernel, + stride=cnf.stride, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + + self.block = nn.Sequential(*layers) + self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row") + self.out_channels = cnf.out_channels + + def forward(self, input: Tensor) -> Tensor: + result = self.block(input) + if self.use_res_connect: + result = self.stochastic_depth(result) + result += input + return result + + +class EfficientNet(nn.Module): + def __init__( + self, + inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]], + dropout: float, + stochastic_depth_prob: float = 0.2, + num_classes: int = 1000, + norm_layer: Optional[Callable[..., nn.Module]] = None, + last_channel: Optional[int] = None, + ) -> None: + """ + EfficientNet V1 and V2 main class + + Args: + inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure + dropout (float): The droupout probability + stochastic_depth_prob (float): The stochastic depth probability + num_classes (int): Number of classes + norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use + last_channel (int): The number of channels on the penultimate layer + """ + super().__init__() + _log_api_usage_once(self) + + if not inverted_residual_setting: + raise ValueError("The inverted_residual_setting should not be empty") + elif not ( + isinstance(inverted_residual_setting, Sequence) + and all([isinstance(s, _MBConvConfig) for s in inverted_residual_setting]) + ): + raise TypeError("The inverted_residual_setting should be List[MBConvConfig]") + + if norm_layer is None: + norm_layer = nn.BatchNorm2d + + layers: List[nn.Module] = [] + + # building first layer + firstconv_output_channels = inverted_residual_setting[0].input_channels + layers.append( + Conv2dNormActivation( + 3, firstconv_output_channels, kernel_size=3, stride=2, norm_layer=norm_layer, activation_layer=nn.SiLU + ) + ) + + # building inverted residual blocks + total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting) + stage_block_id = 0 + for cnf in inverted_residual_setting: + stage: List[nn.Module] = [] + for _ in range(cnf.num_layers): + # copy to avoid modifications. shallow copy is enough + block_cnf = copy.copy(cnf) + + # overwrite info if not the first conv in the stage + if stage: + block_cnf.input_channels = block_cnf.out_channels + block_cnf.stride = 1 + + # adjust stochastic depth probability based on the depth of the stage block + sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks + + stage.append(block_cnf.block(block_cnf, sd_prob, norm_layer)) + stage_block_id += 1 + + layers.append(nn.Sequential(*stage)) + + # building last several layers + lastconv_input_channels = inverted_residual_setting[-1].out_channels + lastconv_output_channels = last_channel if last_channel is not None else 4 * lastconv_input_channels + layers.append( + Conv2dNormActivation( + lastconv_input_channels, + lastconv_output_channels, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=nn.SiLU, + ) + ) + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Sequential( + nn.Dropout(p=dropout, inplace=True), + nn.Linear(lastconv_output_channels, num_classes), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + init_range = 1.0 / math.sqrt(m.out_features) + nn.init.uniform_(m.weight, -init_range, init_range) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + x = self.features(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + + x = self.classifier(x) + + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _efficientnet( + inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]], + dropout: float, + last_channel: Optional[int], + weights: Optional[WeightsEnum], + progress: bool, + **kwargs: Any, +) -> EfficientNet: + if weights is not None: + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + + model = EfficientNet(inverted_residual_setting, dropout, last_channel=last_channel, **kwargs) + + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True)) + + return model + + +def _efficientnet_conf( + arch: str, + **kwargs: Any, +) -> Tuple[Sequence[Union[MBConvConfig, FusedMBConvConfig]], Optional[int]]: + inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]] + if arch.startswith("efficientnet_b"): + bneck_conf = partial(MBConvConfig, width_mult=kwargs.pop("width_mult"), depth_mult=kwargs.pop("depth_mult")) + inverted_residual_setting = [ + bneck_conf(1, 3, 1, 32, 16, 1), + bneck_conf(6, 3, 2, 16, 24, 2), + bneck_conf(6, 5, 2, 24, 40, 2), + bneck_conf(6, 3, 2, 40, 80, 3), + bneck_conf(6, 5, 1, 80, 112, 3), + bneck_conf(6, 5, 2, 112, 192, 4), + bneck_conf(6, 3, 1, 192, 320, 1), + ] + last_channel = None + elif arch.startswith("efficientnet_v2_s"): + inverted_residual_setting = [ + FusedMBConvConfig(1, 3, 1, 24, 24, 2), + FusedMBConvConfig(4, 3, 2, 24, 48, 4), + FusedMBConvConfig(4, 3, 2, 48, 64, 4), + MBConvConfig(4, 3, 2, 64, 128, 6), + MBConvConfig(6, 3, 1, 128, 160, 9), + MBConvConfig(6, 3, 2, 160, 256, 15), + ] + last_channel = 1280 + elif arch.startswith("efficientnet_v2_m"): + inverted_residual_setting = [ + FusedMBConvConfig(1, 3, 1, 24, 24, 3), + FusedMBConvConfig(4, 3, 2, 24, 48, 5), + FusedMBConvConfig(4, 3, 2, 48, 80, 5), + MBConvConfig(4, 3, 2, 80, 160, 7), + MBConvConfig(6, 3, 1, 160, 176, 14), + MBConvConfig(6, 3, 2, 176, 304, 18), + MBConvConfig(6, 3, 1, 304, 512, 5), + ] + last_channel = 1280 + elif arch.startswith("efficientnet_v2_l"): + inverted_residual_setting = [ + FusedMBConvConfig(1, 3, 1, 32, 32, 4), + FusedMBConvConfig(4, 3, 2, 32, 64, 7), + FusedMBConvConfig(4, 3, 2, 64, 96, 7), + MBConvConfig(4, 3, 2, 96, 192, 10), + MBConvConfig(6, 3, 1, 192, 224, 19), + MBConvConfig(6, 3, 2, 224, 384, 25), + MBConvConfig(6, 3, 1, 384, 640, 7), + ] + last_channel = 1280 + else: + raise ValueError(f"Unsupported model type {arch}") + + return inverted_residual_setting, last_channel + + +_COMMON_META: Dict[str, Any] = { + "categories": _IMAGENET_CATEGORIES, +} + + +_COMMON_META_V1 = { + **_COMMON_META, + "min_size": (1, 1), + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v1", +} + + +_COMMON_META_V2 = { + **_COMMON_META, + "min_size": (33, 33), + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v2", +} + + +class EfficientNet_B0_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ + url="https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth", + transforms=partial( + ImageClassification, crop_size=224, resize_size=256, interpolation=InterpolationMode.BICUBIC + ), + meta={ + **_COMMON_META_V1, + "num_params": 5288548, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.692, + "acc@5": 93.532, + } + }, + "_ops": 0.386, + "_file_size": 20.451, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_B1_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ + url="https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth", + transforms=partial( + ImageClassification, crop_size=240, resize_size=256, interpolation=InterpolationMode.BICUBIC + ), + meta={ + **_COMMON_META_V1, + "num_params": 7794184, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.642, + "acc@5": 94.186, + } + }, + "_ops": 0.687, + "_file_size": 30.134, + "_docs": """These weights are ported from the original paper.""", + }, + ) + IMAGENET1K_V2 = Weights( + url="https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth", + transforms=partial( + ImageClassification, crop_size=240, resize_size=255, interpolation=InterpolationMode.BILINEAR + ), + meta={ + **_COMMON_META_V1, + "num_params": 7794184, + "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning", + "_metrics": { + "ImageNet-1K": { + "acc@1": 79.838, + "acc@5": 94.934, + } + }, + "_ops": 0.687, + "_file_size": 30.136, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, + }, + ) + DEFAULT = IMAGENET1K_V2 + + +class EfficientNet_B2_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ + url="https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth", + transforms=partial( + ImageClassification, crop_size=288, resize_size=288, interpolation=InterpolationMode.BICUBIC + ), + meta={ + **_COMMON_META_V1, + "num_params": 9109994, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.608, + "acc@5": 95.310, + } + }, + "_ops": 1.088, + "_file_size": 35.174, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_B3_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ + url="https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth", + transforms=partial( + ImageClassification, crop_size=300, resize_size=320, interpolation=InterpolationMode.BICUBIC + ), + meta={ + **_COMMON_META_V1, + "num_params": 12233232, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.008, + "acc@5": 96.054, + } + }, + "_ops": 1.827, + "_file_size": 47.184, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_B4_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ + url="https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth", + transforms=partial( + ImageClassification, crop_size=380, resize_size=384, interpolation=InterpolationMode.BICUBIC + ), + meta={ + **_COMMON_META_V1, + "num_params": 19341616, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.384, + "acc@5": 96.594, + } + }, + "_ops": 4.394, + "_file_size": 74.489, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_B5_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/ + url="https://download.pytorch.org/models/efficientnet_b5_lukemelas-1a07897c.pth", + transforms=partial( + ImageClassification, crop_size=456, resize_size=456, interpolation=InterpolationMode.BICUBIC + ), + meta={ + **_COMMON_META_V1, + "num_params": 30389784, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.444, + "acc@5": 96.628, + } + }, + "_ops": 10.266, + "_file_size": 116.864, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_B6_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/ + url="https://download.pytorch.org/models/efficientnet_b6_lukemelas-24a108a5.pth", + transforms=partial( + ImageClassification, crop_size=528, resize_size=528, interpolation=InterpolationMode.BICUBIC + ), + meta={ + **_COMMON_META_V1, + "num_params": 43040704, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.008, + "acc@5": 96.916, + } + }, + "_ops": 19.068, + "_file_size": 165.362, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_B7_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/ + url="https://download.pytorch.org/models/efficientnet_b7_lukemelas-c5b4e57e.pth", + transforms=partial( + ImageClassification, crop_size=600, resize_size=600, interpolation=InterpolationMode.BICUBIC + ), + meta={ + **_COMMON_META_V1, + "num_params": 66347960, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.122, + "acc@5": 96.908, + } + }, + "_ops": 37.746, + "_file_size": 254.675, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_V2_S_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth", + transforms=partial( + ImageClassification, + crop_size=384, + resize_size=384, + interpolation=InterpolationMode.BILINEAR, + ), + meta={ + **_COMMON_META_V2, + "num_params": 21458488, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.228, + "acc@5": 96.878, + } + }, + "_ops": 8.366, + "_file_size": 82.704, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_V2_M_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth", + transforms=partial( + ImageClassification, + crop_size=480, + resize_size=480, + interpolation=InterpolationMode.BILINEAR, + ), + meta={ + **_COMMON_META_V2, + "num_params": 54139356, + "_metrics": { + "ImageNet-1K": { + "acc@1": 85.112, + "acc@5": 97.156, + } + }, + "_ops": 24.582, + "_file_size": 208.01, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class EfficientNet_V2_L_Weights(WeightsEnum): + # Weights ported from https://github.com/google/automl/tree/master/efficientnetv2 + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth", + transforms=partial( + ImageClassification, + crop_size=480, + resize_size=480, + interpolation=InterpolationMode.BICUBIC, + mean=(0.5, 0.5, 0.5), + std=(0.5, 0.5, 0.5), + ), + meta={ + **_COMMON_META_V2, + "num_params": 118515272, + "_metrics": { + "ImageNet-1K": { + "acc@1": 85.808, + "acc@5": 97.788, + } + }, + "_ops": 56.08, + "_file_size": 454.573, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_B0_Weights.IMAGENET1K_V1)) +def efficientnet_b0( + *, weights: Optional[EfficientNet_B0_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """EfficientNet B0 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional + Neural Networks `_ paper. + + Args: + weights (:class:`~torchvision.models.EfficientNet_B0_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_B0_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_B0_Weights + :members: + """ + weights = EfficientNet_B0_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b0", width_mult=1.0, depth_mult=1.0) + return _efficientnet( + inverted_residual_setting, kwargs.pop("dropout", 0.2), last_channel, weights, progress, **kwargs + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_B1_Weights.IMAGENET1K_V1)) +def efficientnet_b1( + *, weights: Optional[EfficientNet_B1_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """EfficientNet B1 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional + Neural Networks `_ paper. + + Args: + weights (:class:`~torchvision.models.EfficientNet_B1_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_B1_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_B1_Weights + :members: + """ + weights = EfficientNet_B1_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b1", width_mult=1.0, depth_mult=1.1) + return _efficientnet( + inverted_residual_setting, kwargs.pop("dropout", 0.2), last_channel, weights, progress, **kwargs + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_B2_Weights.IMAGENET1K_V1)) +def efficientnet_b2( + *, weights: Optional[EfficientNet_B2_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """EfficientNet B2 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional + Neural Networks `_ paper. + + Args: + weights (:class:`~torchvision.models.EfficientNet_B2_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_B2_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_B2_Weights + :members: + """ + weights = EfficientNet_B2_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b2", width_mult=1.1, depth_mult=1.2) + return _efficientnet( + inverted_residual_setting, kwargs.pop("dropout", 0.3), last_channel, weights, progress, **kwargs + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_B3_Weights.IMAGENET1K_V1)) +def efficientnet_b3( + *, weights: Optional[EfficientNet_B3_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """EfficientNet B3 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional + Neural Networks `_ paper. + + Args: + weights (:class:`~torchvision.models.EfficientNet_B3_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_B3_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_B3_Weights + :members: + """ + weights = EfficientNet_B3_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b3", width_mult=1.2, depth_mult=1.4) + return _efficientnet( + inverted_residual_setting, + kwargs.pop("dropout", 0.3), + last_channel, + weights, + progress, + **kwargs, + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_B4_Weights.IMAGENET1K_V1)) +def efficientnet_b4( + *, weights: Optional[EfficientNet_B4_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """EfficientNet B4 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional + Neural Networks `_ paper. + + Args: + weights (:class:`~torchvision.models.EfficientNet_B4_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_B4_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_B4_Weights + :members: + """ + weights = EfficientNet_B4_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b4", width_mult=1.4, depth_mult=1.8) + return _efficientnet( + inverted_residual_setting, + kwargs.pop("dropout", 0.4), + last_channel, + weights, + progress, + **kwargs, + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_B5_Weights.IMAGENET1K_V1)) +def efficientnet_b5( + *, weights: Optional[EfficientNet_B5_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """EfficientNet B5 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional + Neural Networks `_ paper. + + Args: + weights (:class:`~torchvision.models.EfficientNet_B5_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_B5_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_B5_Weights + :members: + """ + weights = EfficientNet_B5_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b5", width_mult=1.6, depth_mult=2.2) + return _efficientnet( + inverted_residual_setting, + kwargs.pop("dropout", 0.4), + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), + **kwargs, + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_B6_Weights.IMAGENET1K_V1)) +def efficientnet_b6( + *, weights: Optional[EfficientNet_B6_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """EfficientNet B6 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional + Neural Networks `_ paper. + + Args: + weights (:class:`~torchvision.models.EfficientNet_B6_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_B6_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_B6_Weights + :members: + """ + weights = EfficientNet_B6_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b6", width_mult=1.8, depth_mult=2.6) + return _efficientnet( + inverted_residual_setting, + kwargs.pop("dropout", 0.5), + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), + **kwargs, + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_B7_Weights.IMAGENET1K_V1)) +def efficientnet_b7( + *, weights: Optional[EfficientNet_B7_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """EfficientNet B7 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional + Neural Networks `_ paper. + + Args: + weights (:class:`~torchvision.models.EfficientNet_B7_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_B7_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_B7_Weights + :members: + """ + weights = EfficientNet_B7_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b7", width_mult=2.0, depth_mult=3.1) + return _efficientnet( + inverted_residual_setting, + kwargs.pop("dropout", 0.5), + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), + **kwargs, + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_S_Weights.IMAGENET1K_V1)) +def efficientnet_v2_s( + *, weights: Optional[EfficientNet_V2_S_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """ + Constructs an EfficientNetV2-S architecture from + `EfficientNetV2: Smaller Models and Faster Training `_. + + Args: + weights (:class:`~torchvision.models.EfficientNet_V2_S_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_V2_S_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_V2_S_Weights + :members: + """ + weights = EfficientNet_V2_S_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s") + return _efficientnet( + inverted_residual_setting, + kwargs.pop("dropout", 0.2), + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_M_Weights.IMAGENET1K_V1)) +def efficientnet_v2_m( + *, weights: Optional[EfficientNet_V2_M_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """ + Constructs an EfficientNetV2-M architecture from + `EfficientNetV2: Smaller Models and Faster Training `_. + + Args: + weights (:class:`~torchvision.models.EfficientNet_V2_M_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_V2_M_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_V2_M_Weights + :members: + """ + weights = EfficientNet_V2_M_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m") + return _efficientnet( + inverted_residual_setting, + kwargs.pop("dropout", 0.3), + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_L_Weights.IMAGENET1K_V1)) +def efficientnet_v2_l( + *, weights: Optional[EfficientNet_V2_L_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + """ + Constructs an EfficientNetV2-L architecture from + `EfficientNetV2: Smaller Models and Faster Training `_. + + Args: + weights (:class:`~torchvision.models.EfficientNet_V2_L_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.EfficientNet_V2_L_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.EfficientNet_V2_L_Weights + :members: + """ + weights = EfficientNet_V2_L_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l") + return _efficientnet( + inverted_residual_setting, + kwargs.pop("dropout", 0.4), + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/googlenet.py b/.venv/lib/python3.11/site-packages/torchvision/models/googlenet.py new file mode 100644 index 0000000000000000000000000000000000000000..1dc5136d726a1542a7e83cff8cf356ee207b1949 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/googlenet.py @@ -0,0 +1,345 @@ +import warnings +from collections import namedtuple +from functools import partial +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from ..transforms._presets import ImageClassification +from ..utils import _log_api_usage_once +from ._api import register_model, Weights, WeightsEnum +from ._meta import _IMAGENET_CATEGORIES +from ._utils import _ovewrite_named_param, handle_legacy_interface + + +__all__ = ["GoogLeNet", "GoogLeNetOutputs", "_GoogLeNetOutputs", "GoogLeNet_Weights", "googlenet"] + + +GoogLeNetOutputs = namedtuple("GoogLeNetOutputs", ["logits", "aux_logits2", "aux_logits1"]) +GoogLeNetOutputs.__annotations__ = {"logits": Tensor, "aux_logits2": Optional[Tensor], "aux_logits1": Optional[Tensor]} + +# Script annotations failed with _GoogleNetOutputs = namedtuple ... +# _GoogLeNetOutputs set here for backwards compat +_GoogLeNetOutputs = GoogLeNetOutputs + + +class GoogLeNet(nn.Module): + __constants__ = ["aux_logits", "transform_input"] + + def __init__( + self, + num_classes: int = 1000, + aux_logits: bool = True, + transform_input: bool = False, + init_weights: Optional[bool] = None, + blocks: Optional[List[Callable[..., nn.Module]]] = None, + dropout: float = 0.2, + dropout_aux: float = 0.7, + ) -> None: + super().__init__() + _log_api_usage_once(self) + if blocks is None: + blocks = [BasicConv2d, Inception, InceptionAux] + if init_weights is None: + warnings.warn( + "The default weight initialization of GoogleNet will be changed in future releases of " + "torchvision. If you wish to keep the old behavior (which leads to long initialization times" + " due to scipy/scipy#11299), please set init_weights=True.", + FutureWarning, + ) + init_weights = True + if len(blocks) != 3: + raise ValueError(f"blocks length should be 3 instead of {len(blocks)}") + conv_block = blocks[0] + inception_block = blocks[1] + inception_aux_block = blocks[2] + + self.aux_logits = aux_logits + self.transform_input = transform_input + + self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3) + self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True) + self.conv2 = conv_block(64, 64, kernel_size=1) + self.conv3 = conv_block(64, 192, kernel_size=3, padding=1) + self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True) + + self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32) + self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64) + self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True) + + self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64) + self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64) + self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64) + self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64) + self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128) + self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128) + self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128) + + if aux_logits: + self.aux1 = inception_aux_block(512, num_classes, dropout=dropout_aux) + self.aux2 = inception_aux_block(528, num_classes, dropout=dropout_aux) + else: + self.aux1 = None # type: ignore[assignment] + self.aux2 = None # type: ignore[assignment] + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.dropout = nn.Dropout(p=dropout) + self.fc = nn.Linear(1024, num_classes) + + if init_weights: + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + torch.nn.init.trunc_normal_(m.weight, mean=0.0, std=0.01, a=-2, b=2) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _transform_input(self, x: Tensor) -> Tensor: + if self.transform_input: + x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 + x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 + x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 + x = torch.cat((x_ch0, x_ch1, x_ch2), 1) + return x + + def _forward(self, x: Tensor) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]: + # N x 3 x 224 x 224 + x = self.conv1(x) + # N x 64 x 112 x 112 + x = self.maxpool1(x) + # N x 64 x 56 x 56 + x = self.conv2(x) + # N x 64 x 56 x 56 + x = self.conv3(x) + # N x 192 x 56 x 56 + x = self.maxpool2(x) + + # N x 192 x 28 x 28 + x = self.inception3a(x) + # N x 256 x 28 x 28 + x = self.inception3b(x) + # N x 480 x 28 x 28 + x = self.maxpool3(x) + # N x 480 x 14 x 14 + x = self.inception4a(x) + # N x 512 x 14 x 14 + aux1: Optional[Tensor] = None + if self.aux1 is not None: + if self.training: + aux1 = self.aux1(x) + + x = self.inception4b(x) + # N x 512 x 14 x 14 + x = self.inception4c(x) + # N x 512 x 14 x 14 + x = self.inception4d(x) + # N x 528 x 14 x 14 + aux2: Optional[Tensor] = None + if self.aux2 is not None: + if self.training: + aux2 = self.aux2(x) + + x = self.inception4e(x) + # N x 832 x 14 x 14 + x = self.maxpool4(x) + # N x 832 x 7 x 7 + x = self.inception5a(x) + # N x 832 x 7 x 7 + x = self.inception5b(x) + # N x 1024 x 7 x 7 + + x = self.avgpool(x) + # N x 1024 x 1 x 1 + x = torch.flatten(x, 1) + # N x 1024 + x = self.dropout(x) + x = self.fc(x) + # N x 1000 (num_classes) + return x, aux2, aux1 + + @torch.jit.unused + def eager_outputs(self, x: Tensor, aux2: Tensor, aux1: Optional[Tensor]) -> GoogLeNetOutputs: + if self.training and self.aux_logits: + return _GoogLeNetOutputs(x, aux2, aux1) + else: + return x # type: ignore[return-value] + + def forward(self, x: Tensor) -> GoogLeNetOutputs: + x = self._transform_input(x) + x, aux1, aux2 = self._forward(x) + aux_defined = self.training and self.aux_logits + if torch.jit.is_scripting(): + if not aux_defined: + warnings.warn("Scripted GoogleNet always returns GoogleNetOutputs Tuple") + return GoogLeNetOutputs(x, aux2, aux1) + else: + return self.eager_outputs(x, aux2, aux1) + + +class Inception(nn.Module): + def __init__( + self, + in_channels: int, + ch1x1: int, + ch3x3red: int, + ch3x3: int, + ch5x5red: int, + ch5x5: int, + pool_proj: int, + conv_block: Optional[Callable[..., nn.Module]] = None, + ) -> None: + super().__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1) + + self.branch2 = nn.Sequential( + conv_block(in_channels, ch3x3red, kernel_size=1), conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1) + ) + + self.branch3 = nn.Sequential( + conv_block(in_channels, ch5x5red, kernel_size=1), + # Here, kernel_size=3 instead of kernel_size=5 is a known bug. + # Please see https://github.com/pytorch/vision/issues/906 for details. + conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1), + ) + + self.branch4 = nn.Sequential( + nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True), + conv_block(in_channels, pool_proj, kernel_size=1), + ) + + def _forward(self, x: Tensor) -> List[Tensor]: + branch1 = self.branch1(x) + branch2 = self.branch2(x) + branch3 = self.branch3(x) + branch4 = self.branch4(x) + + outputs = [branch1, branch2, branch3, branch4] + return outputs + + def forward(self, x: Tensor) -> Tensor: + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionAux(nn.Module): + def __init__( + self, + in_channels: int, + num_classes: int, + conv_block: Optional[Callable[..., nn.Module]] = None, + dropout: float = 0.7, + ) -> None: + super().__init__() + if conv_block is None: + conv_block = BasicConv2d + self.conv = conv_block(in_channels, 128, kernel_size=1) + + self.fc1 = nn.Linear(2048, 1024) + self.fc2 = nn.Linear(1024, num_classes) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x: Tensor) -> Tensor: + # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 + x = F.adaptive_avg_pool2d(x, (4, 4)) + # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 + x = self.conv(x) + # N x 128 x 4 x 4 + x = torch.flatten(x, 1) + # N x 2048 + x = F.relu(self.fc1(x), inplace=True) + # N x 1024 + x = self.dropout(x) + # N x 1024 + x = self.fc2(x) + # N x 1000 (num_classes) + + return x + + +class BasicConv2d(nn.Module): + def __init__(self, in_channels: int, out_channels: int, **kwargs: Any) -> None: + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) + self.bn = nn.BatchNorm2d(out_channels, eps=0.001) + + def forward(self, x: Tensor) -> Tensor: + x = self.conv(x) + x = self.bn(x) + return F.relu(x, inplace=True) + + +class GoogLeNet_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/googlenet-1378be20.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + "num_params": 6624904, + "min_size": (15, 15), + "categories": _IMAGENET_CATEGORIES, + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet", + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.778, + "acc@5": 89.530, + } + }, + "_ops": 1.498, + "_file_size": 49.731, + "_docs": """These weights are ported from the original paper.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", GoogLeNet_Weights.IMAGENET1K_V1)) +def googlenet(*, weights: Optional[GoogLeNet_Weights] = None, progress: bool = True, **kwargs: Any) -> GoogLeNet: + """GoogLeNet (Inception v1) model architecture from + `Going Deeper with Convolutions `_. + + Args: + weights (:class:`~torchvision.models.GoogLeNet_Weights`, optional): The + pretrained weights for the model. See + :class:`~torchvision.models.GoogLeNet_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.GoogLeNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + .. autoclass:: torchvision.models.GoogLeNet_Weights + :members: + """ + weights = GoogLeNet_Weights.verify(weights) + + original_aux_logits = kwargs.get("aux_logits", False) + if weights is not None: + if "transform_input" not in kwargs: + _ovewrite_named_param(kwargs, "transform_input", True) + _ovewrite_named_param(kwargs, "aux_logits", True) + _ovewrite_named_param(kwargs, "init_weights", False) + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + + model = GoogLeNet(**kwargs) + + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True)) + if not original_aux_logits: + model.aux_logits = False + model.aux1 = None # type: ignore[assignment] + model.aux2 = None # type: ignore[assignment] + else: + warnings.warn( + "auxiliary heads in the pretrained googlenet model are NOT pretrained, so make sure to train them" + ) + + return model diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/maxvit.py b/.venv/lib/python3.11/site-packages/torchvision/models/maxvit.py new file mode 100644 index 0000000000000000000000000000000000000000..2dbdca69f0580434beb549b3ea4b531995436cef --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/maxvit.py @@ -0,0 +1,833 @@ +import math +from collections import OrderedDict +from functools import partial +from typing import Any, Callable, List, Optional, Sequence, Tuple + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn, Tensor +from torchvision.models._api import register_model, Weights, WeightsEnum +from torchvision.models._meta import _IMAGENET_CATEGORIES +from torchvision.models._utils import _ovewrite_named_param, handle_legacy_interface +from torchvision.ops.misc import Conv2dNormActivation, SqueezeExcitation +from torchvision.ops.stochastic_depth import StochasticDepth +from torchvision.transforms._presets import ImageClassification, InterpolationMode +from torchvision.utils import _log_api_usage_once + +__all__ = [ + "MaxVit", + "MaxVit_T_Weights", + "maxvit_t", +] + + +def _get_conv_output_shape(input_size: Tuple[int, int], kernel_size: int, stride: int, padding: int) -> Tuple[int, int]: + return ( + (input_size[0] - kernel_size + 2 * padding) // stride + 1, + (input_size[1] - kernel_size + 2 * padding) // stride + 1, + ) + + +def _make_block_input_shapes(input_size: Tuple[int, int], n_blocks: int) -> List[Tuple[int, int]]: + """Util function to check that the input size is correct for a MaxVit configuration.""" + shapes = [] + block_input_shape = _get_conv_output_shape(input_size, 3, 2, 1) + for _ in range(n_blocks): + block_input_shape = _get_conv_output_shape(block_input_shape, 3, 2, 1) + shapes.append(block_input_shape) + return shapes + + +def _get_relative_position_index(height: int, width: int) -> torch.Tensor: + coords = torch.stack(torch.meshgrid([torch.arange(height), torch.arange(width)])) + coords_flat = torch.flatten(coords, 1) + relative_coords = coords_flat[:, :, None] - coords_flat[:, None, :] + relative_coords = relative_coords.permute(1, 2, 0).contiguous() + relative_coords[:, :, 0] += height - 1 + relative_coords[:, :, 1] += width - 1 + relative_coords[:, :, 0] *= 2 * width - 1 + return relative_coords.sum(-1) + + +class MBConv(nn.Module): + """MBConv: Mobile Inverted Residual Bottleneck. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + expansion_ratio (float): Expansion ratio in the bottleneck. + squeeze_ratio (float): Squeeze ratio in the SE Layer. + stride (int): Stride of the depthwise convolution. + activation_layer (Callable[..., nn.Module]): Activation function. + norm_layer (Callable[..., nn.Module]): Normalization function. + p_stochastic_dropout (float): Probability of stochastic depth. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + expansion_ratio: float, + squeeze_ratio: float, + stride: int, + activation_layer: Callable[..., nn.Module], + norm_layer: Callable[..., nn.Module], + p_stochastic_dropout: float = 0.0, + ) -> None: + super().__init__() + + proj: Sequence[nn.Module] + self.proj: nn.Module + + should_proj = stride != 1 or in_channels != out_channels + if should_proj: + proj = [nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=True)] + if stride == 2: + proj = [nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)] + proj # type: ignore + self.proj = nn.Sequential(*proj) + else: + self.proj = nn.Identity() # type: ignore + + mid_channels = int(out_channels * expansion_ratio) + sqz_channels = int(out_channels * squeeze_ratio) + + if p_stochastic_dropout: + self.stochastic_depth = StochasticDepth(p_stochastic_dropout, mode="row") # type: ignore + else: + self.stochastic_depth = nn.Identity() # type: ignore + + _layers = OrderedDict() + _layers["pre_norm"] = norm_layer(in_channels) + _layers["conv_a"] = Conv2dNormActivation( + in_channels, + mid_channels, + kernel_size=1, + stride=1, + padding=0, + activation_layer=activation_layer, + norm_layer=norm_layer, + inplace=None, + ) + _layers["conv_b"] = Conv2dNormActivation( + mid_channels, + mid_channels, + kernel_size=3, + stride=stride, + padding=1, + activation_layer=activation_layer, + norm_layer=norm_layer, + groups=mid_channels, + inplace=None, + ) + _layers["squeeze_excitation"] = SqueezeExcitation(mid_channels, sqz_channels, activation=nn.SiLU) + _layers["conv_c"] = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, bias=True) + + self.layers = nn.Sequential(_layers) + + def forward(self, x: Tensor) -> Tensor: + """ + Args: + x (Tensor): Input tensor with expected layout of [B, C, H, W]. + Returns: + Tensor: Output tensor with expected layout of [B, C, H / stride, W / stride]. + """ + res = self.proj(x) + x = self.stochastic_depth(self.layers(x)) + return res + x + + +class RelativePositionalMultiHeadAttention(nn.Module): + """Relative Positional Multi-Head Attention. + + Args: + feat_dim (int): Number of input features. + head_dim (int): Number of features per head. + max_seq_len (int): Maximum sequence length. + """ + + def __init__( + self, + feat_dim: int, + head_dim: int, + max_seq_len: int, + ) -> None: + super().__init__() + + if feat_dim % head_dim != 0: + raise ValueError(f"feat_dim: {feat_dim} must be divisible by head_dim: {head_dim}") + + self.n_heads = feat_dim // head_dim + self.head_dim = head_dim + self.size = int(math.sqrt(max_seq_len)) + self.max_seq_len = max_seq_len + + self.to_qkv = nn.Linear(feat_dim, self.n_heads * self.head_dim * 3) + self.scale_factor = feat_dim**-0.5 + + self.merge = nn.Linear(self.head_dim * self.n_heads, feat_dim) + self.relative_position_bias_table = nn.parameter.Parameter( + torch.empty(((2 * self.size - 1) * (2 * self.size - 1), self.n_heads), dtype=torch.float32), + ) + + self.register_buffer("relative_position_index", _get_relative_position_index(self.size, self.size)) + # initialize with truncated normal the bias + torch.nn.init.trunc_normal_(self.relative_position_bias_table, std=0.02) + + def get_relative_positional_bias(self) -> torch.Tensor: + bias_index = self.relative_position_index.view(-1) # type: ignore + relative_bias = self.relative_position_bias_table[bias_index].view(self.max_seq_len, self.max_seq_len, -1) # type: ignore + relative_bias = relative_bias.permute(2, 0, 1).contiguous() + return relative_bias.unsqueeze(0) + + def forward(self, x: Tensor) -> Tensor: + """ + Args: + x (Tensor): Input tensor with expected layout of [B, G, P, D]. + Returns: + Tensor: Output tensor with expected layout of [B, G, P, D]. + """ + B, G, P, D = x.shape + H, DH = self.n_heads, self.head_dim + + qkv = self.to_qkv(x) + q, k, v = torch.chunk(qkv, 3, dim=-1) + + q = q.reshape(B, G, P, H, DH).permute(0, 1, 3, 2, 4) + k = k.reshape(B, G, P, H, DH).permute(0, 1, 3, 2, 4) + v = v.reshape(B, G, P, H, DH).permute(0, 1, 3, 2, 4) + + k = k * self.scale_factor + dot_prod = torch.einsum("B G H I D, B G H J D -> B G H I J", q, k) + pos_bias = self.get_relative_positional_bias() + + dot_prod = F.softmax(dot_prod + pos_bias, dim=-1) + + out = torch.einsum("B G H I J, B G H J D -> B G H I D", dot_prod, v) + out = out.permute(0, 1, 3, 2, 4).reshape(B, G, P, D) + + out = self.merge(out) + return out + + +class SwapAxes(nn.Module): + """Permute the axes of a tensor.""" + + def __init__(self, a: int, b: int) -> None: + super().__init__() + self.a = a + self.b = b + + def forward(self, x: torch.Tensor) -> torch.Tensor: + res = torch.swapaxes(x, self.a, self.b) + return res + + +class WindowPartition(nn.Module): + """ + Partition the input tensor into non-overlapping windows. + """ + + def __init__(self) -> None: + super().__init__() + + def forward(self, x: Tensor, p: int) -> Tensor: + """ + Args: + x (Tensor): Input tensor with expected layout of [B, C, H, W]. + p (int): Number of partitions. + Returns: + Tensor: Output tensor with expected layout of [B, H/P, W/P, P*P, C]. + """ + B, C, H, W = x.shape + P = p + # chunk up H and W dimensions + x = x.reshape(B, C, H // P, P, W // P, P) + x = x.permute(0, 2, 4, 3, 5, 1) + # colapse P * P dimension + x = x.reshape(B, (H // P) * (W // P), P * P, C) + return x + + +class WindowDepartition(nn.Module): + """ + Departition the input tensor of non-overlapping windows into a feature volume of layout [B, C, H, W]. + """ + + def __init__(self) -> None: + super().__init__() + + def forward(self, x: Tensor, p: int, h_partitions: int, w_partitions: int) -> Tensor: + """ + Args: + x (Tensor): Input tensor with expected layout of [B, (H/P * W/P), P*P, C]. + p (int): Number of partitions. + h_partitions (int): Number of vertical partitions. + w_partitions (int): Number of horizontal partitions. + Returns: + Tensor: Output tensor with expected layout of [B, C, H, W]. + """ + B, G, PP, C = x.shape + P = p + HP, WP = h_partitions, w_partitions + # split P * P dimension into 2 P tile dimensionsa + x = x.reshape(B, HP, WP, P, P, C) + # permute into B, C, HP, P, WP, P + x = x.permute(0, 5, 1, 3, 2, 4) + # reshape into B, C, H, W + x = x.reshape(B, C, HP * P, WP * P) + return x + + +class PartitionAttentionLayer(nn.Module): + """ + Layer for partitioning the input tensor into non-overlapping windows and applying attention to each window. + + Args: + in_channels (int): Number of input channels. + head_dim (int): Dimension of each attention head. + partition_size (int): Size of the partitions. + partition_type (str): Type of partitioning to use. Can be either "grid" or "window". + grid_size (Tuple[int, int]): Size of the grid to partition the input tensor into. + mlp_ratio (int): Ratio of the feature size expansion in the MLP layer. + activation_layer (Callable[..., nn.Module]): Activation function to use. + norm_layer (Callable[..., nn.Module]): Normalization function to use. + attention_dropout (float): Dropout probability for the attention layer. + mlp_dropout (float): Dropout probability for the MLP layer. + p_stochastic_dropout (float): Probability of dropping out a partition. + """ + + def __init__( + self, + in_channels: int, + head_dim: int, + # partitioning parameters + partition_size: int, + partition_type: str, + # grid size needs to be known at initialization time + # because we need to know hamy relative offsets there are in the grid + grid_size: Tuple[int, int], + mlp_ratio: int, + activation_layer: Callable[..., nn.Module], + norm_layer: Callable[..., nn.Module], + attention_dropout: float, + mlp_dropout: float, + p_stochastic_dropout: float, + ) -> None: + super().__init__() + + self.n_heads = in_channels // head_dim + self.head_dim = head_dim + self.n_partitions = grid_size[0] // partition_size + self.partition_type = partition_type + self.grid_size = grid_size + + if partition_type not in ["grid", "window"]: + raise ValueError("partition_type must be either 'grid' or 'window'") + + if partition_type == "window": + self.p, self.g = partition_size, self.n_partitions + else: + self.p, self.g = self.n_partitions, partition_size + + self.partition_op = WindowPartition() + self.departition_op = WindowDepartition() + self.partition_swap = SwapAxes(-2, -3) if partition_type == "grid" else nn.Identity() + self.departition_swap = SwapAxes(-2, -3) if partition_type == "grid" else nn.Identity() + + self.attn_layer = nn.Sequential( + norm_layer(in_channels), + # it's always going to be partition_size ** 2 because + # of the axis swap in the case of grid partitioning + RelativePositionalMultiHeadAttention(in_channels, head_dim, partition_size**2), + nn.Dropout(attention_dropout), + ) + + # pre-normalization similar to transformer layers + self.mlp_layer = nn.Sequential( + nn.LayerNorm(in_channels), + nn.Linear(in_channels, in_channels * mlp_ratio), + activation_layer(), + nn.Linear(in_channels * mlp_ratio, in_channels), + nn.Dropout(mlp_dropout), + ) + + # layer scale factors + self.stochastic_dropout = StochasticDepth(p_stochastic_dropout, mode="row") + + def forward(self, x: Tensor) -> Tensor: + """ + Args: + x (Tensor): Input tensor with expected layout of [B, C, H, W]. + Returns: + Tensor: Output tensor with expected layout of [B, C, H, W]. + """ + + # Undefined behavior if H or W are not divisible by p + # https://github.com/google-research/maxvit/blob/da76cf0d8a6ec668cc31b399c4126186da7da944/maxvit/models/maxvit.py#L766 + gh, gw = self.grid_size[0] // self.p, self.grid_size[1] // self.p + torch._assert( + self.grid_size[0] % self.p == 0 and self.grid_size[1] % self.p == 0, + "Grid size must be divisible by partition size. Got grid size of {} and partition size of {}".format( + self.grid_size, self.p + ), + ) + + x = self.partition_op(x, self.p) + x = self.partition_swap(x) + x = x + self.stochastic_dropout(self.attn_layer(x)) + x = x + self.stochastic_dropout(self.mlp_layer(x)) + x = self.departition_swap(x) + x = self.departition_op(x, self.p, gh, gw) + + return x + + +class MaxVitLayer(nn.Module): + """ + MaxVit layer consisting of a MBConv layer followed by a PartitionAttentionLayer with `window` and a PartitionAttentionLayer with `grid`. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + expansion_ratio (float): Expansion ratio in the bottleneck. + squeeze_ratio (float): Squeeze ratio in the SE Layer. + stride (int): Stride of the depthwise convolution. + activation_layer (Callable[..., nn.Module]): Activation function. + norm_layer (Callable[..., nn.Module]): Normalization function. + head_dim (int): Dimension of the attention heads. + mlp_ratio (int): Ratio of the MLP layer. + mlp_dropout (float): Dropout probability for the MLP layer. + attention_dropout (float): Dropout probability for the attention layer. + p_stochastic_dropout (float): Probability of stochastic depth. + partition_size (int): Size of the partitions. + grid_size (Tuple[int, int]): Size of the input feature grid. + """ + + def __init__( + self, + # conv parameters + in_channels: int, + out_channels: int, + squeeze_ratio: float, + expansion_ratio: float, + stride: int, + # conv + transformer parameters + norm_layer: Callable[..., nn.Module], + activation_layer: Callable[..., nn.Module], + # transformer parameters + head_dim: int, + mlp_ratio: int, + mlp_dropout: float, + attention_dropout: float, + p_stochastic_dropout: float, + # partitioning parameters + partition_size: int, + grid_size: Tuple[int, int], + ) -> None: + super().__init__() + + layers: OrderedDict = OrderedDict() + + # convolutional layer + layers["MBconv"] = MBConv( + in_channels=in_channels, + out_channels=out_channels, + expansion_ratio=expansion_ratio, + squeeze_ratio=squeeze_ratio, + stride=stride, + activation_layer=activation_layer, + norm_layer=norm_layer, + p_stochastic_dropout=p_stochastic_dropout, + ) + # attention layers, block -> grid + layers["window_attention"] = PartitionAttentionLayer( + in_channels=out_channels, + head_dim=head_dim, + partition_size=partition_size, + partition_type="window", + grid_size=grid_size, + mlp_ratio=mlp_ratio, + activation_layer=activation_layer, + norm_layer=nn.LayerNorm, + attention_dropout=attention_dropout, + mlp_dropout=mlp_dropout, + p_stochastic_dropout=p_stochastic_dropout, + ) + layers["grid_attention"] = PartitionAttentionLayer( + in_channels=out_channels, + head_dim=head_dim, + partition_size=partition_size, + partition_type="grid", + grid_size=grid_size, + mlp_ratio=mlp_ratio, + activation_layer=activation_layer, + norm_layer=nn.LayerNorm, + attention_dropout=attention_dropout, + mlp_dropout=mlp_dropout, + p_stochastic_dropout=p_stochastic_dropout, + ) + self.layers = nn.Sequential(layers) + + def forward(self, x: Tensor) -> Tensor: + """ + Args: + x (Tensor): Input tensor of shape (B, C, H, W). + Returns: + Tensor: Output tensor of shape (B, C, H, W). + """ + x = self.layers(x) + return x + + +class MaxVitBlock(nn.Module): + """ + A MaxVit block consisting of `n_layers` MaxVit layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + expansion_ratio (float): Expansion ratio in the bottleneck. + squeeze_ratio (float): Squeeze ratio in the SE Layer. + activation_layer (Callable[..., nn.Module]): Activation function. + norm_layer (Callable[..., nn.Module]): Normalization function. + head_dim (int): Dimension of the attention heads. + mlp_ratio (int): Ratio of the MLP layer. + mlp_dropout (float): Dropout probability for the MLP layer. + attention_dropout (float): Dropout probability for the attention layer. + p_stochastic_dropout (float): Probability of stochastic depth. + partition_size (int): Size of the partitions. + input_grid_size (Tuple[int, int]): Size of the input feature grid. + n_layers (int): Number of layers in the block. + p_stochastic (List[float]): List of probabilities for stochastic depth for each layer. + """ + + def __init__( + self, + # conv parameters + in_channels: int, + out_channels: int, + squeeze_ratio: float, + expansion_ratio: float, + # conv + transformer parameters + norm_layer: Callable[..., nn.Module], + activation_layer: Callable[..., nn.Module], + # transformer parameters + head_dim: int, + mlp_ratio: int, + mlp_dropout: float, + attention_dropout: float, + # partitioning parameters + partition_size: int, + input_grid_size: Tuple[int, int], + # number of layers + n_layers: int, + p_stochastic: List[float], + ) -> None: + super().__init__() + if not len(p_stochastic) == n_layers: + raise ValueError(f"p_stochastic must have length n_layers={n_layers}, got p_stochastic={p_stochastic}.") + + self.layers = nn.ModuleList() + # account for the first stride of the first layer + self.grid_size = _get_conv_output_shape(input_grid_size, kernel_size=3, stride=2, padding=1) + + for idx, p in enumerate(p_stochastic): + stride = 2 if idx == 0 else 1 + self.layers += [ + MaxVitLayer( + in_channels=in_channels if idx == 0 else out_channels, + out_channels=out_channels, + squeeze_ratio=squeeze_ratio, + expansion_ratio=expansion_ratio, + stride=stride, + norm_layer=norm_layer, + activation_layer=activation_layer, + head_dim=head_dim, + mlp_ratio=mlp_ratio, + mlp_dropout=mlp_dropout, + attention_dropout=attention_dropout, + partition_size=partition_size, + grid_size=self.grid_size, + p_stochastic_dropout=p, + ), + ] + + def forward(self, x: Tensor) -> Tensor: + """ + Args: + x (Tensor): Input tensor of shape (B, C, H, W). + Returns: + Tensor: Output tensor of shape (B, C, H, W). + """ + for layer in self.layers: + x = layer(x) + return x + + +class MaxVit(nn.Module): + """ + Implements MaxVit Transformer from the `MaxViT: Multi-Axis Vision Transformer `_ paper. + Args: + input_size (Tuple[int, int]): Size of the input image. + stem_channels (int): Number of channels in the stem. + partition_size (int): Size of the partitions. + block_channels (List[int]): Number of channels in each block. + block_layers (List[int]): Number of layers in each block. + stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value. + squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25. + expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4. + norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.01)`). + activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU. + head_dim (int): Dimension of the attention heads. + mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4. + mlp_dropout (float): Dropout probability for the MLP layer. Default: 0.0. + attention_dropout (float): Dropout probability for the attention layer. Default: 0.0. + num_classes (int): Number of classes. Default: 1000. + """ + + def __init__( + self, + # input size parameters + input_size: Tuple[int, int], + # stem and task parameters + stem_channels: int, + # partitioning parameters + partition_size: int, + # block parameters + block_channels: List[int], + block_layers: List[int], + # attention head dimensions + head_dim: int, + stochastic_depth_prob: float, + # conv + transformer parameters + # norm_layer is applied only to the conv layers + # activation_layer is applied both to conv and transformer layers + norm_layer: Optional[Callable[..., nn.Module]] = None, + activation_layer: Callable[..., nn.Module] = nn.GELU, + # conv parameters + squeeze_ratio: float = 0.25, + expansion_ratio: float = 4, + # transformer parameters + mlp_ratio: int = 4, + mlp_dropout: float = 0.0, + attention_dropout: float = 0.0, + # task parameters + num_classes: int = 1000, + ) -> None: + super().__init__() + _log_api_usage_once(self) + + input_channels = 3 + + # https://github.com/google-research/maxvit/blob/da76cf0d8a6ec668cc31b399c4126186da7da944/maxvit/models/maxvit.py#L1029-L1030 + # for the exact parameters used in batchnorm + if norm_layer is None: + norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.01) + + # Make sure input size will be divisible by the partition size in all blocks + # Undefined behavior if H or W are not divisible by p + # https://github.com/google-research/maxvit/blob/da76cf0d8a6ec668cc31b399c4126186da7da944/maxvit/models/maxvit.py#L766 + block_input_sizes = _make_block_input_shapes(input_size, len(block_channels)) + for idx, block_input_size in enumerate(block_input_sizes): + if block_input_size[0] % partition_size != 0 or block_input_size[1] % partition_size != 0: + raise ValueError( + f"Input size {block_input_size} of block {idx} is not divisible by partition size {partition_size}. " + f"Consider changing the partition size or the input size.\n" + f"Current configuration yields the following block input sizes: {block_input_sizes}." + ) + + # stem + self.stem = nn.Sequential( + Conv2dNormActivation( + input_channels, + stem_channels, + 3, + stride=2, + norm_layer=norm_layer, + activation_layer=activation_layer, + bias=False, + inplace=None, + ), + Conv2dNormActivation( + stem_channels, stem_channels, 3, stride=1, norm_layer=None, activation_layer=None, bias=True + ), + ) + + # account for stem stride + input_size = _get_conv_output_shape(input_size, kernel_size=3, stride=2, padding=1) + self.partition_size = partition_size + + # blocks + self.blocks = nn.ModuleList() + in_channels = [stem_channels] + block_channels[:-1] + out_channels = block_channels + + # precompute the stochastich depth probabilities from 0 to stochastic_depth_prob + # since we have N blocks with L layers, we will have N * L probabilities uniformly distributed + # over the range [0, stochastic_depth_prob] + p_stochastic = np.linspace(0, stochastic_depth_prob, sum(block_layers)).tolist() + + p_idx = 0 + for in_channel, out_channel, num_layers in zip(in_channels, out_channels, block_layers): + self.blocks.append( + MaxVitBlock( + in_channels=in_channel, + out_channels=out_channel, + squeeze_ratio=squeeze_ratio, + expansion_ratio=expansion_ratio, + norm_layer=norm_layer, + activation_layer=activation_layer, + head_dim=head_dim, + mlp_ratio=mlp_ratio, + mlp_dropout=mlp_dropout, + attention_dropout=attention_dropout, + partition_size=partition_size, + input_grid_size=input_size, + n_layers=num_layers, + p_stochastic=p_stochastic[p_idx : p_idx + num_layers], + ), + ) + input_size = self.blocks[-1].grid_size + p_idx += num_layers + + # see https://github.com/google-research/maxvit/blob/da76cf0d8a6ec668cc31b399c4126186da7da944/maxvit/models/maxvit.py#L1137-L1158 + # for why there is Linear -> Tanh -> Linear + self.classifier = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Flatten(), + nn.LayerNorm(block_channels[-1]), + nn.Linear(block_channels[-1], block_channels[-1]), + nn.Tanh(), + nn.Linear(block_channels[-1], num_classes, bias=False), + ) + + self._init_weights() + + def forward(self, x: Tensor) -> Tensor: + x = self.stem(x) + for block in self.blocks: + x = block(x) + x = self.classifier(x) + return x + + def _init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.zeros_(m.bias) + + +def _maxvit( + # stem parameters + stem_channels: int, + # block parameters + block_channels: List[int], + block_layers: List[int], + stochastic_depth_prob: float, + # partitioning parameters + partition_size: int, + # transformer parameters + head_dim: int, + # Weights API + weights: Optional[WeightsEnum] = None, + progress: bool = False, + # kwargs, + **kwargs: Any, +) -> MaxVit: + + if weights is not None: + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + assert weights.meta["min_size"][0] == weights.meta["min_size"][1] + _ovewrite_named_param(kwargs, "input_size", weights.meta["min_size"]) + + input_size = kwargs.pop("input_size", (224, 224)) + + model = MaxVit( + stem_channels=stem_channels, + block_channels=block_channels, + block_layers=block_layers, + stochastic_depth_prob=stochastic_depth_prob, + head_dim=head_dim, + partition_size=partition_size, + input_size=input_size, + **kwargs, + ) + + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True)) + + return model + + +class MaxVit_T_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + # URL empty until official release + url="https://download.pytorch.org/models/maxvit_t-bc5ab103.pth", + transforms=partial( + ImageClassification, crop_size=224, resize_size=224, interpolation=InterpolationMode.BICUBIC + ), + meta={ + "categories": _IMAGENET_CATEGORIES, + "num_params": 30919624, + "min_size": (224, 224), + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#maxvit", + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.700, + "acc@5": 96.722, + } + }, + "_ops": 5.558, + "_file_size": 118.769, + "_docs": """These weights reproduce closely the results of the paper using a similar training recipe. + They were trained with a BatchNorm2D momentum of 0.99 instead of the more correct 0.01.""", + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", MaxVit_T_Weights.IMAGENET1K_V1)) +def maxvit_t(*, weights: Optional[MaxVit_T_Weights] = None, progress: bool = True, **kwargs: Any) -> MaxVit: + """ + Constructs a maxvit_t architecture from + `MaxViT: Multi-Axis Vision Transformer `_. + + Args: + weights (:class:`~torchvision.models.MaxVit_T_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.MaxVit_T_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.maxvit.MaxVit`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.MaxVit_T_Weights + :members: + """ + weights = MaxVit_T_Weights.verify(weights) + + return _maxvit( + stem_channels=64, + block_channels=[64, 128, 256, 512], + block_layers=[2, 2, 5, 2], + head_dim=32, + stochastic_depth_prob=0.2, + partition_size=7, + weights=weights, + progress=progress, + **kwargs, + ) diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/mobilenetv3.py b/.venv/lib/python3.11/site-packages/torchvision/models/mobilenetv3.py new file mode 100644 index 0000000000000000000000000000000000000000..1041d4d149fb1c414991b79156733434a2268e4b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/mobilenetv3.py @@ -0,0 +1,423 @@ +from functools import partial +from typing import Any, Callable, List, Optional, Sequence + +import torch +from torch import nn, Tensor + +from ..ops.misc import Conv2dNormActivation, SqueezeExcitation as SElayer +from ..transforms._presets import ImageClassification +from ..utils import _log_api_usage_once +from ._api import register_model, Weights, WeightsEnum +from ._meta import _IMAGENET_CATEGORIES +from ._utils import _make_divisible, _ovewrite_named_param, handle_legacy_interface + + +__all__ = [ + "MobileNetV3", + "MobileNet_V3_Large_Weights", + "MobileNet_V3_Small_Weights", + "mobilenet_v3_large", + "mobilenet_v3_small", +] + + +class InvertedResidualConfig: + # Stores information listed at Tables 1 and 2 of the MobileNetV3 paper + def __init__( + self, + input_channels: int, + kernel: int, + expanded_channels: int, + out_channels: int, + use_se: bool, + activation: str, + stride: int, + dilation: int, + width_mult: float, + ): + self.input_channels = self.adjust_channels(input_channels, width_mult) + self.kernel = kernel + self.expanded_channels = self.adjust_channels(expanded_channels, width_mult) + self.out_channels = self.adjust_channels(out_channels, width_mult) + self.use_se = use_se + self.use_hs = activation == "HS" + self.stride = stride + self.dilation = dilation + + @staticmethod + def adjust_channels(channels: int, width_mult: float): + return _make_divisible(channels * width_mult, 8) + + +class InvertedResidual(nn.Module): + # Implemented as described at section 5 of MobileNetV3 paper + def __init__( + self, + cnf: InvertedResidualConfig, + norm_layer: Callable[..., nn.Module], + se_layer: Callable[..., nn.Module] = partial(SElayer, scale_activation=nn.Hardsigmoid), + ): + super().__init__() + if not (1 <= cnf.stride <= 2): + raise ValueError("illegal stride value") + + self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels + + layers: List[nn.Module] = [] + activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU + + # expand + if cnf.expanded_channels != cnf.input_channels: + layers.append( + Conv2dNormActivation( + cnf.input_channels, + cnf.expanded_channels, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + + # depthwise + stride = 1 if cnf.dilation > 1 else cnf.stride + layers.append( + Conv2dNormActivation( + cnf.expanded_channels, + cnf.expanded_channels, + kernel_size=cnf.kernel, + stride=stride, + dilation=cnf.dilation, + groups=cnf.expanded_channels, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + if cnf.use_se: + squeeze_channels = _make_divisible(cnf.expanded_channels // 4, 8) + layers.append(se_layer(cnf.expanded_channels, squeeze_channels)) + + # project + layers.append( + Conv2dNormActivation( + cnf.expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None + ) + ) + + self.block = nn.Sequential(*layers) + self.out_channels = cnf.out_channels + self._is_cn = cnf.stride > 1 + + def forward(self, input: Tensor) -> Tensor: + result = self.block(input) + if self.use_res_connect: + result += input + return result + + +class MobileNetV3(nn.Module): + def __init__( + self, + inverted_residual_setting: List[InvertedResidualConfig], + last_channel: int, + num_classes: int = 1000, + block: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + dropout: float = 0.2, + **kwargs: Any, + ) -> None: + """ + MobileNet V3 main class + + Args: + inverted_residual_setting (List[InvertedResidualConfig]): Network structure + last_channel (int): The number of channels on the penultimate layer + num_classes (int): Number of classes + block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet + norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use + dropout (float): The droupout probability + """ + super().__init__() + _log_api_usage_once(self) + + if not inverted_residual_setting: + raise ValueError("The inverted_residual_setting should not be empty") + elif not ( + isinstance(inverted_residual_setting, Sequence) + and all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting]) + ): + raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]") + + if block is None: + block = InvertedResidual + + if norm_layer is None: + norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01) + + layers: List[nn.Module] = [] + + # building first layer + firstconv_output_channels = inverted_residual_setting[0].input_channels + layers.append( + Conv2dNormActivation( + 3, + firstconv_output_channels, + kernel_size=3, + stride=2, + norm_layer=norm_layer, + activation_layer=nn.Hardswish, + ) + ) + + # building inverted residual blocks + for cnf in inverted_residual_setting: + layers.append(block(cnf, norm_layer)) + + # building last several layers + lastconv_input_channels = inverted_residual_setting[-1].out_channels + lastconv_output_channels = 6 * lastconv_input_channels + layers.append( + Conv2dNormActivation( + lastconv_input_channels, + lastconv_output_channels, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=nn.Hardswish, + ) + ) + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Sequential( + nn.Linear(lastconv_output_channels, last_channel), + nn.Hardswish(inplace=True), + nn.Dropout(p=dropout, inplace=True), + nn.Linear(last_channel, num_classes), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + x = self.features(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + + x = self.classifier(x) + + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _mobilenet_v3_conf( + arch: str, width_mult: float = 1.0, reduced_tail: bool = False, dilated: bool = False, **kwargs: Any +): + reduce_divider = 2 if reduced_tail else 1 + dilation = 2 if dilated else 1 + + bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult) + adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_mult=width_mult) + + if arch == "mobilenet_v3_large": + inverted_residual_setting = [ + bneck_conf(16, 3, 16, 16, False, "RE", 1, 1), + bneck_conf(16, 3, 64, 24, False, "RE", 2, 1), # C1 + bneck_conf(24, 3, 72, 24, False, "RE", 1, 1), + bneck_conf(24, 5, 72, 40, True, "RE", 2, 1), # C2 + bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), + bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), + bneck_conf(40, 3, 240, 80, False, "HS", 2, 1), # C3 + bneck_conf(80, 3, 200, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 480, 112, True, "HS", 1, 1), + bneck_conf(112, 3, 672, 112, True, "HS", 1, 1), + bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation), # C4 + bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), + bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), + ] + last_channel = adjust_channels(1280 // reduce_divider) # C5 + elif arch == "mobilenet_v3_small": + inverted_residual_setting = [ + bneck_conf(16, 3, 16, 16, True, "RE", 2, 1), # C1 + bneck_conf(16, 3, 72, 24, False, "RE", 2, 1), # C2 + bneck_conf(24, 3, 88, 24, False, "RE", 1, 1), + bneck_conf(24, 5, 96, 40, True, "HS", 2, 1), # C3 + bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), + bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), + bneck_conf(40, 5, 120, 48, True, "HS", 1, 1), + bneck_conf(48, 5, 144, 48, True, "HS", 1, 1), + bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation), # C4 + bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), + bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), + ] + last_channel = adjust_channels(1024 // reduce_divider) # C5 + else: + raise ValueError(f"Unsupported model type {arch}") + + return inverted_residual_setting, last_channel + + +def _mobilenet_v3( + inverted_residual_setting: List[InvertedResidualConfig], + last_channel: int, + weights: Optional[WeightsEnum], + progress: bool, + **kwargs: Any, +) -> MobileNetV3: + if weights is not None: + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + + model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs) + + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True)) + + return model + + +_COMMON_META = { + "min_size": (1, 1), + "categories": _IMAGENET_CATEGORIES, +} + + +class MobileNet_V3_Large_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 5483032, + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", + "_metrics": { + "ImageNet-1K": { + "acc@1": 74.042, + "acc@5": 91.340, + } + }, + "_ops": 0.217, + "_file_size": 21.114, + "_docs": """These weights were trained from scratch by using a simple training recipe.""", + }, + ) + IMAGENET1K_V2 = Weights( + url="https://download.pytorch.org/models/mobilenet_v3_large-5c1a4163.pth", + transforms=partial(ImageClassification, crop_size=224, resize_size=232), + meta={ + **_COMMON_META, + "num_params": 5483032, + "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.274, + "acc@5": 92.566, + } + }, + "_ops": 0.217, + "_file_size": 21.107, + "_docs": """ + These weights improve marginally upon the results of the original paper by using a modified version of + TorchVision's `new training recipe + `_. + """, + }, + ) + DEFAULT = IMAGENET1K_V2 + + +class MobileNet_V3_Small_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 2542856, + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", + "_metrics": { + "ImageNet-1K": { + "acc@1": 67.668, + "acc@5": 87.402, + } + }, + "_ops": 0.057, + "_file_size": 9.829, + "_docs": """ + These weights improve upon the results of the original paper by using a simple training recipe. + """, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", MobileNet_V3_Large_Weights.IMAGENET1K_V1)) +def mobilenet_v3_large( + *, weights: Optional[MobileNet_V3_Large_Weights] = None, progress: bool = True, **kwargs: Any +) -> MobileNetV3: + """ + Constructs a large MobileNetV3 architecture from + `Searching for MobileNetV3 `__. + + Args: + weights (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.MobileNet_V3_Large_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.MobileNet_V3_Large_Weights + :members: + """ + weights = MobileNet_V3_Large_Weights.verify(weights) + + inverted_residual_setting, last_channel = _mobilenet_v3_conf("mobilenet_v3_large", **kwargs) + return _mobilenet_v3(inverted_residual_setting, last_channel, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", MobileNet_V3_Small_Weights.IMAGENET1K_V1)) +def mobilenet_v3_small( + *, weights: Optional[MobileNet_V3_Small_Weights] = None, progress: bool = True, **kwargs: Any +) -> MobileNetV3: + """ + Constructs a small MobileNetV3 architecture from + `Searching for MobileNetV3 `__. + + Args: + weights (:class:`~torchvision.models.MobileNet_V3_Small_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.MobileNet_V3_Small_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.MobileNet_V3_Small_Weights + :members: + """ + weights = MobileNet_V3_Small_Weights.verify(weights) + + inverted_residual_setting, last_channel = _mobilenet_v3_conf("mobilenet_v3_small", **kwargs) + return _mobilenet_v3(inverted_residual_setting, last_channel, weights, progress, **kwargs) diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/squeezenet.py b/.venv/lib/python3.11/site-packages/torchvision/models/squeezenet.py new file mode 100644 index 0000000000000000000000000000000000000000..982b32107b09c280b4c7caa61e6b80be0cbf041e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/squeezenet.py @@ -0,0 +1,223 @@ +from functools import partial +from typing import Any, Optional + +import torch +import torch.nn as nn +import torch.nn.init as init + +from ..transforms._presets import ImageClassification +from ..utils import _log_api_usage_once +from ._api import register_model, Weights, WeightsEnum +from ._meta import _IMAGENET_CATEGORIES +from ._utils import _ovewrite_named_param, handle_legacy_interface + + +__all__ = ["SqueezeNet", "SqueezeNet1_0_Weights", "SqueezeNet1_1_Weights", "squeezenet1_0", "squeezenet1_1"] + + +class Fire(nn.Module): + def __init__(self, inplanes: int, squeeze_planes: int, expand1x1_planes: int, expand3x3_planes: int) -> None: + super().__init__() + self.inplanes = inplanes + self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1) + self.squeeze_activation = nn.ReLU(inplace=True) + self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes, kernel_size=1) + self.expand1x1_activation = nn.ReLU(inplace=True) + self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1) + self.expand3x3_activation = nn.ReLU(inplace=True) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.squeeze_activation(self.squeeze(x)) + return torch.cat( + [self.expand1x1_activation(self.expand1x1(x)), self.expand3x3_activation(self.expand3x3(x))], 1 + ) + + +class SqueezeNet(nn.Module): + def __init__(self, version: str = "1_0", num_classes: int = 1000, dropout: float = 0.5) -> None: + super().__init__() + _log_api_usage_once(self) + self.num_classes = num_classes + if version == "1_0": + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=7, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(96, 16, 64, 64), + Fire(128, 16, 64, 64), + Fire(128, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 32, 128, 128), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(512, 64, 256, 256), + ) + elif version == "1_1": + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(64, 16, 64, 64), + Fire(128, 16, 64, 64), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(128, 32, 128, 128), + Fire(256, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + Fire(512, 64, 256, 256), + ) + else: + # FIXME: Is this needed? SqueezeNet should only be called from the + # FIXME: squeezenet1_x() functions + # FIXME: This checking is not done for the other models + raise ValueError(f"Unsupported SqueezeNet version {version}: 1_0 or 1_1 expected") + + # Final convolution is initialized differently from the rest + final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) + self.classifier = nn.Sequential( + nn.Dropout(p=dropout), final_conv, nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + if m is final_conv: + init.normal_(m.weight, mean=0.0, std=0.01) + else: + init.kaiming_uniform_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.features(x) + x = self.classifier(x) + return torch.flatten(x, 1) + + +def _squeezenet( + version: str, + weights: Optional[WeightsEnum], + progress: bool, + **kwargs: Any, +) -> SqueezeNet: + if weights is not None: + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + + model = SqueezeNet(version, **kwargs) + + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True)) + + return model + + +_COMMON_META = { + "categories": _IMAGENET_CATEGORIES, + "recipe": "https://github.com/pytorch/vision/pull/49#issuecomment-277560717", + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", +} + + +class SqueezeNet1_0_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "min_size": (21, 21), + "num_params": 1248424, + "_metrics": { + "ImageNet-1K": { + "acc@1": 58.092, + "acc@5": 80.420, + } + }, + "_ops": 0.819, + "_file_size": 4.778, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class SqueezeNet1_1_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "min_size": (17, 17), + "num_params": 1235496, + "_metrics": { + "ImageNet-1K": { + "acc@1": 58.178, + "acc@5": 80.624, + } + }, + "_ops": 0.349, + "_file_size": 4.729, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", SqueezeNet1_0_Weights.IMAGENET1K_V1)) +def squeezenet1_0( + *, weights: Optional[SqueezeNet1_0_Weights] = None, progress: bool = True, **kwargs: Any +) -> SqueezeNet: + """SqueezeNet model architecture from the `SqueezeNet: AlexNet-level + accuracy with 50x fewer parameters and <0.5MB model size + `_ paper. + + Args: + weights (:class:`~torchvision.models.SqueezeNet1_0_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.SqueezeNet1_0_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.squeezenet.SqueezeNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.SqueezeNet1_0_Weights + :members: + """ + weights = SqueezeNet1_0_Weights.verify(weights) + return _squeezenet("1_0", weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", SqueezeNet1_1_Weights.IMAGENET1K_V1)) +def squeezenet1_1( + *, weights: Optional[SqueezeNet1_1_Weights] = None, progress: bool = True, **kwargs: Any +) -> SqueezeNet: + """SqueezeNet 1.1 model from the `official SqueezeNet repo + `_. + + SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters + than SqueezeNet 1.0, without sacrificing accuracy. + + Args: + weights (:class:`~torchvision.models.SqueezeNet1_1_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.SqueezeNet1_1_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.squeezenet.SqueezeNet`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.SqueezeNet1_1_Weights + :members: + """ + weights = SqueezeNet1_1_Weights.verify(weights) + return _squeezenet("1_1", weights, progress, **kwargs) diff --git a/.venv/lib/python3.11/site-packages/torchvision/models/vgg.py b/.venv/lib/python3.11/site-packages/torchvision/models/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..0a548570dd41fa87309e8aa4bdb373517f265060 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/models/vgg.py @@ -0,0 +1,511 @@ +from functools import partial +from typing import Any, cast, Dict, List, Optional, Union + +import torch +import torch.nn as nn + +from ..transforms._presets import ImageClassification +from ..utils import _log_api_usage_once +from ._api import register_model, Weights, WeightsEnum +from ._meta import _IMAGENET_CATEGORIES +from ._utils import _ovewrite_named_param, handle_legacy_interface + + +__all__ = [ + "VGG", + "VGG11_Weights", + "VGG11_BN_Weights", + "VGG13_Weights", + "VGG13_BN_Weights", + "VGG16_Weights", + "VGG16_BN_Weights", + "VGG19_Weights", + "VGG19_BN_Weights", + "vgg11", + "vgg11_bn", + "vgg13", + "vgg13_bn", + "vgg16", + "vgg16_bn", + "vgg19", + "vgg19_bn", +] + + +class VGG(nn.Module): + def __init__( + self, features: nn.Module, num_classes: int = 1000, init_weights: bool = True, dropout: float = 0.5 + ) -> None: + super().__init__() + _log_api_usage_once(self) + self.features = features + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(p=dropout), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(p=dropout), + nn.Linear(4096, num_classes), + ) + if init_weights: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + +def make_layers(cfg: List[Union[str, int]], batch_norm: bool = False) -> nn.Sequential: + layers: List[nn.Module] = [] + in_channels = 3 + for v in cfg: + if v == "M": + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + v = cast(int, v) + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + +cfgs: Dict[str, List[Union[str, int]]] = { + "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], + "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], + "D": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"], + "E": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"], +} + + +def _vgg(cfg: str, batch_norm: bool, weights: Optional[WeightsEnum], progress: bool, **kwargs: Any) -> VGG: + if weights is not None: + kwargs["init_weights"] = False + if weights.meta["categories"] is not None: + _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) + model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True)) + return model + + +_COMMON_META = { + "min_size": (32, 32), + "categories": _IMAGENET_CATEGORIES, + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", + "_docs": """These weights were trained from scratch by using a simplified training recipe.""", +} + + +class VGG11_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/vgg11-8a719046.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 132863336, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.020, + "acc@5": 88.628, + } + }, + "_ops": 7.609, + "_file_size": 506.84, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class VGG11_BN_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/vgg11_bn-6002323d.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 132868840, + "_metrics": { + "ImageNet-1K": { + "acc@1": 70.370, + "acc@5": 89.810, + } + }, + "_ops": 7.609, + "_file_size": 506.881, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class VGG13_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/vgg13-19584684.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 133047848, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.928, + "acc@5": 89.246, + } + }, + "_ops": 11.308, + "_file_size": 507.545, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class VGG13_BN_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/vgg13_bn-abd245e5.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 133053736, + "_metrics": { + "ImageNet-1K": { + "acc@1": 71.586, + "acc@5": 90.374, + } + }, + "_ops": 11.308, + "_file_size": 507.59, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class VGG16_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/vgg16-397923af.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 138357544, + "_metrics": { + "ImageNet-1K": { + "acc@1": 71.592, + "acc@5": 90.382, + } + }, + "_ops": 15.47, + "_file_size": 527.796, + }, + ) + IMAGENET1K_FEATURES = Weights( + # Weights ported from https://github.com/amdegroot/ssd.pytorch/ + url="https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth", + transforms=partial( + ImageClassification, + crop_size=224, + mean=(0.48235, 0.45882, 0.40784), + std=(1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0), + ), + meta={ + **_COMMON_META, + "num_params": 138357544, + "categories": None, + "recipe": "https://github.com/amdegroot/ssd.pytorch#training-ssd", + "_metrics": { + "ImageNet-1K": { + "acc@1": float("nan"), + "acc@5": float("nan"), + } + }, + "_ops": 15.47, + "_file_size": 527.802, + "_docs": """ + These weights can't be used for classification because they are missing values in the `classifier` + module. Only the `features` module has valid values and can be used for feature extraction. The weights + were trained using the original input standardization method as described in the paper. + """, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class VGG16_BN_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/vgg16_bn-6c64b313.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 138365992, + "_metrics": { + "ImageNet-1K": { + "acc@1": 73.360, + "acc@5": 91.516, + } + }, + "_ops": 15.47, + "_file_size": 527.866, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class VGG19_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/vgg19-dcbb9e9d.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 143667240, + "_metrics": { + "ImageNet-1K": { + "acc@1": 72.376, + "acc@5": 90.876, + } + }, + "_ops": 19.632, + "_file_size": 548.051, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +class VGG19_BN_Weights(WeightsEnum): + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/vgg19_bn-c79401a0.pth", + transforms=partial(ImageClassification, crop_size=224), + meta={ + **_COMMON_META, + "num_params": 143678248, + "_metrics": { + "ImageNet-1K": { + "acc@1": 74.218, + "acc@5": 91.842, + } + }, + "_ops": 19.632, + "_file_size": 548.143, + }, + ) + DEFAULT = IMAGENET1K_V1 + + +@register_model() +@handle_legacy_interface(weights=("pretrained", VGG11_Weights.IMAGENET1K_V1)) +def vgg11(*, weights: Optional[VGG11_Weights] = None, progress: bool = True, **kwargs: Any) -> VGG: + """VGG-11 from `Very Deep Convolutional Networks for Large-Scale Image Recognition `__. + + Args: + weights (:class:`~torchvision.models.VGG11_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.VGG11_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.vgg.VGG`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.VGG11_Weights + :members: + """ + weights = VGG11_Weights.verify(weights) + + return _vgg("A", False, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", VGG11_BN_Weights.IMAGENET1K_V1)) +def vgg11_bn(*, weights: Optional[VGG11_BN_Weights] = None, progress: bool = True, **kwargs: Any) -> VGG: + """VGG-11-BN from `Very Deep Convolutional Networks for Large-Scale Image Recognition `__. + + Args: + weights (:class:`~torchvision.models.VGG11_BN_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.VGG11_BN_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.vgg.VGG`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.VGG11_BN_Weights + :members: + """ + weights = VGG11_BN_Weights.verify(weights) + + return _vgg("A", True, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", VGG13_Weights.IMAGENET1K_V1)) +def vgg13(*, weights: Optional[VGG13_Weights] = None, progress: bool = True, **kwargs: Any) -> VGG: + """VGG-13 from `Very Deep Convolutional Networks for Large-Scale Image Recognition `__. + + Args: + weights (:class:`~torchvision.models.VGG13_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.VGG13_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.vgg.VGG`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.VGG13_Weights + :members: + """ + weights = VGG13_Weights.verify(weights) + + return _vgg("B", False, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", VGG13_BN_Weights.IMAGENET1K_V1)) +def vgg13_bn(*, weights: Optional[VGG13_BN_Weights] = None, progress: bool = True, **kwargs: Any) -> VGG: + """VGG-13-BN from `Very Deep Convolutional Networks for Large-Scale Image Recognition `__. + + Args: + weights (:class:`~torchvision.models.VGG13_BN_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.VGG13_BN_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.vgg.VGG`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.VGG13_BN_Weights + :members: + """ + weights = VGG13_BN_Weights.verify(weights) + + return _vgg("B", True, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", VGG16_Weights.IMAGENET1K_V1)) +def vgg16(*, weights: Optional[VGG16_Weights] = None, progress: bool = True, **kwargs: Any) -> VGG: + """VGG-16 from `Very Deep Convolutional Networks for Large-Scale Image Recognition `__. + + Args: + weights (:class:`~torchvision.models.VGG16_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.VGG16_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.vgg.VGG`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.VGG16_Weights + :members: + """ + weights = VGG16_Weights.verify(weights) + + return _vgg("D", False, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", VGG16_BN_Weights.IMAGENET1K_V1)) +def vgg16_bn(*, weights: Optional[VGG16_BN_Weights] = None, progress: bool = True, **kwargs: Any) -> VGG: + """VGG-16-BN from `Very Deep Convolutional Networks for Large-Scale Image Recognition `__. + + Args: + weights (:class:`~torchvision.models.VGG16_BN_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.VGG16_BN_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.vgg.VGG`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.VGG16_BN_Weights + :members: + """ + weights = VGG16_BN_Weights.verify(weights) + + return _vgg("D", True, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", VGG19_Weights.IMAGENET1K_V1)) +def vgg19(*, weights: Optional[VGG19_Weights] = None, progress: bool = True, **kwargs: Any) -> VGG: + """VGG-19 from `Very Deep Convolutional Networks for Large-Scale Image Recognition `__. + + Args: + weights (:class:`~torchvision.models.VGG19_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.VGG19_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.vgg.VGG`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.VGG19_Weights + :members: + """ + weights = VGG19_Weights.verify(weights) + + return _vgg("E", False, weights, progress, **kwargs) + + +@register_model() +@handle_legacy_interface(weights=("pretrained", VGG19_BN_Weights.IMAGENET1K_V1)) +def vgg19_bn(*, weights: Optional[VGG19_BN_Weights] = None, progress: bool = True, **kwargs: Any) -> VGG: + """VGG-19_BN from `Very Deep Convolutional Networks for Large-Scale Image Recognition `__. + + Args: + weights (:class:`~torchvision.models.VGG19_BN_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.VGG19_BN_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.vgg.VGG`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.VGG19_BN_Weights + :members: + """ + weights = VGG19_BN_Weights.verify(weights) + + return _vgg("E", True, weights, progress, **kwargs) diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__init__.py b/.venv/lib/python3.11/site-packages/torchvision/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..827505b842d4f1ad0e16dfe54ef28658364cc9ac --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/__init__.py @@ -0,0 +1,73 @@ +from ._register_onnx_ops import _register_custom_op +from .boxes import ( + batched_nms, + box_area, + box_convert, + box_iou, + clip_boxes_to_image, + complete_box_iou, + distance_box_iou, + generalized_box_iou, + masks_to_boxes, + nms, + remove_small_boxes, +) +from .ciou_loss import complete_box_iou_loss +from .deform_conv import deform_conv2d, DeformConv2d +from .diou_loss import distance_box_iou_loss +from .drop_block import drop_block2d, drop_block3d, DropBlock2d, DropBlock3d +from .feature_pyramid_network import FeaturePyramidNetwork +from .focal_loss import sigmoid_focal_loss +from .giou_loss import generalized_box_iou_loss +from .misc import Conv2dNormActivation, Conv3dNormActivation, FrozenBatchNorm2d, MLP, Permute, SqueezeExcitation +from .poolers import MultiScaleRoIAlign +from .ps_roi_align import ps_roi_align, PSRoIAlign +from .ps_roi_pool import ps_roi_pool, PSRoIPool +from .roi_align import roi_align, RoIAlign +from .roi_pool import roi_pool, RoIPool +from .stochastic_depth import stochastic_depth, StochasticDepth + +_register_custom_op() + + +__all__ = [ + "masks_to_boxes", + "deform_conv2d", + "DeformConv2d", + "nms", + "batched_nms", + "remove_small_boxes", + "clip_boxes_to_image", + "box_convert", + "box_area", + "box_iou", + "generalized_box_iou", + "distance_box_iou", + "complete_box_iou", + "roi_align", + "RoIAlign", + "roi_pool", + "RoIPool", + "ps_roi_align", + "PSRoIAlign", + "ps_roi_pool", + "PSRoIPool", + "MultiScaleRoIAlign", + "FeaturePyramidNetwork", + "sigmoid_focal_loss", + "stochastic_depth", + "StochasticDepth", + "FrozenBatchNorm2d", + "Conv2dNormActivation", + "Conv3dNormActivation", + "SqueezeExcitation", + "MLP", + "Permute", + "generalized_box_iou_loss", + "distance_box_iou_loss", + "complete_box_iou_loss", + "drop_block2d", + "DropBlock2d", + "drop_block3d", + "DropBlock3d", +] diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13197c14ec8bb1dfaea1bf118a0eea6882f7dde7 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_box_convert.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_box_convert.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..28cfdf093c9509ef3deddc477a067a7611ab6fae Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_box_convert.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_register_onnx_ops.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_register_onnx_ops.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d794d0b587ffd80d857607e2ea07996df4914737 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_register_onnx_ops.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c3175fdaab0fe2cf1113e37475615d046136fd5 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/_utils.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/boxes.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/boxes.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af16ab90f2ade05e15dafde3107794b6e8b901e1 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/boxes.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ciou_loss.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ciou_loss.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3c7497a7c8ec42b28f29354ee35079591bfb405 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ciou_loss.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/deform_conv.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/deform_conv.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6dd83a790cf46ae4a5e351b7975ce191b8c6766f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/deform_conv.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/diou_loss.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/diou_loss.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01b4a0feaec323622ef88687d25ad6068bc9d3a2 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/diou_loss.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/drop_block.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/drop_block.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4cefd63729c169535f3557466d1bdef05a658739 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/drop_block.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/feature_pyramid_network.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/feature_pyramid_network.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..301fbb1cb141b8a01e0f0ec5386aee225e65078a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/feature_pyramid_network.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/focal_loss.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/focal_loss.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38225107a7ce92edd320ec516ea941d86292278a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/focal_loss.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/giou_loss.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/giou_loss.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d8664ee774ad60fbdcaf67a41386373f9d1dfba Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/giou_loss.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/misc.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/misc.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0efcbfdcf7d0fe15fce92fbf0c166d5a89cf18b5 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/misc.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/poolers.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/poolers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc1ea572a8d985f06558fb1fbe442d677365d378 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/poolers.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ps_roi_align.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ps_roi_align.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7327c2ac53416d33a7a482f93e9589fd8979e732 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ps_roi_align.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ps_roi_pool.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ps_roi_pool.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b0405a60be9dff5e22d1a09f69688380754d626e Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/ps_roi_pool.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/roi_align.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/roi_align.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85bb49055e10218c291a42c0cee76b873722338d Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/roi_align.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/roi_pool.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/roi_pool.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f366029320a805362eaa87406df80f8119d3c996 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/roi_pool.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/stochastic_depth.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/stochastic_depth.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ca53a12b96c996375e70f8a903beae2d3bef2ff Binary files /dev/null and b/.venv/lib/python3.11/site-packages/torchvision/ops/__pycache__/stochastic_depth.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/_box_convert.py b/.venv/lib/python3.11/site-packages/torchvision/ops/_box_convert.py new file mode 100644 index 0000000000000000000000000000000000000000..124bdd0bcc6527d04e3c9c1f710b803f7dfbbb71 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/_box_convert.py @@ -0,0 +1,81 @@ +import torch +from torch import Tensor + + +def _box_cxcywh_to_xyxy(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (cx, cy, w, h) format to (x1, y1, x2, y2) format. + (cx, cy) refers to center of bounding box + (w, h) are width and height of bounding box + Args: + boxes (Tensor[N, 4]): boxes in (cx, cy, w, h) format which will be converted. + + Returns: + boxes (Tensor(N, 4)): boxes in (x1, y1, x2, y2) format. + """ + # We need to change all 4 of them so some temporary variable is needed. + cx, cy, w, h = boxes.unbind(-1) + x1 = cx - 0.5 * w + y1 = cy - 0.5 * h + x2 = cx + 0.5 * w + y2 = cy + 0.5 * h + + boxes = torch.stack((x1, y1, x2, y2), dim=-1) + + return boxes + + +def _box_xyxy_to_cxcywh(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x1, y1, x2, y2) format to (cx, cy, w, h) format. + (x1, y1) refer to top left of bounding box + (x2, y2) refer to bottom right of bounding box + Args: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format which will be converted. + + Returns: + boxes (Tensor(N, 4)): boxes in (cx, cy, w, h) format. + """ + x1, y1, x2, y2 = boxes.unbind(-1) + cx = (x1 + x2) / 2 + cy = (y1 + y2) / 2 + w = x2 - x1 + h = y2 - y1 + + boxes = torch.stack((cx, cy, w, h), dim=-1) + + return boxes + + +def _box_xywh_to_xyxy(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x, y, w, h) format to (x1, y1, x2, y2) format. + (x, y) refers to top left of bounding box. + (w, h) refers to width and height of box. + Args: + boxes (Tensor[N, 4]): boxes in (x, y, w, h) which will be converted. + + Returns: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format. + """ + x, y, w, h = boxes.unbind(-1) + boxes = torch.stack([x, y, x + w, y + h], dim=-1) + return boxes + + +def _box_xyxy_to_xywh(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x1, y1, x2, y2) format to (x, y, w, h) format. + (x1, y1) refer to top left of bounding box + (x2, y2) refer to bottom right of bounding box + Args: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) which will be converted. + + Returns: + boxes (Tensor[N, 4]): boxes in (x, y, w, h) format. + """ + x1, y1, x2, y2 = boxes.unbind(-1) + w = x2 - x1 # x2 - x1 + h = y2 - y1 # y2 - y1 + boxes = torch.stack((x1, y1, w, h), dim=-1) + return boxes diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/_register_onnx_ops.py b/.venv/lib/python3.11/site-packages/torchvision/ops/_register_onnx_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..5dd263a5d8ef497becc4aa39252a93c913b84880 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/_register_onnx_ops.py @@ -0,0 +1,107 @@ +import sys +import warnings + +import torch +from torch.onnx import symbolic_opset11 as opset11 +from torch.onnx.symbolic_helper import parse_args + +_ONNX_OPSET_VERSION_11 = 11 +_ONNX_OPSET_VERSION_16 = 16 +BASE_ONNX_OPSET_VERSION = _ONNX_OPSET_VERSION_11 + + +@parse_args("v", "v", "f") +def symbolic_multi_label_nms(g, boxes, scores, iou_threshold): + boxes = opset11.unsqueeze(g, boxes, 0) + scores = opset11.unsqueeze(g, opset11.unsqueeze(g, scores, 0), 0) + max_output_per_class = g.op("Constant", value_t=torch.tensor([sys.maxsize], dtype=torch.long)) + iou_threshold = g.op("Constant", value_t=torch.tensor([iou_threshold], dtype=torch.float)) + + # Cast boxes and scores to float32 in case they are float64 inputs + nms_out = g.op( + "NonMaxSuppression", + g.op("Cast", boxes, to_i=torch.onnx.TensorProtoDataType.FLOAT), + g.op("Cast", scores, to_i=torch.onnx.TensorProtoDataType.FLOAT), + max_output_per_class, + iou_threshold, + ) + return opset11.squeeze( + g, opset11.select(g, nms_out, 1, g.op("Constant", value_t=torch.tensor([2], dtype=torch.long))), 1 + ) + + +def _process_batch_indices_for_roi_align(g, rois): + indices = opset11.squeeze( + g, opset11.select(g, rois, 1, g.op("Constant", value_t=torch.tensor([0], dtype=torch.long))), 1 + ) + return g.op("Cast", indices, to_i=torch.onnx.TensorProtoDataType.INT64) + + +def _process_rois_for_roi_align(g, rois): + return opset11.select(g, rois, 1, g.op("Constant", value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long))) + + +def _process_sampling_ratio_for_roi_align(g, sampling_ratio: int): + if sampling_ratio < 0: + warnings.warn( + "ONNX export for RoIAlign with a non-zero sampling_ratio is not supported. " + "The model will be exported with a sampling_ratio of 0." + ) + sampling_ratio = 0 + return sampling_ratio + + +@parse_args("v", "v", "f", "i", "i", "i", "i") +def roi_align_opset11(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned): + batch_indices = _process_batch_indices_for_roi_align(g, rois) + rois = _process_rois_for_roi_align(g, rois) + if aligned: + warnings.warn( + "ROIAlign with aligned=True is only supported in opset >= 16. " + "Please export with opset 16 or higher, or use aligned=False." + ) + sampling_ratio = _process_sampling_ratio_for_roi_align(g, sampling_ratio) + return g.op( + "RoiAlign", + input, + rois, + batch_indices, + spatial_scale_f=spatial_scale, + output_height_i=pooled_height, + output_width_i=pooled_width, + sampling_ratio_i=sampling_ratio, + ) + + +@parse_args("v", "v", "f", "i", "i", "i", "i") +def roi_align_opset16(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned): + batch_indices = _process_batch_indices_for_roi_align(g, rois) + rois = _process_rois_for_roi_align(g, rois) + coordinate_transformation_mode = "half_pixel" if aligned else "output_half_pixel" + sampling_ratio = _process_sampling_ratio_for_roi_align(g, sampling_ratio) + return g.op( + "RoiAlign", + input, + rois, + batch_indices, + coordinate_transformation_mode_s=coordinate_transformation_mode, + spatial_scale_f=spatial_scale, + output_height_i=pooled_height, + output_width_i=pooled_width, + sampling_ratio_i=sampling_ratio, + ) + + +@parse_args("v", "v", "f", "i", "i") +def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width): + roi_pool = g.op( + "MaxRoiPool", input, rois, pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale + ) + return roi_pool, None + + +def _register_custom_op(): + torch.onnx.register_custom_op_symbolic("torchvision::nms", symbolic_multi_label_nms, _ONNX_OPSET_VERSION_11) + torch.onnx.register_custom_op_symbolic("torchvision::roi_align", roi_align_opset11, _ONNX_OPSET_VERSION_11) + torch.onnx.register_custom_op_symbolic("torchvision::roi_align", roi_align_opset16, _ONNX_OPSET_VERSION_16) + torch.onnx.register_custom_op_symbolic("torchvision::roi_pool", roi_pool, _ONNX_OPSET_VERSION_11) diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/_utils.py b/.venv/lib/python3.11/site-packages/torchvision/ops/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a6ca557a98b899b7c2a11ba0dca3d64730af4268 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/_utils.py @@ -0,0 +1,106 @@ +from typing import List, Optional, Tuple, Union + +import torch +from torch import nn, Tensor + + +def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor: + """ + Efficient version of torch.cat that avoids a copy if there is only a single element in a list + """ + # TODO add back the assert + # assert isinstance(tensors, (list, tuple)) + if len(tensors) == 1: + return tensors[0] + return torch.cat(tensors, dim) + + +def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor: + concat_boxes = _cat([b for b in boxes], dim=0) + temp = [] + for i, b in enumerate(boxes): + temp.append(torch.full_like(b[:, :1], i)) + ids = _cat(temp, dim=0) + rois = torch.cat([ids, concat_boxes], dim=1) + return rois + + +def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]): + if isinstance(boxes, (list, tuple)): + for _tensor in boxes: + torch._assert( + _tensor.size(1) == 4, "The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]]" + ) + elif isinstance(boxes, torch.Tensor): + torch._assert(boxes.size(1) == 5, "The boxes tensor shape is not correct as Tensor[K, 5]") + else: + torch._assert(False, "boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]") + return + + +def split_normalization_params( + model: nn.Module, norm_classes: Optional[List[type]] = None +) -> Tuple[List[Tensor], List[Tensor]]: + # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501 + if not norm_classes: + norm_classes = [ + nn.modules.batchnorm._BatchNorm, + nn.LayerNorm, + nn.GroupNorm, + nn.modules.instancenorm._InstanceNorm, + nn.LocalResponseNorm, + ] + + for t in norm_classes: + if not issubclass(t, nn.Module): + raise ValueError(f"Class {t} is not a subclass of nn.Module.") + + classes = tuple(norm_classes) + + norm_params = [] + other_params = [] + for module in model.modules(): + if next(module.children(), None): + other_params.extend(p for p in module.parameters(recurse=False) if p.requires_grad) + elif isinstance(module, classes): + norm_params.extend(p for p in module.parameters() if p.requires_grad) + else: + other_params.extend(p for p in module.parameters() if p.requires_grad) + return norm_params, other_params + + +def _upcast(t: Tensor) -> Tensor: + # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type + if t.is_floating_point(): + return t if t.dtype in (torch.float32, torch.float64) else t.float() + else: + return t if t.dtype in (torch.int32, torch.int64) else t.int() + + +def _upcast_non_float(t: Tensor) -> Tensor: + # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type + if t.dtype not in (torch.float32, torch.float64): + return t.float() + return t + + +def _loss_inter_union( + boxes1: torch.Tensor, + boxes2: torch.Tensor, +) -> Tuple[torch.Tensor, torch.Tensor]: + + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + + # Intersection keypoints + xkis1 = torch.max(x1, x1g) + ykis1 = torch.max(y1, y1g) + xkis2 = torch.min(x2, x2g) + ykis2 = torch.min(y2, y2g) + + intsctk = torch.zeros_like(x1) + mask = (ykis2 > ykis1) & (xkis2 > xkis1) + intsctk[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) + unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsctk + + return intsctk, unionk diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/boxes.py b/.venv/lib/python3.11/site-packages/torchvision/ops/boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..309990ea03ad90eb7c82297a025c12bec21d1bb7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/boxes.py @@ -0,0 +1,433 @@ +from typing import Tuple + +import torch +import torchvision +from torch import Tensor +from torchvision.extension import _assert_has_ops + +from ..utils import _log_api_usage_once +from ._box_convert import _box_cxcywh_to_xyxy, _box_xywh_to_xyxy, _box_xyxy_to_cxcywh, _box_xyxy_to_xywh +from ._utils import _upcast + + +def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor: + """ + Performs non-maximum suppression (NMS) on the boxes according + to their intersection-over-union (IoU). + + NMS iteratively removes lower scoring boxes which have an + IoU greater than ``iou_threshold`` with another (higher scoring) + box. + + If multiple boxes have the exact same score and satisfy the IoU + criterion with respect to a reference box, the selected box is + not guaranteed to be the same between CPU and GPU. This is similar + to the behavior of argsort in PyTorch when repeated values are present. + + Args: + boxes (Tensor[N, 4])): boxes to perform NMS on. They + are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and + ``0 <= y1 < y2``. + scores (Tensor[N]): scores for each one of the boxes + iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold + + Returns: + Tensor: int64 tensor with the indices of the elements that have been kept + by NMS, sorted in decreasing order of scores + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(nms) + _assert_has_ops() + return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + + +def batched_nms( + boxes: Tensor, + scores: Tensor, + idxs: Tensor, + iou_threshold: float, +) -> Tensor: + """ + Performs non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Args: + boxes (Tensor[N, 4]): boxes where NMS will be performed. They + are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and + ``0 <= y1 < y2``. + scores (Tensor[N]): scores for each one of the boxes + idxs (Tensor[N]): indices of the categories for each one of the boxes. + iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold + + Returns: + Tensor: int64 tensor with the indices of the elements that have been kept by NMS, sorted + in decreasing order of scores + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(batched_nms) + # Benchmarks that drove the following thresholds are at + # https://github.com/pytorch/vision/issues/1311#issuecomment-781329339 + if boxes.numel() > (4000 if boxes.device.type == "cpu" else 20000) and not torchvision._is_tracing(): + return _batched_nms_vanilla(boxes, scores, idxs, iou_threshold) + else: + return _batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold) + + +@torch.jit._script_if_tracing +def _batched_nms_coordinate_trick( + boxes: Tensor, + scores: Tensor, + idxs: Tensor, + iou_threshold: float, +) -> Tensor: + # strategy: in order to perform NMS independently per class, + # we add an offset to all the boxes. The offset is dependent + # only on the class idx, and is large enough so that boxes + # from different classes do not overlap + if boxes.numel() == 0: + return torch.empty((0,), dtype=torch.int64, device=boxes.device) + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) + boxes_for_nms = boxes + offsets[:, None] + keep = nms(boxes_for_nms, scores, iou_threshold) + return keep + + +@torch.jit._script_if_tracing +def _batched_nms_vanilla( + boxes: Tensor, + scores: Tensor, + idxs: Tensor, + iou_threshold: float, +) -> Tensor: + # Based on Detectron2 implementation, just manually call nms() on each class independently + keep_mask = torch.zeros_like(scores, dtype=torch.bool) + for class_id in torch.unique(idxs): + curr_indices = torch.where(idxs == class_id)[0] + curr_keep_indices = nms(boxes[curr_indices], scores[curr_indices], iou_threshold) + keep_mask[curr_indices[curr_keep_indices]] = True + keep_indices = torch.where(keep_mask)[0] + return keep_indices[scores[keep_indices].sort(descending=True)[1]] + + +def remove_small_boxes(boxes: Tensor, min_size: float) -> Tensor: + """ + Remove every box from ``boxes`` which contains at least one side length + that is smaller than ``min_size``. + + .. note:: + For sanitizing a :class:`~torchvision.tv_tensors.BoundingBoxes` object, consider using + the transform :func:`~torchvision.transforms.v2.SanitizeBoundingBoxes` instead. + + Args: + boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format + with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + min_size (float): minimum size + + Returns: + Tensor[K]: indices of the boxes that have both sides + larger than ``min_size`` + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(remove_small_boxes) + ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] + keep = (ws >= min_size) & (hs >= min_size) + keep = torch.where(keep)[0] + return keep + + +def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor: + """ + Clip boxes so that they lie inside an image of size ``size``. + + .. note:: + For clipping a :class:`~torchvision.tv_tensors.BoundingBoxes` object, consider using + the transform :func:`~torchvision.transforms.v2.ClampBoundingBoxes` instead. + + Args: + boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format + with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + size (Tuple[height, width]): size of the image + + Returns: + Tensor[N, 4]: clipped boxes + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(clip_boxes_to_image) + dim = boxes.dim() + boxes_x = boxes[..., 0::2] + boxes_y = boxes[..., 1::2] + height, width = size + + if torchvision._is_tracing(): + boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) + boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device)) + boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) + boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device)) + else: + boxes_x = boxes_x.clamp(min=0, max=width) + boxes_y = boxes_y.clamp(min=0, max=height) + + clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim) + return clipped_boxes.reshape(boxes.shape) + + +def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor: + """ + Converts :class:`torch.Tensor` boxes from a given ``in_fmt`` to ``out_fmt``. + + .. note:: + For converting a :class:`torch.Tensor` or a :class:`~torchvision.tv_tensors.BoundingBoxes` object + between different formats, + consider using :func:`~torchvision.transforms.v2.functional.convert_bounding_box_format` instead. + Or see the corresponding transform :func:`~torchvision.transforms.v2.ConvertBoundingBoxFormat`. + + Supported ``in_fmt`` and ``out_fmt`` strings are: + + ``'xyxy'``: boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right. + This is the format that torchvision utilities expect. + + ``'xywh'``: boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height. + + ``'cxcywh'``: boxes are represented via centre, width and height, cx, cy being center of box, w, h + being width and height. + + Args: + boxes (Tensor[N, 4]): boxes which will be converted. + in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']. + out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'] + + Returns: + Tensor[N, 4]: Boxes into converted format. + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(box_convert) + allowed_fmts = ("xyxy", "xywh", "cxcywh") + if in_fmt not in allowed_fmts or out_fmt not in allowed_fmts: + raise ValueError("Unsupported Bounding Box Conversions for given in_fmt and out_fmt") + + if in_fmt == out_fmt: + return boxes.clone() + + if in_fmt != "xyxy" and out_fmt != "xyxy": + # convert to xyxy and change in_fmt xyxy + if in_fmt == "xywh": + boxes = _box_xywh_to_xyxy(boxes) + elif in_fmt == "cxcywh": + boxes = _box_cxcywh_to_xyxy(boxes) + in_fmt = "xyxy" + + if in_fmt == "xyxy": + if out_fmt == "xywh": + boxes = _box_xyxy_to_xywh(boxes) + elif out_fmt == "cxcywh": + boxes = _box_xyxy_to_cxcywh(boxes) + elif out_fmt == "xyxy": + if in_fmt == "xywh": + boxes = _box_xywh_to_xyxy(boxes) + elif in_fmt == "cxcywh": + boxes = _box_cxcywh_to_xyxy(boxes) + return boxes + + +def box_area(boxes: Tensor) -> Tensor: + """ + Computes the area of a set of bounding boxes, which are specified by their + (x1, y1, x2, y2) coordinates. + + Args: + boxes (Tensor[N, 4]): boxes for which the area will be computed. They + are expected to be in (x1, y1, x2, y2) format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Returns: + Tensor[N]: the area for each box + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(box_area) + boxes = _upcast(boxes) + return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + +# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py +# with slight modifications +def _box_inter_union(boxes1: Tensor, boxes2: Tensor) -> Tuple[Tensor, Tensor]: + area1 = box_area(boxes1) + area2 = box_area(boxes2) + + lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] + rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] + + wh = _upcast(rb - lt).clamp(min=0) # [N,M,2] + inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] + + union = area1[:, None] + area2 - inter + + return inter, union + + +def box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor: + """ + Return intersection-over-union (Jaccard index) between two sets of boxes. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(box_iou) + inter, union = _box_inter_union(boxes1, boxes2) + iou = inter / union + return iou + + +# Implementation adapted from https://github.com/facebookresearch/detr/blob/master/util/box_ops.py +def generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor: + """ + Return generalized intersection-over-union (Jaccard index) between two sets of boxes. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise generalized IoU values + for every element in boxes1 and boxes2 + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(generalized_box_iou) + + inter, union = _box_inter_union(boxes1, boxes2) + iou = inter / union + + lti = torch.min(boxes1[:, None, :2], boxes2[:, :2]) + rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) + + whi = _upcast(rbi - lti).clamp(min=0) # [N,M,2] + areai = whi[:, :, 0] * whi[:, :, 1] + + return iou - (areai - union) / areai + + +def complete_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor: + """ + Return complete intersection-over-union (Jaccard index) between two sets of boxes. + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + eps (float, optional): small number to prevent division by zero. Default: 1e-7 + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise complete IoU values + for every element in boxes1 and boxes2 + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(complete_box_iou) + + boxes1 = _upcast(boxes1) + boxes2 = _upcast(boxes2) + + diou, iou = _box_diou_iou(boxes1, boxes2, eps) + + w_pred = boxes1[:, None, 2] - boxes1[:, None, 0] + h_pred = boxes1[:, None, 3] - boxes1[:, None, 1] + + w_gt = boxes2[:, 2] - boxes2[:, 0] + h_gt = boxes2[:, 3] - boxes2[:, 1] + + v = (4 / (torch.pi**2)) * torch.pow(torch.atan(w_pred / h_pred) - torch.atan(w_gt / h_gt), 2) + with torch.no_grad(): + alpha = v / (1 - iou + v + eps) + return diou - alpha * v + + +def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor: + """ + Return distance intersection-over-union (Jaccard index) between two sets of boxes. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + eps (float, optional): small number to prevent division by zero. Default: 1e-7 + + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise distance IoU values + for every element in boxes1 and boxes2 + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(distance_box_iou) + + boxes1 = _upcast(boxes1) + boxes2 = _upcast(boxes2) + diou, _ = _box_diou_iou(boxes1, boxes2, eps=eps) + return diou + + +def _box_diou_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tuple[Tensor, Tensor]: + + iou = box_iou(boxes1, boxes2) + lti = torch.min(boxes1[:, None, :2], boxes2[:, :2]) + rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) + whi = _upcast(rbi - lti).clamp(min=0) # [N,M,2] + diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps + # centers of boxes + x_p = (boxes1[:, 0] + boxes1[:, 2]) / 2 + y_p = (boxes1[:, 1] + boxes1[:, 3]) / 2 + x_g = (boxes2[:, 0] + boxes2[:, 2]) / 2 + y_g = (boxes2[:, 1] + boxes2[:, 3]) / 2 + # The distance between boxes' centers squared. + centers_distance_squared = (_upcast((x_p[:, None] - x_g[None, :])) ** 2) + ( + _upcast((y_p[:, None] - y_g[None, :])) ** 2 + ) + # The distance IoU is the IoU penalized by a normalized + # distance between boxes' centers squared. + return iou - (centers_distance_squared / diagonal_distance_squared), iou + + +def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor: + """ + Compute the bounding boxes around the provided masks. + + Returns a [N, 4] tensor containing bounding boxes. The boxes are in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + masks (Tensor[N, H, W]): masks to transform where N is the number of masks + and (H, W) are the spatial dimensions. + + Returns: + Tensor[N, 4]: bounding boxes + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(masks_to_boxes) + if masks.numel() == 0: + return torch.zeros((0, 4), device=masks.device, dtype=torch.float) + + n = masks.shape[0] + + bounding_boxes = torch.zeros((n, 4), device=masks.device, dtype=torch.float) + + for index, mask in enumerate(masks): + y, x = torch.where(mask != 0) + + bounding_boxes[index, 0] = torch.min(x) + bounding_boxes[index, 1] = torch.min(y) + bounding_boxes[index, 2] = torch.max(x) + bounding_boxes[index, 3] = torch.max(y) + + return bounding_boxes diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/ciou_loss.py b/.venv/lib/python3.11/site-packages/torchvision/ops/ciou_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..75a1c4cb1f39f7b8ff33bdac3641ca71b6413d20 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/ciou_loss.py @@ -0,0 +1,78 @@ +import torch + +from ..utils import _log_api_usage_once +from ._utils import _upcast_non_float +from .diou_loss import _diou_iou_loss + + +def complete_box_iou_loss( + boxes1: torch.Tensor, + boxes2: torch.Tensor, + reduction: str = "none", + eps: float = 1e-7, +) -> torch.Tensor: + + """ + Gradient-friendly IoU loss with an additional penalty that is non-zero when the + boxes do not overlap. This loss function considers important geometrical + factors such as overlap area, normalized central point distance and aspect ratio. + This loss is symmetric, so the boxes1 and boxes2 arguments are interchangeable. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``, and The two boxes should have the + same dimensions. + + Args: + boxes1 : (Tensor[N, 4] or Tensor[4]) first set of boxes + boxes2 : (Tensor[N, 4] or Tensor[4]) second set of boxes + reduction : (string, optional) Specifies the reduction to apply to the output: + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: No reduction will be + applied to the output. ``'mean'``: The output will be averaged. + ``'sum'``: The output will be summed. Default: ``'none'`` + eps : (float): small number to prevent division by zero. Default: 1e-7 + + Returns: + Tensor: Loss tensor with the reduction option applied. + + Reference: + Zhaohui Zheng et al.: Complete Intersection over Union Loss: + https://arxiv.org/abs/1911.08287 + + """ + + # Original Implementation from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/losses.py + + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(complete_box_iou_loss) + + boxes1 = _upcast_non_float(boxes1) + boxes2 = _upcast_non_float(boxes2) + + diou_loss, iou = _diou_iou_loss(boxes1, boxes2) + + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + + # width and height of boxes + w_pred = x2 - x1 + h_pred = y2 - y1 + w_gt = x2g - x1g + h_gt = y2g - y1g + v = (4 / (torch.pi**2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2) + with torch.no_grad(): + alpha = v / (1 - iou + v + eps) + + loss = diou_loss + alpha * v + + # Check reduction option and return loss accordingly + if reduction == "none": + pass + elif reduction == "mean": + loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() + elif reduction == "sum": + loss = loss.sum() + else: + raise ValueError( + f"Invalid Value for arg 'reduction': '{reduction} \n Supported reduction modes: 'none', 'mean', 'sum'" + ) + return loss diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/deform_conv.py b/.venv/lib/python3.11/site-packages/torchvision/ops/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..b3cc83332a0a780710cdd61f0930f81ca9ab18b9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/deform_conv.py @@ -0,0 +1,195 @@ +import math +from typing import Optional, Tuple + +import torch +from torch import nn, Tensor +from torch.nn import init +from torch.nn.modules.utils import _pair +from torch.nn.parameter import Parameter +from torchvision.extension import _assert_has_ops + +from ..utils import _log_api_usage_once + + +def deform_conv2d( + input: Tensor, + offset: Tensor, + weight: Tensor, + bias: Optional[Tensor] = None, + stride: Tuple[int, int] = (1, 1), + padding: Tuple[int, int] = (0, 0), + dilation: Tuple[int, int] = (1, 1), + mask: Optional[Tensor] = None, +) -> Tensor: + r""" + Performs Deformable Convolution v2, described in + `Deformable ConvNets v2: More Deformable, Better Results + `__ if :attr:`mask` is not ``None`` and + Performs Deformable Convolution, described in + `Deformable Convolutional Networks + `__ if :attr:`mask` is ``None``. + + Args: + input (Tensor[batch_size, in_channels, in_height, in_width]): input tensor + offset (Tensor[batch_size, 2 * offset_groups * kernel_height * kernel_width, out_height, out_width]): + offsets to be applied for each position in the convolution kernel. + weight (Tensor[out_channels, in_channels // groups, kernel_height, kernel_width]): convolution weights, + split into groups of size (in_channels // groups) + bias (Tensor[out_channels]): optional bias of shape (out_channels,). Default: None + stride (int or Tuple[int, int]): distance between convolution centers. Default: 1 + padding (int or Tuple[int, int]): height/width of padding of zeroes around + each image. Default: 0 + dilation (int or Tuple[int, int]): the spacing between kernel elements. Default: 1 + mask (Tensor[batch_size, offset_groups * kernel_height * kernel_width, out_height, out_width]): + masks to be applied for each position in the convolution kernel. Default: None + + Returns: + Tensor[batch_sz, out_channels, out_h, out_w]: result of convolution + + Examples:: + >>> input = torch.rand(4, 3, 10, 10) + >>> kh, kw = 3, 3 + >>> weight = torch.rand(5, 3, kh, kw) + >>> # offset and mask should have the same spatial size as the output + >>> # of the convolution. In this case, for an input of 10, stride of 1 + >>> # and kernel size of 3, without padding, the output size is 8 + >>> offset = torch.rand(4, 2 * kh * kw, 8, 8) + >>> mask = torch.rand(4, kh * kw, 8, 8) + >>> out = deform_conv2d(input, offset, weight, mask=mask) + >>> print(out.shape) + >>> # returns + >>> torch.Size([4, 5, 8, 8]) + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(deform_conv2d) + _assert_has_ops() + out_channels = weight.shape[0] + + use_mask = mask is not None + + if mask is None: + mask = torch.zeros((input.shape[0], 1), device=input.device, dtype=input.dtype) + + if bias is None: + bias = torch.zeros(out_channels, device=input.device, dtype=input.dtype) + + stride_h, stride_w = _pair(stride) + pad_h, pad_w = _pair(padding) + dil_h, dil_w = _pair(dilation) + weights_h, weights_w = weight.shape[-2:] + _, n_in_channels, _, _ = input.shape + + n_offset_grps = offset.shape[1] // (2 * weights_h * weights_w) + n_weight_grps = n_in_channels // weight.shape[1] + + if n_offset_grps == 0: + raise RuntimeError( + "the shape of the offset tensor at dimension 1 is not valid. It should " + "be a multiple of 2 * weight.size[2] * weight.size[3].\n" + f"Got offset.shape[1]={offset.shape[1]}, while 2 * weight.size[2] * weight.size[3]={2 * weights_h * weights_w}" + ) + + return torch.ops.torchvision.deform_conv2d( + input, + weight, + offset, + mask, + bias, + stride_h, + stride_w, + pad_h, + pad_w, + dil_h, + dil_w, + n_weight_grps, + n_offset_grps, + use_mask, + ) + + +class DeformConv2d(nn.Module): + """ + See :func:`deform_conv2d`. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + groups: int = 1, + bias: bool = True, + ): + super().__init__() + _log_api_usage_once(self) + + if in_channels % groups != 0: + raise ValueError("in_channels must be divisible by groups") + if out_channels % groups != 0: + raise ValueError("out_channels must be divisible by groups") + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + + self.weight = Parameter( + torch.empty(out_channels, in_channels // groups, self.kernel_size[0], self.kernel_size[1]) + ) + + if bias: + self.bias = Parameter(torch.empty(out_channels)) + else: + self.register_parameter("bias", None) + + self.reset_parameters() + + def reset_parameters(self) -> None: + init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + + if self.bias is not None: + fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + init.uniform_(self.bias, -bound, bound) + + def forward(self, input: Tensor, offset: Tensor, mask: Optional[Tensor] = None) -> Tensor: + """ + Args: + input (Tensor[batch_size, in_channels, in_height, in_width]): input tensor + offset (Tensor[batch_size, 2 * offset_groups * kernel_height * kernel_width, out_height, out_width]): + offsets to be applied for each position in the convolution kernel. + mask (Tensor[batch_size, offset_groups * kernel_height * kernel_width, out_height, out_width]): + masks to be applied for each position in the convolution kernel. + """ + return deform_conv2d( + input, + offset, + self.weight, + self.bias, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + mask=mask, + ) + + def __repr__(self) -> str: + s = ( + f"{self.__class__.__name__}(" + f"{self.in_channels}" + f", {self.out_channels}" + f", kernel_size={self.kernel_size}" + f", stride={self.stride}" + ) + s += f", padding={self.padding}" if self.padding != (0, 0) else "" + s += f", dilation={self.dilation}" if self.dilation != (1, 1) else "" + s += f", groups={self.groups}" if self.groups != 1 else "" + s += ", bias=False" if self.bias is None else "" + s += ")" + + return s diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/diou_loss.py b/.venv/lib/python3.11/site-packages/torchvision/ops/diou_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..c64c6673a882f439f8ba9b0e25d1387d8d2f4284 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/diou_loss.py @@ -0,0 +1,94 @@ +from typing import Tuple + +import torch + +from ..utils import _log_api_usage_once +from ._utils import _loss_inter_union, _upcast_non_float + + +def distance_box_iou_loss( + boxes1: torch.Tensor, + boxes2: torch.Tensor, + reduction: str = "none", + eps: float = 1e-7, +) -> torch.Tensor: + + """ + Gradient-friendly IoU loss with an additional penalty that is non-zero when the + distance between boxes' centers isn't zero. Indeed, for two exactly overlapping + boxes, the distance IoU is the same as the IoU loss. + This loss is symmetric, so the boxes1 and boxes2 arguments are interchangeable. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``, and The two boxes should have the + same dimensions. + + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[N, 4]): second set of boxes + reduction (string, optional): Specifies the reduction to apply to the output: + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: No reduction will be + applied to the output. ``'mean'``: The output will be averaged. + ``'sum'``: The output will be summed. Default: ``'none'`` + eps (float, optional): small number to prevent division by zero. Default: 1e-7 + + Returns: + Tensor: Loss tensor with the reduction option applied. + + Reference: + Zhaohui Zheng et al.: Distance Intersection over Union Loss: + https://arxiv.org/abs/1911.08287 + """ + + # Original Implementation from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/losses.py + + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(distance_box_iou_loss) + + boxes1 = _upcast_non_float(boxes1) + boxes2 = _upcast_non_float(boxes2) + + loss, _ = _diou_iou_loss(boxes1, boxes2, eps) + + # Check reduction option and return loss accordingly + if reduction == "none": + pass + elif reduction == "mean": + loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() + elif reduction == "sum": + loss = loss.sum() + else: + raise ValueError( + f"Invalid Value for arg 'reduction': '{reduction} \n Supported reduction modes: 'none', 'mean', 'sum'" + ) + return loss + + +def _diou_iou_loss( + boxes1: torch.Tensor, + boxes2: torch.Tensor, + eps: float = 1e-7, +) -> Tuple[torch.Tensor, torch.Tensor]: + + intsct, union = _loss_inter_union(boxes1, boxes2) + iou = intsct / (union + eps) + # smallest enclosing box + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + xc1 = torch.min(x1, x1g) + yc1 = torch.min(y1, y1g) + xc2 = torch.max(x2, x2g) + yc2 = torch.max(y2, y2g) + # The diagonal distance of the smallest enclosing box squared + diagonal_distance_squared = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps + # centers of boxes + x_p = (x2 + x1) / 2 + y_p = (y2 + y1) / 2 + x_g = (x1g + x2g) / 2 + y_g = (y1g + y2g) / 2 + # The distance between boxes' centers squared. + centers_distance_squared = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) + # The distance IoU is the IoU penalized by a normalized + # distance between boxes' centers squared. + loss = 1 - iou + (centers_distance_squared / diagonal_distance_squared) + return loss, iou diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/drop_block.py b/.venv/lib/python3.11/site-packages/torchvision/ops/drop_block.py new file mode 100644 index 0000000000000000000000000000000000000000..e65496ea29ad1ee6e04bc19280a00a397a4f1040 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/drop_block.py @@ -0,0 +1,155 @@ +import torch +import torch.fx +import torch.nn.functional as F +from torch import nn, Tensor + +from ..utils import _log_api_usage_once + + +def drop_block2d( + input: Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-06, training: bool = True +) -> Tensor: + """ + Implements DropBlock2d from `"DropBlock: A regularization method for convolutional networks" + `. + + Args: + input (Tensor[N, C, H, W]): The input tensor or 4-dimensions with the first one + being its batch i.e. a batch with ``N`` rows. + p (float): Probability of an element to be dropped. + block_size (int): Size of the block to drop. + inplace (bool): If set to ``True``, will do this operation in-place. Default: ``False``. + eps (float): A value added to the denominator for numerical stability. Default: 1e-6. + training (bool): apply dropblock if is ``True``. Default: ``True``. + + Returns: + Tensor[N, C, H, W]: The randomly zeroed tensor after dropblock. + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(drop_block2d) + if p < 0.0 or p > 1.0: + raise ValueError(f"drop probability has to be between 0 and 1, but got {p}.") + if input.ndim != 4: + raise ValueError(f"input should be 4 dimensional. Got {input.ndim} dimensions.") + if not training or p == 0.0: + return input + + N, C, H, W = input.size() + block_size = min(block_size, W, H) + # compute the gamma of Bernoulli distribution + gamma = (p * H * W) / ((block_size**2) * ((H - block_size + 1) * (W - block_size + 1))) + noise = torch.empty((N, C, H - block_size + 1, W - block_size + 1), dtype=input.dtype, device=input.device) + noise.bernoulli_(gamma) + + noise = F.pad(noise, [block_size // 2] * 4, value=0) + noise = F.max_pool2d(noise, stride=(1, 1), kernel_size=(block_size, block_size), padding=block_size // 2) + noise = 1 - noise + normalize_scale = noise.numel() / (eps + noise.sum()) + if inplace: + input.mul_(noise).mul_(normalize_scale) + else: + input = input * noise * normalize_scale + return input + + +def drop_block3d( + input: Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-06, training: bool = True +) -> Tensor: + """ + Implements DropBlock3d from `"DropBlock: A regularization method for convolutional networks" + `. + + Args: + input (Tensor[N, C, D, H, W]): The input tensor or 5-dimensions with the first one + being its batch i.e. a batch with ``N`` rows. + p (float): Probability of an element to be dropped. + block_size (int): Size of the block to drop. + inplace (bool): If set to ``True``, will do this operation in-place. Default: ``False``. + eps (float): A value added to the denominator for numerical stability. Default: 1e-6. + training (bool): apply dropblock if is ``True``. Default: ``True``. + + Returns: + Tensor[N, C, D, H, W]: The randomly zeroed tensor after dropblock. + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(drop_block3d) + if p < 0.0 or p > 1.0: + raise ValueError(f"drop probability has to be between 0 and 1, but got {p}.") + if input.ndim != 5: + raise ValueError(f"input should be 5 dimensional. Got {input.ndim} dimensions.") + if not training or p == 0.0: + return input + + N, C, D, H, W = input.size() + block_size = min(block_size, D, H, W) + # compute the gamma of Bernoulli distribution + gamma = (p * D * H * W) / ((block_size**3) * ((D - block_size + 1) * (H - block_size + 1) * (W - block_size + 1))) + noise = torch.empty( + (N, C, D - block_size + 1, H - block_size + 1, W - block_size + 1), dtype=input.dtype, device=input.device + ) + noise.bernoulli_(gamma) + + noise = F.pad(noise, [block_size // 2] * 6, value=0) + noise = F.max_pool3d( + noise, stride=(1, 1, 1), kernel_size=(block_size, block_size, block_size), padding=block_size // 2 + ) + noise = 1 - noise + normalize_scale = noise.numel() / (eps + noise.sum()) + if inplace: + input.mul_(noise).mul_(normalize_scale) + else: + input = input * noise * normalize_scale + return input + + +torch.fx.wrap("drop_block2d") + + +class DropBlock2d(nn.Module): + """ + See :func:`drop_block2d`. + """ + + def __init__(self, p: float, block_size: int, inplace: bool = False, eps: float = 1e-06) -> None: + super().__init__() + + self.p = p + self.block_size = block_size + self.inplace = inplace + self.eps = eps + + def forward(self, input: Tensor) -> Tensor: + """ + Args: + input (Tensor): Input feature map on which some areas will be randomly + dropped. + Returns: + Tensor: The tensor after DropBlock layer. + """ + return drop_block2d(input, self.p, self.block_size, self.inplace, self.eps, self.training) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(p={self.p}, block_size={self.block_size}, inplace={self.inplace})" + return s + + +torch.fx.wrap("drop_block3d") + + +class DropBlock3d(DropBlock2d): + """ + See :func:`drop_block3d`. + """ + + def __init__(self, p: float, block_size: int, inplace: bool = False, eps: float = 1e-06) -> None: + super().__init__(p, block_size, inplace, eps) + + def forward(self, input: Tensor) -> Tensor: + """ + Args: + input (Tensor): Input feature map on which some areas will be randomly + dropped. + Returns: + Tensor: The tensor after DropBlock layer. + """ + return drop_block3d(input, self.p, self.block_size, self.inplace, self.eps, self.training) diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/feature_pyramid_network.py b/.venv/lib/python3.11/site-packages/torchvision/ops/feature_pyramid_network.py new file mode 100644 index 0000000000000000000000000000000000000000..2e7aef0e2fad773ac884d576f781a305e5e880d0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/feature_pyramid_network.py @@ -0,0 +1,250 @@ +from collections import OrderedDict +from typing import Callable, Dict, List, Optional, Tuple + +import torch.nn.functional as F +from torch import nn, Tensor + +from ..ops.misc import Conv2dNormActivation +from ..utils import _log_api_usage_once + + +class ExtraFPNBlock(nn.Module): + """ + Base class for the extra block in the FPN. + + Args: + results (List[Tensor]): the result of the FPN + x (List[Tensor]): the original feature maps + names (List[str]): the names for each one of the + original feature maps + + Returns: + results (List[Tensor]): the extended set of results + of the FPN + names (List[str]): the extended set of names for the results + """ + + def forward( + self, + results: List[Tensor], + x: List[Tensor], + names: List[str], + ) -> Tuple[List[Tensor], List[str]]: + pass + + +class FeaturePyramidNetwork(nn.Module): + """ + Module that adds a FPN from on top of a set of feature maps. This is based on + `"Feature Pyramid Network for Object Detection" `_. + + The feature maps are currently supposed to be in increasing depth + order. + + The input to the model is expected to be an OrderedDict[Tensor], containing + the feature maps on top of which the FPN will be added. + + Args: + in_channels_list (list[int]): number of channels for each feature map that + is passed to the module + out_channels (int): number of channels of the FPN representation + extra_blocks (ExtraFPNBlock or None): if provided, extra operations will + be performed. It is expected to take the fpn features, the original + features and the names of the original features as input, and returns + a new list of feature maps and their corresponding names + norm_layer (callable, optional): Module specifying the normalization layer to use. Default: None + + Examples:: + + >>> m = torchvision.ops.FeaturePyramidNetwork([10, 20, 30], 5) + >>> # get some dummy data + >>> x = OrderedDict() + >>> x['feat0'] = torch.rand(1, 10, 64, 64) + >>> x['feat2'] = torch.rand(1, 20, 16, 16) + >>> x['feat3'] = torch.rand(1, 30, 8, 8) + >>> # compute the FPN on top of x + >>> output = m(x) + >>> print([(k, v.shape) for k, v in output.items()]) + >>> # returns + >>> [('feat0', torch.Size([1, 5, 64, 64])), + >>> ('feat2', torch.Size([1, 5, 16, 16])), + >>> ('feat3', torch.Size([1, 5, 8, 8]))] + + """ + + _version = 2 + + def __init__( + self, + in_channels_list: List[int], + out_channels: int, + extra_blocks: Optional[ExtraFPNBlock] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + ): + super().__init__() + _log_api_usage_once(self) + self.inner_blocks = nn.ModuleList() + self.layer_blocks = nn.ModuleList() + for in_channels in in_channels_list: + if in_channels == 0: + raise ValueError("in_channels=0 is currently not supported") + inner_block_module = Conv2dNormActivation( + in_channels, out_channels, kernel_size=1, padding=0, norm_layer=norm_layer, activation_layer=None + ) + layer_block_module = Conv2dNormActivation( + out_channels, out_channels, kernel_size=3, norm_layer=norm_layer, activation_layer=None + ) + self.inner_blocks.append(inner_block_module) + self.layer_blocks.append(layer_block_module) + + # initialize parameters now to avoid modifying the initialization of top_blocks + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_uniform_(m.weight, a=1) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + if extra_blocks is not None: + if not isinstance(extra_blocks, ExtraFPNBlock): + raise TypeError(f"extra_blocks should be of type ExtraFPNBlock not {type(extra_blocks)}") + self.extra_blocks = extra_blocks + + def _load_from_state_dict( + self, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ): + version = local_metadata.get("version", None) + + if version is None or version < 2: + num_blocks = len(self.inner_blocks) + for block in ["inner_blocks", "layer_blocks"]: + for i in range(num_blocks): + for type in ["weight", "bias"]: + old_key = f"{prefix}{block}.{i}.{type}" + new_key = f"{prefix}{block}.{i}.0.{type}" + if old_key in state_dict: + state_dict[new_key] = state_dict.pop(old_key) + + super()._load_from_state_dict( + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ) + + def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor: + """ + This is equivalent to self.inner_blocks[idx](x), + but torchscript doesn't support this yet + """ + num_blocks = len(self.inner_blocks) + if idx < 0: + idx += num_blocks + out = x + for i, module in enumerate(self.inner_blocks): + if i == idx: + out = module(x) + return out + + def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor: + """ + This is equivalent to self.layer_blocks[idx](x), + but torchscript doesn't support this yet + """ + num_blocks = len(self.layer_blocks) + if idx < 0: + idx += num_blocks + out = x + for i, module in enumerate(self.layer_blocks): + if i == idx: + out = module(x) + return out + + def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]: + """ + Computes the FPN for a set of feature maps. + + Args: + x (OrderedDict[Tensor]): feature maps for each feature level. + + Returns: + results (OrderedDict[Tensor]): feature maps after FPN layers. + They are ordered from the highest resolution first. + """ + # unpack OrderedDict into two lists for easier handling + names = list(x.keys()) + x = list(x.values()) + + last_inner = self.get_result_from_inner_blocks(x[-1], -1) + results = [] + results.append(self.get_result_from_layer_blocks(last_inner, -1)) + + for idx in range(len(x) - 2, -1, -1): + inner_lateral = self.get_result_from_inner_blocks(x[idx], idx) + feat_shape = inner_lateral.shape[-2:] + inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest") + last_inner = inner_lateral + inner_top_down + results.insert(0, self.get_result_from_layer_blocks(last_inner, idx)) + + if self.extra_blocks is not None: + results, names = self.extra_blocks(results, x, names) + + # make it back an OrderedDict + out = OrderedDict([(k, v) for k, v in zip(names, results)]) + + return out + + +class LastLevelMaxPool(ExtraFPNBlock): + """ + Applies a max_pool2d (not actual max_pool2d, we just subsample) on top of the last feature map + """ + + def forward( + self, + x: List[Tensor], + y: List[Tensor], + names: List[str], + ) -> Tuple[List[Tensor], List[str]]: + names.append("pool") + # Use max pooling to simulate stride 2 subsampling + x.append(F.max_pool2d(x[-1], kernel_size=1, stride=2, padding=0)) + return x, names + + +class LastLevelP6P7(ExtraFPNBlock): + """ + This module is used in RetinaNet to generate extra layers, P6 and P7. + """ + + def __init__(self, in_channels: int, out_channels: int): + super().__init__() + self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) + self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) + for module in [self.p6, self.p7]: + nn.init.kaiming_uniform_(module.weight, a=1) + nn.init.constant_(module.bias, 0) + self.use_P5 = in_channels == out_channels + + def forward( + self, + p: List[Tensor], + c: List[Tensor], + names: List[str], + ) -> Tuple[List[Tensor], List[str]]: + p5, c5 = p[-1], c[-1] + x = p5 if self.use_P5 else c5 + p6 = self.p6(x) + p7 = self.p7(F.relu(p6)) + p.extend([p6, p7]) + names.extend(["p6", "p7"]) + return p, names diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/focal_loss.py b/.venv/lib/python3.11/site-packages/torchvision/ops/focal_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..08c282555fca01cbee78d79c2a0fa7bb8a9df570 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/focal_loss.py @@ -0,0 +1,58 @@ +import torch +import torch.nn.functional as F + +from ..utils import _log_api_usage_once + + +def sigmoid_focal_loss( + inputs: torch.Tensor, + targets: torch.Tensor, + alpha: float = 0.25, + gamma: float = 2, + reduction: str = "none", +) -> torch.Tensor: + """ + Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. + + Args: + inputs (Tensor): A float tensor of arbitrary shape. + The predictions for each example. + targets (Tensor): A float tensor with the same shape as inputs. Stores the binary + classification label for each element in inputs + (0 for the negative class and 1 for the positive class). + alpha (float): Weighting factor in range (0,1) to balance + positive vs negative examples or -1 for ignore. Default: ``0.25``. + gamma (float): Exponent of the modulating factor (1 - p_t) to + balance easy vs hard examples. Default: ``2``. + reduction (string): ``'none'`` | ``'mean'`` | ``'sum'`` + ``'none'``: No reduction will be applied to the output. + ``'mean'``: The output will be averaged. + ``'sum'``: The output will be summed. Default: ``'none'``. + Returns: + Loss tensor with the reduction option applied. + """ + # Original implementation from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py + + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(sigmoid_focal_loss) + p = torch.sigmoid(inputs) + ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") + p_t = p * targets + (1 - p) * (1 - targets) + loss = ce_loss * ((1 - p_t) ** gamma) + + if alpha >= 0: + alpha_t = alpha * targets + (1 - alpha) * (1 - targets) + loss = alpha_t * loss + + # Check reduction option and return loss accordingly + if reduction == "none": + pass + elif reduction == "mean": + loss = loss.mean() + elif reduction == "sum": + loss = loss.sum() + else: + raise ValueError( + f"Invalid Value for arg 'reduction': '{reduction} \n Supported reduction modes: 'none', 'mean', 'sum'" + ) + return loss diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/giou_loss.py b/.venv/lib/python3.11/site-packages/torchvision/ops/giou_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..ec8bc8852fe71258cb5a46fdf5428581a23b4c3e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/giou_loss.py @@ -0,0 +1,76 @@ +import torch + +from ..utils import _log_api_usage_once +from ._utils import _loss_inter_union, _upcast_non_float + + +def generalized_box_iou_loss( + boxes1: torch.Tensor, + boxes2: torch.Tensor, + reduction: str = "none", + eps: float = 1e-7, +) -> torch.Tensor: + + """ + Gradient-friendly IoU loss with an additional penalty that is non-zero when the + boxes do not overlap and scales with the size of their smallest enclosing box. + This loss is symmetric, so the boxes1 and boxes2 arguments are interchangeable. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``, and The two boxes should have the + same dimensions. + + Args: + boxes1 (Tensor[N, 4] or Tensor[4]): first set of boxes + boxes2 (Tensor[N, 4] or Tensor[4]): second set of boxes + reduction (string, optional): Specifies the reduction to apply to the output: + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: No reduction will be + applied to the output. ``'mean'``: The output will be averaged. + ``'sum'``: The output will be summed. Default: ``'none'`` + eps (float): small number to prevent division by zero. Default: 1e-7 + + Returns: + Tensor: Loss tensor with the reduction option applied. + + Reference: + Hamid Rezatofighi et al.: Generalized Intersection over Union: + A Metric and A Loss for Bounding Box Regression: + https://arxiv.org/abs/1902.09630 + """ + + # Original implementation from https://github.com/facebookresearch/fvcore/blob/bfff2ef/fvcore/nn/giou_loss.py + + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(generalized_box_iou_loss) + + boxes1 = _upcast_non_float(boxes1) + boxes2 = _upcast_non_float(boxes2) + intsctk, unionk = _loss_inter_union(boxes1, boxes2) + iouk = intsctk / (unionk + eps) + + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + + # smallest enclosing box + xc1 = torch.min(x1, x1g) + yc1 = torch.min(y1, y1g) + xc2 = torch.max(x2, x2g) + yc2 = torch.max(y2, y2g) + + area_c = (xc2 - xc1) * (yc2 - yc1) + miouk = iouk - ((area_c - unionk) / (area_c + eps)) + + loss = 1 - miouk + + # Check reduction option and return loss accordingly + if reduction == "none": + pass + elif reduction == "mean": + loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() + elif reduction == "sum": + loss = loss.sum() + else: + raise ValueError( + f"Invalid Value for arg 'reduction': '{reduction} \n Supported reduction modes: 'none', 'mean', 'sum'" + ) + return loss diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/misc.py b/.venv/lib/python3.11/site-packages/torchvision/ops/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..0bbea6bce43425dec79a655857a92b1f36883098 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/misc.py @@ -0,0 +1,320 @@ +import warnings +from typing import Callable, List, Optional, Sequence, Tuple, Union + +import torch +from torch import Tensor + +from ..utils import _log_api_usage_once, _make_ntuple + + +interpolate = torch.nn.functional.interpolate + + +class FrozenBatchNorm2d(torch.nn.Module): + """ + BatchNorm2d where the batch statistics and the affine parameters are fixed + + Args: + num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)`` + eps (float): a value added to the denominator for numerical stability. Default: 1e-5 + """ + + def __init__( + self, + num_features: int, + eps: float = 1e-5, + ): + super().__init__() + _log_api_usage_once(self) + self.eps = eps + self.register_buffer("weight", torch.ones(num_features)) + self.register_buffer("bias", torch.zeros(num_features)) + self.register_buffer("running_mean", torch.zeros(num_features)) + self.register_buffer("running_var", torch.ones(num_features)) + + def _load_from_state_dict( + self, + state_dict: dict, + prefix: str, + local_metadata: dict, + strict: bool, + missing_keys: List[str], + unexpected_keys: List[str], + error_msgs: List[str], + ): + num_batches_tracked_key = prefix + "num_batches_tracked" + if num_batches_tracked_key in state_dict: + del state_dict[num_batches_tracked_key] + + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) + + def forward(self, x: Tensor) -> Tensor: + # move reshapes to the beginning + # to make it fuser-friendly + w = self.weight.reshape(1, -1, 1, 1) + b = self.bias.reshape(1, -1, 1, 1) + rv = self.running_var.reshape(1, -1, 1, 1) + rm = self.running_mean.reshape(1, -1, 1, 1) + scale = w * (rv + self.eps).rsqrt() + bias = b - rm * scale + return x * scale + bias + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})" + + +class ConvNormActivation(torch.nn.Sequential): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, ...]] = 3, + stride: Union[int, Tuple[int, ...]] = 1, + padding: Optional[Union[int, Tuple[int, ...], str]] = None, + groups: int = 1, + norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, + activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, + dilation: Union[int, Tuple[int, ...]] = 1, + inplace: Optional[bool] = True, + bias: Optional[bool] = None, + conv_layer: Callable[..., torch.nn.Module] = torch.nn.Conv2d, + ) -> None: + + if padding is None: + if isinstance(kernel_size, int) and isinstance(dilation, int): + padding = (kernel_size - 1) // 2 * dilation + else: + _conv_dim = len(kernel_size) if isinstance(kernel_size, Sequence) else len(dilation) + kernel_size = _make_ntuple(kernel_size, _conv_dim) + dilation = _make_ntuple(dilation, _conv_dim) + padding = tuple((kernel_size[i] - 1) // 2 * dilation[i] for i in range(_conv_dim)) + if bias is None: + bias = norm_layer is None + + layers = [ + conv_layer( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation=dilation, + groups=groups, + bias=bias, + ) + ] + + if norm_layer is not None: + layers.append(norm_layer(out_channels)) + + if activation_layer is not None: + params = {} if inplace is None else {"inplace": inplace} + layers.append(activation_layer(**params)) + super().__init__(*layers) + _log_api_usage_once(self) + self.out_channels = out_channels + + if self.__class__ == ConvNormActivation: + warnings.warn( + "Don't use ConvNormActivation directly, please use Conv2dNormActivation and Conv3dNormActivation instead." + ) + + +class Conv2dNormActivation(ConvNormActivation): + """ + Configurable block used for Convolution2d-Normalization-Activation blocks. + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block + kernel_size: (int, optional): Size of the convolving kernel. Default: 3 + stride (int, optional): Stride of the convolution. Default: 1 + padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will be calculated as ``padding = (kernel_size - 1) // 2 * dilation`` + groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 + norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer won't be used. Default: ``torch.nn.BatchNorm2d`` + activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer won't be used. Default: ``torch.nn.ReLU`` + dilation (int): Spacing between kernel elements. Default: 1 + inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` + bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. + + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]] = 3, + stride: Union[int, Tuple[int, int]] = 1, + padding: Optional[Union[int, Tuple[int, int], str]] = None, + groups: int = 1, + norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, + activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, + dilation: Union[int, Tuple[int, int]] = 1, + inplace: Optional[bool] = True, + bias: Optional[bool] = None, + ) -> None: + + super().__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + groups, + norm_layer, + activation_layer, + dilation, + inplace, + bias, + torch.nn.Conv2d, + ) + + +class Conv3dNormActivation(ConvNormActivation): + """ + Configurable block used for Convolution3d-Normalization-Activation blocks. + + Args: + in_channels (int): Number of channels in the input video. + out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block + kernel_size: (int, optional): Size of the convolving kernel. Default: 3 + stride (int, optional): Stride of the convolution. Default: 1 + padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will be calculated as ``padding = (kernel_size - 1) // 2 * dilation`` + groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 + norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer won't be used. Default: ``torch.nn.BatchNorm3d`` + activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer won't be used. Default: ``torch.nn.ReLU`` + dilation (int): Spacing between kernel elements. Default: 1 + inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` + bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int, int]] = 3, + stride: Union[int, Tuple[int, int, int]] = 1, + padding: Optional[Union[int, Tuple[int, int, int], str]] = None, + groups: int = 1, + norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm3d, + activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, + dilation: Union[int, Tuple[int, int, int]] = 1, + inplace: Optional[bool] = True, + bias: Optional[bool] = None, + ) -> None: + + super().__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + groups, + norm_layer, + activation_layer, + dilation, + inplace, + bias, + torch.nn.Conv3d, + ) + + +class SqueezeExcitation(torch.nn.Module): + """ + This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1). + Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in eq. 3. + + Args: + input_channels (int): Number of channels in the input image + squeeze_channels (int): Number of squeeze channels + activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU`` + scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid`` + """ + + def __init__( + self, + input_channels: int, + squeeze_channels: int, + activation: Callable[..., torch.nn.Module] = torch.nn.ReLU, + scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid, + ) -> None: + super().__init__() + _log_api_usage_once(self) + self.avgpool = torch.nn.AdaptiveAvgPool2d(1) + self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1) + self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1) + self.activation = activation() + self.scale_activation = scale_activation() + + def _scale(self, input: Tensor) -> Tensor: + scale = self.avgpool(input) + scale = self.fc1(scale) + scale = self.activation(scale) + scale = self.fc2(scale) + return self.scale_activation(scale) + + def forward(self, input: Tensor) -> Tensor: + scale = self._scale(input) + return scale * input + + +class MLP(torch.nn.Sequential): + """This block implements the multi-layer perceptron (MLP) module. + + Args: + in_channels (int): Number of channels of the input + hidden_channels (List[int]): List of the hidden channel dimensions + norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the linear layer. If ``None`` this layer won't be used. Default: ``None`` + activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the linear layer. If ``None`` this layer won't be used. Default: ``torch.nn.ReLU`` + inplace (bool, optional): Parameter for the activation layer, which can optionally do the operation in-place. + Default is ``None``, which uses the respective default values of the ``activation_layer`` and Dropout layer. + bias (bool): Whether to use bias in the linear layer. Default ``True`` + dropout (float): The probability for the dropout layer. Default: 0.0 + """ + + def __init__( + self, + in_channels: int, + hidden_channels: List[int], + norm_layer: Optional[Callable[..., torch.nn.Module]] = None, + activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, + inplace: Optional[bool] = None, + bias: bool = True, + dropout: float = 0.0, + ): + # The addition of `norm_layer` is inspired from the implementation of TorchMultimodal: + # https://github.com/facebookresearch/multimodal/blob/5dec8a/torchmultimodal/modules/layers/mlp.py + params = {} if inplace is None else {"inplace": inplace} + + layers = [] + in_dim = in_channels + for hidden_dim in hidden_channels[:-1]: + layers.append(torch.nn.Linear(in_dim, hidden_dim, bias=bias)) + if norm_layer is not None: + layers.append(norm_layer(hidden_dim)) + layers.append(activation_layer(**params)) + layers.append(torch.nn.Dropout(dropout, **params)) + in_dim = hidden_dim + + layers.append(torch.nn.Linear(in_dim, hidden_channels[-1], bias=bias)) + layers.append(torch.nn.Dropout(dropout, **params)) + + super().__init__(*layers) + _log_api_usage_once(self) + + +class Permute(torch.nn.Module): + """This module returns a view of the tensor input with its dimensions permuted. + + Args: + dims (List[int]): The desired ordering of dimensions + """ + + def __init__(self, dims: List[int]): + super().__init__() + self.dims = dims + + def forward(self, x: Tensor) -> Tensor: + return torch.permute(x, self.dims) diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/poolers.py b/.venv/lib/python3.11/site-packages/torchvision/ops/poolers.py new file mode 100644 index 0000000000000000000000000000000000000000..9cdd83a598bea0a044179269f8369b58401801d1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/poolers.py @@ -0,0 +1,327 @@ +from typing import Dict, List, Optional, Tuple, Union + +import torch +import torch.fx +import torchvision +from torch import nn, Tensor +from torchvision.ops.boxes import box_area + +from ..utils import _log_api_usage_once +from .roi_align import roi_align + + +# copying result_idx_in_level to a specific index in result[] +# is not supported by ONNX tracing yet. +# _onnx_merge_levels() is an implementation supported by ONNX +# that merges the levels to the right indices +@torch.jit.unused +def _onnx_merge_levels(levels: Tensor, unmerged_results: List[Tensor]) -> Tensor: + first_result = unmerged_results[0] + dtype, device = first_result.dtype, first_result.device + res = torch.zeros( + (levels.size(0), first_result.size(1), first_result.size(2), first_result.size(3)), dtype=dtype, device=device + ) + for level in range(len(unmerged_results)): + index = torch.where(levels == level)[0].view(-1, 1, 1, 1) + index = index.expand( + index.size(0), + unmerged_results[level].size(1), + unmerged_results[level].size(2), + unmerged_results[level].size(3), + ) + res = res.scatter(0, index, unmerged_results[level]) + return res + + +# TODO: (eellison) T54974082 https://github.com/pytorch/pytorch/issues/26744/pytorch/issues/26744 +def initLevelMapper( + k_min: int, + k_max: int, + canonical_scale: int = 224, + canonical_level: int = 4, + eps: float = 1e-6, +): + return LevelMapper(k_min, k_max, canonical_scale, canonical_level, eps) + + +class LevelMapper: + """Determine which FPN level each RoI in a set of RoIs should map to based + on the heuristic in the FPN paper. + + Args: + k_min (int) + k_max (int) + canonical_scale (int) + canonical_level (int) + eps (float) + """ + + def __init__( + self, + k_min: int, + k_max: int, + canonical_scale: int = 224, + canonical_level: int = 4, + eps: float = 1e-6, + ): + self.k_min = k_min + self.k_max = k_max + self.s0 = canonical_scale + self.lvl0 = canonical_level + self.eps = eps + + def __call__(self, boxlists: List[Tensor]) -> Tensor: + """ + Args: + boxlists (list[BoxList]) + """ + # Compute level ids + s = torch.sqrt(torch.cat([box_area(boxlist) for boxlist in boxlists])) + + # Eqn.(1) in FPN paper + target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0) + torch.tensor(self.eps, dtype=s.dtype)) + target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max) + return (target_lvls.to(torch.int64) - self.k_min).to(torch.int64) + + +def _convert_to_roi_format(boxes: List[Tensor]) -> Tensor: + concat_boxes = torch.cat(boxes, dim=0) + device, dtype = concat_boxes.device, concat_boxes.dtype + ids = torch.cat( + [torch.full_like(b[:, :1], i, dtype=dtype, layout=torch.strided, device=device) for i, b in enumerate(boxes)], + dim=0, + ) + rois = torch.cat([ids, concat_boxes], dim=1) + return rois + + +def _infer_scale(feature: Tensor, original_size: List[int]) -> float: + # assumption: the scale is of the form 2 ** (-k), with k integer + size = feature.shape[-2:] + possible_scales: List[float] = [] + for s1, s2 in zip(size, original_size): + approx_scale = float(s1) / float(s2) + scale = 2 ** float(torch.tensor(approx_scale).log2().round()) + possible_scales.append(scale) + return possible_scales[0] + + +@torch.fx.wrap +def _setup_scales( + features: List[Tensor], image_shapes: List[Tuple[int, int]], canonical_scale: int, canonical_level: int +) -> Tuple[List[float], LevelMapper]: + if not image_shapes: + raise ValueError("images list should not be empty") + max_x = 0 + max_y = 0 + for shape in image_shapes: + max_x = max(shape[0], max_x) + max_y = max(shape[1], max_y) + original_input_shape = (max_x, max_y) + + scales = [_infer_scale(feat, original_input_shape) for feat in features] + # get the levels in the feature map by leveraging the fact that the network always + # downsamples by a factor of 2 at each level. + lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item() + lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item() + + map_levels = initLevelMapper( + int(lvl_min), + int(lvl_max), + canonical_scale=canonical_scale, + canonical_level=canonical_level, + ) + return scales, map_levels + + +@torch.fx.wrap +def _filter_input(x: Dict[str, Tensor], featmap_names: List[str]) -> List[Tensor]: + x_filtered = [] + for k, v in x.items(): + if k in featmap_names: + x_filtered.append(v) + return x_filtered + + +@torch.fx.wrap +def _multiscale_roi_align( + x_filtered: List[Tensor], + boxes: List[Tensor], + output_size: List[int], + sampling_ratio: int, + scales: Optional[List[float]], + mapper: Optional[LevelMapper], +) -> Tensor: + """ + Args: + x_filtered (List[Tensor]): List of input tensors. + boxes (List[Tensor[N, 4]]): boxes to be used to perform the pooling operation, in + (x1, y1, x2, y2) format and in the image reference size, not the feature map + reference. The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + output_size (Union[List[Tuple[int, int]], List[int]]): size of the output + sampling_ratio (int): sampling ratio for ROIAlign + scales (Optional[List[float]]): If None, scales will be automatically inferred. Default value is None. + mapper (Optional[LevelMapper]): If none, mapper will be automatically inferred. Default value is None. + Returns: + result (Tensor) + """ + if scales is None or mapper is None: + raise ValueError("scales and mapper should not be None") + + num_levels = len(x_filtered) + rois = _convert_to_roi_format(boxes) + + if num_levels == 1: + return roi_align( + x_filtered[0], + rois, + output_size=output_size, + spatial_scale=scales[0], + sampling_ratio=sampling_ratio, + ) + + levels = mapper(boxes) + + num_rois = len(rois) + num_channels = x_filtered[0].shape[1] + + dtype, device = x_filtered[0].dtype, x_filtered[0].device + result = torch.zeros( + ( + num_rois, + num_channels, + ) + + output_size, + dtype=dtype, + device=device, + ) + + tracing_results = [] + for level, (per_level_feature, scale) in enumerate(zip(x_filtered, scales)): + idx_in_level = torch.where(levels == level)[0] + rois_per_level = rois[idx_in_level] + + result_idx_in_level = roi_align( + per_level_feature, + rois_per_level, + output_size=output_size, + spatial_scale=scale, + sampling_ratio=sampling_ratio, + ) + + if torchvision._is_tracing(): + tracing_results.append(result_idx_in_level.to(dtype)) + else: + # result and result_idx_in_level's dtypes are based on dtypes of different + # elements in x_filtered. x_filtered contains tensors output by different + # layers. When autocast is active, it may choose different dtypes for + # different layers' outputs. Therefore, we defensively match result's dtype + # before copying elements from result_idx_in_level in the following op. + # We need to cast manually (can't rely on autocast to cast for us) because + # the op acts on result in-place, and autocast only affects out-of-place ops. + result[idx_in_level] = result_idx_in_level.to(result.dtype) + + if torchvision._is_tracing(): + result = _onnx_merge_levels(levels, tracing_results) + + return result + + +class MultiScaleRoIAlign(nn.Module): + """ + Multi-scale RoIAlign pooling, which is useful for detection with or without FPN. + + It infers the scale of the pooling via the heuristics specified in eq. 1 + of the `Feature Pyramid Network paper `_. + They keyword-only parameters ``canonical_scale`` and ``canonical_level`` + correspond respectively to ``224`` and ``k0=4`` in eq. 1, and + have the following meaning: ``canonical_level`` is the target level of the pyramid from + which to pool a region of interest with ``w x h = canonical_scale x canonical_scale``. + + Args: + featmap_names (List[str]): the names of the feature maps that will be used + for the pooling. + output_size (List[Tuple[int, int]] or List[int]): output size for the pooled region + sampling_ratio (int): sampling ratio for ROIAlign + canonical_scale (int, optional): canonical_scale for LevelMapper + canonical_level (int, optional): canonical_level for LevelMapper + + Examples:: + + >>> m = torchvision.ops.MultiScaleRoIAlign(['feat1', 'feat3'], 3, 2) + >>> i = OrderedDict() + >>> i['feat1'] = torch.rand(1, 5, 64, 64) + >>> i['feat2'] = torch.rand(1, 5, 32, 32) # this feature won't be used in the pooling + >>> i['feat3'] = torch.rand(1, 5, 16, 16) + >>> # create some random bounding boxes + >>> boxes = torch.rand(6, 4) * 256; boxes[:, 2:] += boxes[:, :2] + >>> # original image size, before computing the feature maps + >>> image_sizes = [(512, 512)] + >>> output = m(i, [boxes], image_sizes) + >>> print(output.shape) + >>> torch.Size([6, 5, 3, 3]) + + """ + + __annotations__ = {"scales": Optional[List[float]], "map_levels": Optional[LevelMapper]} + + def __init__( + self, + featmap_names: List[str], + output_size: Union[int, Tuple[int], List[int]], + sampling_ratio: int, + *, + canonical_scale: int = 224, + canonical_level: int = 4, + ): + super().__init__() + _log_api_usage_once(self) + if isinstance(output_size, int): + output_size = (output_size, output_size) + self.featmap_names = featmap_names + self.sampling_ratio = sampling_ratio + self.output_size = tuple(output_size) + self.scales = None + self.map_levels = None + self.canonical_scale = canonical_scale + self.canonical_level = canonical_level + + def forward( + self, + x: Dict[str, Tensor], + boxes: List[Tensor], + image_shapes: List[Tuple[int, int]], + ) -> Tensor: + """ + Args: + x (OrderedDict[Tensor]): feature maps for each level. They are assumed to have + all the same number of channels, but they can have different sizes. + boxes (List[Tensor[N, 4]]): boxes to be used to perform the pooling operation, in + (x1, y1, x2, y2) format and in the image reference size, not the feature map + reference. The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + image_shapes (List[Tuple[height, width]]): the sizes of each image before they + have been fed to a CNN to obtain feature maps. This allows us to infer the + scale factor for each one of the levels to be pooled. + Returns: + result (Tensor) + """ + x_filtered = _filter_input(x, self.featmap_names) + if self.scales is None or self.map_levels is None: + self.scales, self.map_levels = _setup_scales( + x_filtered, image_shapes, self.canonical_scale, self.canonical_level + ) + + return _multiscale_roi_align( + x_filtered, + boxes, + self.output_size, + self.sampling_ratio, + self.scales, + self.map_levels, + ) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(featmap_names={self.featmap_names}, " + f"output_size={self.output_size}, sampling_ratio={self.sampling_ratio})" + ) diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/ps_roi_align.py b/.venv/lib/python3.11/site-packages/torchvision/ops/ps_roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..82809b8f8885667b28eccd22aca60d1dca02f3bf --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/ps_roi_align.py @@ -0,0 +1,90 @@ +import torch +import torch.fx +from torch import nn, Tensor +from torch.nn.modules.utils import _pair +from torchvision.extension import _assert_has_ops + +from ..utils import _log_api_usage_once +from ._utils import check_roi_boxes_shape, convert_boxes_to_roi_format + + +@torch.fx.wrap +def ps_roi_align( + input: Tensor, + boxes: Tensor, + output_size: int, + spatial_scale: float = 1.0, + sampling_ratio: int = -1, +) -> Tensor: + """ + Performs Position-Sensitive Region of Interest (RoI) Align operator + mentioned in Light-Head R-CNN. + + Args: + input (Tensor[N, C, H, W]): The input tensor, i.e. a batch with ``N`` elements. Each element + contains ``C`` feature maps of dimensions ``H x W``. + boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) + format where the regions will be taken from. + The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + If a single Tensor is passed, then the first column should + contain the index of the corresponding element in the batch, i.e. a number in ``[0, N - 1]``. + If a list of Tensors is passed, then each Tensor will correspond to the boxes for an element i + in the batch. + output_size (int or Tuple[int, int]): the size of the output (in bins or pixels) after the pooling + is performed, as (height, width). + spatial_scale (float): a scaling factor that maps the box coordinates to + the input coordinates. For example, if your boxes are defined on the scale + of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of + the original image), you'll want to set this to 0.5. Default: 1.0 + sampling_ratio (int): number of sampling points in the interpolation grid + used to compute the output value of each pooled output bin. If > 0, + then exactly ``sampling_ratio x sampling_ratio`` sampling points per bin are used. If + <= 0, then an adaptive number of grid points are used (computed as + ``ceil(roi_width / output_width)``, and likewise for height). Default: -1 + + Returns: + Tensor[K, C / (output_size[0] * output_size[1]), output_size[0], output_size[1]]: The pooled RoIs + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(ps_roi_align) + _assert_has_ops() + check_roi_boxes_shape(boxes) + rois = boxes + output_size = _pair(output_size) + if not isinstance(rois, torch.Tensor): + rois = convert_boxes_to_roi_format(rois) + output, _ = torch.ops.torchvision.ps_roi_align( + input, rois, spatial_scale, output_size[0], output_size[1], sampling_ratio + ) + return output + + +class PSRoIAlign(nn.Module): + """ + See :func:`ps_roi_align`. + """ + + def __init__( + self, + output_size: int, + spatial_scale: float, + sampling_ratio: int, + ): + super().__init__() + _log_api_usage_once(self) + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + + def forward(self, input: Tensor, rois: Tensor) -> Tensor: + return ps_roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio) + + def __repr__(self) -> str: + s = ( + f"{self.__class__.__name__}(" + f"output_size={self.output_size}" + f", spatial_scale={self.spatial_scale}" + f", sampling_ratio={self.sampling_ratio}" + f")" + ) + return s diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/ps_roi_pool.py b/.venv/lib/python3.11/site-packages/torchvision/ops/ps_roi_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..15292dcad97490aaa740cdec2d0aedb31e5662eb --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/ps_roi_pool.py @@ -0,0 +1,70 @@ +import torch +import torch.fx +from torch import nn, Tensor +from torch.nn.modules.utils import _pair +from torchvision.extension import _assert_has_ops + +from ..utils import _log_api_usage_once +from ._utils import check_roi_boxes_shape, convert_boxes_to_roi_format + + +@torch.fx.wrap +def ps_roi_pool( + input: Tensor, + boxes: Tensor, + output_size: int, + spatial_scale: float = 1.0, +) -> Tensor: + """ + Performs Position-Sensitive Region of Interest (RoI) Pool operator + described in R-FCN + + Args: + input (Tensor[N, C, H, W]): The input tensor, i.e. a batch with ``N`` elements. Each element + contains ``C`` feature maps of dimensions ``H x W``. + boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) + format where the regions will be taken from. + The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + If a single Tensor is passed, then the first column should + contain the index of the corresponding element in the batch, i.e. a number in ``[0, N - 1]``. + If a list of Tensors is passed, then each Tensor will correspond to the boxes for an element i + in the batch. + output_size (int or Tuple[int, int]): the size of the output (in bins or pixels) after the pooling + is performed, as (height, width). + spatial_scale (float): a scaling factor that maps the box coordinates to + the input coordinates. For example, if your boxes are defined on the scale + of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of + the original image), you'll want to set this to 0.5. Default: 1.0 + + Returns: + Tensor[K, C / (output_size[0] * output_size[1]), output_size[0], output_size[1]]: The pooled RoIs. + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(ps_roi_pool) + _assert_has_ops() + check_roi_boxes_shape(boxes) + rois = boxes + output_size = _pair(output_size) + if not isinstance(rois, torch.Tensor): + rois = convert_boxes_to_roi_format(rois) + output, _ = torch.ops.torchvision.ps_roi_pool(input, rois, spatial_scale, output_size[0], output_size[1]) + return output + + +class PSRoIPool(nn.Module): + """ + See :func:`ps_roi_pool`. + """ + + def __init__(self, output_size: int, spatial_scale: float): + super().__init__() + _log_api_usage_once(self) + self.output_size = output_size + self.spatial_scale = spatial_scale + + def forward(self, input: Tensor, rois: Tensor) -> Tensor: + return ps_roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(output_size={self.output_size}, spatial_scale={self.spatial_scale})" + return s diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/roi_align.py b/.venv/lib/python3.11/site-packages/torchvision/ops/roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..8b616ca9161c64e3a1fe1004776e466c5118d17d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/roi_align.py @@ -0,0 +1,293 @@ +import functools +from typing import List, Union + +import torch +import torch.fx +from torch import nn, Tensor +from torch._dynamo.utils import is_compile_supported +from torch.jit.annotations import BroadcastingList2 +from torch.nn.modules.utils import _pair +from torchvision.extension import _assert_has_ops, _has_ops + +from ..utils import _log_api_usage_once +from ._utils import check_roi_boxes_shape, convert_boxes_to_roi_format + + +def lazy_compile(**compile_kwargs): + """Lazily wrap a function with torch.compile on the first call + + This avoids eagerly importing dynamo. + """ + + def decorate_fn(fn): + @functools.wraps(fn) + def compile_hook(*args, **kwargs): + compiled_fn = torch.compile(fn, **compile_kwargs) + globals()[fn.__name__] = functools.wraps(fn)(compiled_fn) + return compiled_fn(*args, **kwargs) + + return compile_hook + + return decorate_fn + + +# NB: all inputs are tensors +def _bilinear_interpolate( + input, # [N, C, H, W] + roi_batch_ind, # [K] + y, # [K, PH, IY] + x, # [K, PW, IX] + ymask, # [K, IY] + xmask, # [K, IX] +): + _, channels, height, width = input.size() + + # deal with inverse element out of feature map boundary + y = y.clamp(min=0) + x = x.clamp(min=0) + y_low = y.int() + x_low = x.int() + y_high = torch.where(y_low >= height - 1, height - 1, y_low + 1) + y_low = torch.where(y_low >= height - 1, height - 1, y_low) + y = torch.where(y_low >= height - 1, y.to(input.dtype), y) + + x_high = torch.where(x_low >= width - 1, width - 1, x_low + 1) + x_low = torch.where(x_low >= width - 1, width - 1, x_low) + x = torch.where(x_low >= width - 1, x.to(input.dtype), x) + + ly = y - y_low + lx = x - x_low + hy = 1.0 - ly + hx = 1.0 - lx + + # do bilinear interpolation, but respect the masking! + # TODO: It's possible the masking here is unnecessary if y and + # x were clamped appropriately; hard to tell + def masked_index( + y, # [K, PH, IY] + x, # [K, PW, IX] + ): + if ymask is not None: + assert xmask is not None + y = torch.where(ymask[:, None, :], y, 0) + x = torch.where(xmask[:, None, :], x, 0) + return input[ + roi_batch_ind[:, None, None, None, None, None], + torch.arange(channels, device=input.device)[None, :, None, None, None, None], + y[:, None, :, None, :, None], # prev [K, PH, IY] + x[:, None, None, :, None, :], # prev [K, PW, IX] + ] # [K, C, PH, PW, IY, IX] + + v1 = masked_index(y_low, x_low) + v2 = masked_index(y_low, x_high) + v3 = masked_index(y_high, x_low) + v4 = masked_index(y_high, x_high) + + # all ws preemptively [K, C, PH, PW, IY, IX] + def outer_prod(y, x): + return y[:, None, :, None, :, None] * x[:, None, None, :, None, :] + + w1 = outer_prod(hy, hx) + w2 = outer_prod(hy, lx) + w3 = outer_prod(ly, hx) + w4 = outer_prod(ly, lx) + + val = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4 + return val + + +# TODO: this doesn't actually cache +# TODO: main library should make this easier to do +def maybe_cast(tensor): + if torch.is_autocast_enabled() and tensor.is_cuda and tensor.dtype != torch.double: + return tensor.float() + else: + return tensor + + +# This is a pure Python and differentiable implementation of roi_align. When +# run in eager mode, it uses a lot of memory, but when compiled it has +# acceptable memory usage. The main point of this implementation is that +# its backwards is deterministic. +# It is transcribed directly off of the roi_align CUDA kernel, see +# https://dev-discuss.pytorch.org/t/a-pure-python-implementation-of-roi-align-that-looks-just-like-its-cuda-kernel/1266 +@lazy_compile(dynamic=True) +def _roi_align(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned): + orig_dtype = input.dtype + + input = maybe_cast(input) + rois = maybe_cast(rois) + + _, _, height, width = input.size() + + ph = torch.arange(pooled_height, device=input.device) # [PH] + pw = torch.arange(pooled_width, device=input.device) # [PW] + + # input: [N, C, H, W] + # rois: [K, 5] + + roi_batch_ind = rois[:, 0].int() # [K] + offset = 0.5 if aligned else 0.0 + roi_start_w = rois[:, 1] * spatial_scale - offset # [K] + roi_start_h = rois[:, 2] * spatial_scale - offset # [K] + roi_end_w = rois[:, 3] * spatial_scale - offset # [K] + roi_end_h = rois[:, 4] * spatial_scale - offset # [K] + + roi_width = roi_end_w - roi_start_w # [K] + roi_height = roi_end_h - roi_start_h # [K] + if not aligned: + roi_width = torch.clamp(roi_width, min=1.0) # [K] + roi_height = torch.clamp(roi_height, min=1.0) # [K] + + bin_size_h = roi_height / pooled_height # [K] + bin_size_w = roi_width / pooled_width # [K] + + exact_sampling = sampling_ratio > 0 + + roi_bin_grid_h = sampling_ratio if exact_sampling else torch.ceil(roi_height / pooled_height) # scalar or [K] + roi_bin_grid_w = sampling_ratio if exact_sampling else torch.ceil(roi_width / pooled_width) # scalar or [K] + + """ + iy, ix = dims(2) + """ + + if exact_sampling: + count = max(roi_bin_grid_h * roi_bin_grid_w, 1) # scalar + iy = torch.arange(roi_bin_grid_h, device=input.device) # [IY] + ix = torch.arange(roi_bin_grid_w, device=input.device) # [IX] + ymask = None + xmask = None + else: + count = torch.clamp(roi_bin_grid_h * roi_bin_grid_w, min=1) # [K] + # When doing adaptive sampling, the number of samples we need to do + # is data-dependent based on how big the ROIs are. This is a bit + # awkward because first-class dims can't actually handle this. + # So instead, we inefficiently suppose that we needed to sample ALL + # the points and mask out things that turned out to be unnecessary + iy = torch.arange(height, device=input.device) # [IY] + ix = torch.arange(width, device=input.device) # [IX] + ymask = iy[None, :] < roi_bin_grid_h[:, None] # [K, IY] + xmask = ix[None, :] < roi_bin_grid_w[:, None] # [K, IX] + + def from_K(t): + return t[:, None, None] + + y = ( + from_K(roi_start_h) + + ph[None, :, None] * from_K(bin_size_h) + + (iy[None, None, :] + 0.5).to(input.dtype) * from_K(bin_size_h / roi_bin_grid_h) + ) # [K, PH, IY] + x = ( + from_K(roi_start_w) + + pw[None, :, None] * from_K(bin_size_w) + + (ix[None, None, :] + 0.5).to(input.dtype) * from_K(bin_size_w / roi_bin_grid_w) + ) # [K, PW, IX] + val = _bilinear_interpolate(input, roi_batch_ind, y, x, ymask, xmask) # [K, C, PH, PW, IY, IX] + + # Mask out samples that weren't actually adaptively needed + if not exact_sampling: + val = torch.where(ymask[:, None, None, None, :, None], val, 0) + val = torch.where(xmask[:, None, None, None, None, :], val, 0) + + output = val.sum((-1, -2)) # remove IY, IX ~> [K, C, PH, PW] + if isinstance(count, torch.Tensor): + output /= count[:, None, None, None] + else: + output /= count + + output = output.to(orig_dtype) + + return output + + +@torch.fx.wrap +def roi_align( + input: Tensor, + boxes: Union[Tensor, List[Tensor]], + output_size: BroadcastingList2[int], + spatial_scale: float = 1.0, + sampling_ratio: int = -1, + aligned: bool = False, +) -> Tensor: + """ + Performs Region of Interest (RoI) Align operator with average pooling, as described in Mask R-CNN. + + Args: + input (Tensor[N, C, H, W]): The input tensor, i.e. a batch with ``N`` elements. Each element + contains ``C`` feature maps of dimensions ``H x W``. + If the tensor is quantized, we expect a batch size of ``N == 1``. + boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) + format where the regions will be taken from. + The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + If a single Tensor is passed, then the first column should + contain the index of the corresponding element in the batch, i.e. a number in ``[0, N - 1]``. + If a list of Tensors is passed, then each Tensor will correspond to the boxes for an element i + in the batch. + output_size (int or Tuple[int, int]): the size of the output (in bins or pixels) after the pooling + is performed, as (height, width). + spatial_scale (float): a scaling factor that maps the box coordinates to + the input coordinates. For example, if your boxes are defined on the scale + of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of + the original image), you'll want to set this to 0.5. Default: 1.0 + sampling_ratio (int): number of sampling points in the interpolation grid + used to compute the output value of each pooled output bin. If > 0, + then exactly ``sampling_ratio x sampling_ratio`` sampling points per bin are used. If + <= 0, then an adaptive number of grid points are used (computed as + ``ceil(roi_width / output_width)``, and likewise for height). Default: -1 + aligned (bool): If False, use the legacy implementation. + If True, pixel shift the box coordinates it by -0.5 for a better alignment with the two + neighboring pixel indices. This version is used in Detectron2 + + Returns: + Tensor[K, C, output_size[0], output_size[1]]: The pooled RoIs. + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(roi_align) + check_roi_boxes_shape(boxes) + rois = boxes + output_size = _pair(output_size) + if not isinstance(rois, torch.Tensor): + rois = convert_boxes_to_roi_format(rois) + if not torch.jit.is_scripting(): + if ( + not _has_ops() or (torch.are_deterministic_algorithms_enabled() and (input.is_cuda or input.is_mps)) + ) and is_compile_supported(input.device.type): + return _roi_align(input, rois, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned) + _assert_has_ops() + return torch.ops.torchvision.roi_align( + input, rois, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned + ) + + +class RoIAlign(nn.Module): + """ + See :func:`roi_align`. + """ + + def __init__( + self, + output_size: BroadcastingList2[int], + spatial_scale: float, + sampling_ratio: int, + aligned: bool = False, + ): + super().__init__() + _log_api_usage_once(self) + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + self.aligned = aligned + + def forward(self, input: Tensor, rois: Union[Tensor, List[Tensor]]) -> Tensor: + return roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned) + + def __repr__(self) -> str: + s = ( + f"{self.__class__.__name__}(" + f"output_size={self.output_size}" + f", spatial_scale={self.spatial_scale}" + f", sampling_ratio={self.sampling_ratio}" + f", aligned={self.aligned}" + f")" + ) + return s diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/roi_pool.py b/.venv/lib/python3.11/site-packages/torchvision/ops/roi_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..96282418f0769bc91018a872dbd0a106742bf884 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/roi_pool.py @@ -0,0 +1,72 @@ +from typing import List, Union + +import torch +import torch.fx +from torch import nn, Tensor +from torch.jit.annotations import BroadcastingList2 +from torch.nn.modules.utils import _pair +from torchvision.extension import _assert_has_ops + +from ..utils import _log_api_usage_once +from ._utils import check_roi_boxes_shape, convert_boxes_to_roi_format + + +@torch.fx.wrap +def roi_pool( + input: Tensor, + boxes: Union[Tensor, List[Tensor]], + output_size: BroadcastingList2[int], + spatial_scale: float = 1.0, +) -> Tensor: + """ + Performs Region of Interest (RoI) Pool operator described in Fast R-CNN + + Args: + input (Tensor[N, C, H, W]): The input tensor, i.e. a batch with ``N`` elements. Each element + contains ``C`` feature maps of dimensions ``H x W``. + boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) + format where the regions will be taken from. + The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + If a single Tensor is passed, then the first column should + contain the index of the corresponding element in the batch, i.e. a number in ``[0, N - 1]``. + If a list of Tensors is passed, then each Tensor will correspond to the boxes for an element i + in the batch. + output_size (int or Tuple[int, int]): the size of the output after the cropping + is performed, as (height, width) + spatial_scale (float): a scaling factor that maps the box coordinates to + the input coordinates. For example, if your boxes are defined on the scale + of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of + the original image), you'll want to set this to 0.5. Default: 1.0 + + Returns: + Tensor[K, C, output_size[0], output_size[1]]: The pooled RoIs. + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(roi_pool) + _assert_has_ops() + check_roi_boxes_shape(boxes) + rois = boxes + output_size = _pair(output_size) + if not isinstance(rois, torch.Tensor): + rois = convert_boxes_to_roi_format(rois) + output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale, output_size[0], output_size[1]) + return output + + +class RoIPool(nn.Module): + """ + See :func:`roi_pool`. + """ + + def __init__(self, output_size: BroadcastingList2[int], spatial_scale: float): + super().__init__() + _log_api_usage_once(self) + self.output_size = output_size + self.spatial_scale = spatial_scale + + def forward(self, input: Tensor, rois: Union[Tensor, List[Tensor]]) -> Tensor: + return roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(output_size={self.output_size}, spatial_scale={self.spatial_scale})" + return s diff --git a/.venv/lib/python3.11/site-packages/torchvision/ops/stochastic_depth.py b/.venv/lib/python3.11/site-packages/torchvision/ops/stochastic_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..ff8167b2315e941f7e31a0626eeec270d350a710 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/ops/stochastic_depth.py @@ -0,0 +1,66 @@ +import torch +import torch.fx +from torch import nn, Tensor + +from ..utils import _log_api_usage_once + + +def stochastic_depth(input: Tensor, p: float, mode: str, training: bool = True) -> Tensor: + """ + Implements the Stochastic Depth from `"Deep Networks with Stochastic Depth" + `_ used for randomly dropping residual + branches of residual architectures. + + Args: + input (Tensor[N, ...]): The input tensor or arbitrary dimensions with the first one + being its batch i.e. a batch with ``N`` rows. + p (float): probability of the input to be zeroed. + mode (str): ``"batch"`` or ``"row"``. + ``"batch"`` randomly zeroes the entire input, ``"row"`` zeroes + randomly selected rows from the batch. + training: apply stochastic depth if is ``True``. Default: ``True`` + + Returns: + Tensor[N, ...]: The randomly zeroed tensor. + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(stochastic_depth) + if p < 0.0 or p > 1.0: + raise ValueError(f"drop probability has to be between 0 and 1, but got {p}") + if mode not in ["batch", "row"]: + raise ValueError(f"mode has to be either 'batch' or 'row', but got {mode}") + if not training or p == 0.0: + return input + + survival_rate = 1.0 - p + if mode == "row": + size = [input.shape[0]] + [1] * (input.ndim - 1) + else: + size = [1] * input.ndim + noise = torch.empty(size, dtype=input.dtype, device=input.device) + noise = noise.bernoulli_(survival_rate) + if survival_rate > 0.0: + noise.div_(survival_rate) + return input * noise + + +torch.fx.wrap("stochastic_depth") + + +class StochasticDepth(nn.Module): + """ + See :func:`stochastic_depth`. + """ + + def __init__(self, p: float, mode: str) -> None: + super().__init__() + _log_api_usage_once(self) + self.p = p + self.mode = mode + + def forward(self, input: Tensor) -> Tensor: + return stochastic_depth(input, self.p, self.mode, self.training) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(p={self.p}, mode={self.mode})" + return s diff --git a/.venv/lib/python3.11/site-packages/torchvision/utils.py b/.venv/lib/python3.11/site-packages/torchvision/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b69edcb572e885516903e4487f8f9862c4e0020e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/utils.py @@ -0,0 +1,658 @@ +import collections +import math +import pathlib +import warnings +from itertools import repeat +from types import FunctionType +from typing import Any, BinaryIO, List, Optional, Tuple, Union + +import numpy as np +import torch +from PIL import Image, ImageColor, ImageDraw, ImageFont + + +__all__ = [ + "make_grid", + "save_image", + "draw_bounding_boxes", + "draw_segmentation_masks", + "draw_keypoints", + "flow_to_image", +] + + +@torch.no_grad() +def make_grid( + tensor: Union[torch.Tensor, List[torch.Tensor]], + nrow: int = 8, + padding: int = 2, + normalize: bool = False, + value_range: Optional[Tuple[int, int]] = None, + scale_each: bool = False, + pad_value: float = 0.0, +) -> torch.Tensor: + """ + Make a grid of images. + + Args: + tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W) + or a list of images all of the same size. + nrow (int, optional): Number of images displayed in each row of the grid. + The final grid size is ``(B / nrow, nrow)``. Default: ``8``. + padding (int, optional): amount of padding. Default: ``2``. + normalize (bool, optional): If True, shift the image to the range (0, 1), + by the min and max values specified by ``value_range``. Default: ``False``. + value_range (tuple, optional): tuple (min, max) where min and max are numbers, + then these numbers are used to normalize the image. By default, min and max + are computed from the tensor. + scale_each (bool, optional): If ``True``, scale each image in the batch of + images separately rather than the (min, max) over all images. Default: ``False``. + pad_value (float, optional): Value for the padded pixels. Default: ``0``. + + Returns: + grid (Tensor): the tensor containing grid of images. + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(make_grid) + if not torch.is_tensor(tensor): + if isinstance(tensor, list): + for t in tensor: + if not torch.is_tensor(t): + raise TypeError(f"tensor or list of tensors expected, got a list containing {type(t)}") + else: + raise TypeError(f"tensor or list of tensors expected, got {type(tensor)}") + + # if list of tensors, convert to a 4D mini-batch Tensor + if isinstance(tensor, list): + tensor = torch.stack(tensor, dim=0) + + if tensor.dim() == 2: # single image H x W + tensor = tensor.unsqueeze(0) + if tensor.dim() == 3: # single image + if tensor.size(0) == 1: # if single-channel, convert to 3-channel + tensor = torch.cat((tensor, tensor, tensor), 0) + tensor = tensor.unsqueeze(0) + + if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images + tensor = torch.cat((tensor, tensor, tensor), 1) + + if normalize is True: + tensor = tensor.clone() # avoid modifying tensor in-place + if value_range is not None and not isinstance(value_range, tuple): + raise TypeError("value_range has to be a tuple (min, max) if specified. min and max are numbers") + + def norm_ip(img, low, high): + img.clamp_(min=low, max=high) + img.sub_(low).div_(max(high - low, 1e-5)) + + def norm_range(t, value_range): + if value_range is not None: + norm_ip(t, value_range[0], value_range[1]) + else: + norm_ip(t, float(t.min()), float(t.max())) + + if scale_each is True: + for t in tensor: # loop over mini-batch dimension + norm_range(t, value_range) + else: + norm_range(tensor, value_range) + + if not isinstance(tensor, torch.Tensor): + raise TypeError("tensor should be of type torch.Tensor") + if tensor.size(0) == 1: + return tensor.squeeze(0) + + # make the mini-batch of images into a grid + nmaps = tensor.size(0) + xmaps = min(nrow, nmaps) + ymaps = int(math.ceil(float(nmaps) / xmaps)) + height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding) + num_channels = tensor.size(1) + grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value) + k = 0 + for y in range(ymaps): + for x in range(xmaps): + if k >= nmaps: + break + # Tensor.copy_() is a valid method but seems to be missing from the stubs + # https://pytorch.org/docs/stable/tensors.html#torch.Tensor.copy_ + grid.narrow(1, y * height + padding, height - padding).narrow( # type: ignore[attr-defined] + 2, x * width + padding, width - padding + ).copy_(tensor[k]) + k = k + 1 + return grid + + +@torch.no_grad() +def save_image( + tensor: Union[torch.Tensor, List[torch.Tensor]], + fp: Union[str, pathlib.Path, BinaryIO], + format: Optional[str] = None, + **kwargs, +) -> None: + """ + Save a given Tensor into an image file. + + Args: + tensor (Tensor or list): Image to be saved. If given a mini-batch tensor, + saves the tensor as a grid of images by calling ``make_grid``. + fp (string or file object): A filename or a file object + format(Optional): If omitted, the format to use is determined from the filename extension. + If a file object was used instead of a filename, this parameter should always be used. + **kwargs: Other arguments are documented in ``make_grid``. + """ + + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(save_image) + grid = make_grid(tensor, **kwargs) + # Add 0.5 after unnormalizing to [0, 255] to round to the nearest integer + ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy() + im = Image.fromarray(ndarr) + im.save(fp, format=format) + + +@torch.no_grad() +def draw_bounding_boxes( + image: torch.Tensor, + boxes: torch.Tensor, + labels: Optional[List[str]] = None, + colors: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None, + fill: Optional[bool] = False, + width: int = 1, + font: Optional[str] = None, + font_size: Optional[int] = None, +) -> torch.Tensor: + + """ + Draws bounding boxes on given RGB image. + The image values should be uint8 in [0, 255] or float in [0, 1]. + If fill is True, Resulting Tensor should be saved as PNG image. + + Args: + image (Tensor): Tensor of shape (C, H, W) and dtype uint8 or float. + boxes (Tensor): Tensor of size (N, 4) containing bounding boxes in (xmin, ymin, xmax, ymax) format. Note that + the boxes are absolute coordinates with respect to the image. In other words: `0 <= xmin < xmax < W` and + `0 <= ymin < ymax < H`. + labels (List[str]): List containing the labels of bounding boxes. + colors (color or list of colors, optional): List containing the colors + of the boxes or single color for all boxes. The color can be represented as + PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``. + By default, random colors are generated for boxes. + fill (bool): If `True` fills the bounding box with specified color. + width (int): Width of bounding box. + font (str): A filename containing a TrueType font. If the file is not found in this filename, the loader may + also search in other directories, such as the `fonts/` directory on Windows or `/Library/Fonts/`, + `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS. + font_size (int): The requested font size in points. + + Returns: + img (Tensor[C, H, W]): Image Tensor of dtype uint8 with bounding boxes plotted. + """ + import torchvision.transforms.v2.functional as F # noqa + + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(draw_bounding_boxes) + if not isinstance(image, torch.Tensor): + raise TypeError(f"Tensor expected, got {type(image)}") + elif not (image.dtype == torch.uint8 or image.is_floating_point()): + raise ValueError(f"The image dtype must be uint8 or float, got {image.dtype}") + elif image.dim() != 3: + raise ValueError("Pass individual images, not batches") + elif image.size(0) not in {1, 3}: + raise ValueError("Only grayscale and RGB images are supported") + elif (boxes[:, 0] > boxes[:, 2]).any() or (boxes[:, 1] > boxes[:, 3]).any(): + raise ValueError( + "Boxes need to be in (xmin, ymin, xmax, ymax) format. Use torchvision.ops.box_convert to convert them" + ) + + num_boxes = boxes.shape[0] + + if num_boxes == 0: + warnings.warn("boxes doesn't contain any box. No box was drawn") + return image + + if labels is None: + labels: Union[List[str], List[None]] = [None] * num_boxes # type: ignore[no-redef] + elif len(labels) != num_boxes: + raise ValueError( + f"Number of boxes ({num_boxes}) and labels ({len(labels)}) mismatch. Please specify labels for each box." + ) + + colors = _parse_colors(colors, num_objects=num_boxes) + + if font is None: + if font_size is not None: + warnings.warn("Argument 'font_size' will be ignored since 'font' is not set.") + txt_font = ImageFont.load_default() + else: + txt_font = ImageFont.truetype(font=font, size=font_size or 10) + + # Handle Grayscale images + if image.size(0) == 1: + image = torch.tile(image, (3, 1, 1)) + + original_dtype = image.dtype + if original_dtype.is_floating_point: + image = F.to_dtype(image, dtype=torch.uint8, scale=True) + + img_to_draw = F.to_pil_image(image) + img_boxes = boxes.to(torch.int64).tolist() + + if fill: + draw = ImageDraw.Draw(img_to_draw, "RGBA") + else: + draw = ImageDraw.Draw(img_to_draw) + + for bbox, color, label in zip(img_boxes, colors, labels): # type: ignore[arg-type] + if fill: + fill_color = color + (100,) + draw.rectangle(bbox, width=width, outline=color, fill=fill_color) + else: + draw.rectangle(bbox, width=width, outline=color) + + if label is not None: + margin = width + 1 + draw.text((bbox[0] + margin, bbox[1] + margin), label, fill=color, font=txt_font) + + out = F.pil_to_tensor(img_to_draw) + if original_dtype.is_floating_point: + out = F.to_dtype(out, dtype=original_dtype, scale=True) + return out + + +@torch.no_grad() +def draw_segmentation_masks( + image: torch.Tensor, + masks: torch.Tensor, + alpha: float = 0.8, + colors: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None, +) -> torch.Tensor: + + """ + Draws segmentation masks on given RGB image. + The image values should be uint8 in [0, 255] or float in [0, 1]. + + Args: + image (Tensor): Tensor of shape (3, H, W) and dtype uint8 or float. + masks (Tensor): Tensor of shape (num_masks, H, W) or (H, W) and dtype bool. + alpha (float): Float number between 0 and 1 denoting the transparency of the masks. + 0 means full transparency, 1 means no transparency. + colors (color or list of colors, optional): List containing the colors + of the masks or single color for all masks. The color can be represented as + PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``. + By default, random colors are generated for each mask. + + Returns: + img (Tensor[C, H, W]): Image Tensor, with segmentation masks drawn on top. + """ + + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(draw_segmentation_masks) + if not isinstance(image, torch.Tensor): + raise TypeError(f"The image must be a tensor, got {type(image)}") + elif not (image.dtype == torch.uint8 or image.is_floating_point()): + raise ValueError(f"The image dtype must be uint8 or float, got {image.dtype}") + elif image.dim() != 3: + raise ValueError("Pass individual images, not batches") + elif image.size()[0] != 3: + raise ValueError("Pass an RGB image. Other Image formats are not supported") + if masks.ndim == 2: + masks = masks[None, :, :] + if masks.ndim != 3: + raise ValueError("masks must be of shape (H, W) or (batch_size, H, W)") + if masks.dtype != torch.bool: + raise ValueError(f"The masks must be of dtype bool. Got {masks.dtype}") + if masks.shape[-2:] != image.shape[-2:]: + raise ValueError("The image and the masks must have the same height and width") + + num_masks = masks.size()[0] + overlapping_masks = masks.sum(dim=0) > 1 + + if num_masks == 0: + warnings.warn("masks doesn't contain any mask. No mask was drawn") + return image + + original_dtype = image.dtype + colors = [ + torch.tensor(color, dtype=original_dtype, device=image.device) + for color in _parse_colors(colors, num_objects=num_masks, dtype=original_dtype) + ] + + img_to_draw = image.detach().clone() + # TODO: There might be a way to vectorize this + for mask, color in zip(masks, colors): + img_to_draw[:, mask] = color[:, None] + + img_to_draw[:, overlapping_masks] = 0 + + out = image * (1 - alpha) + img_to_draw * alpha + # Note: at this point, out is a float tensor in [0, 1] or [0, 255] depending on original_dtype + return out.to(original_dtype) + + +@torch.no_grad() +def draw_keypoints( + image: torch.Tensor, + keypoints: torch.Tensor, + connectivity: Optional[List[Tuple[int, int]]] = None, + colors: Optional[Union[str, Tuple[int, int, int]]] = None, + radius: int = 2, + width: int = 3, + visibility: Optional[torch.Tensor] = None, +) -> torch.Tensor: + + """ + Draws Keypoints on given RGB image. + The image values should be uint8 in [0, 255] or float in [0, 1]. + Keypoints can be drawn for multiple instances at a time. + + This method allows that keypoints and their connectivity are drawn based on the visibility of this keypoint. + + Args: + image (Tensor): Tensor of shape (3, H, W) and dtype uint8 or float. + keypoints (Tensor): Tensor of shape (num_instances, K, 2) the K keypoint locations for each of the N instances, + in the format [x, y]. + connectivity (List[Tuple[int, int]]]): A List of tuple where each tuple contains a pair of keypoints + to be connected. + If at least one of the two connected keypoints has a ``visibility`` of False, + this specific connection is not drawn. + Exclusions due to invisibility are computed per-instance. + colors (str, Tuple): The color can be represented as + PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``. + radius (int): Integer denoting radius of keypoint. + width (int): Integer denoting width of line connecting keypoints. + visibility (Tensor): Tensor of shape (num_instances, K) specifying the visibility of the K + keypoints for each of the N instances. + True means that the respective keypoint is visible and should be drawn. + False means invisible, so neither the point nor possible connections containing it are drawn. + The input tensor will be cast to bool. + Default ``None`` means that all the keypoints are visible. + For more details, see :ref:`draw_keypoints_with_visibility`. + + Returns: + img (Tensor[C, H, W]): Image Tensor with keypoints drawn. + """ + + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(draw_keypoints) + # validate image + if not isinstance(image, torch.Tensor): + raise TypeError(f"The image must be a tensor, got {type(image)}") + elif not (image.dtype == torch.uint8 or image.is_floating_point()): + raise ValueError(f"The image dtype must be uint8 or float, got {image.dtype}") + elif image.dim() != 3: + raise ValueError("Pass individual images, not batches") + elif image.size()[0] != 3: + raise ValueError("Pass an RGB image. Other Image formats are not supported") + + # validate keypoints + if keypoints.ndim != 3: + raise ValueError("keypoints must be of shape (num_instances, K, 2)") + + # validate visibility + if visibility is None: # set default + visibility = torch.ones(keypoints.shape[:-1], dtype=torch.bool) + if visibility.ndim == 3: + # If visibility was passed as pred.split([2, 1], dim=-1), it will be of shape (num_instances, K, 1). + # We make sure it is of shape (num_instances, K). This isn't documented, we're just being nice. + visibility = visibility.squeeze(-1) + if visibility.ndim != 2: + raise ValueError(f"visibility must be of shape (num_instances, K). Got ndim={visibility.ndim}") + if visibility.shape != keypoints.shape[:-1]: + raise ValueError( + "keypoints and visibility must have the same dimensionality for num_instances and K. " + f"Got {visibility.shape = } and {keypoints.shape = }" + ) + + original_dtype = image.dtype + if original_dtype.is_floating_point: + from torchvision.transforms.v2.functional import to_dtype # noqa + + image = to_dtype(image, dtype=torch.uint8, scale=True) + + ndarr = image.permute(1, 2, 0).cpu().numpy() + img_to_draw = Image.fromarray(ndarr) + draw = ImageDraw.Draw(img_to_draw) + img_kpts = keypoints.to(torch.int64).tolist() + img_vis = visibility.cpu().bool().tolist() + + for kpt_inst, vis_inst in zip(img_kpts, img_vis): + for kpt_coord, kp_vis in zip(kpt_inst, vis_inst): + if not kp_vis: + continue + x1 = kpt_coord[0] - radius + x2 = kpt_coord[0] + radius + y1 = kpt_coord[1] - radius + y2 = kpt_coord[1] + radius + draw.ellipse([x1, y1, x2, y2], fill=colors, outline=None, width=0) + + if connectivity: + for connection in connectivity: + if (not vis_inst[connection[0]]) or (not vis_inst[connection[1]]): + continue + start_pt_x = kpt_inst[connection[0]][0] + start_pt_y = kpt_inst[connection[0]][1] + + end_pt_x = kpt_inst[connection[1]][0] + end_pt_y = kpt_inst[connection[1]][1] + + draw.line( + ((start_pt_x, start_pt_y), (end_pt_x, end_pt_y)), + width=width, + ) + + out = torch.from_numpy(np.array(img_to_draw)).permute(2, 0, 1) + if original_dtype.is_floating_point: + out = to_dtype(out, dtype=original_dtype, scale=True) + return out + + +# Flow visualization code adapted from https://github.com/tomrunia/OpticalFlow_Visualization +@torch.no_grad() +def flow_to_image(flow: torch.Tensor) -> torch.Tensor: + + """ + Converts a flow to an RGB image. + + Args: + flow (Tensor): Flow of shape (N, 2, H, W) or (2, H, W) and dtype torch.float. + + Returns: + img (Tensor): Image Tensor of dtype uint8 where each color corresponds + to a given flow direction. Shape is (N, 3, H, W) or (3, H, W) depending on the input. + """ + + if flow.dtype != torch.float: + raise ValueError(f"Flow should be of dtype torch.float, got {flow.dtype}.") + + orig_shape = flow.shape + if flow.ndim == 3: + flow = flow[None] # Add batch dim + + if flow.ndim != 4 or flow.shape[1] != 2: + raise ValueError(f"Input flow should have shape (2, H, W) or (N, 2, H, W), got {orig_shape}.") + + max_norm = torch.sum(flow**2, dim=1).sqrt().max() + epsilon = torch.finfo((flow).dtype).eps + normalized_flow = flow / (max_norm + epsilon) + img = _normalized_flow_to_image(normalized_flow) + + if len(orig_shape) == 3: + img = img[0] # Remove batch dim + return img + + +@torch.no_grad() +def _normalized_flow_to_image(normalized_flow: torch.Tensor) -> torch.Tensor: + + """ + Converts a batch of normalized flow to an RGB image. + + Args: + normalized_flow (torch.Tensor): Normalized flow tensor of shape (N, 2, H, W) + Returns: + img (Tensor(N, 3, H, W)): Flow visualization image of dtype uint8. + """ + + N, _, H, W = normalized_flow.shape + device = normalized_flow.device + flow_image = torch.zeros((N, 3, H, W), dtype=torch.uint8, device=device) + colorwheel = _make_colorwheel().to(device) # shape [55x3] + num_cols = colorwheel.shape[0] + norm = torch.sum(normalized_flow**2, dim=1).sqrt() + a = torch.atan2(-normalized_flow[:, 1, :, :], -normalized_flow[:, 0, :, :]) / torch.pi + fk = (a + 1) / 2 * (num_cols - 1) + k0 = torch.floor(fk).to(torch.long) + k1 = k0 + 1 + k1[k1 == num_cols] = 0 + f = fk - k0 + + for c in range(colorwheel.shape[1]): + tmp = colorwheel[:, c] + col0 = tmp[k0] / 255.0 + col1 = tmp[k1] / 255.0 + col = (1 - f) * col0 + f * col1 + col = 1 - norm * (1 - col) + flow_image[:, c, :, :] = torch.floor(255 * col) + return flow_image + + +def _make_colorwheel() -> torch.Tensor: + """ + Generates a color wheel for optical flow visualization as presented in: + Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007) + URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf. + + Returns: + colorwheel (Tensor[55, 3]): Colorwheel Tensor. + """ + + RY = 15 + YG = 6 + GC = 4 + CB = 11 + BM = 13 + MR = 6 + + ncols = RY + YG + GC + CB + BM + MR + colorwheel = torch.zeros((ncols, 3)) + col = 0 + + # RY + colorwheel[0:RY, 0] = 255 + colorwheel[0:RY, 1] = torch.floor(255 * torch.arange(0, RY) / RY) + col = col + RY + # YG + colorwheel[col : col + YG, 0] = 255 - torch.floor(255 * torch.arange(0, YG) / YG) + colorwheel[col : col + YG, 1] = 255 + col = col + YG + # GC + colorwheel[col : col + GC, 1] = 255 + colorwheel[col : col + GC, 2] = torch.floor(255 * torch.arange(0, GC) / GC) + col = col + GC + # CB + colorwheel[col : col + CB, 1] = 255 - torch.floor(255 * torch.arange(CB) / CB) + colorwheel[col : col + CB, 2] = 255 + col = col + CB + # BM + colorwheel[col : col + BM, 2] = 255 + colorwheel[col : col + BM, 0] = torch.floor(255 * torch.arange(0, BM) / BM) + col = col + BM + # MR + colorwheel[col : col + MR, 2] = 255 - torch.floor(255 * torch.arange(MR) / MR) + colorwheel[col : col + MR, 0] = 255 + return colorwheel + + +def _generate_color_palette(num_objects: int): + palette = torch.tensor([2**25 - 1, 2**15 - 1, 2**21 - 1]) + return [tuple((i * palette) % 255) for i in range(num_objects)] + + +def _parse_colors( + colors: Union[None, str, Tuple[int, int, int], List[Union[str, Tuple[int, int, int]]]], + *, + num_objects: int, + dtype: torch.dtype = torch.uint8, +) -> List[Tuple[int, int, int]]: + """ + Parses a specification of colors for a set of objects. + + Args: + colors: A specification of colors for the objects. This can be one of the following: + - None: to generate a color palette automatically. + - A list of colors: where each color is either a string (specifying a named color) or an RGB tuple. + - A string or an RGB tuple: to use the same color for all objects. + + If `colors` is a tuple, it should be a 3-tuple specifying the RGB values of the color. + If `colors` is a list, it should have at least as many elements as the number of objects to color. + + num_objects (int): The number of objects to color. + + Returns: + A list of 3-tuples, specifying the RGB values of the colors. + + Raises: + ValueError: If the number of colors in the list is less than the number of objects to color. + If `colors` is not a list, tuple, string or None. + """ + if colors is None: + colors = _generate_color_palette(num_objects) + elif isinstance(colors, list): + if len(colors) < num_objects: + raise ValueError( + f"Number of colors must be equal or larger than the number of objects, but got {len(colors)} < {num_objects}." + ) + elif not isinstance(colors, (tuple, str)): + raise ValueError(f"`colors` must be a tuple or a string, or a list thereof, but got {colors}.") + elif isinstance(colors, tuple) and len(colors) != 3: + raise ValueError(f"If passed as tuple, colors should be an RGB triplet, but got {colors}.") + else: # colors specifies a single color for all objects + colors = [colors] * num_objects + + colors = [ImageColor.getrgb(color) if isinstance(color, str) else color for color in colors] + if dtype.is_floating_point: # [0, 255] -> [0, 1] + colors = [tuple(v / 255 for v in color) for color in colors] # type: ignore[union-attr] + return colors # type: ignore[return-value] + + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + + +def _make_ntuple(x: Any, n: int) -> Tuple[Any, ...]: + """ + Make n-tuple from input x. If x is an iterable, then we just convert it to tuple. + Otherwise, we will make a tuple of length n, all with value of x. + reference: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/utils.py#L8 + + Args: + x (Any): input value + n (int): length of the resulting tuple + """ + if isinstance(x, collections.abc.Iterable): + return tuple(x) + return tuple(repeat(x, n)) diff --git a/.venv/lib/python3.11/site-packages/torchvision/version.py b/.venv/lib/python3.11/site-packages/torchvision/version.py new file mode 100644 index 0000000000000000000000000000000000000000..fa27c5f251252f22e784edf298a3a9756960607f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchvision/version.py @@ -0,0 +1,5 @@ +__version__ = '0.20.1+cu124' +git_version = '3ac97aa9120137381ed1060f37237e44485ac2aa' +from torchvision.extension import _check_cuda_version +if _check_cuda_version() > 0: + cuda = _check_cuda_version()