koichi12 commited on Feb 12, 2025

Commit

da0ba90

verified ·

1 Parent(s): da4944c

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.venv/lib/python3.11/site-packages/apiclient/__init__.py +27 -0
.venv/lib/python3.11/site-packages/apiclient/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/certifi/__init__.py +4 -0
.venv/lib/python3.11/site-packages/certifi/__main__.py +12 -0
.venv/lib/python3.11/site-packages/certifi/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/certifi/__pycache__/__main__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/certifi/__pycache__/core.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/certifi/cacert.pem +0 -0
.venv/lib/python3.11/site-packages/certifi/core.py +114 -0
.venv/lib/python3.11/site-packages/certifi/py.typed +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_auth.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_assets.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_manager.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_datetime.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_deprecation.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_experimental.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/endpoint_helpers.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/insecure_hashlib.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/logging.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/tqdm.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/nvidia/cusolver/lib/libcusolverMg.so.11 +3 -0
.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/INSTALLER +1 -0
.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/LICENSE +201 -0
.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/METADATA +51 -0
.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/NOTICE +5 -0
.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/RECORD +58 -0
.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/WHEEL +5 -0
.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/top_level.txt +1 -0
.venv/lib/python3.11/site-packages/torch/_C.cpython-311-x86_64-linux-gnu.so +0 -0
.venv/lib/python3.11/site-packages/torch/_VF.py +31 -0
.venv/lib/python3.11/site-packages/torch/_VF.pyi +0 -0
.venv/lib/python3.11/site-packages/torch/__config__.py +23 -0
.venv/lib/python3.11/site-packages/torch/__future__.py +75 -0
.venv/lib/python3.11/site-packages/torch/__init__.py +2665 -0
.venv/lib/python3.11/site-packages/torch/_appdirs.py +667 -0
.venv/lib/python3.11/site-packages/torch/_classes.py +56 -0
.venv/lib/python3.11/site-packages/torch/_compile.py +38 -0
.venv/lib/python3.11/site-packages/torch/_custom_ops.py +324 -0
.venv/lib/python3.11/site-packages/torch/_deploy.py +104 -0
.venv/lib/python3.11/site-packages/torch/_guards.py +925 -0
.venv/lib/python3.11/site-packages/torch/_jit_internal.py +1547 -0
.venv/lib/python3.11/site-packages/torch/_linalg_utils.py +150 -0
.venv/lib/python3.11/site-packages/torch/_lobpcg.py +1157 -0
.venv/lib/python3.11/site-packages/torch/_lowrank.py +294 -0
.venv/lib/python3.11/site-packages/torch/_meta_registrations.py +0 -0
.venv/lib/python3.11/site-packages/torch/_namedtensor_internals.py +159 -0
.venv/lib/python3.11/site-packages/torch/_ops.py +1355 -0
.venv/lib/python3.11/site-packages/torch/_python_dispatcher.py +182 -0

.gitattributes CHANGED Viewed

@@ -413,3 +413,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/
 .venv/lib/python3.11/site-packages/nvidia/cuda_nvrtc/lib/libnvrtc-builtins.so.12.4 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/cv2/cv2.abi3.so filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_engines_runtime_compiled.so.9 filter=lfs diff=lfs merge=lfs -text

 .venv/lib/python3.11/site-packages/nvidia/cuda_nvrtc/lib/libnvrtc-builtins.so.12.4 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/cv2/cv2.abi3.so filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_engines_runtime_compiled.so.9 filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/nvidia/cusolver/lib/libcusolverMg.so.11 filter=lfs diff=lfs merge=lfs -text

.venv/lib/python3.11/site-packages/apiclient/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""Retain apiclient as an alias for googleapiclient."""
+from googleapiclient import channel, discovery, errors, http, mimeparse, model
+try:
+    from googleapiclient import sample_tools
+except ImportError:
+    # Silently ignore, because the vast majority of consumers won't use it and
+    # it has deep dependence on oauth2client, an optional dependency.
+    sample_tools = None
+from googleapiclient import schema
+_SUBMODULES = {
+    "channel": channel,
+    "discovery": discovery,
+    "errors": errors,
+    "http": http,
+    "mimeparse": mimeparse,
+    "model": model,
+    "sample_tools": sample_tools,
+    "schema": schema,
+}
+import sys
+for module_name, module in _SUBMODULES.items():
+    sys.modules["apiclient.%s" % module_name] = module

.venv/lib/python3.11/site-packages/apiclient/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (954 Bytes). View file

.venv/lib/python3.11/site-packages/certifi/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .core import contents, where
+__all__ = ["contents", "where"]
+__version__ = "2024.12.14"

.venv/lib/python3.11/site-packages/certifi/__main__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import argparse
+from certifi import contents, where
+parser = argparse.ArgumentParser()
+parser.add_argument("-c", "--contents", action="store_true")
+args = parser.parse_args()
+if args.contents:
+    print(contents())
+else:
+    print(where())

.venv/lib/python3.11/site-packages/certifi/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (322 Bytes). View file

.venv/lib/python3.11/site-packages/certifi/__pycache__/__main__.cpython-311.pyc ADDED Viewed

Binary file (711 Bytes). View file

.venv/lib/python3.11/site-packages/certifi/__pycache__/core.cpython-311.pyc ADDED Viewed

Binary file (3.75 kB). View file

.venv/lib/python3.11/site-packages/certifi/cacert.pem ADDED Viewed

The diff for this file is too large to render. See raw diff

.venv/lib/python3.11/site-packages/certifi/core.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""
+certifi.py
+~~~~~~~~~~
+This module returns the installation location of cacert.pem or its contents.
+"""
+import sys
+import atexit
+def exit_cacert_ctx() -> None:
+    _CACERT_CTX.__exit__(None, None, None)  # type: ignore[union-attr]
+if sys.version_info >= (3, 11):
+    from importlib.resources import as_file, files
+    _CACERT_CTX = None
+    _CACERT_PATH = None
+    def where() -> str:
+        # This is slightly terrible, but we want to delay extracting the file
+        # in cases where we're inside of a zipimport situation until someone
+        # actually calls where(), but we don't want to re-extract the file
+        # on every call of where(), so we'll do it once then store it in a
+        # global variable.
+        global _CACERT_CTX
+        global _CACERT_PATH
+        if _CACERT_PATH is None:
+            # This is slightly janky, the importlib.resources API wants you to
+            # manage the cleanup of this file, so it doesn't actually return a
+            # path, it returns a context manager that will give you the path
+            # when you enter it and will do any cleanup when you leave it. In
+            # the common case of not needing a temporary file, it will just
+            # return the file system location and the __exit__() is a no-op.
+            #
+            # We also have to hold onto the actual context manager, because
+            # it will do the cleanup whenever it gets garbage collected, so
+            # we will also store that at the global level as well.
+            _CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
+            _CACERT_PATH = str(_CACERT_CTX.__enter__())
+            atexit.register(exit_cacert_ctx)
+        return _CACERT_PATH
+    def contents() -> str:
+        return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
+elif sys.version_info >= (3, 7):
+    from importlib.resources import path as get_path, read_text
+    _CACERT_CTX = None
+    _CACERT_PATH = None
+    def where() -> str:
+        # This is slightly terrible, but we want to delay extracting the
+        # file in cases where we're inside of a zipimport situation until
+        # someone actually calls where(), but we don't want to re-extract
+        # the file on every call of where(), so we'll do it once then store
+        # it in a global variable.
+        global _CACERT_CTX
+        global _CACERT_PATH
+        if _CACERT_PATH is None:
+            # This is slightly janky, the importlib.resources API wants you
+            # to manage the cleanup of this file, so it doesn't actually
+            # return a path, it returns a context manager that will give
+            # you the path when you enter it and will do any cleanup when
+            # you leave it. In the common case of not needing a temporary
+            # file, it will just return the file system location and the
+            # __exit__() is a no-op.
+            #
+            # We also have to hold onto the actual context manager, because
+            # it will do the cleanup whenever it gets garbage collected, so
+            # we will also store that at the global level as well.
+            _CACERT_CTX = get_path("certifi", "cacert.pem")
+            _CACERT_PATH = str(_CACERT_CTX.__enter__())
+            atexit.register(exit_cacert_ctx)
+        return _CACERT_PATH
+    def contents() -> str:
+        return read_text("certifi", "cacert.pem", encoding="ascii")
+else:
+    import os
+    import types
+    from typing import Union
+    Package = Union[types.ModuleType, str]
+    Resource = Union[str, "os.PathLike"]
+    # This fallback will work for Python versions prior to 3.7 that lack the
+    # importlib.resources module but relies on the existing `where` function
+    # so won't address issues with environments like PyOxidizer that don't set
+    # __file__ on modules.
+    def read_text(
+        package: Package,
+        resource: Resource,
+        encoding: str = 'utf-8',
+        errors: str = 'strict'
+    ) -> str:
+        with open(where(), encoding=encoding) as data:
+            return data.read()
+    # If we don't have importlib.resources, then we will just do the old logic
+    # of assuming we're on the filesystem and munge the path directly.
+    def where() -> str:
+        f = os.path.dirname(__file__)
+        return os.path.join(f, "cacert.pem")
+    def contents() -> str:
+        return read_text("certifi", "cacert.pem", encoding="ascii")

.venv/lib/python3.11/site-packages/certifi/py.typed ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (4.86 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_auth.cpython-311.pyc ADDED Viewed

Binary file (10.1 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_assets.cpython-311.pyc ADDED Viewed

Binary file (5.77 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_manager.cpython-311.pyc ADDED Viewed

Binary file (40.5 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_datetime.cpython-311.pyc ADDED Viewed

Binary file (2.35 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_deprecation.cpython-311.pyc ADDED Viewed

Binary file (7.45 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_experimental.cpython-311.pyc ADDED Viewed

Binary file (2.41 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/endpoint_helpers.cpython-311.pyc ADDED Viewed

Binary file (2.35 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/insecure_hashlib.cpython-311.pyc ADDED Viewed

Binary file (623 Bytes). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/logging.cpython-311.pyc ADDED Viewed

Binary file (6.53 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/tqdm.cpython-311.pyc ADDED Viewed

Binary file (11.7 kB). View file

.venv/lib/python3.11/site-packages/nvidia/cusolver/lib/libcusolverMg.so.11 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47662749a295f771b92abe8d99dcd5f151953d56069a19f43977b97868ec21eb
+size 82303400

.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,51 @@

+Metadata-Version: 2.1
+Name: prometheus_client
+Version: 0.21.1
+Summary: Python client for the Prometheus monitoring system.
+Home-page: https://github.com/prometheus/client_python
+Author: Brian Brazil
+Author-email: brian.brazil@robustperception.io
+License: Apache Software License 2.0
+Keywords: prometheus monitoring instrumentation client
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Information Technology
+Classifier: Intended Audience :: System Administrators
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: System :: Monitoring
+Classifier: License :: OSI Approved :: Apache Software License
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+License-File: NOTICE
+Provides-Extra: twisted
+Requires-Dist: twisted; extra == "twisted"
+# Prometheus Python Client
+The official Python client for [Prometheus](https://prometheus.io).
+## Installation
+```
+pip install prometheus-client
+```
+This package can be found on [PyPI](https://pypi.python.org/pypi/prometheus_client).
+## Documentation
+Documentation is available on https://prometheus.github.io/client_python
+## Links
+* [Releases](https://github.com/prometheus/client_python/releases): The releases page shows the history of the project and acts as a changelog.
+* [PyPI](https://pypi.python.org/pypi/prometheus_client)

.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/NOTICE ADDED Viewed

	@@ -0,0 +1,5 @@

+Prometheus instrumentation library for Python applications
+Copyright 2015 The Prometheus Authors
+This product bundles decorator 4.0.10 which is available under a "2-clause BSD"
+license. For details, see prometheus_client/decorator.py.

.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,58 @@

+prometheus_client-0.21.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+prometheus_client-0.21.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+prometheus_client-0.21.1.dist-info/METADATA,sha256=r74KhsmW6__tSpz4xH6BX7qsbJfFWYfj24x1elsVtr8,1842
+prometheus_client-0.21.1.dist-info/NOTICE,sha256=TvoYdK6qYPNl9Xl-YX8f-TPhXlCOr3UemEjtRBPXp64,236
+prometheus_client-0.21.1.dist-info/RECORD,,
+prometheus_client-0.21.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+prometheus_client-0.21.1.dist-info/top_level.txt,sha256=AxLEvHEMhTW-Kvb9Ly1DPI3aapigQ2aeg8TXMt9WMRo,18
+prometheus_client/__init__.py,sha256=D-ptlQkWPXqZIJPi5TR0QNMdWr_Ejv-gMq6WAFik_9o,1815
+prometheus_client/__pycache__/__init__.cpython-311.pyc,,
+prometheus_client/__pycache__/asgi.cpython-311.pyc,,
+prometheus_client/__pycache__/context_managers.cpython-311.pyc,,
+prometheus_client/__pycache__/core.cpython-311.pyc,,
+prometheus_client/__pycache__/decorator.cpython-311.pyc,,
+prometheus_client/__pycache__/exposition.cpython-311.pyc,,
+prometheus_client/__pycache__/gc_collector.cpython-311.pyc,,
+prometheus_client/__pycache__/metrics.cpython-311.pyc,,
+prometheus_client/__pycache__/metrics_core.cpython-311.pyc,,
+prometheus_client/__pycache__/mmap_dict.cpython-311.pyc,,
+prometheus_client/__pycache__/multiprocess.cpython-311.pyc,,
+prometheus_client/__pycache__/parser.cpython-311.pyc,,
+prometheus_client/__pycache__/platform_collector.cpython-311.pyc,,
+prometheus_client/__pycache__/process_collector.cpython-311.pyc,,
+prometheus_client/__pycache__/registry.cpython-311.pyc,,
+prometheus_client/__pycache__/samples.cpython-311.pyc,,
+prometheus_client/__pycache__/utils.cpython-311.pyc,,
+prometheus_client/__pycache__/values.cpython-311.pyc,,
+prometheus_client/asgi.py,sha256=ivn-eV7ZU0BEa4E9oWBFbBRUklHPw9f5lcdGsyFuCLo,1606
+prometheus_client/bridge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+prometheus_client/bridge/__pycache__/__init__.cpython-311.pyc,,
+prometheus_client/bridge/__pycache__/graphite.cpython-311.pyc,,
+prometheus_client/bridge/graphite.py,sha256=m5-7IyVyGL8C6S9yLxeupS1pfj8KFNPNlazddamQT8s,2897
+prometheus_client/context_managers.py,sha256=E7uksn4D7yBoZWDgjI1VRpR3l2tKivs9DHZ5UAcmPwE,2343
+prometheus_client/core.py,sha256=yyVvSxa8WQnBvAr4JhO3HqdTqClwhbzmVGvwRvWQMIo,860
+prometheus_client/decorator.py,sha256=7MdUokWmzQ17foet2R5QcMubdZ1WDPGYo0_HqLxAw2k,15802
+prometheus_client/exposition.py,sha256=nmushN6NIGo-nOBeaCXfg5bCeyvesVM_DXUWmRjFwr4,26176
+prometheus_client/gc_collector.py,sha256=tBhXXktF9g9h7gvO-DmI2gxPol2_gXI1M6e9ZMazNfY,1514
+prometheus_client/metrics.py,sha256=ypy4Vv0duzCgo4ZXHBNK45uU9hbe7iK-Fohv7EJ_I5A,28109
+prometheus_client/metrics_core.py,sha256=Yz-yqS3pxNdpIRMShQv_IHaKlVS_Q53TaYcP9U8LDlE,15548
+prometheus_client/mmap_dict.py,sha256=-t49kywZHFHk2D9IWtunqKFtr5eEgiN-RjFWg16JE-Q,5393
+prometheus_client/multiprocess.py,sha256=VIvAR0vmjL0lknnTijKt9HS1DNz9rZrS09HqIIcaZLs,7539
+prometheus_client/openmetrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+prometheus_client/openmetrics/__pycache__/__init__.cpython-311.pyc,,
+prometheus_client/openmetrics/__pycache__/exposition.cpython-311.pyc,,
+prometheus_client/openmetrics/__pycache__/parser.cpython-311.pyc,,
+prometheus_client/openmetrics/exposition.py,sha256=Ef3GeveuojMzOrl-T7cG6Ml2TRN1xIYjpe_puReFrlo,2993
+prometheus_client/openmetrics/parser.py,sha256=c6vQccyW93MXzc22QGdceETg0m_KMeMyEbKrfObG0R8,22125
+prometheus_client/parser.py,sha256=zuVhB8clFPvQ9wOEj1XikN7NoJe8J3pZcQkNgEUkuXg,7434
+prometheus_client/platform_collector.py,sha256=t_GD2oCLN3Pql4TltbNqTap8a4HOtbvBm0OU5_gPn38,1879
+prometheus_client/process_collector.py,sha256=B8y36L1iq0c3KFlvdNj1F5JEQLTec116h6y3m9Jhk90,3864
+prometheus_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+prometheus_client/registry.py,sha256=3R-yxiPitVs36cnIRnotqSJmOPwAQsLz-tl6kw3rcd4,6196
+prometheus_client/samples.py,sha256=smIiOIsAwGXHgM_7xg9Zo5yTEM2gavYvVtgGTjdWMcA,1687
+prometheus_client/twisted/__init__.py,sha256=0RxJjYSOC5p6o2cu6JbfUzc8ReHYQGNv9pKP-U4u7OE,72
+prometheus_client/twisted/__pycache__/__init__.cpython-311.pyc,,
+prometheus_client/twisted/__pycache__/_exposition.cpython-311.pyc,,
+prometheus_client/twisted/_exposition.py,sha256=2TL2BH5sW0i6H7dHkot9aBH9Ld-I60ax55DuaIWnElo,250
+prometheus_client/utils.py,sha256=zKJZaW_hyZgQSmkaD-rgT5l-YsT3--le0BRQ7v_x8eE,594
+prometheus_client/values.py,sha256=hzThQQd0x4mIPR3ddezQpjUoDVdSBnwem4Z48woxpa8,5002

.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: setuptools (75.6.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

.venv/lib/python3.11/site-packages/prometheus_client-0.21.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ prometheus_client

.venv/lib/python3.11/site-packages/torch/_C.cpython-311-x86_64-linux-gnu.so ADDED Viewed

Binary file (37.9 kB). View file

.venv/lib/python3.11/site-packages/torch/_VF.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+This makes the functions in torch._C._VariableFunctions available as
+    torch._VF.<funcname>
+without mypy being able to find them.
+A subset of those functions are mapped to ATen functions in
+torch/jit/_builtins.py
+See https://github.com/pytorch/pytorch/issues/21478 for the reason for
+introducing torch._VF
+"""
+import sys
+import types
+import torch
+class VFModule(types.ModuleType):
+    vf: types.ModuleType
+    def __init__(self, name: str):
+        super().__init__(name)
+        self.vf = torch._C._VariableFunctions
+    def __getattr__(self, name: str) -> object:
+        return getattr(self.vf, name)
+sys.modules[__name__] = VFModule(__name__)

.venv/lib/python3.11/site-packages/torch/_VF.pyi ADDED Viewed

The diff for this file is too large to render. See raw diff

.venv/lib/python3.11/site-packages/torch/__config__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+# mypy: allow-untyped-defs
+import torch
+def show():
+    """
+    Return a human-readable string with descriptions of the
+    configuration of PyTorch.
+    """
+    return torch._C._show_config()
+# TODO: In principle, we could provide more structured version/config
+# information here. For now only CXX_FLAGS is exposed, as Timer
+# uses them.
+def _cxx_flags():
+    """Returns the CXX_FLAGS used when building PyTorch."""
+    return torch._C._cxx_flags()
+def parallel_info():
+    r"""Returns detailed string with parallelization settings"""
+    return torch._C._parallel_info()

.venv/lib/python3.11/site-packages/torch/__future__.py ADDED Viewed

	@@ -0,0 +1,75 @@

+_overwrite_module_params_on_conversion: bool = False
+_swap_module_params_on_conversion: bool = False
+def set_overwrite_module_params_on_conversion(value: bool) -> None:
+    """
+    Sets whether to assign new tensors to the parameters instead of changing the
+    existing parameters in-place when converting an ``nn.Module``.
+    When enabled, the following methods will assign new parameters to the module:
+    #. ``module.{device}()`` (e.g. :meth:`nn.Module.cuda()`) for moving a module between devices
+    #. ``module.{dtype}()`` (e.g. :meth:`nn.Module.float()`) for converting a module to a different dtype
+    #. :meth:`nn.Module.to`
+    #. :meth:`nn.Module.to_empty`
+    Args:
+        value (bool): Whether to assign new tensors or not.
+    """
+    global _overwrite_module_params_on_conversion
+    _overwrite_module_params_on_conversion = value
+def get_overwrite_module_params_on_conversion() -> bool:
+    """
+    Returns whether to assign new tensors to the parameters instead of changing the
+    existing parameters in-place when converting an :class:`torch.nn.Module`. Defaults to ``False``.
+    See :func:`~torch.__future__.set_overwrite_module_params_on_conversion` for more information.
+    """
+    return _overwrite_module_params_on_conversion
+def set_swap_module_params_on_conversion(value: bool) -> None:
+    """
+    Sets whether to use :func:`~torch.utils.swap_tensors` instead of setting ``.data`` to
+    change the existing parameters in-place when converting an ``nn.Module`` and instead
+    of ``param.copy_(state_dict[key])`` when loading a state dict into an ``nn.Module``.
+    .. note::
+        This function takes precedence over :func:`~torch.__future__.get_overwrite_module_params_on_conversion`
+    When enabled, the following methods will swap the existing parameters in-place:
+    #. ``module.{device}()`` (e.g. :meth:`nn.Module.cuda()`) for moving a module between devices
+    #. ``module.{dtype}()`` (e.g. :meth:`nn.Module.float()`) for converting a module to a different dtype
+    #. :meth:`nn.Module.to`
+    #. :meth:`nn.Module.to_empty`
+    #. :meth:`nn.Module.load_state_dict`
+    The semantics for :meth:`~nn.Module.load_state_dict` when this is set are as follows:
+    #. For each parameter/buffer, its corresponding ``state_dict['key']`` is transformed via
+       :meth:`~torch.Tensor.module_load` (i.e. ``res = param.module_load(state_dict['key'])``)
+    #. If necessary, ``res`` will be wrapped in an :class:`~nn.Parameter`
+    #. The parameter/buffer in the module will be swapped via :func:`~torch.utils.swap_tensors`
+       with ``res``
+    Args:
+        value (bool): Whether to use :func:`~torch.utils.swap_tensors` or not.
+    """
+    global _swap_module_params_on_conversion
+    _swap_module_params_on_conversion = value
+def get_swap_module_params_on_conversion() -> bool:
+    """
+    Returns whether to use :func:`~torch.utils.swap_tensors` instead of setting .data to
+    change the existing parameters in-place when converting an ``nn.Module``. Defaults to ``False``.
+    See :func:`~torch.__future__.set_swap_module_params_on_conversion` for more information.
+    """
+    return _swap_module_params_on_conversion

.venv/lib/python3.11/site-packages/torch/__init__.py ADDED Viewed

	@@ -0,0 +1,2665 @@

+"""
+The torch package contains data structures for multi-dimensional
+tensors and defines mathematical operations over these tensors.
+Additionally, it provides many utilities for efficient serialization of
+Tensors and arbitrary types, and other useful utilities.
+It has a CUDA counterpart, that enables you to run your tensor computations
+on an NVIDIA GPU with compute capability >= 3.0.
+"""
+# mypy: allow-untyped-defs
+import builtins
+import ctypes
+import glob
+import importlib
+import inspect
+import math
+import os
+import platform
+import sys
+import textwrap
+import threading
+from typing import (
+    Any as _Any,
+    Callable as _Callable,
+    Dict as _Dict,
+    Optional as _Optional,
+    overload as _overload,
+    Set as _Set,
+    Tuple as _Tuple,
+    Type as _Type,
+    TYPE_CHECKING,
+    TypeVar as _TypeVar,
+    Union as _Union,
+)
+from typing_extensions import ParamSpec as _ParamSpec, TypeGuard as _TypeGuard
+if TYPE_CHECKING:
+    from .types import IntLikeType
+# multipy/deploy is setting this import before importing torch, this is the most
+# reliable way we have to detect if we're running within deploy.
+# https://github.com/pytorch/multipy/blob/d60f34ad38c371e441fe7ffdb77a3c3dda5a5d19/multipy/runtime/interpreter/interpreter_impl.cpp#L134-L137
+def _running_with_deploy() -> builtins.bool:
+    return sys.modules.get("torch._meta_registrations", None) is object
+from torch._utils import (
+    _functionalize_sync as _sync,
+    _import_dotted_name,
+    classproperty,
+)
+from torch._utils_internal import (
+    get_file_path,
+    prepare_multiprocessing_environment,
+    USE_GLOBAL_DEPS,
+    USE_RTLD_GLOBAL_WITH_LIBTORCH,
+)
+# TODO(torch_deploy) figure out how to freeze version.py in fbcode build
+if _running_with_deploy():
+    __version__ = "torch-deploy-1.8"
+else:
+    from torch.torch_version import __version__ as __version__
+__all__ = [
+    "BoolStorage",
+    "BoolTensor",
+    "ByteStorage",
+    "ByteTensor",
+    "CharStorage",
+    "CharTensor",
+    "DoubleStorage",
+    "DoubleTensor",
+    "FloatStorage",
+    "FloatTensor",
+    "GradScaler",
+    "IntStorage",
+    "IntTensor",
+    "LongStorage",
+    "LongTensor",
+    "ShortStorage",
+    "ShortTensor",
+    "SymBool",
+    "SymFloat",
+    "SymInt",
+    "Tensor",
+    "TypedStorage",
+    "UntypedStorage",
+    "are_deterministic_algorithms_enabled",
+    "autocast",
+    "chunk",
+    "compile",
+    "cond",
+    "enable_grad",
+    "export",
+    "get_default_device",
+    "get_deterministic_debug_mode",
+    "get_device_module",
+    "get_float32_matmul_precision",
+    "get_rng_state",
+    "inference_mode",
+    "initial_seed",
+    "is_deterministic_algorithms_warn_only_enabled",
+    "is_storage",
+    "is_tensor",
+    "is_warn_always_enabled",
+    "load",
+    "lobpcg",
+    "manual_seed",
+    "matmul",
+    "no_grad",
+    "rand",
+    "randn",
+    "save",
+    "seed",
+    "set_default_device",
+    "set_default_tensor_type",
+    "set_deterministic_debug_mode",
+    "set_float32_matmul_precision",
+    "set_printoptions",
+    "set_rng_state",
+    "set_warn_always",
+    "split",
+    "stack",
+    "sym_float",
+    "sym_int",
+    "sym_ite",
+    "sym_max",
+    "sym_min",
+    "sym_not",
+    "typename",
+    "unravel_index",
+    "use_deterministic_algorithms",
+    "vmap",
+]
+# Please keep this list sorted
+assert __all__ == sorted(__all__)
+################################################################################
+# Load the extension module
+################################################################################
+if sys.platform == "win32":
+    def _load_dll_libraries() -> None:
+        import sysconfig
+        from torch.version import cuda as cuda_version
+        pfiles_path = os.getenv("ProgramFiles", r"C:\Program Files")
+        py_dll_path = os.path.join(sys.exec_prefix, "Library", "bin")
+        th_dll_path = os.path.join(os.path.dirname(__file__), "lib")
+        usebase_path = os.path.join(
+            sysconfig.get_config_var("userbase"), "Library", "bin"
+        )
+        # When users create a virtualenv that inherits the base environment,
+        # we will need to add the corresponding library directory into
+        # DLL search directories. Otherwise, it will rely on `PATH` which
+        # is dependent on user settings.
+        if sys.exec_prefix != sys.base_exec_prefix:
+            base_py_dll_path = os.path.join(sys.base_exec_prefix, "Library", "bin")
+        else:
+            base_py_dll_path = ""
+        dll_paths = [
+            p
+            for p in (th_dll_path, py_dll_path, base_py_dll_path, usebase_path)
+            if os.path.exists(p)
+        ]
+        if not builtins.any(
+            os.path.exists(os.path.join(p, "nvToolsExt64_1.dll")) for p in dll_paths
+        ):
+            nvtoolsext_dll_path = os.path.join(
+                os.getenv(
+                    "NVTOOLSEXT_PATH",
+                    os.path.join(pfiles_path, "NVIDIA Corporation", "NvToolsExt"),
+                ),
+                "bin",
+                "x64",
+            )
+        else:
+            nvtoolsext_dll_path = ""
+        if cuda_version and builtins.all(
+            not glob.glob(os.path.join(p, "cudart64*.dll")) for p in dll_paths
+        ):
+            cuda_version_1 = cuda_version.replace(".", "_")
+            cuda_path_var = "CUDA_PATH_V" + cuda_version_1
+            default_path = os.path.join(
+                pfiles_path, "NVIDIA GPU Computing Toolkit", "CUDA", f"v{cuda_version}"
+            )
+            cuda_path = os.path.join(os.getenv(cuda_path_var, default_path), "bin")
+        else:
+            cuda_path = ""
+        dll_paths.extend(
+            p for p in (nvtoolsext_dll_path, cuda_path) if os.path.exists(p)
+        )
+        kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True)
+        with_load_library_flags = hasattr(kernel32, "AddDllDirectory")
+        prev_error_mode = kernel32.SetErrorMode(0x0001)
+        kernel32.LoadLibraryW.restype = ctypes.c_void_p
+        if with_load_library_flags:
+            kernel32.LoadLibraryExW.restype = ctypes.c_void_p
+        for dll_path in dll_paths:
+            os.add_dll_directory(dll_path)
+        try:
+            ctypes.CDLL("vcruntime140.dll")
+            ctypes.CDLL("msvcp140.dll")
+            ctypes.CDLL("vcruntime140_1.dll")
+        except OSError:
+            print(
+                textwrap.dedent(
+                    """
+                    Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure.
+                    It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe
+                    """
+                ).strip()
+            )
+        dlls = glob.glob(os.path.join(th_dll_path, "*.dll"))
+        path_patched = False
+        for dll in dlls:
+            is_loaded = False
+            if with_load_library_flags:
+                res = kernel32.LoadLibraryExW(dll, None, 0x00001100)
+                last_error = ctypes.get_last_error()
+                if res is None and last_error != 126:
+                    err = ctypes.WinError(last_error)
+                    err.strerror += (
+                        f' Error loading "{dll}" or one of its dependencies.'
+                    )
+                    raise err
+                elif res is not None:
+                    is_loaded = True
+            if not is_loaded:
+                if not path_patched:
+                    os.environ["PATH"] = ";".join(dll_paths + [os.environ["PATH"]])
+                    path_patched = True
+                res = kernel32.LoadLibraryW(dll)
+                if res is None:
+                    err = ctypes.WinError(ctypes.get_last_error())
+                    err.strerror += (
+                        f' Error loading "{dll}" or one of its dependencies.'
+                    )
+                    raise err
+        kernel32.SetErrorMode(prev_error_mode)
+    _load_dll_libraries()
+    del _load_dll_libraries
+def _preload_cuda_deps(lib_folder: str, lib_name: str) -> None:
+    """Preloads cuda deps if they could not be found otherwise."""
+    # Should only be called on Linux if default path resolution have failed
+    assert platform.system() == "Linux", "Should only be called on Linux"
+    lib_path = None
+    for path in sys.path:
+        nvidia_path = os.path.join(path, "nvidia")
+        if not os.path.exists(nvidia_path):
+            continue
+        candidate_lib_paths = glob.glob(
+            os.path.join(nvidia_path, lib_folder, "lib", lib_name)
+        )
+        if candidate_lib_paths and not lib_path:
+            lib_path = candidate_lib_paths[0]
+        if lib_path:
+            break
+    if not lib_path:
+        raise ValueError(f"{lib_name} not found in the system path {sys.path}")
+    ctypes.CDLL(lib_path)
+# See Note [Global dependencies]
+def _load_global_deps() -> None:
+    if _running_with_deploy() or platform.system() == "Windows":
+        return
+    # Determine the file extension based on the platform
+    lib_ext = ".dylib" if platform.system() == "Darwin" else ".so"
+    lib_name = f"libtorch_global_deps{lib_ext}"
+    here = os.path.abspath(__file__)
+    global_deps_lib_path = os.path.join(os.path.dirname(here), "lib", lib_name)
+    try:
+        ctypes.CDLL(global_deps_lib_path, mode=ctypes.RTLD_GLOBAL)
+    except OSError as err:
+        # Can only happen for wheel with cuda libs as PYPI deps
+        # As PyTorch is not purelib, but nvidia-*-cu12 is
+        cuda_libs: _Dict[str, str] = {
+            "cublas": "libcublas.so.*[0-9]",
+            "cudnn": "libcudnn.so.*[0-9]",
+            "cuda_nvrtc": "libnvrtc.so.*[0-9]",
+            "cuda_runtime": "libcudart.so.*[0-9]",
+            "cuda_cupti": "libcupti.so.*[0-9]",
+            "cufft": "libcufft.so.*[0-9]",
+            "curand": "libcurand.so.*[0-9]",
+            "nvjitlink": "libnvJitLink.so.*[0-9]",
+            "cusparse": "libcusparse.so.*[0-9]",
+            "cusolver": "libcusolver.so.*[0-9]",
+            "nccl": "libnccl.so.*[0-9]",
+            "nvtx": "libnvToolsExt.so.*[0-9]",
+        }
+        is_cuda_lib_err = [
+            lib for lib in cuda_libs.values() if lib.split(".")[0] in err.args[0]
+        ]
+        if not is_cuda_lib_err:
+            raise err
+        for lib_folder, lib_name in cuda_libs.items():
+            _preload_cuda_deps(lib_folder, lib_name)
+        ctypes.CDLL(global_deps_lib_path, mode=ctypes.RTLD_GLOBAL)
+if (USE_RTLD_GLOBAL_WITH_LIBTORCH or os.getenv("TORCH_USE_RTLD_GLOBAL")) and (
+    _running_with_deploy() or platform.system() != "Windows"
+):
+    # Do it the hard way.  You might want to load libtorch with RTLD_GLOBAL in a
+    # few circumstances:
+    #
+    #   1. You're in a build environment (e.g., fbcode) where
+    #      libtorch_global_deps is not available, but you still need
+    #      to get mkl to link in with RTLD_GLOBAL or it will just
+    #      not work.
+    #
+    #   2. You're trying to run PyTorch under UBSAN and you need
+    #      to ensure that only one copy of libtorch is loaded, so
+    #      vptr checks work properly
+    #
+    # If you're using this setting, you must verify that all the libraries
+    # you load consistently use the same libstdc++, or you may have
+    # mysterious segfaults.
+    #
+    old_flags = sys.getdlopenflags()
+    sys.setdlopenflags(os.RTLD_GLOBAL | os.RTLD_LAZY)
+    from torch._C import *  # noqa: F403
+    sys.setdlopenflags(old_flags)
+    del old_flags
+else:
+    # Easy way.  You want this most of the time, because it will prevent
+    # C++ symbols from libtorch clobbering C++ symbols from other
+    # libraries, leading to mysterious segfaults.
+    #
+    # If building in an environment where libtorch_global_deps isn't available
+    # like parts of fbsource, but where RTLD_GLOBAL causes segfaults, you will
+    # want USE_RTLD_GLOBAL_WITH_LIBTORCH = False and USE_GLOBAL_DEPS = False
+    #
+    # See Note [Global dependencies]
+    if USE_GLOBAL_DEPS:
+        _load_global_deps()
+    from torch._C import *  # noqa: F403
+class SymInt:
+    """
+    Like an int (including magic methods), but redirects all operations on the
+    wrapped node. This is used in particular to symbolically record operations
+    in the symbolic shape workflow.
+    """
+    def __init__(self, node):
+        # This field MUST be named node; C++ binding code assumes that this
+        # class has a field named node that stores SymNode
+        self.node = node
+    def __bool__(self):
+        return builtins.bool(self != 0)
+    def __int__(self):
+        return self.node.int_()
+    def __index__(self):
+        return self.node.int_()
+    # Magic methods installed by torch.fx.experimental.sym_node
+    def __round__(self, ndigits=None):
+        return self
+    def __truediv__(self, other):
+        if isinstance(other, (builtins.float, SymFloat)):
+            return sym_float(self).__float_truediv__(other)
+        if not isinstance(other, (builtins.int, SymInt)):
+            return NotImplemented
+        return self.__int_truediv__(other)
+    def __rtruediv__(self, other):
+        if isinstance(other, (builtins.float, SymFloat)):
+            return sym_float(self).__rfloat_truediv__(other)
+        if not isinstance(other, (builtins.int, SymInt)):
+            return NotImplemented
+        return self.__rint_truediv__(other)
+    def __floordiv__(self, other):
+        if isinstance(other, (builtins.float, SymFloat)):
+            return sym_float(math.floor(sym_float(self) / other))
+        if not isinstance(other, (builtins.int, SymInt)):
+            return NotImplemented
+        return self.__int_floordiv__(other)
+    def __rfloordiv__(self, other):
+        if isinstance(other, (builtins.float, SymFloat)):
+            return sym_float(math.floor(other / sym_float(self)))
+        if not isinstance(other, (builtins.int, SymInt)):
+            return NotImplemented
+        return self.__rint_floordiv__(other)
+    # nb: complex is impossible to handle correctly lol, with
+    # negative base and integral float need to diverge semantics and
+    # just always return complex.  Neener neener pretend this problem
+    # doesn't exist
+    def __pow__(self, other):
+        if isinstance(other, (builtins.float, SymFloat)):
+            return sym_float(self).__pow__(other)
+        if not isinstance(other, (builtins.int, SymInt)):
+            return NotImplemented
+        # Guards!  This guard is necessary because we need to know it to
+        # determine the output type of this operation
+        if other >= 0:
+            return self.__pow_by_natural__(other)
+        else:
+            # Mercifully, when the exponent is negative, Python just promotes
+            # to doubles and does a float pow:
+            #
+            #   if (Py_SIZE(b) < 0 && c == NULL) {
+            #       /* if exponent is negative and there's no modulus:
+            #              return a float.  This works because we know
+            #              that this calls float_pow() which converts its
+            #              arguments to double. */
+            #       Py_DECREF(a);
+            #       Py_DECREF(b);
+            #       return PyFloat_Type.tp_as_number->nb_power(v, w, x);
+            #   }
+            return sym_float(self).__pow__(sym_float(other))
+    def __rpow__(self, other):
+        if isinstance(other, (builtins.float, SymFloat)):
+            return sym_float(self).__rpow__(other)
+        if not isinstance(other, (builtins.int, SymInt)):
+            return NotImplemented
+        if self >= 0:  # self is exponent
+            return self.__rpow_by_natural__(other)
+        else:
+            return sym_float(self).__rpow__(sym_float(other))
+    def __eq__(self, other: object) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __lt__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __gt__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __le__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __ge__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __add__(self, other) -> "SymInt":
+        raise TypeError("type stub not overridden")
+    def __mod__(self, other: "IntLikeType") -> "SymInt":
+        raise TypeError("type stub not overridden")
+    def __mul__(self, other) -> "SymInt":
+        raise TypeError("type stub not overridden")
+    def __pow_by_natural__(self, other) -> "SymInt":
+        raise TypeError("type stub not overridden")
+    def __rpow_by_natural__(self, other) -> "SymInt":
+        raise TypeError("type stub not overridden")
+    def __int_truediv__(self, other) -> "SymFloat":
+        raise TypeError("type stub not overridden")
+    def __rint_truediv__(self, other) -> "SymFloat":
+        raise TypeError("type stub not overridden")
+    def __int_floordiv__(self, other) -> "SymFloat":
+        raise TypeError("type stub not overridden")
+    def __rint_floordiv__(self, other) -> "SymFloat":
+        raise TypeError("type stub not overridden")
+    def __sym_max__(self, other):
+        raise TypeError("type stub not overridden")
+    def __sym_min__(self, other):
+        raise TypeError("type stub not overridden")
+    def __sym_float__(self):
+        raise TypeError("type stub not overridden")
+    def __neg__(self):
+        raise TypeError("type stub not overridden")
+    def __sub__(self, other: "IntLikeType") -> "SymInt":
+        raise TypeError("type stub not overridden")
+    def __repr__(self):
+        return self.node._graph_repr()
+    def _sympy_(self):
+        return self.node.expr
+    def __hash__(self) -> builtins.int:
+        if self.node.is_nested_int():
+            return hash(self.node.nested_int())
+        else:
+            # We could support constant SymInts as well, but not doing it for now
+            raise TypeError("unhashable type: non-nested SymInt")
+            # TODO: Force specialization
+            # This can't be done because the TypeError here is load bearing
+            # for einops
+            # https://github.com/arogozhnikov/einops/blob/6181e1e95dc58c00a3143c1726da1c6ee0463164/einops/einops.py#L237
+            # return hash(builtins.int(self))
+    def as_integer_ratio(self) -> _Tuple["SymInt", builtins.int]:
+        """Represent this int as an exact integer ratio"""
+        return self, 1
+    def bit_length(self) -> builtins.int:
+        # TODO: A more relaxed guard is possible here, where you guard to
+        # allow all integer quantities which would result in the same bit
+        # length.  We can also just make a dedicated Sympy function for
+        # computing this quantity and represent it symbolically.
+        return builtins.int(self).bit_length()
+    def conjugate(self) -> "SymInt":
+        return self
+class SymFloat:
+    """
+    Like an float (including magic methods), but redirects all operations on the
+    wrapped node. This is used in particular to symbolically record operations
+    in the symbolic shape workflow.
+    """
+    def __init__(self, node):
+        # This field MUST be named node; C++ binding code assumes that this
+        # class has a field named node that stores SymNode
+        self.node = node
+    def __truediv__(self, other):
+        if not isinstance(other, (builtins.int, builtins.float, SymInt, SymFloat)):
+            return NotImplemented
+        return self.__float_truediv__(sym_float(other))
+    def __rtruediv__(self, other):
+        if not isinstance(other, (builtins.int, builtins.float, SymInt, SymFloat)):
+            return NotImplemented
+        return self.__rfloat_truediv__(sym_float(other))
+    def __floordiv__(self, other):
+        if not isinstance(other, (builtins.int, builtins.float, SymInt, SymFloat)):
+            return NotImplemented
+        return sym_float(math.floor(self / sym_float(other)))
+    def __rfloordiv__(self, other):
+        if not isinstance(other, (builtins.int, builtins.float, SymInt, SymFloat)):
+            return NotImplemented
+        return sym_float(math.floor(sym_float(other) / self))
+    def __bool__(self):
+        return self.node.bool_()
+    def __float__(self):
+        return self.node.guard_float("", 0)
+    # Symbolic power does NOT work with negative base, this is to avoid
+    # potential complex outputs
+    def __pow__(self, other):
+        if not isinstance(other, (builtins.int, builtins.float, SymInt, SymFloat)):
+            return NotImplemented
+        torch._check(self >= 0)
+        return self.__float_pow__(other)
+    def __rpow__(self, other):
+        if not isinstance(other, (builtins.int, builtins.float, SymInt, SymFloat)):
+            return NotImplemented
+        torch._check(other >= 0)
+        return self.__rfloat_pow__(other)
+    # Magic methods installed by torch.fx.experimental.sym_node
+    def __eq__(self, other: object) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __lt__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __gt__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __le__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __ge__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __float_pow__(self, other) -> "SymFloat":
+        raise TypeError("type stub not overridden")
+    def __rfloat_pow__(self, other) -> "SymFloat":
+        raise TypeError("type stub not overridden")
+    def __float_truediv__(self, other) -> "SymFloat":
+        raise TypeError("type stub not overridden")
+    def __rfloat_truediv__(self, other) -> "SymFloat":
+        raise TypeError("type stub not overridden")
+    def __trunc__(self):
+        raise TypeError("type stub not overridden")
+    def __sym_max__(self, other):
+        raise TypeError("type stub not overridden")
+    def __sym_min__(self, other):
+        raise TypeError("type stub not overridden")
+    def __sym_int__(self):
+        raise TypeError("type stub not overridden")
+    def is_integer(self):
+        """Return True if the float is an integer."""
+        raise TypeError("type stub not overridden")
+    def as_integer_ratio(self) -> _Tuple[builtins.int, builtins.int]:
+        """Represent this float as an exact integer ratio"""
+        return builtins.float(self).as_integer_ratio()
+    def __repr__(self):
+        return self.node._graph_repr()
+    def _sympy_(self):
+        return self.node.expr
+    def __hash__(self):
+        return hash(builtins.float(self))
+class SymBool:
+    """
+    Like an bool (including magic methods), but redirects all operations on the
+    wrapped node. This is used in particular to symbolically record operations
+    in the symbolic shape workflow.
+    Unlike regular bools, regular boolean operators will force extra guards instead
+    of symbolically evaluate.  Use the bitwise operators instead to handle this.
+    """
+    def __init__(self, node):
+        # This field MUST be named node; C++ binding code assumes that this
+        # class has a field named node that stores SymNode
+        self.node = node
+    def __bool__(self):
+        return self.node.bool_()
+    def __int__(self):
+        return builtins.int(self.node.bool_())
+    # Magic methods installed by torch.fx.experimental.sym_node
+    def __and__(self, other) -> "SymBool":
+        raise TypeError("type stub not overridden")
+    def __or__(self, other) -> "SymBool":
+        raise TypeError("type stub not overridden")
+    # We very carefully define __sym_not__, and not a number of other
+    # plausible alternatives:
+    #
+    #   - We do not override __not__ because this is not a real magic
+    #     method; you cannot override the meaning of the not builtin in
+    #     Python.  We use the name 'sym_not' to clarify that in user code you
+    #     cannot use the builtin not or operator.not_ or operator.__not__ and
+    #     hit this magic method; you must use our custom sym_not operator.
+    #
+    #   - We do not override the __invert__ method because SymBool is
+    #     meant to be usable in situations where bool is expected.  However,
+    #     bitwise negation ~a does the wrong thing with booleans (because
+    #     bool is a subclass of int, so ~1 = -2 which is not falseish.)
+    #     This would be a giant footgun, so we get around it by defining
+    #     our own operator.  Note that bitwise and/or do the right thing,
+    #     so we reuse the conventional operators there for readability.
+    #
+    def __sym_not__(self) -> "SymBool":
+        raise TypeError("type stub not overridden")
+    def __sym_ite__(self, then_val, else_val):
+        raise TypeError("type stub not overridden")
+    def __eq__(self, other) -> builtins.bool:
+        raise TypeError("type stub not overridden")
+    def __repr__(self):
+        return self.node._graph_repr()
+    def _sympy_(self):
+        return self.node.expr
+    def __hash__(self):
+        if self.node.is_constant():
+            return hash(self.node.bool_())
+        else:
+            # Force specialization
+            return hash(builtins.bool(self))
+def sym_not(a):
+    r"""SymInt-aware utility for logical negation.
+    Args:
+        a (SymBool or bool): Object to negate
+    """
+    import sympy
+    if overrides.has_torch_function_unary(a):
+        return overrides.handle_torch_function(sym_not, (a,), a)
+    if hasattr(a, "__sym_not__"):
+        return a.__sym_not__()
+    if isinstance(a, sympy.Basic):
+        return ~a  # type: ignore[operator]
+    return not a
+def sym_float(a):
+    r"""SymInt-aware utility for float casting.
+    Args:
+        a (SymInt, SymFloat, or object): Object to cast
+    """
+    if overrides.has_torch_function_unary(a):
+        return overrides.handle_torch_function(sym_float, (a,), a)
+    if isinstance(a, SymFloat):
+        return a
+    elif hasattr(a, "__sym_float__"):
+        return a.__sym_float__()
+    return builtins.float(a)  # type: ignore[operator]
+def sym_int(a):
+    r"""SymInt-aware utility for int casting.
+    Args:
+        a (SymInt, SymFloat, or object): Object to cast
+    """
+    if overrides.has_torch_function_unary(a):
+        return overrides.handle_torch_function(sym_int, (a,), a)
+    if isinstance(a, SymInt):
+        return a
+    elif isinstance(a, SymFloat):
+        return math.trunc(a)
+    return builtins.int(a)  # type: ignore[operator]
+def sym_max(a, b):
+    """
+    SymInt-aware utility for max which avoids branching on a < b.
+    Unlike builtins.max(), this only works for int/float, and it always
+    promotes to float if any argument is float (unlike builtins.max, which
+    will faithfully preserve the type of the input argument).
+    """
+    if overrides.has_torch_function((a, b)):
+        return overrides.handle_torch_function(sym_max, (a, b), a, b)
+    if isinstance(a, (SymInt, SymFloat)):
+        return a.__sym_max__(b)
+    elif isinstance(b, (SymInt, SymFloat)):
+        # Due to promotion semantics, this is operator is commutative:
+        # max(1, 1.0) === max(1.0, 1) === 1.0
+        return b.__sym_max__(a)
+    # TODO: Probably can make bool work too, just lazy
+    all_types, float_types = __all_and_float_types()
+    assert isinstance(a, all_types), type(a)
+    assert isinstance(b, all_types), type(b)
+    if isinstance(a, float_types) or isinstance(b, float_types):
+        return builtins.float(builtins.max(a, b))
+    else:
+        return builtins.max(a, b)
+def __all_and_float_types() -> _Tuple[_Tuple[_Type, ...], _Tuple[_Type, ...]]:
+    try:
+        import numpy as np
+        all_types: _Tuple[_Type, ...] = (
+            np.integer,
+            np.floating,
+            builtins.int,
+            builtins.float,
+        )
+        float_types: _Tuple[_Type, ...] = (np.floating, builtins.float)
+    except ModuleNotFoundError:
+        all_types = (builtins.int, builtins.float)
+        float_types = (builtins.float,)
+    return all_types, float_types
+def sym_min(a, b):
+    """SymInt-aware utility for min()."""
+    if overrides.has_torch_function((a, b)):
+        return overrides.handle_torch_function(sym_min, (a, b), a, b)
+    if isinstance(a, (SymInt, SymFloat)):
+        return a.__sym_min__(b)
+    elif isinstance(b, (SymInt, SymFloat)):
+        return b.__sym_min__(a)
+    all_types, float_types = __all_and_float_types()
+    assert isinstance(a, all_types), type(a)
+    assert isinstance(b, all_types), type(b)
+    if isinstance(a, float_types) or isinstance(b, float_types):
+        return builtins.float(builtins.min(a, b))
+    else:
+        return builtins.min(a, b)
+# Drop in replacement for math.sqrt, math.sin, math.cos etc
+def _get_sym_math_fn(name):
+    def fn(a):
+        if overrides.has_torch_function_unary(a):
+            return overrides.handle_torch_function(fn, (a,), a)
+        if hasattr(a, f"__sym_{name}__"):
+            return getattr(a, f"__sym_{name}__")()
+        return getattr(math, name)(a)
+    return fn
+__fn, __name, __sym_name = None, "", ""
+for __name in (
+    "sqrt",
+    "cos",
+    "cosh",
+    "sin",
+    "sinh",
+    "tan",
+    "tanh",
+    "asin",
+    "acos",
+    "atan",
+):
+    __sym_name = f"_sym_{__name}"
+    __fn = _get_sym_math_fn(__name)
+    __fn.__qualname__ = __fn.__name__ = __sym_name
+    globals()[__sym_name] = __fn
+del __fn, __name, __sym_name, _get_sym_math_fn
+# Adding temporary shortcut
+sym_sqrt = globals()["_sym_sqrt"]
+__all__.append("sym_sqrt")
+def sym_ite(b, t, f):
+    if overrides.has_torch_function((b, t, f)):
+        return overrides.handle_torch_function(sym_ite, (b, t, f), b, t, f)
+    assert isinstance(b, (SymBool, builtins.bool)) and type(t) == type(f)
+    if isinstance(b, SymBool):
+        return b.__sym_ite__(t, f)
+    return t if b else f
+# Check to see if we can load C extensions, and if not provide some guidance
+# on what the problem might be.
+try:
+    # _initExtension is chosen (arbitrarily) as a sentinel.
+    from torch._C import _initExtension
+except ImportError:
+    import torch._C as _C_for_compiled_check
+    # The __file__ check only works for Python 3.7 and above.
+    if _C_for_compiled_check.__file__ is None:
+        raise ImportError(
+            textwrap.dedent(
+                """
+                Failed to load PyTorch C extensions:
+                    It appears that PyTorch has loaded the `torch/_C` folder
+                    of the PyTorch repository rather than the C extensions which
+                    are expected in the `torch._C` namespace. This can occur when
+                    using the `install` workflow. e.g.
+                        $ python setup.py install && python -c "import torch"
+                    This error can generally be solved using the `develop` workflow
+                        $ python setup.py develop && python -c "import torch"  # This should succeed
+                    or by running Python from a different directory.
+                """
+            ).strip()
+        ) from None
+    raise  # If __file__ is not None the cause is unknown, so just re-raise.
+# The torch._C submodule is already loaded via `from torch._C import *` above
+# Make an explicit reference to the _C submodule to appease linters
+from torch import _C as _C
+__name, __obj = "", None
+for __name in dir(_C):
+    if __name[0] != "_" and not __name.endswith("Base"):
+        __all__.append(__name)
+        __obj = getattr(_C, __name)
+        if callable(__obj) or inspect.isclass(__obj):
+            if __obj.__module__ != __name__:  # "torch"
+                # TODO: fix their module from C++ side
+                if __name not in {
+                    "DisableTorchFunctionSubclass",
+                    "DisableTorchFunction",
+                    "Generator",
+                }:
+                    __obj.__module__ = __name__  # "torch"
+    elif __name == "TensorBase":
+        # issue 109438 / pr 109940. Prevent TensorBase from being copied into torch.
+        delattr(sys.modules[__name__], __name)
+del __name, __obj
+if not TYPE_CHECKING:
+    # issue 38137 and python issue 43367. Submodules of a C extension are
+    # non-standard, and attributes of those submodules cannot be pickled since
+    # pickle expect to be able to import them as "from _C.sub import attr"
+    # which fails with "_C is not a package
+    def _import_extension_to_sys_modules(module, memo=None):
+        if memo is None:
+            memo = set()
+        if module in memo:
+            return
+        memo.add(module)
+        module_name = module.__name__
+        for name in dir(module):
+            member = getattr(module, name)
+            member_name = getattr(member, "__name__", "")
+            if inspect.ismodule(member) and member_name.startswith(module_name):
+                sys.modules.setdefault(member_name, member)
+                # Recurse for submodules (e.g., `_C._dynamo.eval_frame`)
+                _import_extension_to_sys_modules(member, memo)
+    _import_extension_to_sys_modules(_C)
+    del _import_extension_to_sys_modules
+################################################################################
+# Define basic utilities
+################################################################################
+def typename(obj: _Any, /) -> str:
+    """
+    String representation of the type of an object.
+    This function returns a fully qualified string representation of an object's type.
+    Args:
+        obj (object): The object whose type to represent
+    Returns:
+        str: the type of the object `o`
+    Example:
+        >>> x = torch.tensor([1, 2, 3])
+        >>> torch.typename(x)
+        'torch.LongTensor'
+        >>> torch.typename(torch.nn.Parameter)
+        'torch.nn.parameter.Parameter'
+    """
+    if isinstance(obj, torch.Tensor):
+        return obj.type()
+    module = getattr(obj, "__module__", "") or ""
+    qualname = ""
+    if hasattr(obj, "__qualname__"):
+        qualname = obj.__qualname__
+    elif hasattr(obj, "__name__"):
+        qualname = obj.__name__
+    else:
+        module = obj.__class__.__module__ or ""
+        qualname = obj.__class__.__qualname__
+    if module in {"", "builtins"}:
+        return qualname
+    return f"{module}.{qualname}"
+def is_tensor(obj: _Any, /) -> _TypeGuard["torch.Tensor"]:
+    r"""Returns True if `obj` is a PyTorch tensor.
+    Note that this function is simply doing ``isinstance(obj, Tensor)``.
+    Using that ``isinstance`` check is better for typechecking with mypy,
+    and more explicit - so it's recommended to use that instead of
+    ``is_tensor``.
+    Args:
+        obj (object): Object to test
+    Example::
+        >>> x = torch.tensor([1, 2, 3])
+        >>> torch.is_tensor(x)
+        True
+    """
+    return isinstance(obj, torch.Tensor)
+def is_storage(obj: _Any, /) -> _TypeGuard[_Union["TypedStorage", "UntypedStorage"]]:
+    r"""Returns True if `obj` is a PyTorch storage object.
+    Args:
+        obj (Object): Object to test
+    """
+    return type(obj) in _storage_classes
+_GLOBAL_DEVICE_CONTEXT = threading.local()
+def get_default_device() -> "torch.device":
+    r"""Gets the default ``torch.Tensor`` to be allocated on ``device``"""
+    global _GLOBAL_DEVICE_CONTEXT
+    if hasattr(_GLOBAL_DEVICE_CONTEXT, "device_context"):
+        device = _GLOBAL_DEVICE_CONTEXT.device_context.device
+        if device.index is not None:
+            return device
+        else:
+            # TODO: Call like get_device_index() method corresponding to
+            # each device type
+            return torch.tensor([]).device
+    else:
+        return torch.device("cpu")
+def set_default_device(
+    device: _Optional[_Union["torch.device", str, builtins.int]],
+) -> None:
+    """Sets the default ``torch.Tensor`` to be allocated on ``device``.  This
+    does not affect factory function calls which are called with an explicit
+    ``device`` argument.  Factory calls will be performed as if they
+    were passed ``device`` as an argument.
+    To only temporarily change the default device instead of setting it
+    globally, use ``with torch.device(device):`` instead.
+    The default device is initially ``cpu``.  If you set the default tensor
+    device to another device (e.g., ``cuda``) without a device index, tensors
+    will be allocated on whatever the current device for the device type,
+    even after :func:`torch.cuda.set_device` is called.
+    .. warning::
+        This function imposes a slight performance cost on every Python
+        call to the torch API (not just factory functions).  If this
+        is causing problems for you, please comment on
+        https://github.com/pytorch/pytorch/issues/92701
+    .. note::
+        This doesn't affect functions that create tensors that share the same memory as the input, like:
+        :func:`torch.from_numpy` and :func:`torch.frombuffer`
+    Args:
+        device (device or string): the device to set as default
+    Example::
+        >>> # xdoctest: +SKIP("requires cuda, changes global state")
+        >>> torch.get_default_device()
+        device(type='cpu')
+        >>> torch.set_default_device('cuda')  # current device is 0
+        >>> torch.get_default_device()
+        device(type='cuda', index=0)
+        >>> torch.set_default_device('cuda')
+        >>> torch.cuda.set_device('cuda:1')  # current device is 1
+        >>> torch.get_default_device()
+        device(type='cuda', index=1)
+        >>> torch.set_default_device('cuda:1')
+        >>> torch.get_default_device()
+        device(type='cuda', index=1)
+    """
+    global _GLOBAL_DEVICE_CONTEXT
+    if hasattr(_GLOBAL_DEVICE_CONTEXT, "device_context"):
+        device_context = _GLOBAL_DEVICE_CONTEXT.device_context
+        if device_context is not None:
+            device_context.__exit__(None, None, None)
+    if device is None:
+        device_context = None
+    else:
+        from torch.utils._device import DeviceContext
+        device_context = DeviceContext(device)
+        device_context.__enter__()
+    _GLOBAL_DEVICE_CONTEXT.device_context = device_context
+def set_default_tensor_type(t: _Union[_Type["torch.Tensor"], str], /) -> None:
+    r"""
+    .. warning::
+        This function is deprecated as of PyTorch 2.1, please use :func:`torch.set_default_dtype()` and
+        :func:`torch.set_default_device()` as alternatives.
+    Sets the default ``torch.Tensor`` type to floating point tensor type
+    ``t``. This type will also be used as default floating point type for
+    type inference in :func:`torch.tensor`.
+    The default floating point tensor type is initially ``torch.FloatTensor``.
+    Args:
+        t (type or string): the floating point tensor type or its name
+    Example::
+        >>> # xdoctest: +SKIP("Other tests may have changed the default type. Can we reset it?")
+        >>> torch.tensor([1.2, 3]).dtype    # initial default for floating point is torch.float32
+        torch.float32
+        >>> torch.set_default_tensor_type(torch.DoubleTensor)
+        >>> torch.tensor([1.2, 3]).dtype    # a new floating point tensor
+        torch.float64
+    """
+    if isinstance(t, str):
+        t = _import_dotted_name(t)
+    _C._set_default_tensor_type(t)
+def set_default_dtype(d: "torch.dtype", /) -> None:
+    r"""
+    Sets the default floating point dtype to :attr:`d`. Supports floating point dtype
+    as inputs. Other dtypes will cause torch to raise an exception.
+    When PyTorch is initialized its default floating point dtype is torch.float32,
+    and the intent of set_default_dtype(torch.float64) is to facilitate NumPy-like
+    type inference. The default floating point dtype is used to:
+    1. Implicitly determine the default complex dtype. When the default floating type is float16,
+       the default complex dtype is complex32. For float32, the default complex dtype is complex64.
+       For float64, it is complex128. For bfloat16, an exception will be raised because
+       there is no corresponding complex type for bfloat16.
+    2. Infer the dtype for tensors constructed using Python floats or complex Python
+       numbers. See examples below.
+    3. Determine the result of type promotion between bool and integer tensors and
+       Python floats and complex Python numbers.
+    Args:
+        d (:class:`torch.dtype`): the floating point dtype to make the default.
+    Example:
+        >>> # xdoctest: +SKIP("Other tests may have changed the default type. Can we reset it?")
+        >>> # initial default for floating point is torch.float32
+        >>> # Python floats are interpreted as float32
+        >>> torch.tensor([1.2, 3]).dtype
+        torch.float32
+        >>> # initial default for floating point is torch.complex64
+        >>> # Complex Python numbers are interpreted as complex64
+        >>> torch.tensor([1.2, 3j]).dtype
+        torch.complex64
+        >>> torch.set_default_dtype(torch.float64)
+        >>> # Python floats are now interpreted as float64
+        >>> torch.tensor([1.2, 3]).dtype  # a new floating point tensor
+        torch.float64
+        >>> # Complex Python numbers are now interpreted as complex128
+        >>> torch.tensor([1.2, 3j]).dtype  # a new complex tensor
+        torch.complex128
+        >>> torch.set_default_dtype(torch.float16)
+        >>> # Python floats are now interpreted as float16
+        >>> torch.tensor([1.2, 3]).dtype  # a new floating point tensor
+        torch.float16
+        >>> # Complex Python numbers are now interpreted as complex128
+        >>> torch.tensor([1.2, 3j]).dtype  # a new complex tensor
+        torch.complex32
+    """
+    _C._set_default_dtype(d)
+def use_deterministic_algorithms(
+    mode: builtins.bool,
+    *,
+    warn_only: builtins.bool = False,
+) -> None:
+    r"""Sets whether PyTorch operations must use "deterministic"
+    algorithms. That is, algorithms which, given the same input, and when
+    run on the same software and hardware, always produce the same output.
+    When enabled, operations will use deterministic algorithms when available,
+    and if only nondeterministic algorithms are available they will throw a
+    :class:`RuntimeError` when called.
+    .. note:: This setting alone is not always enough to make an application
+        reproducible. Refer to :ref:`reproducibility` for more information.
+    .. note:: :func:`torch.set_deterministic_debug_mode` offers an alternative
+        interface for this feature.
+    The following normally-nondeterministic operations will act
+    deterministically when ``mode=True``:
+        * :class:`torch.nn.Conv1d` when called on CUDA tensor
+        * :class:`torch.nn.Conv2d` when called on CUDA tensor
+        * :class:`torch.nn.Conv3d` when called on CUDA tensor
+        * :class:`torch.nn.ConvTranspose1d` when called on CUDA tensor
+        * :class:`torch.nn.ConvTranspose2d` when called on CUDA tensor
+        * :class:`torch.nn.ConvTranspose3d` when called on CUDA tensor
+        * :class:`torch.nn.ReplicationPad2d` when attempting to differentiate a CUDA tensor
+        * :func:`torch.bmm` when called on sparse-dense CUDA tensors
+        * :func:`torch.Tensor.__getitem__` when attempting to differentiate a CPU tensor
+          and the index is a list of tensors
+        * :func:`torch.Tensor.index_put` with ``accumulate=False``
+        * :func:`torch.Tensor.index_put` with ``accumulate=True`` when called on a CPU
+          tensor
+        * :func:`torch.Tensor.put_` with ``accumulate=True`` when called on a CPU
+          tensor
+        * :func:`torch.Tensor.scatter_add_` when called on a CUDA tensor
+        * :func:`torch.gather` when called on a CUDA tensor that requires grad
+        * :func:`torch.index_add` when called on CUDA tensor
+        * :func:`torch.index_select` when attempting to differentiate a CUDA tensor
+        * :func:`torch.repeat_interleave` when attempting to differentiate a CUDA tensor
+        * :func:`torch.Tensor.index_copy` when called on a CPU or CUDA tensor
+        * :func:`torch.Tensor.scatter` when `src` type is Tensor and called on CUDA tensor
+        * :func:`torch.Tensor.scatter_reduce` when ``reduce='sum'`` or ``reduce='mean'`` and called on CUDA tensor
+    The following normally-nondeterministic operations will throw a
+    :class:`RuntimeError` when ``mode=True``:
+        * :class:`torch.nn.AvgPool3d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.AdaptiveAvgPool2d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.AdaptiveAvgPool3d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.MaxPool3d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.AdaptiveMaxPool2d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.FractionalMaxPool2d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.FractionalMaxPool3d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.MaxUnpool1d`
+        * :class:`torch.nn.MaxUnpool2d`
+        * :class:`torch.nn.MaxUnpool3d`
+        * :func:`torch.nn.functional.interpolate` when attempting to differentiate a CUDA tensor
+          and one of the following modes is used:
+          - ``linear``
+          - ``bilinear``
+          - ``bicubic``
+          - ``trilinear``
+        * :class:`torch.nn.ReflectionPad1d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.ReflectionPad2d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.ReflectionPad3d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.ReplicationPad1d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.ReplicationPad3d` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.NLLLoss` when called on a CUDA tensor
+        * :class:`torch.nn.CTCLoss` when attempting to differentiate a CUDA tensor
+        * :class:`torch.nn.EmbeddingBag` when attempting to differentiate a CUDA tensor when
+          ``mode='max'``
+        * :func:`torch.Tensor.put_` when ``accumulate=False``
+        * :func:`torch.Tensor.put_` when ``accumulate=True`` and called on a CUDA tensor
+        * :func:`torch.histc` when called on a CUDA tensor
+        * :func:`torch.bincount` when called on a CUDA tensor and ``weights``
+          tensor is given
+        * :func:`torch.kthvalue` with called on a CUDA tensor
+        * :func:`torch.median` with indices output when called on a CUDA tensor
+        * :func:`torch.nn.functional.grid_sample` when attempting to differentiate a CUDA tensor
+        * :func:`torch.cumsum` when called on a CUDA tensor when dtype is floating point or complex
+        * :func:`torch.Tensor.scatter_reduce` when ``reduce='prod'`` and called on CUDA tensor
+        * :func:`torch.Tensor.resize_` when called with a quantized tensor
+    In addition, several operations fill uninitialized memory when this setting
+    is turned on and when
+    :attr:`torch.utils.deterministic.fill_uninitialized_memory` is turned on.
+    See the documentation for that attribute for more information.
+    A handful of CUDA operations are nondeterministic if the CUDA version is
+    10.2 or greater, unless the environment variable ``CUBLAS_WORKSPACE_CONFIG=:4096:8``
+    or ``CUBLAS_WORKSPACE_CONFIG=:16:8`` is set. See the CUDA documentation for more
+    details: `<https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility>`_
+    If one of these environment variable configurations is not set, a :class:`RuntimeError`
+    will be raised from these operations when called with CUDA tensors:
+        * :func:`torch.mm`
+        * :func:`torch.mv`
+        * :func:`torch.bmm`
+    Note that deterministic operations tend to have worse performance than
+    nondeterministic operations.
+    .. note::
+        This flag does not detect or prevent nondeterministic behavior caused
+        by calling an inplace operation on a tensor with an internal memory
+        overlap or by giving such a tensor as the :attr:`out` argument for an
+        operation. In these cases, multiple writes of different data may target
+        a single memory location, and the order of writes is not guaranteed.
+    Args:
+        mode (:class:`bool`): If True, makes potentially nondeterministic
+            operations switch to a deterministic algorithm or throw a runtime
+            error. If False, allows nondeterministic operations.
+    Keyword args:
+        warn_only (:class:`bool`, optional): If True, operations that do not
+            have a deterministic implementation will throw a warning instead of
+            an error. Default: ``False``
+    Example::
+        >>> # xdoctest: +SKIP
+        >>> torch.use_deterministic_algorithms(True)
+        # Forward mode nondeterministic error
+        >>> torch.randn(10, device='cuda').kthvalue(1)
+        ...
+        RuntimeError: kthvalue CUDA does not have a deterministic implementation...
+        # Backward mode nondeterministic error
+        >>> torch.nn.AvgPool3d(1)(torch.randn(3, 4, 5, 6, requires_grad=True).cuda()).sum().backward()
+        ...
+        RuntimeError: avg_pool3d_backward_cuda does not have a deterministic implementation...
+    """
+    _C._set_deterministic_algorithms(mode, warn_only=warn_only)
+def are_deterministic_algorithms_enabled() -> builtins.bool:
+    r"""Returns True if the global deterministic flag is turned on. Refer to
+    :func:`torch.use_deterministic_algorithms` documentation for more details.
+    """
+    return _C._get_deterministic_algorithms()
+def is_deterministic_algorithms_warn_only_enabled() -> builtins.bool:
+    r"""Returns True if the global deterministic flag is set to warn only.
+    Refer to :func:`torch.use_deterministic_algorithms` documentation for more
+    details.
+    """
+    return _C._get_deterministic_algorithms_warn_only()
+def set_deterministic_debug_mode(debug_mode: _Union[builtins.int, str]) -> None:
+    r"""Sets the debug mode for deterministic operations.
+    .. note:: This is an alternative interface for
+        :func:`torch.use_deterministic_algorithms`. Refer to that function's
+        documentation for details about affected operations.
+    Args:
+        debug_mode(str or int): If "default" or 0, don't error or warn on
+            nondeterministic operations. If "warn" or 1, warn on
+            nondeterministic operations. If "error" or 2, error on
+            nondeterministic operations.
+    """
+    # NOTE: builtins.int is used here because int in this scope resolves
+    # to torch.int
+    if not isinstance(debug_mode, (builtins.int, str)):
+        raise TypeError(f"debug_mode must be str or int, but got {type(debug_mode)}")
+    if isinstance(debug_mode, str):
+        if debug_mode == "default":
+            debug_mode = 0
+        elif debug_mode == "warn":
+            debug_mode = 1
+        elif debug_mode == "error":
+            debug_mode = 2
+        else:
+            raise RuntimeError(
+                "invalid value of debug_mode, expected one of `default`, "
+                f"`warn`, `error`, but got {debug_mode}"
+            )
+    if debug_mode == 0:
+        _C._set_deterministic_algorithms(False)
+    elif debug_mode == 1:
+        _C._set_deterministic_algorithms(True, warn_only=True)
+    elif debug_mode == 2:
+        _C._set_deterministic_algorithms(True)
+    else:
+        raise RuntimeError(
+            "invalid value of debug_mode, expected 0, 1, or 2, " f"but got {debug_mode}"
+        )
+def get_deterministic_debug_mode() -> builtins.int:
+    r"""Returns the current value of the debug mode for deterministic
+    operations. Refer to :func:`torch.set_deterministic_debug_mode`
+    documentation for more details.
+    """
+    if _C._get_deterministic_algorithms():
+        if _C._get_deterministic_algorithms_warn_only():
+            return 1
+        else:
+            return 2
+    else:
+        return 0
+def get_float32_matmul_precision() -> str:
+    r"""Returns the current value of float32 matrix multiplication precision. Refer to
+    :func:`torch.set_float32_matmul_precision` documentation for more details.
+    """
+    return _C._get_float32_matmul_precision()
+def set_float32_matmul_precision(precision: str) -> None:
+    r"""Sets the internal precision of float32 matrix multiplications.
+    Running float32 matrix multiplications in lower precision may significantly increase
+    performance, and in some programs the loss of precision has a negligible impact.
+    Supports three settings:
+        * "highest", float32 matrix multiplications use the float32 datatype (24 mantissa
+          bits with 23 bits explicitly stored) for internal computations.
+        * "high", float32 matrix multiplications either use the TensorFloat32 datatype (10
+          mantissa bits explicitly stored) or treat each float32 number as the sum of two bfloat16 numbers
+          (approximately 16 mantissa bits with 14 bits explicitly stored), if the appropriate fast matrix multiplication
+          algorithms are available.  Otherwise float32 matrix multiplications are computed
+          as if the precision is "highest".  See below for more information on the bfloat16
+          approach.
+        * "medium", float32 matrix multiplications use the bfloat16 datatype (8 mantissa
+          bits with 7 bits explicitly stored) for internal computations, if a fast matrix multiplication algorithm
+          using that datatype internally is available. Otherwise float32
+          matrix multiplications are computed as if the precision is "high".
+    When using "high" precision, float32 multiplications may use a bfloat16-based algorithm
+    that is more complicated than simply truncating to some smaller number mantissa bits
+    (e.g. 10 for TensorFloat32, 7 for bfloat16 explicitly stored).  Refer to [Henry2019]_ for a complete
+    description of this algorithm.  To briefly explain here, the first step is to realize
+    that we can perfectly encode a single float32 number as the sum of three bfloat16
+    numbers (because float32 has 23 mantissa bits while bfloat16 has 7 explicitly stored, and both have the
+    same number of exponent bits).  This means that the product of two float32 numbers can
+    be exactly given by the sum of nine products of bfloat16 numbers.  We can then trade
+    accuracy for speed by dropping some of these products.  The "high" precision algorithm
+    specifically keeps only the three most significant products, which conveniently excludes
+    all of the products involving the last 8 mantissa bits of either input.  This means that
+    we can represent our inputs as the sum of two bfloat16 numbers rather than three.
+    Because bfloat16 fused-multiply-add (FMA) instructions are typically >10x faster than
+    float32 ones, it's faster to do three multiplications and 2 additions with bfloat16
+    precision than it is to do a single multiplication with float32 precision.
+    .. [Henry2019] http://arxiv.org/abs/1904.06376
+    .. note::
+        This does not change the output dtype of float32 matrix multiplications,
+        it controls how the internal computation of the matrix multiplication is performed.
+    .. note::
+        This does not change the precision of convolution operations. Other flags,
+        like `torch.backends.cudnn.allow_tf32`, may control the precision of convolution
+        operations.
+    .. note::
+        This flag currently only affects one native device type: CUDA.
+        If "high" or "medium" are set then the TensorFloat32 datatype will be used
+        when computing float32 matrix multiplications, equivalent to setting
+        `torch.backends.cuda.matmul.allow_tf32 = True`. When "highest" (the default)
+        is set then the float32 datatype is used for internal computations, equivalent
+        to setting `torch.backends.cuda.matmul.allow_tf32 = False`.
+    Args:
+        precision(str): can be set to "highest" (default), "high", or "medium" (see above).
+    """
+    _C._set_float32_matmul_precision(precision)
+def set_warn_always(b: builtins.bool, /) -> None:
+    r"""When this flag is False (default) then some PyTorch warnings may only
+    appear once per process. This helps avoid excessive warning information.
+    Setting it to True causes these warnings to always appear, which may be
+    helpful when debugging.
+    Args:
+        b (:class:`bool`): If True, force warnings to always be emitted
+                           If False, set to the default behaviour
+    """
+    _C._set_warnAlways(b)
+def is_warn_always_enabled() -> builtins.bool:
+    r"""Returns True if the global warn_always flag is turned on. Refer to
+    :func:`torch.set_warn_always` documentation for more details.
+    """
+    return _C._get_warnAlways()
+################################################################################
+# Define error checking functions
+################################################################################
+# These error checking functions must be kept consistent with their C++
+# equivalents. Their C++ equivalents are mentioned where applicable.
+def _check_with(
+    error_type,
+    cond: _Union[builtins.bool, SymBool],
+    message: _Callable[[], str],
+):  # noqa: F811
+    if not isinstance(cond, (builtins.bool, SymBool)):
+        raise TypeError(f"cond must be a bool, but got {type(cond)}")
+    from torch.fx.experimental.symbolic_shapes import expect_true
+    if expect_true(cond):
+        return
+    # error_type must be a subclass of Exception and not subclass of Warning
+    assert issubclass(error_type, Exception) and not issubclass(error_type, Warning)
+    if message is None:
+        message_evaluated = (
+            "Expected cond to be True, but got False. (Could this error "
+            "message be improved? If so, please report an enhancement request "
+            "to PyTorch.)"
+        )
+    else:
+        if not callable(message):
+            raise TypeError("message must be a callable")
+        message_evaluated = str(message())
+    raise error_type(message_evaluated)
+def _check(cond, message=None):  # noqa: F811
+    r"""Throws error containing an optional message if the specified condition
+    is False.
+    Error type: ``RuntimeError``
+    C++ equivalent: ``TORCH_CHECK``
+    Args:
+        cond (:class:`bool`): If False, throw error
+        message (Callable, optional): Callable that returns either a string or
+            an object that has a ``__str__()`` method to be used as the error
+            message. Default: ``None``
+    """
+    _check_with(RuntimeError, cond, message)
+def _check_is_size(i, message=None):
+    """Checks that a given integer is a valid size (i.e., is non-negative).
+    You should use this over _check(i >= 0) because we can use the semantic
+    information (that i is a size) to make some further inferences in case
+    i is an unbacked SymInt.
+    NB: Do NOT use this in contexts where a -1 size would be valid (indicating
+    to infer the size from context, or if you should wrap-around or truncate).
+    Only use this if the only valid value is an honest to goodness size.
+    """
+    # This is responsible for the expect_true
+    _check(i >= 0, message)
+    from torch.fx.experimental.symbolic_shapes import _advise_is_size
+    _advise_is_size(i)
+def _check_index(cond, message=None):  # noqa: F811
+    r"""Throws error containing an optional message if the specified condition
+    is False.
+    Error type: ``IndexError``
+    C++ equivalent: ``TORCH_CHECK_INDEX``
+    Args:
+        cond (:class:`bool`): If False, throw error
+        message (Callable, optional): Callable that returns either a string or
+            an object that has a ``__str__()`` method to be used as the error
+            message. Default: ``None``
+    """
+    _check_with(IndexError, cond, message)
+def _check_value(cond, message=None):  # noqa: F811
+    r"""Throws error containing an optional message if the specified condition
+    is False.
+    Error type: ``ValueError``
+    C++ equivalent: ``TORCH_CHECK_VALUE``
+    Args:
+        cond (:class:`bool`): If False, throw error
+        message (Callable, optional): Callable that returns either a string or
+            an object that has a ``__str__()`` method to be used as the error
+            message. Default: ``None``
+    """
+    _check_with(ValueError, cond, message)
+def _check_type(cond, message=None):  # noqa: F811
+    r"""Throws error containing an optional message if the specified condition
+    is False.
+    Error type: ``TypeError``
+    C++ equivalent: ``TORCH_CHECK_TYPE``
+    Args:
+        cond (:class:`bool`): If False, throw error
+        message (Callable, optional): Callable that returns either a string or
+            an object that has a ``__str__()`` method to be used as the error
+            message. Default: ``None``
+    """
+    _check_with(TypeError, cond, message)
+def _check_not_implemented(cond, message=None):  # noqa: F811
+    r"""Throws error containing an optional message if the specified condition
+    is False.
+    Error type: ``NotImplementedError``
+    C++ equivalent: ``TORCH_CHECK_NOT_IMPLEMENTED``
+    Args:
+        cond (:class:`bool`): If False, throw error
+        message (Callable, optional): Callable that returns either a string or
+            an object that has a ``__str__()`` method to be used as the error
+            message. Default: ``None``
+    """
+    _check_with(NotImplementedError, cond, message)
+def _check_tensor_all_with(error_type, cond, message=None):  # noqa: F811
+    if not is_tensor(cond):
+        raise TypeError(f"cond must be a tensor, but got {type(cond)}")
+    if not cond.dtype == torch.bool:
+        raise TypeError(f"cond tensor must have dtype torch.bool, but got {cond.dtype}")
+    _check_with(error_type, cond._is_all_true().item(), message)  # type: ignore[arg-type]
+# C++ equivalent: `TORCH_CHECK_TENSOR_ALL`
+def _check_tensor_all(cond, message=None):  # noqa: F811
+    r"""Throws error containing an optional message if the specified condition
+    is False.
+    Error type: ``RuntimeError``
+    C++ equivalent: ``TORCH_CHECK_TENSOR_ALL``
+    Args:
+        cond (:class:`torch.Tensor`): Tensor of dtype ``torch.bool``. If any
+            element is ``False``, throw error
+        message (Callable, optional): Callable that returns either a string or
+            an object that has a ``__str__()`` method to be used as the error
+            message. Default: ``None``
+    """
+    _check_tensor_all_with(RuntimeError, cond, message)
+################################################################################
+# Define numeric constants
+################################################################################
+# For Python Array API (https://data-apis.org/array-api/latest/API_specification/constants.html) and
+# NumPy consistency (https://numpy.org/devdocs/reference/constants.html)
+from math import e, inf, nan, pi
+newaxis: None = None
+__all__.extend(["e", "pi", "nan", "inf", "newaxis"])
+################################################################################
+# Define Storage and Tensor classes
+################################################################################
+from torch._tensor import Tensor  # usort: skip
+# needs to be after torch.Tensor is defined to avoid circular dependencies
+from torch import storage as storage  # usort: skip
+from torch.storage import (
+    _LegacyStorage,
+    _StorageBase,
+    _warn_typed_storage_removal,
+    TypedStorage,
+    UntypedStorage,
+)
+# NOTE: New <type>Storage classes should never be added. When adding a new
+# dtype, use torch.storage.TypedStorage directly.
+class ByteStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.uint8
+class DoubleStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.double
+class FloatStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.float
+class HalfStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.half
+class LongStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.long
+class IntStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.int
+class ShortStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.short
+class CharStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.int8
+class BoolStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.bool
+class BFloat16Storage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.bfloat16
+class ComplexDoubleStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.cdouble
+class ComplexFloatStorage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.cfloat
+class QUInt8Storage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.quint8
+class QInt8Storage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.qint8
+class QInt32Storage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.qint32
+class QUInt4x2Storage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.quint4x2
+class QUInt2x4Storage(_LegacyStorage):
+    @classproperty
+    def dtype(self):
+        _warn_typed_storage_removal(stacklevel=3)
+        return self._dtype
+    @classproperty
+    def _dtype(self):
+        return torch.quint2x4
+_storage_classes: _Set[_Type[_Union[TypedStorage, UntypedStorage]]] = {
+    UntypedStorage,
+    DoubleStorage,
+    FloatStorage,
+    LongStorage,
+    IntStorage,
+    ShortStorage,
+    CharStorage,
+    ByteStorage,
+    HalfStorage,
+    BoolStorage,
+    QUInt8Storage,
+    QInt8Storage,
+    QInt32Storage,
+    BFloat16Storage,
+    ComplexFloatStorage,
+    ComplexDoubleStorage,
+    QUInt4x2Storage,
+    QUInt2x4Storage,
+    TypedStorage,
+}
+# The _tensor_classes set is initialized by the call to initialize_python_bindings.
+_tensor_classes: _Set[_Type["torch.Tensor"]] = set()
+# If you edit these imports, please update torch/__init__.py.in as well
+from torch import amp as amp, random as random, serialization as serialization
+from torch._tensor_str import set_printoptions
+from torch.amp import autocast, GradScaler
+from torch.random import get_rng_state, initial_seed, manual_seed, seed, set_rng_state
+from torch.serialization import load, save
+################################################################################
+# Initialize extension
+################################################################################
+# Shared memory manager needs to know the exact location of manager executable
+def _manager_path():
+    if _running_with_deploy() or platform.system() == "Windows":
+        return b""
+    path = get_file_path("torch", "bin", "torch_shm_manager")
+    prepare_multiprocessing_environment(get_file_path("torch"))
+    if not os.path.exists(path):
+        raise RuntimeError("Unable to find torch_shm_manager at " + path)
+    return path.encode("utf-8")
+_C._initExtension(_manager_path())
+del _manager_path
+# Appease the type checker: it can't deal with direct setting of globals().
+# Note that we will see "too many" functions when reexporting this way; there
+# is not a good way to fix this problem.  Perhaps, try to redesign VariableFunctions
+# so that this import is good enough
+if TYPE_CHECKING:
+    # Some type signatures pulled in from _VariableFunctions here clash with
+    # signatures already imported. For now these clashes are ignored; see
+    # PR #43339 for details.
+    from torch._C._VariableFunctions import *  # type: ignore[assignment, misc] # noqa: F403
+    # Fixup segment_reduce visibility
+    _segment_reduce = segment_reduce
+    del segment_reduce  # noqa: F821
+# Ops not to be exposed in `torch` namespace,
+# mostly helper ops.
+PRIVATE_OPS = ("unique_dim",)
+__name, __obj = "", None
+for __name in dir(_C._VariableFunctions):
+    if __name.startswith("__") or __name in PRIVATE_OPS:
+        continue
+    __obj = getattr(_C._VariableFunctions, __name)
+    __obj.__module__ = __name__  # "torch"
+    # Hide some APIs that should not be public
+    if __name == "segment_reduce":
+        # TODO: Once the undocumented FC window is passed, remove the line bellow
+        globals()[__name] = __obj
+        __name = "_" + __name
+    globals()[__name] = __obj
+    if not __name.startswith("_"):
+        __all__.append(__name)
+del __name, __obj
+################################################################################
+# Add torch.dtype instances to the public API
+################################################################################
+import torch
+__all__.extend(
+    name for name in dir(torch) if isinstance(getattr(torch, name), torch.dtype)
+)
+################################################################################
+# Import TorchDynamo's lazy APIs to avoid circular dependenices
+################################################################################
+# needs to be before from torch.functional import * to avoid circular dependencies
+from torch._compile import _disable_dynamo  # usort: skip
+################################################################################
+# Import interface functions defined in Python
+################################################################################
+# needs to be after the above ATen bindings so we can overwrite from Python side
+from torch import _VF as _VF, functional as functional  # usort: skip
+from torch.functional import *  # usort: skip # noqa: F403
+################################################################################
+# Remove unnecessary members
+################################################################################
+del _StorageBase
+del _LegacyStorage
+################################################################################
+# Define _assert
+################################################################################
+# needs to be before the submodule imports to avoid circular dependencies
+def _assert(condition, message):
+    r"""A wrapper around Python's assert which is symbolically traceable."""
+    if type(condition) is not torch.Tensor and overrides.has_torch_function(
+        (condition,)
+    ):
+        return overrides.handle_torch_function(
+            _assert, (condition,), condition, message
+        )
+    assert condition, message
+################################################################################
+# Import most common subpackages
+################################################################################
+# Use the redundant form so that type checkers know that these are a part of
+# the public API. The "regular" import lines are there solely for the runtime
+# side effect of adding to the imported module's members for other users.
+# needs to be before import torch.nn as nn to avoid circular dependencies
+from torch.autograd import (  # usort: skip
+    enable_grad as enable_grad,
+    inference_mode as inference_mode,
+    no_grad as no_grad,
+    set_grad_enabled as set_grad_enabled,
+)
+from torch import (
+    __config__ as __config__,
+    __future__ as __future__,
+    _awaits as _awaits,
+    autograd as autograd,
+    backends as backends,
+    cpu as cpu,
+    cuda as cuda,
+    distributed as distributed,
+    distributions as distributions,
+    fft as fft,
+    futures as futures,
+    hub as hub,
+    jit as jit,
+    linalg as linalg,
+    mps as mps,
+    mtia as mtia,
+    multiprocessing as multiprocessing,
+    nested as nested,
+    nn as nn,
+    optim as optim,
+    overrides as overrides,
+    profiler as profiler,
+    sparse as sparse,
+    special as special,
+    testing as testing,
+    types as types,
+    utils as utils,
+    xpu as xpu,
+)
+from torch.signal import windows as windows
+# Quantized, sparse, AO, etc. should be last to get imported, as nothing
+# is expected to depend on them.
+from torch import ao as ao  # usort: skip
+# nn.quant* depends on ao -- so should be after those.
+import torch.nn.intrinsic
+import torch.nn.qat
+import torch.nn.quantizable
+import torch.nn.quantized
+_C._init_names(list(_storage_classes))
+# attach docstrings to torch and tensor functions
+from torch import _size_docs, _storage_docs, _tensor_docs, _torch_docs
+del _torch_docs, _tensor_docs, _storage_docs, _size_docs
+def compiled_with_cxx11_abi() -> builtins.bool:
+    r"""Returns whether PyTorch was built with _GLIBCXX_USE_CXX11_ABI=1"""
+    return _C._GLIBCXX_USE_CXX11_ABI
+from torch import _library as _library, _ops as _ops
+# Import the ops and classes "namespace"
+from torch._ops import ops as ops  # usort: skip
+from torch._classes import classes as classes  # usort: skip
+sys.modules.setdefault(f"{__name__}.ops", ops)
+sys.modules.setdefault(f"{__name__}.classes", classes)
+# quantization depends on torch.fx and torch.ops
+# Import quantization
+from torch import quantization as quantization  # usort: skip
+# Import the quasi random sampler
+from torch import quasirandom as quasirandom  # usort: skip
+# If you are seeing this, it means that this call site was not checked if
+# the memory format could be preserved, and it was switched to old default
+# behaviour of contiguous
+legacy_contiguous_format = contiguous_format  # defined by _C._initExtension()
+# Register fork handler to initialize OpenMP in child processes (see gh-28389)
+from torch.multiprocessing._atfork import register_after_fork
+register_after_fork(torch.get_num_threads)
+del register_after_fork
+# Import tools that require fully imported torch (for applying
+# torch.jit.script as a decorator, for instance):
+from torch._lobpcg import lobpcg as lobpcg
+# These were previously defined in native_functions.yaml and appeared on the
+# `torch` namespace, but we moved them to c10 dispatch to facilitate custom
+# class usage. We add these lines here to preserve backward compatibility.
+quantized_lstm = ops.aten.quantized_lstm
+quantized_gru = ops.aten.quantized_gru
+# Import experimental masked operations support. See
+# [RFC-0016](https://github.com/pytorch/rfcs/pull/27) for more
+# information.
+from torch import masked as masked
+# Import removed ops with error message about removal
+from torch._linalg_utils import (  # type: ignore[misc]
+    _symeig as symeig,
+    eig,
+    lstsq,
+    matrix_rank,
+    solve,
+)
+from torch.utils.dlpack import from_dlpack, to_dlpack
+class _TorchCompileInductorWrapper:
+    compiler_name = "inductor"
+    def __init__(self, mode, options, dynamic):
+        self.config: _Dict[str, _Any] = {}
+        self.dynamic = dynamic
+        self.apply_mode(mode)
+        self.apply_options(options)
+        if self.config.get("triton.cudagraphs", False):
+            os.environ["DISABLE_CUPTI_LAZY_REINIT"] = "1"
+            # FIXME: CUDA Graph does not work well with CUPTI teardown.
+            #   1) crashes on 1st lazy CUPTI re-init after teardown (CUDA 11)
+            #   2) crashes on 2nd non-lazy CUPTI re-init after teardown (CUDA 12)
+            # Workaround: turn off CUPTI teardown when using CUDA Graphs.
+            os.environ["TEARDOWN_CUPTI"] = "0"
+    def __eq__(self, other):
+        return (
+            isinstance(other, _TorchCompileInductorWrapper)
+            and self.config == other.config
+            and self.dynamic == other.dynamic
+        )
+    def apply_mode(self, mode: _Optional[str]):
+        if mode is None or mode == "default":
+            pass
+        elif mode in {"reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs"}:
+            from torch._inductor import list_mode_options
+            self.apply_options(list_mode_options(mode, self.dynamic))
+        else:
+            raise RuntimeError(
+                f"Unrecognized mode={mode}, should be one of: default, reduce-overhead, max-autotune, max-autotune-no-cudagraphs"
+            )
+    def apply_options(self, options: _Optional[_Dict[str, _Any]]):
+        if not options:
+            return
+        from torch._inductor import config
+        current_config: _Dict[str, _Any] = config.shallow_copy_dict()
+        for key, val in options.items():
+            attr_name = key.replace("-", "_")
+            if attr_name not in current_config:
+                raise RuntimeError(
+                    f"Unexpected optimization option {key}, known options are {list(current_config.keys())}"
+                )
+            if type(val) is not type(current_config[attr_name]):
+                val_type_str = type(val).__name__
+                expected_type_str = type(current_config[attr_name]).__name__
+                raise RuntimeError(
+                    f"Unexpected type of attr {key}, got {val_type_str} should be {expected_type_str}"
+                )
+            self.config[attr_name] = val
+    def __call__(self, model_, inputs_):
+        from torch._inductor.compile_fx import compile_fx
+        return compile_fx(model_, inputs_, config_patches=self.config)
+    def get_compiler_config(self):
+        from torch._inductor.compile_fx import get_patched_config_dict
+        return get_patched_config_dict(config_patches=self.config)
+    def reset(self):
+        from torch._inductor import config
+        if "triton.cudagraphs" in self.config or config.triton.cudagraphs:
+            if self.config.get("triton.cudagraphs", True):
+                from torch._inductor.cudagraph_trees import reset_cudagraph_trees
+                reset_cudagraph_trees()
+class _TorchCompileWrapper:
+    def __init__(self, backend, mode, options, dynamic):
+        from torch._dynamo.backends.registry import lookup_backend
+        if isinstance(backend, str):
+            self.compiler_name = backend
+        elif hasattr(backend, "__name__"):
+            self.compiler_name = backend.__name__
+        else:
+            self.compiler_name = str(backend)
+        self.dynamic = dynamic
+        self.compiler_fn = lookup_backend(backend)
+        self.kwargs = {}
+        # only pass the args if they non-empty
+        if mode and mode != "default":
+            self.kwargs["mode"] = mode
+        if options:
+            self.kwargs["options"] = options
+    def __eq__(self, other):
+        return (
+            isinstance(other, _TorchCompileWrapper)
+            and self.compiler_fn == other.compiler_fn
+            and self.kwargs == other.kwargs
+            and self.dynamic == other.dynamic
+        )
+    def __call__(self, model_, inputs_):
+        return self.compiler_fn(model_, inputs_, **self.kwargs)
+    def reset(self):
+        if hasattr(self.compiler_fn, "reset"):
+            self.compiler_fn.reset()
+_InputT = _ParamSpec("_InputT")
+_RetT = _TypeVar("_RetT")
+@_overload
+def compile(
+    model: _Callable[_InputT, _RetT],
+    *,
+    fullgraph: builtins.bool = False,
+    dynamic: _Optional[builtins.bool] = None,
+    backend: _Union[str, _Callable] = "inductor",
+    mode: _Union[str, None] = None,
+    options: _Optional[_Dict[str, _Union[str, builtins.int, builtins.bool]]] = None,
+    disable: builtins.bool = False,
+) -> _Callable[_InputT, _RetT]: ...
+@_overload
+def compile(
+    model: None = None,
+    *,
+    fullgraph: builtins.bool = False,
+    dynamic: _Optional[builtins.bool] = None,
+    backend: _Union[str, _Callable] = "inductor",
+    mode: _Union[str, None] = None,
+    options: _Optional[_Dict[str, _Union[str, builtins.int, builtins.bool]]] = None,
+    disable: builtins.bool = False,
+) -> _Callable[[_Callable[_InputT, _RetT]], _Callable[_InputT, _RetT]]: ...
+def compile(
+    model: _Optional[_Callable] = None,
+    *,
+    fullgraph: builtins.bool = False,
+    dynamic: _Optional[builtins.bool] = None,
+    backend: _Union[str, _Callable] = "inductor",
+    mode: _Union[str, None] = None,
+    options: _Optional[_Dict[str, _Union[str, builtins.int, builtins.bool]]] = None,
+    disable: builtins.bool = False,
+) -> _Union[
+    _Callable[[_Callable[_InputT, _RetT]], _Callable[_InputT, _RetT]],
+    _Callable[_InputT, _RetT],
+]:
+    """
+    Optimizes given model/function using TorchDynamo and specified backend.
+    If you are compiling an :class:`torch.nn.Module`, you can also use :meth:`torch.nn.Module.compile`
+    to compile the module inplace without changing its structure.
+    Concretely, for every frame executed within the compiled region, we will attempt
+    to compile it and cache the compiled result on the code object for future
+    use.  A single frame may be compiled multiple times if previous compiled
+    results are not applicable for subsequent calls (this is called a "guard
+    failure), you can use TORCH_LOGS=guards to debug these situations.
+    Multiple compiled results can be associated with a frame up to
+    ``torch._dynamo.config.cache_size_limit``, which defaults to 8; at which
+    point we will fall back to eager.  Note that compile caches are per
+    *code object*, not frame; if you dynamically create multiple copies of a
+    function, they will all share the same code cache.
+    Args:
+       model (Callable): Module/function to optimize
+       fullgraph (bool): If False (default), torch.compile attempts to discover compileable regions
+        in the function that it will optimize. If True, then we require that the entire function be
+        capturable into a single graph. If this is not possible (that is, if there are graph breaks),
+        then this will raise an error.
+       dynamic (bool or None): Use dynamic shape tracing.  When this is True, we will up-front attempt
+        to generate a kernel that is as dynamic as possible to avoid recompilations when
+        sizes change.  This may not always work as some operations/optimizations will
+        force specialization; use TORCH_LOGS=dynamic to debug overspecialization.
+        When this is False, we will NEVER generate dynamic kernels, we will always specialize.
+        By default (None), we automatically detect if dynamism has occurred and compile a more
+        dynamic kernel upon recompile.
+       backend (str or Callable): backend to be used
+        - "inductor" is the default backend, which is a good balance between performance and overhead
+        - Non experimental in-tree backends can be seen with `torch._dynamo.list_backends()`
+        - Experimental or debug in-tree backends can be seen with `torch._dynamo.list_backends(None)`
+        - To register an out-of-tree custom backend:
+          https://pytorch.org/docs/main/torch.compiler_custom_backends.html#registering-custom-backends
+       mode (str): Can be either "default", "reduce-overhead", "max-autotune" or "max-autotune-no-cudagraphs"
+        - "default" is the default mode, which is a good balance between performance and overhead
+        - "reduce-overhead" is a mode that reduces the overhead of python with CUDA graphs,
+          useful for small batches.  Reduction of overhead can come at the cost of more memory
+          usage, as we will cache the workspace memory required for the invocation so that we
+          do not have to reallocate it on subsequent runs.  Reduction of overhead is not guaranteed
+          to work; today, we only reduce overhead for CUDA only graphs which do not mutate inputs.
+          There are other circumstances where CUDA graphs are not applicable; use TORCH_LOG=perf_hints
+          to debug.
+        - "max-autotune" is a mode that leverages Triton or template based matrix multiplications
+          on supported devices and Triton based convolutions on GPU.
+          It enables CUDA graphs by default on GPU.
+        - "max-autotune-no-cudagraphs" is a mode similar to "max-autotune" but without CUDA graphs
+        - To see the exact configs that each mode sets you can call `torch._inductor.list_mode_options()`
+       options (dict): A dictionary of options to pass to the backend. Some notable ones to try out are
+        - `epilogue_fusion` which fuses pointwise ops into templates. Requires `max_autotune` to also be set
+        - `max_autotune` which will profile to pick the best matmul configuration
+        - `fallback_random` which is useful when debugging accuracy issues
+        - `shape_padding` which pads matrix shapes to better align loads on GPUs especially for tensor cores
+        - `triton.cudagraphs` which will reduce the overhead of python with CUDA graphs
+        - `trace.enabled` which is the most useful debugging flag to turn on
+        - `trace.graph_diagram` which will show you a picture of your graph after fusion
+        - For inductor you can see the full list of configs that it supports by calling `torch._inductor.list_options()`
+       disable (bool): Turn torch.compile() into a no-op for testing
+    Example::
+        @torch.compile(options={"triton.cudagraphs": True}, fullgraph=True)
+        def foo(x):
+            return torch.sin(x) + torch.cos(x)
+    """
+    _C._log_api_usage_once("torch.compile")
+    if sys.version_info >= (3, 13):
+        raise RuntimeError("Dynamo is not supported on Python 3.13+")
+    # Decorator mode
+    if model is None:
+        def fn(model: _Callable[_InputT, _RetT]) -> _Callable[_InputT, _RetT]:
+            if model is None:
+                raise RuntimeError("Model can't be None")
+            return compile(
+                model,
+                fullgraph=fullgraph,
+                dynamic=dynamic,
+                backend=backend,
+                mode=mode,
+                options=options,
+                disable=disable,
+            )
+        return fn
+    if mode is not None and options is not None:
+        raise RuntimeError(
+            "Either mode or options can be specified, but both can't be specified at the same time."
+        )
+    if mode is None and options is None:
+        mode = "default"
+    if backend == "inductor":
+        backend = _TorchCompileInductorWrapper(mode, options, dynamic)
+    else:
+        backend = _TorchCompileWrapper(backend, mode, options, dynamic)
+    return torch._dynamo.optimize(
+        backend=backend,
+        nopython=fullgraph,
+        dynamic=dynamic,
+        disable=disable,
+    )(model)  # type: ignore[return-value]
+def _register_device_module(device_type, module):
+    r"""Register an external runtime module of the specific :attr:`device_type`
+    supported by torch.
+    After the :attr:`module` is registered correctly, the user can refer
+    the external runtime module as part of torch with attribute torch.xxx.
+    """
+    # Make sure the device_type represent a supported device type for torch.
+    device_type = torch.device(device_type).type
+    m = sys.modules[__name__]
+    if hasattr(m, device_type):
+        raise RuntimeError(
+            f"The runtime module of '{device_type}' has already "
+            f"been registered with '{getattr(m, device_type)}'"
+        )
+    setattr(m, device_type, module)
+    torch_module_name = ".".join([__name__, device_type])
+    sys.modules[torch_module_name] = module
+from torch import (
+    export as export,
+    func as func,
+    library as library,
+    return_types as return_types,
+)
+from torch._higher_order_ops import cond as cond, while_loop as while_loop
+from torch.func import vmap as vmap
+if not TYPE_CHECKING:
+    from torch import _meta_registrations
+# Enable CUDA Sanitizer
+if "TORCH_CUDA_SANITIZER" in os.environ:
+    import torch.cuda._sanitizer as csan
+    csan.enable_cuda_sanitizer()
+# Populate magic methods on SymInt and SymFloat
+import torch.fx.experimental.sym_node
+# Register MPS specific decomps
+torch.backends.mps._init()
+if not _running_with_deploy():
+    from torch import compiler as compiler
+    class _TritonLibrary:
+        lib = torch.library.Library("triton", "DEF")
+        ops_table: _Dict[_Tuple[str, str], _Callable] = {}
+        @classmethod
+        def registerOp(cls, op_key, full_schema, op_impl, dispatch_key):
+            if (op_key, dispatch_key) not in cls.ops_table:
+                cls.lib.define(full_schema)
+                cls.lib.impl("triton::" + op_key, op_impl, dispatch_key)
+                cls.ops_table[(op_key, dispatch_key)] = op_impl
+            return cls.ops_table[(op_key, dispatch_key)]
+# Deprecated attributes
+_deprecated_attrs = {
+    "has_mps": torch.backends.mps.is_built,
+    "has_cuda": torch.backends.cuda.is_built,
+    "has_cudnn": torch.backends.cudnn.is_available,
+    "has_mkldnn": torch.backends.mkldnn.is_available,
+}
+if TYPE_CHECKING:
+    # Import the following modules during type checking to enable code intelligence features,
+    # such as auto-completion in tools like pylance, even when these modules are not explicitly
+    # imported in user code.
+    from torch import (
+        _dynamo as _dynamo,
+        _inductor as _inductor,
+        _subclasses as _subclasses,
+        onnx as onnx,
+    )
+else:
+    _lazy_modules = {
+        "_dynamo",
+        "_inductor",
+        "_export",
+        # ONNX must be imported after _dynamo, _ops, _subclasses, fx, func and jit
+        "onnx",
+    }
+    def __getattr__(name):
+        # Deprecated attrs
+        replacement = _deprecated_attrs.get(name)
+        if replacement is not None:
+            import warnings
+            warnings.warn(
+                f"'{name}' is deprecated, please use '{replacement.__module__}.{replacement.__name__}()'",
+                stacklevel=2,
+            )
+            return replacement()
+        # Lazy modules
+        if name in _lazy_modules:
+            return importlib.import_module(f".{name}", __name__)
+        raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+def get_device_module(device: _Optional[_Union[torch.device, str]] = None):
+    """
+    Returns the module associated with a given device(e.g., torch.device('cuda'), "mtia:0", "xpu", ...).
+    If no device is given, return the module for the current accelerator or CPU if none is present.
+    """
+    if isinstance(device, torch.device):
+        device_module_name = device.type
+    elif isinstance(device, str):
+        device_module_name = torch.device(device).type
+    elif device is None:
+        # Using default accelerator type. If no accelerator is available, it automatically returns CPU device.
+        device_module_name = torch._C._get_accelerator().type
+    else:
+        raise RuntimeError(
+            f"Invalid value of device '{device}', expect torch.device, str, or None"
+        )
+    device_module = getattr(torch, device_module_name, None)
+    if device_module is None:
+        raise RuntimeError(
+            f"Device '{device_module_name}' does not have a corresponding module registered as 'torch.{device_module_name}'."
+        )
+    return device_module
+def _constrain_as_size(
+    symbol,
+    min: _Optional[builtins.int] = None,
+    max: _Optional[builtins.int] = None,
+):
+    """
+    This indicates that a given int is size-like, and can be used in any context where a size is expected.
+    You will typically use this when reading out integers from Tensors, e.g., max.item() or lengths.tolist()
+    which then need to be used as tensor constructors. Providing these assertions to PyTorch can help resolve
+      GuardOnDataDependentSymNode errors upon export, since we cannot guard on unbacked SymInts.
+    This function has unusual semantics in some circumstances in framework
+    code, we will treat this int as >= 2 (when we do a size-oblivious guard).
+    This makes it easier to use the unbacked int in size contexts,
+    as we will often attempt to guard on a size being zero/one
+    (e.g., when computing the contiguity of a tensor, or testing if
+    broadcasting can occur), which will not work on unbacked SymInts.
+    However, if we conservatively assume that the size is not zero/one, we will
+    end up with a graph that will still work even if the size is zero/one.
+    For more details, see https://docs.google.com/document/d/1HSuTTVvYH1pTew89Rtpeu84Ht3nQEFTYhAX3Ypa_xJs/edit
+    ```
+    """
+    torch.sym_constrain_range_for_size(symbol, min=min, max=max)
+from torch import _logging
+_logging._init_logs()
+def _import_device_backends():
+    """
+    Leverage the Python plugin mechanism to load out-of-the-tree device extensions.
+    See this RFC: https://github.com/pytorch/pytorch/issues/122468
+    """
+    from importlib.metadata import entry_points
+    group_name = "torch.backends"
+    if sys.version_info < (3, 10):
+        backend_extensions = entry_points().get(group_name, ())
+    else:
+        backend_extensions = entry_points(group=group_name)
+    for backend_extension in backend_extensions:
+        try:
+            # Load the extension
+            entrypoint = backend_extension.load()
+            # Call the entrypoint
+            entrypoint()
+        except Exception as err:
+            raise RuntimeError(
+                f"Failed to load the backend extension: {backend_extension.name}. "
+                f"You can disable extension auto-loading with TORCH_DEVICE_BACKEND_AUTOLOAD=0."
+            ) from err
+def _is_device_backend_autoload_enabled() -> builtins.bool:
+    """
+    Whether autoloading out-of-the-tree device extensions is enabled.
+    The switch depends on the value of the environment variable
+    `TORCH_DEVICE_BACKEND_AUTOLOAD`.
+    Returns:
+        bool: Whether to enable autoloading the extensions. Enabled by default.
+    Examples:
+        >>> torch._is_device_backend_autoload_enabled()
+        True
+    """
+    # enabled by default
+    return os.getenv("TORCH_DEVICE_BACKEND_AUTOLOAD", "1") == "1"
+if _is_device_backend_autoload_enabled():
+    _import_device_backends()

.venv/lib/python3.11/site-packages/torch/_appdirs.py ADDED Viewed

	@@ -0,0 +1,667 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) 2005-2010 ActiveState Software Inc.
+# Copyright (c) 2013 Eddy Petrișor
+# flake8: noqa
+"""
+This file is directly from
+https://github.com/ActiveState/appdirs/blob/3fe6a83776843a46f20c2e5587afcffe05e03b39/appdirs.py
+The license of https://github.com/ActiveState/appdirs copied below:
+# This is the MIT license
+Copyright (c) 2010 ActiveState Software Inc.
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+"""Utilities for determining application-specific dirs.
+See <https://github.com/ActiveState/appdirs> for details and usage.
+"""
+# Dev Notes:
+# - MSDN on where to store app data files:
+#   http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
+# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
+# - XDG spec for Un*x: https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+__version__ = "1.4.4"
+__version_info__ = tuple(int(segment) for segment in __version__.split("."))
+import os
+import sys
+unicode = str
+if sys.platform.startswith("java"):
+    import platform
+    os_name = platform.java_ver()[3][0]
+    if os_name.startswith("Windows"):  # "Windows XP", "Windows 7", etc.
+        system = "win32"
+    elif os_name.startswith("Mac"):  # "Mac OS X", etc.
+        system = "darwin"
+    else:  # "Linux", "SunOS", "FreeBSD", etc.
+        # Setting this to "linux2" is not ideal, but only Windows or Mac
+        # are actually checked for and the rest of the module expects
+        # *sys.platform* style strings.
+        system = "linux2"
+else:
+    system = sys.platform
+def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
+    r"""Return full path to the user-specific data dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+    Typical user data directories are:
+        Mac OS X:               ~/Library/Application Support/<AppName>
+        Unix:                   ~/.local/share/<AppName>    # or in $XDG_DATA_HOME, if defined
+        Win XP (not roaming):   C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
+        Win XP (roaming):       C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
+        Win 7  (not roaming):   C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
+        Win 7  (roaming):       C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
+    For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
+    That means, by default "~/.local/share/<AppName>".
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
+        path = os.path.normpath(_get_win_folder(const))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+    elif system == "darwin":
+        path = os.path.expanduser("~/Library/Application Support/")
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        path = os.getenv("XDG_DATA_HOME", os.path.expanduser("~/.local/share"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
+    r"""Return full path to the user-shared data dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "multipath" is an optional parameter only applicable to *nix
+            which indicates that the entire list of data dirs should be
+            returned. By default, the first item from XDG_DATA_DIRS is
+            returned, or '/usr/local/share/<AppName>',
+            if XDG_DATA_DIRS is not set
+    Typical site data directories are:
+        Mac OS X:   /Library/Application Support/<AppName>
+        Unix:       /usr/local/share/<AppName> or /usr/share/<AppName>
+        Win XP:     C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
+        Vista:      (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
+        Win 7:      C:\ProgramData\<AppAuthor>\<AppName>   # Hidden, but writeable on Win 7.
+    For Unix, this is using the $XDG_DATA_DIRS[0] default.
+    WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+    elif system == "darwin":
+        path = os.path.expanduser("/Library/Application Support")
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        # XDG default for $XDG_DATA_DIRS
+        # only first, if multipath is False
+        path = os.getenv(
+            "XDG_DATA_DIRS", os.pathsep.join(["/usr/local/share", "/usr/share"])
+        )
+        pathlist = [
+            os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)
+        ]
+        if appname:
+            if version:
+                appname = os.path.join(appname, version)
+            pathlist = [os.sep.join([x, appname]) for x in pathlist]
+        if multipath:
+            path = os.pathsep.join(pathlist)
+        else:
+            path = pathlist[0]
+        return path
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
+    r"""Return full path to the user-specific config dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+    Typical user config directories are:
+        Mac OS X:               ~/Library/Preferences/<AppName>
+        Unix:                   ~/.config/<AppName>     # or in $XDG_CONFIG_HOME, if defined
+        Win *:                  same as user_data_dir
+    For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
+    That means, by default "~/.config/<AppName>".
+    """
+    if system == "win32":
+        path = user_data_dir(appname, appauthor, None, roaming)
+    elif system == "darwin":
+        path = os.path.expanduser("~/Library/Preferences/")
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        path = os.getenv("XDG_CONFIG_HOME", os.path.expanduser("~/.config"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
+    r"""Return full path to the user-shared data dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "multipath" is an optional parameter only applicable to *nix
+            which indicates that the entire list of config dirs should be
+            returned. By default, the first item from XDG_CONFIG_DIRS is
+            returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
+    Typical site config directories are:
+        Mac OS X:   same as site_data_dir
+        Unix:       /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
+                    $XDG_CONFIG_DIRS
+        Win *:      same as site_data_dir
+        Vista:      (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
+    For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
+    WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
+    """
+    if system == "win32":
+        path = site_data_dir(appname, appauthor)
+        if appname and version:
+            path = os.path.join(path, version)
+    elif system == "darwin":
+        path = os.path.expanduser("/Library/Preferences")
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        # XDG default for $XDG_CONFIG_DIRS
+        # only first, if multipath is False
+        path = os.getenv("XDG_CONFIG_DIRS", "/etc/xdg")
+        pathlist = [
+            os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)
+        ]
+        if appname:
+            if version:
+                appname = os.path.join(appname, version)
+            pathlist = [os.sep.join([x, appname]) for x in pathlist]
+        if multipath:
+            path = os.pathsep.join(pathlist)
+        else:
+            path = pathlist[0]
+    return path
+def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
+    r"""Return full path to the user-specific cache dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "opinion" (boolean) can be False to disable the appending of
+            "Cache" to the base app data dir for Windows. See
+            discussion below.
+    Typical user cache directories are:
+        Mac OS X:   ~/Library/Caches/<AppName>
+        Unix:       ~/.cache/<AppName> (XDG default)
+        Win XP:     C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
+        Vista:      C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
+    On Windows the only suggestion in the MSDN docs is that local settings go in
+    the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
+    app data dir (the default returned by `user_data_dir` above). Apps typically
+    put cache data somewhere *under* the given dir here. Some examples:
+        ...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
+        ...\Acme\SuperApp\Cache\1.0
+    OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
+    This can be disabled with the `opinion=False` option.
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+            if opinion:
+                path = os.path.join(path, "Cache")
+    elif system == "darwin":
+        path = os.path.expanduser("~/Library/Caches")
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
+    r"""Return full path to the user-specific state dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+    Typical user state directories are:
+        Mac OS X:  same as user_data_dir
+        Unix:      ~/.local/state/<AppName>   # or in $XDG_STATE_HOME, if defined
+        Win *:     same as user_data_dir
+    For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
+    to extend the XDG spec and support $XDG_STATE_HOME.
+    That means, by default "~/.local/state/<AppName>".
+    """
+    if system in ["win32", "darwin"]:
+        path = user_data_dir(appname, appauthor, None, roaming)
+    else:
+        path = os.getenv("XDG_STATE_HOME", os.path.expanduser("~/.local/state"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
+    r"""Return full path to the user-specific log dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "opinion" (boolean) can be False to disable the appending of
+            "Logs" to the base app data dir for Windows, and "log" to the
+            base cache dir for Unix. See discussion below.
+    Typical user log directories are:
+        Mac OS X:   ~/Library/Logs/<AppName>
+        Unix:       ~/.cache/<AppName>/log  # or under $XDG_CACHE_HOME if defined
+        Win XP:     C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
+        Vista:      C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
+    On Windows the only suggestion in the MSDN docs is that local settings
+    go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
+    examples of what some windows apps use for a logs dir.)
+    OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
+    value for Windows and appends "log" to the user cache dir for Unix.
+    This can be disabled with the `opinion=False` option.
+    """
+    if system == "darwin":
+        path = os.path.join(os.path.expanduser("~/Library/Logs"), appname)
+    elif system == "win32":
+        path = user_data_dir(appname, appauthor, version)
+        version = False
+        if opinion:
+            path = os.path.join(path, "Logs")
+    else:
+        path = user_cache_dir(appname, appauthor, version)
+        version = False
+        if opinion:
+            path = os.path.join(path, "log")
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+class AppDirs(object):
+    """Convenience wrapper for getting application dirs."""
+    def __init__(
+        self, appname=None, appauthor=None, version=None, roaming=False, multipath=False
+    ):
+        self.appname = appname
+        self.appauthor = appauthor
+        self.version = version
+        self.roaming = roaming
+        self.multipath = multipath
+    @property
+    def user_data_dir(self):
+        return user_data_dir(
+            self.appname, self.appauthor, version=self.version, roaming=self.roaming
+        )
+    @property
+    def site_data_dir(self):
+        return site_data_dir(
+            self.appname, self.appauthor, version=self.version, multipath=self.multipath
+        )
+    @property
+    def user_config_dir(self):
+        return user_config_dir(
+            self.appname, self.appauthor, version=self.version, roaming=self.roaming
+        )
+    @property
+    def site_config_dir(self):
+        return site_config_dir(
+            self.appname, self.appauthor, version=self.version, multipath=self.multipath
+        )
+    @property
+    def user_cache_dir(self):
+        return user_cache_dir(self.appname, self.appauthor, version=self.version)
+    @property
+    def user_state_dir(self):
+        return user_state_dir(self.appname, self.appauthor, version=self.version)
+    @property
+    def user_log_dir(self):
+        return user_log_dir(self.appname, self.appauthor, version=self.version)
+# ---- internal support stuff
+def _get_win_folder_from_registry(csidl_name):
+    """This is a fallback technique at best. I'm not sure if using the
+    registry for this guarantees us the correct answer for all CSIDL_*
+    names.
+    """
+    import winreg as _winreg
+    shell_folder_name = {
+        "CSIDL_APPDATA": "AppData",
+        "CSIDL_COMMON_APPDATA": "Common AppData",
+        "CSIDL_LOCAL_APPDATA": "Local AppData",
+    }[csidl_name]
+    key = _winreg.OpenKey(
+        _winreg.HKEY_CURRENT_USER,
+        r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders",
+    )
+    dir, type = _winreg.QueryValueEx(key, shell_folder_name)
+    return dir
+def _get_win_folder_with_pywin32(csidl_name):
+    from win32com.shell import shell, shellcon
+    dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
+    # Try to make this a unicode path because SHGetFolderPath does
+    # not return unicode strings when there is unicode data in the
+    # path.
+    try:
+        dir = unicode(dir)
+        # Downgrade to short path name if have highbit chars. See
+        # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+        has_high_char = False
+        for c in dir:
+            if ord(c) > 255:
+                has_high_char = True
+                break
+        if has_high_char:
+            try:
+                import win32api
+                dir = win32api.GetShortPathName(dir)
+            except ImportError:
+                pass
+    except UnicodeError:
+        pass
+    return dir
+def _get_win_folder_with_ctypes(csidl_name):
+    import ctypes
+    csidl_const = {
+        "CSIDL_APPDATA": 26,
+        "CSIDL_COMMON_APPDATA": 35,
+        "CSIDL_LOCAL_APPDATA": 28,
+    }[csidl_name]
+    buf = ctypes.create_unicode_buffer(1024)
+    ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
+    # Downgrade to short path name if have highbit chars. See
+    # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+    has_high_char = False
+    for c in buf:
+        if ord(c) > 255:
+            has_high_char = True
+            break
+    if has_high_char:
+        buf2 = ctypes.create_unicode_buffer(1024)
+        if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
+            buf = buf2
+    return buf.value
+def _get_win_folder_with_jna(csidl_name):
+    import array
+    from com.sun import jna
+    from com.sun.jna.platform import win32
+    buf_size = win32.WinDef.MAX_PATH * 2
+    buf = array.zeros("c", buf_size)
+    shell = win32.Shell32.INSTANCE
+    shell.SHGetFolderPath(
+        None,
+        getattr(win32.ShlObj, csidl_name),
+        None,
+        win32.ShlObj.SHGFP_TYPE_CURRENT,
+        buf,
+    )
+    dir = jna.Native.toString(buf.tostring()).rstrip("\0")
+    # Downgrade to short path name if have highbit chars. See
+    # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+    has_high_char = False
+    for c in dir:
+        if ord(c) > 255:
+            has_high_char = True
+            break
+    if has_high_char:
+        buf = array.zeros("c", buf_size)
+        kernel = win32.Kernel32.INSTANCE
+        if kernel.GetShortPathName(dir, buf, buf_size):
+            dir = jna.Native.toString(buf.tostring()).rstrip("\0")
+    return dir
+if system == "win32":
+    try:
+        import win32com.shell
+        _get_win_folder = _get_win_folder_with_pywin32
+    except ImportError:
+        try:
+            from ctypes import windll
+            _get_win_folder = _get_win_folder_with_ctypes
+        except ImportError:
+            try:
+                import com.sun.jna
+                _get_win_folder = _get_win_folder_with_jna
+            except ImportError:
+                _get_win_folder = _get_win_folder_from_registry
+# ---- self test code
+if __name__ == "__main__":
+    appname = "MyApp"
+    appauthor = "MyCompany"
+    props = (
+        "user_data_dir",
+        "user_config_dir",
+        "user_cache_dir",
+        "user_state_dir",
+        "user_log_dir",
+        "site_data_dir",
+        "site_config_dir",
+    )
+    print(f"-- app dirs {__version__} --")
+    print("-- app dirs (with optional 'version')")
+    dirs = AppDirs(appname, appauthor, version="1.0")
+    for prop in props:
+        print(f"{prop}: {getattr(dirs, prop)}")
+    print("\n-- app dirs (without optional 'version')")
+    dirs = AppDirs(appname, appauthor)
+    for prop in props:
+        print(f"{prop}: {getattr(dirs, prop)}")
+    print("\n-- app dirs (without optional 'appauthor')")
+    dirs = AppDirs(appname)
+    for prop in props:
+        print(f"{prop}: {getattr(dirs, prop)}")
+    print("\n-- app dirs (with disabled 'appauthor')")
+    dirs = AppDirs(appname, appauthor=False)
+    for prop in props:
+        print(f"{prop}: {getattr(dirs, prop)}")

.venv/lib/python3.11/site-packages/torch/_classes.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# mypy: allow-untyped-defs
+import types
+import torch._C
+class _ClassNamespace(types.ModuleType):
+    def __init__(self, name):
+        super().__init__("torch.classes" + name)
+        self.name = name
+    def __getattr__(self, attr):
+        proxy = torch._C._get_custom_class_python_wrapper(self.name, attr)
+        if proxy is None:
+            raise RuntimeError(f"Class {self.name}.{attr} not registered!")
+        return proxy
+class _Classes(types.ModuleType):
+    __file__ = "_classes.py"
+    def __init__(self) -> None:
+        super().__init__("torch.classes")
+    def __getattr__(self, name):
+        namespace = _ClassNamespace(name)
+        setattr(self, name, namespace)
+        return namespace
+    @property
+    def loaded_libraries(self):
+        return torch.ops.loaded_libraries
+    def load_library(self, path):
+        """
+        Loads a shared library from the given path into the current process.
+        The library being loaded may run global initialization code to register
+        custom classes with the PyTorch JIT runtime. This allows dynamically
+        loading custom classes. For this, you should compile your class
+        and the static registration code into a shared library object, and then
+        call ``torch.classes.load_library('path/to/libcustom.so')`` to load the
+        shared object.
+        After the library is loaded, it is added to the
+        ``torch.classes.loaded_libraries`` attribute, a set that may be inspected
+        for the paths of all libraries loaded using this function.
+        Args:
+            path (str): A path to a shared library to load.
+        """
+        torch.ops.load_library(path)
+# The classes "namespace"
+classes = _Classes()

.venv/lib/python3.11/site-packages/torch/_compile.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# mypy: allow-untyped-defs
+"""
+APIs related to torch.compile which lazily import torch._dynamo to avoid
+circular dependencies.
+"""
+import functools
+def _disable_dynamo(fn=None, recursive=True):
+    """
+    This API should be only used inside torch, external users should still use
+    torch._dynamo.disable. The main goal of this API is to avoid circular
+    imports issues that is common while using _dynamo.disable inside torch
+    itself.
+    This API avoids it by lazily importing torch._dynamo from the import time to
+    the invocation of the decorated function.
+    """
+    if fn is not None:
+        @functools.wraps(fn)
+        def inner(*args, **kwargs):
+            # cache this on the first invocation to avoid adding too much overhead.
+            disable_fn = getattr(fn, "__dynamo_disable", None)
+            if disable_fn is None:
+                import torch._dynamo
+                disable_fn = torch._dynamo.disable(fn, recursive)
+                fn.__dynamo_disable = disable_fn
+            return disable_fn(*args, **kwargs)
+        return inner
+    else:
+        # decorator usage like @_disable_dynamo(recursive=False). The resulting
+        # object expects the original decorated function as the arg.
+        return functools.partial(_disable_dynamo, recursive=recursive)

.venv/lib/python3.11/site-packages/torch/_custom_ops.py ADDED Viewed

	@@ -0,0 +1,324 @@

+# mypy: allow-untyped-defs
+import inspect
+from torch._custom_op.impl import (
+    _custom_op_with_schema,
+    _find_custom_op,
+    infer_schema,
+    parse_qualname,
+    validate_namespace,
+)
+from torch.library import get_ctx
+__all__ = [
+    "custom_op",
+    "impl",
+    "impl_abstract",
+    "get_ctx",
+    "impl_save_for_backward",
+    "impl_backward",
+]
+def custom_op(qualname, func_or_schema=None):
+    r"""Register a new custom operator
+    In PyTorch, defining an op (short for "operator") is a two step-process:
+    - we need to define the op (by providing an operator name and schema)
+    - we need to implement behavior for how the operator interacts with
+      various PyTorch subsystems, like CPU/CUDA Tensors, Autograd, etc.
+    This entrypoint defines the custom operator (the first step)
+    you must then perform the second step by calling various
+    ``impl_*`` APIs.
+    This API may be used as a decorator (see examples).
+    For a detailed guide on custom ops, please see
+    https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+    Arguments:
+        qualname (str): Should be a string that looks like
+            "namespace::operator_name". Operators in PyTorch need a namespace to
+            avoid name collisions; a given operator may only be created once.
+            If you are writing a Python library, we recommend the namespace to
+            be the name of your top-level module.
+        func_or_schema (Union[Callable, str]): Each PyTorch operator needs a
+            schema that tells PyTorch the types of the inputs/outputs.
+            If this is a Callable, we will automatically infer the schema from
+            the type annotations on the function (see examples). Otherwise,
+            if you don't want to use type annotations, you may provide us the
+            schema string.
+    Example::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
+        >>> import torch
+        >>> import numpy as np
+        >>> from torch import Tensor
+        >>>
+        >>> # Step 1: define the custom op.
+        >>> # We need to provide the API a "prototype function"
+        >>> # (a function that returns NotImplementedError), from which
+        >>> # we will infer the types of the inputs and outputs.
+        >>> @torch._custom_ops.custom_op("mylibrary::numpy_sin")
+        >>> def numpy_sin(x: Tensor) -> Tensor:
+        >>>     raise NotImplementedError
+        >>>
+        >>> # The custom op is now accessible via the torch.ops module:
+        >>> torch.ops.mylibrary.numpy_sin
+        >>>
+        >>> # Step 2: Register an implementation for various PyTorch subsystems
+        >>>
+        >>> # Register an implementation for CPU tensors
+        >>> @torch._custom_ops.impl("mylibrary::numpy_sin", device_types="cpu")
+        >>> def numpy_sin_impl_cpu(x):
+        >>>     return torch.from_numpy(np.sin(x.numpy()))
+        >>>
+        >>> # Register an implementation for CUDA tensors
+        >>> @torch._custom_ops.impl("mylibrary::numpy_sin", device_types="cuda")
+        >>> def numpy_sin_impl_cuda(x):
+        >>>     return torch.from_numpy(np.sin(x.cpu().numpy())).to(x.device)
+        >>>
+        >>> x = torch.randn(3)
+        >>> torch.ops.mylibrary.numpy_sin(x)  # calls numpy_sin_impl_cpu
+        >>>
+        >>> x_cuda = x.cuda()
+        >>> torch.ops.mylibrary.numpy_sin(x)  # calls numpy_sin_impl_cuda
+    """
+    ns, name = parse_qualname(qualname)
+    validate_namespace(ns)
+    def inner(func):
+        if not inspect.isfunction(func):
+            raise ValueError(
+                f"custom_op(...)(func): Expected `func` to be a Python "
+                f"function, got: {type(func)}"
+            )
+        if func.__name__ != name:
+            raise ValueError(
+                f"custom_op(qualname='{qualname}', ...)(func): expected `func` "
+                f"to have name '{name}' but got '{func.__name__}'. "
+                f"Please either change the name of `func` or the qualname that "
+                f"is passed to `custom_op`"
+            )
+        schema = infer_schema(func, mutates_args=())
+        _custom_op_with_schema(qualname, schema)
+        return func
+    if func_or_schema is None:
+        return inner
+    if isinstance(func_or_schema, str):
+        _custom_op_with_schema(qualname, func_or_schema)
+    else:
+        return inner(func_or_schema)
+def impl(qualname, *, device_types=("cpu", "cuda"), func=None):
+    r"""Register an implementation for a device type for this custom op.
+    If the op is passed multiple Tensor inputs with different device
+    types, it will dispatch to the registered implementation for the highest
+    priority device type among those present.
+    The supported device types, in order of priority, are {'cuda', 'cpu'}.
+    This API may be used as a decorator (see examples).
+    For a detailed guide on custom ops, please see
+    https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+    Arguments:
+        device_types (str or Iterable[str]): the device type(s) to register the function for.
+    Example::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
+        >>> import torch
+        >>> import numpy as np
+        >>> from torch import Tensor
+        >>>
+        >>> # Step 1: define the custom op.
+        >>> # We need to provide the API a "prototype function"
+        >>> # (a function that returns NotImplementedError), from which
+        >>> # we will infer the types of the inputs and outputs.
+        >>> @torch._custom_ops.custom_op("mylibrary::numpy_cos")
+        >>> def numpy_cos(x: Tensor) -> Tensor:
+        >>>     raise NotImplementedError
+        >>>
+        >>> # The custom op is now accessible via the torch.ops module:
+        >>> torch.ops.mylibrary.numpy_cos
+        >>>
+        >>> # Step 2: Register an implementation for various PyTorch subsystems
+        >>>
+        >>> # Register an implementation for CPU tensors
+        >>> @torch._custom_ops.impl("mylibrary::numpy_cos", device_types="cpu")
+        >>> def numpy_cos_impl_cpu(x):
+        >>>     return torch.from_numpy(np.cos(x.numpy()))
+        >>>
+        >>> # Register an implementation for CUDA tensors
+        >>> @torch._custom_ops.impl("mylibrary::numpy_cos", device_types="cuda")
+        >>> def numpy_cos_impl_cuda(x):
+        >>>     return torch.from_numpy(np.cos(x.cpu().numpy())).to(x.device)
+        >>>
+        >>> x = torch.randn(3)
+        >>> torch.ops.mylibrary.numpy_cos(x)  # calls numpy_cos_impl_cpu
+        >>>
+        >>> x_cuda = x.cuda()
+        >>> torch.ops.mylibrary.numpy_cos(x)  # calls numpy_cos_impl_cuda
+    """
+    def inner(func):
+        custom_op = _find_custom_op(qualname, also_check_torch_library=True)
+        custom_op.impl(device_types, _stacklevel=3)(func)
+        return func
+    if func is None:
+        return inner
+    return inner(func)
+def impl_abstract(qualname, *, func=None):
+    r"""Register an abstract implementation for this operator.
+    An "abstract implementation" specifies the behavior of this operator on
+    Tensors that carry no data. Given some input Tensors with certain properties
+    (sizes/strides/storage_offset/device), it specifies what the properties of
+    the output Tensors are.
+    The abstract implementation has the same signature as the operator.
+    It is run for both FakeTensors and meta tensors. To write an abstract
+    implementation, assume that all Tensor inputs to the operator are
+    regular CPU/CUDA/Meta tensors, but they do not have storage, and
+    you are trying to return regular CPU/CUDA/Meta tensor(s) as output.
+    The abstract implementation must consist of only PyTorch operations
+    (and may not directly access the storage or data of any input or
+    intermediate Tensors).
+    This API may be used as a decorator (see examples).
+    For a detailed guide on custom ops, please see
+    https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+    Examples::
+        >>> import numpy as np
+        >>> from torch import Tensor
+        >>>
+        >>> # Example 1: an operator without data-dependent output shape
+        >>> @torch._custom_ops.custom_op("mylibrary::custom_linear")
+        >>> def custom_linear(x: Tensor, weight: Tensor, bias: Tensor) -> Tensor:
+        >>>     raise NotImplementedError
+        >>>
+        >>> @torch._custom_ops.impl_abstract("mylibrary::custom_linear")
+        >>> def custom_linear_abstract(x, weight):
+        >>>     assert x.dim() == 2
+        >>>     assert weight.dim() == 2
+        >>>     assert bias.dim() == 1
+        >>>     assert x.shape[1] == weight.shape[1]
+        >>>     assert weight.shape[0] == bias.shape[0]
+        >>>     assert x.device == weight.device
+        >>>
+        >>>     return (x @ weight.t()) + bias
+        >>>
+        >>> # Example 2: an operator with data-dependent output shape
+        >>> @torch._custom_ops.custom_op('mylibrary::custom_nonzero')
+        >>> def custom_nonzero(x: Tensor) -> Tensor:
+        >>>     ...
+        >>>
+        >>> @torch._custom_ops.impl_abstract("mylibrary::custom_nonzero")
+        >>> def custom_nonzero_abstract(x):
+        >>>     # Number of nonzero-elements is data-dependent.
+        >>>     # Since we cannot peek at the data in an abstract impl,
+        >>>     # we use the ctx object to construct a new symint that
+        >>>     # represents the data-dependent size.
+        >>>     ctx = torch._custom_ops.get_ctx()
+        >>>     nnz = ctx.create_unbacked_symint()
+        >>>     shape = [x.dim(), nnz]
+        >>>     result = x.new_empty(shape, dtype=torch.long)
+        >>>     return result
+        >>>
+        >>> @torch._custom_ops.impl("mylibrary::custom_nonzero")
+        >>> def custom_nonzero_impl(x):
+        >>>     x_np = to_numpy(x)
+        >>>     res = np.stack(np.nonzero(x_np), axis=1)
+        >>>     # unbacked symbolic ints in PyTorch must be >= 2, so we
+        >>>     # constrain the range to at least 2
+        >>>     if res.shape[0] <= 1:
+        >>>         raise RuntimeError("not supported")
+        >>>     return torch.tensor(res, device=x.device)
+    """
+    import torch.library
+    return torch.library.register_fake(qualname, func, _stacklevel=2)
+def impl_save_for_backward(qualname, *, func=None):
+    r"""Register a function that tells us what to save for backward.
+    Please see :func:`impl_backward` for more details.
+    """
+    def inner(func):
+        custom_op = _find_custom_op(qualname, also_check_torch_library=True)
+        custom_op.impl_save_for_backward(_stacklevel=3)(func)
+        return func
+    if func is None:
+        return inner
+    return inner(func)
+def impl_backward(qualname, output_differentiability=None, *, func=None):
+    r"""Registers a backward formula for an operator.
+    In order for an operator to work with autograd, you need to register
+    a backward formula. There are two pieces to this:
+    1. You must give us a function to specify what to save for backward.
+       Call this the "save for backward" function.
+    2. You must give us a function that computes gradients. Call this the
+       "backward" function.
+    Use `impl_save_for_backward` to define a "save for backward" function
+    that specifies what gets saved for backward. The function should accept
+    two arguments ``(inputs, output)`` and return the quantities to be saved
+    for backward.
+    During runtime, when you call the operator in a forwards pass, PyTorch
+    will invoke the "save for backward" function with the inputs and output
+    of the operator.
+    Use `impl_backward` to define the "backward" function. The backward
+    function must accept ``(ctx, saved, *grads)``:
+    - ``ctx`` is a context object where we may provide information
+    - ``saved`` is exactly what gets returned from the "save for backward"
+      function
+    - ``grads`` is one or more gradients. The number of gradients matches
+      the number of outputs of the operator.
+    The backward function must return a dict that maps the name of
+    an input to the operator to its corresponding gradient. All inputs that
+    were declared to be Tensors in the operator definition must be accounted
+    for in the dict. The gradient may be a Tensor or None.
+    For a detailed guide on custom ops, please see
+    https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+    """
+    def inner(func):
+        custom_op = _find_custom_op(qualname, also_check_torch_library=True)
+        custom_op.impl_backward(output_differentiability, _stacklevel=3)(func)
+        return func
+    if func is None:
+        return inner
+    return inner(func)
+def _destroy(qualname):
+    """De-registers a custom op. For testing purposes only"""
+    custom_op = _find_custom_op(qualname)
+    custom_op._destroy()

.venv/lib/python3.11/site-packages/torch/_deploy.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# mypy: allow-untyped-defs
+import io
+import torch
+from torch.package import Importer, OrderedImporter, PackageImporter, sys_importer
+from torch.package._package_pickler import create_pickler
+from torch.package._package_unpickler import PackageUnpickler
+from torch.serialization import _maybe_decode_ascii
+def _save_storages(importer, obj):
+    serialized_storages = []
+    serialized_dtypes = []
+    importer = importer if isinstance(importer, torch.package.PackageImporter) else None
+    importers: Importer
+    if importer is not None:
+        importers = OrderedImporter(importer, sys_importer)
+    else:
+        importers = sys_importer
+    def persistent_id(obj):
+        if torch.is_storage(obj) or isinstance(obj, torch.storage.TypedStorage):
+            if isinstance(obj, torch.storage.TypedStorage):
+                # TODO: Once we decide to break serialization FC, we can
+                # remove this case
+                dtype = obj.dtype
+            else:
+                dtype = torch.uint8
+            serialized_storages.append(obj)
+            serialized_dtypes.append(dtype)
+            return ("storage", len(serialized_storages) - 1)
+        if hasattr(obj, "__reduce_deploy__"):
+            if _serialized_reduces.get(id(obj)) is None:
+                _serialized_reduces[id(obj)] = (
+                    "reduce_deploy",
+                    id(obj),
+                    *obj.__reduce_deploy__(importers),
+                )
+            return _serialized_reduces[id(obj)]
+        return None
+    # Write the pickle data for `obj`
+    data_buf = io.BytesIO()
+    pickler = create_pickler(data_buf, importers)
+    pickler.persistent_id = persistent_id
+    pickler.dump(obj)
+    data_value = data_buf.getvalue()
+    return (
+        data_value,
+        serialized_storages,
+        serialized_dtypes,
+        importer.zip_reader if importer else None,
+    )
+def _load_storages(id, zip_reader, obj_bytes, serialized_storages, serialized_dtypes):
+    def persistent_load(saved_id):
+        assert isinstance(saved_id, tuple)
+        typename = _maybe_decode_ascii(saved_id[0])
+        data = saved_id[1:]
+        if typename == "storage":
+            # TODO: Once we decide to break serialization FC, we can
+            # stop wrapping with TypedStorage
+            storage = serialized_storages[data[0]]
+            dtype = serialized_dtypes[data[0]]
+            return torch.storage.TypedStorage(
+                wrap_storage=storage.untyped(), dtype=dtype
+            )
+        if typename == "reduce_deploy":
+            reduce_id, func, args = data
+            if reduce_id not in _loaded_reduces:
+                _loaded_reduces[reduce_id] = func(_raw_packages[zip_reader], *args)
+            return _loaded_reduces[reduce_id]
+        return None
+    importer: Importer
+    if zip_reader is not None:
+        importer = OrderedImporter(_get_package(zip_reader), sys_importer)
+    else:
+        importer = sys_importer
+    unpickler = PackageUnpickler(importer, io.BytesIO(obj_bytes))
+    unpickler.persistent_load = persistent_load  # type: ignore[method-assign]
+    result = _deploy_objects[id] = unpickler.load()
+    return result
+def _get_package(zip_reader):
+    if zip_reader not in _raw_packages:
+        _raw_packages[zip_reader] = PackageImporter(zip_reader)
+    return _raw_packages[zip_reader]
+_raw_packages: dict = {}
+_deploy_objects: dict = {}
+_serialized_reduces: dict = {}
+_loaded_reduces: dict = {}

.venv/lib/python3.11/site-packages/torch/_guards.py ADDED Viewed

	@@ -0,0 +1,925 @@

+# mypy: allow-untyped-defs
+from __future__ import annotations
+import contextlib
+import dataclasses
+import enum
+import functools
+import logging
+import threading
+import traceback
+import unittest.mock
+import weakref
+from abc import abstractmethod
+from contextlib import contextmanager
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generic,
+    List,
+    NamedTuple,
+    Optional,
+    Set,
+    Tuple,
+    TYPE_CHECKING,
+    TypeVar,
+)
+from torch._C._dynamo.eval_frame import set_context_frame  # noqa: F401
+from torch.utils import _pytree as pytree
+from torch.utils._traceback import CapturedTraceback
+from torch.utils.weak import WeakTensorKeyDictionary
+log = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    import sympy
+    # Import the following modules during type checking to enable code intelligence features,
+    # such as auto-completion in tools like pylance, even when these modules are not explicitly
+    # imported in user code.
+    import torch
+"""
+torch._guards is the definitional source of truth for general purpose guard structures.
+An important thing to keep in mind here is the preservation of layering. There should be no dynamo notions,
+and no guard installation notions here.
+"""
+class CompileId(NamedTuple):
+    frame_id: int
+    # This id is per-frame, and counts how many times we've compiled this
+    # frame.  This could have been a global id but having this be per-frame
+    # gives you a better intuitive sense for how many recompiles have occurred
+    # so far.
+    frame_compile_id: int
+    # TODO: consider also tracking the recompilation count
+    def __str__(self):
+        return f"{self.frame_id}/{self.frame_compile_id}"
+class TraceId(NamedTuple):
+    compile_id: CompileId
+    # This starts off as 0, and every time we restart analysis it goes
+    # up by one
+    attempt: int
+    def __str__(self):
+        if self.attempt == 0:
+            return str(self.compile_id)
+        else:
+            return f"{self.compile_id}_{self.attempt}"
+class GuardSource(enum.Enum):
+    LOCAL = 0
+    GLOBAL = 1
+    LOCAL_SPECIALIZED_NN_MODULE = 2
+    GLOBAL_SPECIALIZED_NN_MODULE = 3
+    CONSTANT = 4
+    RANDOM_VALUE = 5
+    SHAPE_ENV = 6
+    LOCAL_FSDP_MODULE = 7
+    GLOBAL_FSDP_MODULE = 8
+    BACKWARD_STATE = 9
+    EPHEMERAL = 10
+    SYNTHETIC_LOCAL = 11
+    LOCAL_UNSPECIALIZED_NN_MODULE = 12
+    GLOBAL_UNSPECIALIZED_NN_MODULE = 13
+    LOCAL_UNSPECIALIZED_BUILTIN_NN_MODULE = 14
+    GLOBAL_UNSPECIALIZED_BUILTIN_NN_MODULE = 15
+    def is_fsdp_module(self) -> bool:
+        return self in (GuardSource.GLOBAL_FSDP_MODULE, GuardSource.LOCAL_FSDP_MODULE)
+    def is_specialized_nn_module(self) -> bool:
+        return (
+            self
+            in (
+                GuardSource.GLOBAL_SPECIALIZED_NN_MODULE,
+                GuardSource.LOCAL_SPECIALIZED_NN_MODULE,
+            )
+            # TODO (anijain2305) - Investigate why is_fsdp_module required.
+            or self.is_fsdp_module()
+        )
+    def is_unspecialized_nn_module(self) -> bool:
+        return self in (
+            GuardSource.GLOBAL_UNSPECIALIZED_NN_MODULE,
+            GuardSource.LOCAL_UNSPECIALIZED_NN_MODULE,
+            GuardSource.GLOBAL_UNSPECIALIZED_BUILTIN_NN_MODULE,
+            GuardSource.LOCAL_UNSPECIALIZED_BUILTIN_NN_MODULE,
+        )
+    def is_unspecialized_builtin_nn_module(self) -> bool:
+        return self in (
+            GuardSource.GLOBAL_UNSPECIALIZED_BUILTIN_NN_MODULE,
+            GuardSource.LOCAL_UNSPECIALIZED_BUILTIN_NN_MODULE,
+        )
+    def is_local(self):
+        return self in (
+            GuardSource.LOCAL,
+            GuardSource.LOCAL_SPECIALIZED_NN_MODULE,
+            GuardSource.LOCAL_FSDP_MODULE,
+            GuardSource.LOCAL_UNSPECIALIZED_NN_MODULE,
+            GuardSource.LOCAL_UNSPECIALIZED_BUILTIN_NN_MODULE,
+        )
+"""
+Base class for a "GuardBuilder" role.
+The GuardBuilderBase role is to represent a scope within which to build a guard. The name is a little
+confusing, as its not a builder, but for the sake of avoiding a lot of renames and keeping the original reference
+to torchdynamo's GuardBuilder.
+Note: create_fn is invoked with a GuardBuilderBase and a Guard. A GuardBuilder is chosen based
+on GuardSource's select function.
+There is value in keeping this GuardBuilderBase empty to keep layering clean.
+"""
+class GuardBuilderBase:
+    pass
+class ShapeGuard(NamedTuple):
+    expr: sympy.Expr
+    stack: CapturedTraceback
+@dataclasses.dataclass
+class Guard:
+    # originating_source is the source that called the make_guard method to
+    # construct this guard object. The property name specifies what exactly it
+    # is the guard is guarding on.  The meaning of the name is dependent on the
+    # create_fn; you must look at the use-site inside create_fn to know what
+    # name means.
+    #
+    # That being said, although you might think this is just a "name", name is
+    # usually an arbitrary Python expression that will be evaluated with all
+    # globals (and locals, if you create a LOCAL guard) to extract the Python
+    # object that we want to perform guard tests on.  This evaluation
+    # typically happens in GuardBuilder.eval.  In these cases, name is
+    # typically produced by originating_source.name() (not to be confused with
+    # GuardSource - the property source).
+    #
+    # Occasionally, name is not a valid Python expression; sometimes
+    # it is meaningless.  Example create_fns that are like this include
+    # GRAD_MODE and SHAPE_ENV.
+    originating_source: Source
+    create_fn: Callable[[GuardBuilderBase, Guard], None]
+    # Export only. These values are written to at time of guard check_fn creation.
+    guard_types: Optional[List[str]] = None
+    code_list: Optional[List[str]] = None
+    obj_weakref: Optional[object] = None
+    guarded_class_weakref: Optional[type] = None
+    stack: Optional[CapturedTraceback] = None
+    user_stack: Optional[traceback.StackSummary] = None
+    _hash: Optional[int] = None
+    def __hash__(self):
+        if self._hash is None:
+            self._hash = hash((self.name, self.source, id(self.create_fn)))
+        return self._hash
+    def sort_key(self):
+        # Put the duplicate input guards at the end. The duplicate guards have
+        # two sources while guard.name only considers one source.
+        from torch._dynamo.guards import GuardBuilder
+        is_duplicate_input = (
+            isinstance(self.create_fn, functools.partial)
+            and self.create_fn.func is GuardBuilder.DUPLICATE_INPUT
+        )
+        return (
+            is_duplicate_input,
+            self.source.value if self.source else -1,
+            len(self.name),
+            self.name,
+            self.inner_create_fn().__code__.co_firstlineno,
+        )
+    def __lt__(self, other):
+        return self.sort_key() < other.sort_key()
+    def inner_create_fn(self):
+        if isinstance(self.create_fn, functools.partial):
+            return self.create_fn.func
+        else:
+            return self.create_fn
+    @property
+    def name(self) -> str:
+        return self.originating_source.name()
+    @property
+    def source(self) -> GuardSource:
+        return self.originating_source.guard_source()
+    @staticmethod
+    def weakref_to_str(obj_weakref):
+        """
+        This is a workaround of a Python weakref bug.
+        `obj_weakref` is instance returned by `weakref.ref`,
+        `str(obj_weakref)` is buggy if the original obj overrides __getattr__, e.g:
+            class MyConfig(dict):
+                def __getattr__(self, x):
+                    return self[x]
+            obj = MyConfig(offset=5)
+            obj_weakref = weakref.ref(obj)
+            str(obj_weakref)  # raise error: KeyError: '__name__'
+        """
+        if isinstance(obj_weakref, weakref.ReferenceType):
+            obj = obj_weakref()
+            if obj is not None:
+                return f"<weakref at {hex(id(obj_weakref))}; to '{obj.__class__.__name__}' at {hex(id(obj))}>"
+            else:
+                return f"<weakref at {hex(id(obj_weakref))}; dead>"
+        else:
+            return str(obj_weakref)
+    def __repr__(self):
+        s = f"""
+        {self.source.name.lower() if self.source else ""} {repr(self.name)} {self.inner_create_fn().__name__}
+        {{
+            'guard_types': {self.guard_types},
+            'code': {self.code_list},
+            'obj_weakref': {self.weakref_to_str(self.obj_weakref)}
+            'guarded_class': {self.guarded_class_weakref}
+        }}
+        """
+        return s
+    def __str__(self):
+        output = f"Name: {repr(self.name)}\n"
+        source = self.source.name.lower() if self.source else ""
+        output += f"    Source: {source}\n"
+        output += f"    Create Function: {self.inner_create_fn().__name__}\n"
+        output += f"    Guard Types: {self.guard_types}\n"
+        output += f"    Code List: {self.code_list}\n"
+        output += f"    Object Weakref: {self.weakref_to_str(self.obj_weakref)}\n"
+        output += f"    Guarded Class Weakref: {self.guarded_class_weakref}\n"
+        return output
+    def create(self, builder: GuardBuilderBase):
+        try:
+            return self.create_fn(builder, self)
+        except Exception:
+            log.exception("Error while creating guard:\n%s", str(self).rstrip())
+            if self.stack:
+                log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip())
+            raise
+    def is_specialized_nn_module(self):
+        return self.source.is_specialized_nn_module()
+    def is_fsdp_module(self):
+        return self.source.is_fsdp_module()
+    def is_local(self):
+        return self.source.is_local()
+    def set_export_info(self, guard_type, guarded_class, code_list, obj_weakref):
+        if not self.guard_types:
+            self.guard_types = []
+        self.guard_types.append(guard_type)
+        assert self.guarded_class_weakref in (
+            guarded_class,
+            None,
+        ), "Guarded class id must be identical, or None"
+        self.guarded_class_weakref = guarded_class
+        if not self.code_list:
+            self.code_list = code_list
+        else:
+            self.code_list.extend(code_list)
+        # Some objects are ephemeral, e.g., list[slice(1, 2)]. If we have
+        # multiple guards on the same object, the weakref can die between the
+        # invocation of set_export_info calls. So a dead weakref is also
+        # acceptable.
+        assert (
+            self.obj_weakref in (obj_weakref, None)
+            or callable(self.obj_weakref)
+            and self.obj_weakref() is None
+        ), "Guarded object must be identical, None or ephemeral (dead weakref)"
+        self.obj_weakref = obj_weakref
+T = TypeVar("T")
+"""
+Parent structure for guard env expressions.
+A GuardEnvExpr can have any subtype.
+Note: All subtypes must be handled exhaustively in
+torch._dynamo.guards._parse_guard_env_guards to avoid a RuntimeError.
+"""
+@dataclasses.dataclass
+class GuardEnvExpr:
+    pass
+"""
+A class representing a pair of duplicate inputs.
+input_pos_a and input_pos_b are input positions we have deduped.
+"""
+@dataclasses.dataclass
+class DuplicateInputs(GuardEnvExpr):
+    input_source_a: Source
+    input_source_b: Source
+    def __post_init__(self):
+        assert self.input_source_a != self.input_source_b
+"""
+Checkpointable is an interface for driving state snapshotting, left purposely vague for now.
+copy_graphstate() -> T, a somewhat legacy name, is expected to emit a snapshot of any type that
+can also be taken in at restore_graphstate(T) calls.
+When to snapshot, is, at the moment, an implementation detail of upstream callers. Checkpointable
+does not provide any garuantees around consistency, idempotency, or safety of calling its APIs, yet.
+In the future, it will have a closer coupling to a generic Checkpoint management system.
+"""
+class Checkpointable(Generic[T]):
+    @abstractmethod
+    def copy_graphstate(self) -> T: ...
+    @abstractmethod
+    def restore_graphstate(self, state: T): ...
+class GuardsCheckpointState:
+    """
+    The GuardCheckpointState - it is the T of Checkpointable[T] for GuardsContext
+    """
+    dynamo_guards: Set[Guard] = set()
+    def __init__(self, dynamo_guards):
+        self.dynamo_guards = dynamo_guards
+    def diff(self, other):
+        """
+        Produces a delta against another GuardsCheckpointState.
+        Returns None if no delta is found, otherwise, return a set() of mismatched
+        Guard type objects.
+        """
+        r = self.dynamo_guards.difference(other.dynamo_guards)
+        if len(r) == 0:
+            return None
+        return r
+    def __eq__(self, other):
+        return self.diff(other) is None
+class ModuleContextCheckpointState:
+    nn_modules: Dict[str, torch.nn.Module] = {}
+    def __init__(self, nn_modules):
+        self.nn_modules = nn_modules
+    def diff(self, other):
+        """
+        Produces a delta against another ModuleContextCheckpointState.
+        Returns None if no delta is found, otherwise, return a set() of mismatched
+        module key names.
+        """
+        r = set(self.nn_modules.keys()).difference(set(other.nn_modules.keys()))
+        if len(r) == 0:
+            return None
+        return r
+    def __eq__(self, other):
+        return self.diff(other) is None
+class ModuleContext(Checkpointable[ModuleContextCheckpointState]):
+    def __init__(self) -> None:
+        self.nn_modules: Dict[str, Any] = {}
+    def copy_graphstate(self):
+        return ModuleContextCheckpointState(dict(self.nn_modules))
+    def restore_graphstate(self, state):
+        assert isinstance(state, ModuleContextCheckpointState)
+        self.nn_modules = state.nn_modules
+class GlobalContextCheckpointState:
+    global_state: Dict[str, Tuple[Callable, ...]] = {}
+    def __init__(self, global_states):
+        self.global_state = global_states
+    def diff(self, other):
+        """
+        Produces a delta against another GlobalContextCheckpointState.
+        Returns None if no delta is found, otherwise, return a set() of mismatched
+        global key names.
+        """
+        r = set(self.global_state.keys()).difference(set(other.global_state.keys()))
+        if len(r) == 0:
+            return None
+        return r
+    def __eq__(self, other):
+        return self.diff(other) is None
+class GlobalContext(Checkpointable[GlobalContextCheckpointState]):
+    """
+    This keeps track of the global torch state during tracing of a function.
+    For example, torch.is_grad_enabled.
+    """
+    _supported_global_states = {
+        "grad_enabled",
+        "torch_function_enabled",
+        "autocast_enabled",
+        "autocast_cpu_enabled",
+        "autocast_gpu_dtype",
+        "autocast_cpu_dtype",
+        "autocast_cache_enabled",
+    }
+    def __init__(self) -> None:
+        self.global_state: Dict[str, Tuple[Callable, ...]] = {}
+    def copy_graphstate(self):
+        return GlobalContextCheckpointState(dict(self.global_state))
+    def restore_graphstate(self, state):
+        assert isinstance(state, GlobalContextCheckpointState)
+        self.global_state = state.global_state
+        assert (
+            len(self.global_state) == len(self._supported_global_states)
+            and set(self.global_state.keys()) == self._supported_global_states
+        ), "Global state mismatch"
+        for func, args in self.global_state.values():
+            func(args)
+"""
+A GuardsContext is a checkpointable representation of all the guards in the current tracing
+context. It's lifecycle is bound 1:1 to the tracing context, and it should never be instantiated
+directly outside of it. For passing around internal state representations of this object,
+prefer to extract them with copy_graphstate to produce a GuardsCheckpointState.
+"""
+# Like a Set[Guard] but will record the user stack on all guards at the
+# time they were installed at their destination
+class GuardsSet:
+    def __init__(self, inner=None):
+        if inner is None:
+            inner = set()
+        self.inner = inner
+    def __iter__(self):
+        return iter(self.inner)
+    def __len__(self):
+        return len(self.inner)
+    # Subtraction along with bool is typically used to determine the delta of
+    # added guards between checkpoints for higher order ops
+    def __sub__(self, other):
+        return GuardsSet(self.inner - other.inner)
+    def __bool__(self):
+        return bool(self.inner)
+    def add(self, guard: Guard, *, collect_debug_stack=True, skip=0):
+        if guard in self.inner:
+            return
+        if collect_debug_stack:
+            if guard.stack is None:
+                guard.stack = CapturedTraceback.extract(skip=1 + skip)
+            if guard.user_stack is None:
+                guard.user_stack = TracingContext.extract_stack()
+        self.inner.add(guard)
+    def update(self, *others: Set[Guard]):
+        for o in others:
+            for g in o:
+                self.add(g, skip=1)
+    def remove_guards_with_source(self, source):
+        """Delete all guards with a given source"""
+        self.inner = {g for g in self.inner if g.originating_source != source}
+class GuardsContext(Checkpointable[GuardsCheckpointState]):
+    def __init__(self) -> None:
+        self.dynamo_guards: GuardsSet = GuardsSet()
+        self.aotautograd_guards: List[GuardEnvExpr] = []
+    def copy_graphstate(self):
+        return GuardsCheckpointState(set(self.dynamo_guards.inner))
+    def restore_graphstate(self, state):
+        # NB: "steals" the passed in state
+        assert isinstance(state, GuardsCheckpointState)
+        self.dynamo_guards = GuardsSet(state.dynamo_guards)
+_TLS = threading.local()
+"""
+TracingContext is the source of truth for all currently accumulated information
+needed to trace. Its lifecycle is kept 1:1 when using TorchDynamo, but other systems
+are open to managing their own TracingContext with that in mind.
+The purpose of TracingContext is not to be a dumping ground, or god object, but rather to avoid
+having to plumb complex subsystems across multiple verticals.
+Ex: A common example is guard accumulation between dynamo, shape_env, aot_autograd, and inductor.
+Accessing the current tracing context via
+TracingContext.get() allows users to accumulate their own guards for processing, without needing to know how
+to plumb objects back up to where frame interpretation happened.
+Note that you can end up with multiple TracingContext for a single compilation
+of a frame, as we reset the TracingContext whenever we restart analysis.
+CompileContext is a more overarching context that encompasses multiple restarts.
+"""
+class CompileContext:
+    @staticmethod
+    def get() -> CompileContext:
+        assert _TLS.compile_context is not None
+        return _TLS.compile_context
+    @staticmethod
+    def try_get() -> Optional[CompileContext]:
+        return getattr(_TLS, "compile_context", None)
+    def __init__(self, compile_id):
+        assert compile_id is None or isinstance(compile_id, CompileId)
+        self.compile_id: Optional[CompileId] = compile_id
+        self.attempt = 0
+    @staticmethod
+    def current_compile_id():
+        self = CompileContext.try_get()
+        if self is None:
+            return None
+        return self.compile_id
+    @staticmethod
+    def current_trace_id():
+        self = CompileContext.try_get()
+        if self is None:
+            return None
+        if self.compile_id is None:
+            return None
+        return TraceId(self.compile_id, self.attempt)
+class TracingContext:
+    """
+    Provides the currently installed TracingContext, or None.
+    Note that it is a staticmethod, and invocations outside of `with tracing()` (see below), are valid but
+    will return None.
+    """
+    @staticmethod
+    def try_get() -> Optional[TracingContext]:
+        return getattr(_TLS, "tracing_context", None)
+    @staticmethod
+    def get() -> TracingContext:
+        if ctx := TracingContext.try_get():
+            return ctx
+        raise RuntimeError(
+            "TracingContext.get() must be called within an ongoing trace."
+        )
+    def __init__(self, fake_mode):
+        self.guards_context = GuardsContext()
+        self.module_context = ModuleContext()
+        self.global_context = GlobalContext()
+        self.fake_mode = fake_mode
+        self.frame_summary_stack = []
+        # This is morally part of frame_summary_stack, but it is kept separate
+        # for clarity.  As we process a frame, this variable gets updated
+        # to keep track of what line we are in the function.  We make a
+        # function call, this gets cleared and the frame location is pushed
+        # to frame_summary_stack (prepping this variable for the inner frame's
+        # progress)
+        self.loc_in_frame = None
+        # this is only set after aot_autograd
+        self.fw_metadata = None
+        # this is only set after aot_autograd
+        self.aot_graph_name = None
+        self.params_flat = None
+        # this is for extended return calling convention from backend
+        # compiler to aot_autograd
+        # Per output, what the compiler specified stride of the output is,
+        # or None if no stride is known.  This is always the HINT, it
+        # is never a SymInt (it would be better if it was a SymInt, but
+        # I can't conveniently get this from Inductor atm.  Also, be
+        # careful not to accidentally induce guards on the SymInt if
+        # you ever do change this in aot_autograd.py; you should check
+        # on permutations preferentially.)
+        self.output_strides: Optional[List[Optional[Tuple[int, ...]]]] = None
+        # When this is True, whenever we encounter an int in Dynamo tracing,
+        # we will (1) force unspec it and (2) force it as a size-like unbacked
+        # integer.  This is currently used when processing certain lists of
+        # ints that are known to be size-like and may have 0/1 entries that we
+        # must not specialize on.
+        self.force_unspec_int_unbacked_size_like = False
+        # See note [Tensor Fakification and Symbol Caching]
+        self.tensor_to_context = WeakTensorKeyDictionary()
+        # If this true, Aot Autograd will return output Fake Tensors with appropiate
+        # meta on the first invocation
+        # see note: [Returning Fake Tensors on First AOT Autograd Call]
+        self.fakify_first_call = False
+    def clear(self):
+        # Look at the note in output_graph.py in function `save_global_state`
+        # for the context on clearing global context.
+        self.global_context.global_state = {}
+    @staticmethod
+    @contextmanager
+    def patch(**kwargs):
+        prior = {}
+        ctx = TracingContext.get()
+        for key in kwargs.keys():
+            # KeyError on invalid entry
+            prior[key] = getattr(ctx, key)
+        for key, val in kwargs.items():
+            setattr(ctx, key, val)
+        try:
+            yield
+        finally:
+            for key, val in prior.items():
+                setattr(ctx, key, val)
+    @staticmethod
+    def extract_stack():
+        self = TracingContext.try_get()
+        if self is None:
+            return traceback.StackSummary()
+        stack = self.frame_summary_stack
+        if self.loc_in_frame is not None:
+            stack = stack + [self.loc_in_frame]
+        return traceback.StackSummary.from_list(stack)
+    # Call this when you want to call into some code that isn't necessarily
+    # associated with the current frame state
+    @staticmethod
+    @contextlib.contextmanager
+    def clear_frame():
+        tc = TracingContext.get()
+        with unittest.mock.patch.object(
+            tc, "frame_summary_stack", []
+        ), unittest.mock.patch.object(tc, "loc_in_frame", None):
+            try:
+                yield
+            except Exception as e:
+                # Prevent real_stack from getting attached
+                #
+                # The invariant is that if an Exception as real_stack, we've
+                # appropriately attached a user stack and we no longer need to
+                # attach anything. Because we cannot conveniently interpose
+                # when an exception is thrown, we instead interpose everywhere
+                # we set what the user stack is set (using the context
+                # manager). However, our compiler stack does "tail calls"
+                # (when it calls into user compiler), at which point the
+                # parent exception frames would incorrectly attach an
+                # incorrect frame.
+                #
+                # However, if, somehow, someone raised an exception with this
+                # scope that had a stack (for example, because they are
+                # restoring the user stack state appropriately as they process
+                # node by node), we should respect it. Thus, we cannot
+                # unconditionally set None.
+                if not hasattr(e, "real_stack"):
+                    e.real_stack = None  # type: ignore[attr-defined]
+                raise
+    @staticmethod
+    @contextlib.contextmanager
+    def current_frame(frame_summary):
+        # frame_summary can be None to solely take advantage of real_stack
+        # attachment to thrown exceptions
+        tc = TracingContext.get()
+        if frame_summary is not None:
+            tc.frame_summary_stack.append(frame_summary)
+        old = tc.loc_in_frame
+        tc.loc_in_frame = None
+        try:
+            yield
+        except Exception as e:
+            if not hasattr(e, "real_stack"):
+                e.real_stack = tc.extract_stack()  # type: ignore[attr-defined]
+            raise
+        finally:
+            if frame_summary is not None:
+                tc.frame_summary_stack.pop()
+            tc.loc_in_frame = old
+    @staticmethod
+    @contextlib.contextmanager
+    def report_output_strides():
+        tc = TracingContext.try_get()
+        if tc is None:
+            yield None
+            return
+        old_output_strides = tc.output_strides
+        tc.output_strides = []
+        try:
+            yield tc.output_strides
+        finally:
+            tc.output_strides = old_output_strides
+    @staticmethod
+    def set_current_loc(filename, lineno, frame_name):
+        TracingContext.get().loc_in_frame = traceback.FrameSummary(
+            filename, lineno, frame_name, lookup_line=False
+        )
+@contextmanager
+def compile_context(context: Optional[CompileContext]):
+    old_context = getattr(_TLS, "compile_context", None)
+    _TLS.compile_context = context
+    try:
+        yield context
+    finally:
+        if context is not None:
+            if context.compile_id is not None:
+                set_context_frame(
+                    (
+                        context.compile_id.frame_id,
+                        context.compile_id.frame_compile_id,
+                        context.attempt,
+                    )
+                )
+        _TLS.compile_context = old_context
+@contextmanager
+def tracing(context: Optional[TracingContext]):
+    """
+    This function installs the passed in tracing context as a dynamic scoped
+    global variable.
+    Calls to TracingContext.get() while not under a `with tracing()` context
+    will return None.
+    """
+    old_context = getattr(_TLS, "tracing_context", None)
+    _TLS.tracing_context = context
+    try:
+        yield context
+    except Exception as e:
+        if not hasattr(e, "real_stack") and context is not None:
+            e.real_stack = context.extract_stack()  # type: ignore[attr-defined]
+        raise
+    finally:
+        if (
+            context is not None
+            and context.fake_mode is not None
+            and context.fake_mode.shape_env is not None
+        ):
+            context.fake_mode.shape_env.cleanup()
+        _TLS.tracing_context = old_context
+# Subclasses can be found in torch/_dynamo/source.py
+# TODO(voz): Consider a toplevel torch/_source.py
+@dataclasses.dataclass(frozen=True)
+class Source:
+    def is_dict_key(self):
+        return False
+    def is_ephemeral(self):
+        return False
+    def reconstruct(self, codegen):
+        raise NotImplementedError
+    def guard_source(self) -> GuardSource:
+        raise NotImplementedError
+    def name(self) -> str:
+        raise NotImplementedError
+    def make_guard(self, fn) -> Guard:
+        if self.guard_source() is GuardSource.CONSTANT:
+            raise NotImplementedError
+        return Guard(self, fn)
+    def is_specialized_nn_module(self) -> bool:
+        return self.guard_source().is_specialized_nn_module()
+    def subguards_allowed(self):
+        """True if you can guard on attributes of this"""
+        return self.guard_source() != GuardSource.SYNTHETIC_LOCAL
+# Subclasses can be found in torch/_dynamo/source.py
+@dataclasses.dataclass(frozen=True)
+class ChainedSource(Source):
+    base: Source
+    def is_dict_key(self):
+        # Recurse until you either hit a ConstDictKey or a Source
+        return self.base.is_dict_key()
+    def is_ephemeral(self):
+        return self.base.is_ephemeral()
+def detect_fake_mode(inputs: Any = None):
+    """
+    Attempts to "detect" what the current fake mode is.  If there is one ambiently
+    available from TracingContext, we preferentially use that.  Otherwise, we
+    heuristically detect the fake mode via the following sources, in order of
+    priority:
+        - Currently active fake mode on stack
+        - Fake mode associated with passed in tensors (inputs does not
+          have to be flattened)
+    """
+    from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode
+    fake_modes = []
+    if context := TracingContext.try_get():
+        fake_mode = context.fake_mode
+        if fake_mode is not None:
+            fake_modes.append((fake_mode, "tracing context", 0))
+    from torch.utils._python_dispatch import _get_current_dispatch_mode_stack
+    for i, m in enumerate(reversed(_get_current_dispatch_mode_stack())):
+        if isinstance(m, FakeTensorMode):
+            fake_modes.append((m, "active fake mode", i))
+    flat_inputs = pytree.tree_leaves(inputs)
+    for i, flat_input in enumerate(flat_inputs):
+        if isinstance(flat_input, FakeTensor):
+            fake_modes.append((flat_input.fake_mode, "fake tensor input", i))
+    if fake_modes:
+        fake_mode, desc1, i1 = fake_modes[0]
+        for m, desc2, i2 in fake_modes[1:]:
+            assert fake_mode is m, (
+                f"fake mode ({fake_mode}) from {desc1} {i1} doesn't match mode ({m}) from {desc2} {i2}\n\n"
+                f"fake mode from {desc1} {i1} allocated at:\n{fake_mode.stack}\n"
+                f"fake mode from {desc2} {i2} allocated at:\n{m.stack}"
+            )
+        return fake_mode
+    else:
+        return None
+def active_fake_mode():
+    """
+    Inspects the dispatch mode stack for an active fake mode and returns it.
+    Returns None if no fake mode is active.
+    """
+    from torch._subclasses.fake_tensor import FakeTensorMode
+    from torch.utils._python_dispatch import _get_current_dispatch_mode_stack
+    for _, m in enumerate(reversed(_get_current_dispatch_mode_stack())):
+        if isinstance(m, FakeTensorMode):
+            return m
+    return None

.venv/lib/python3.11/site-packages/torch/_jit_internal.py ADDED Viewed

	@@ -0,0 +1,1547 @@

+# mypy: allow-untyped-defs
+"""
+The weak_script annotation needs to be here instead of inside torch/jit/ so it
+can be used in other places in torch/ (namely torch.nn) without running into
+circular dependency problems
+"""
+import ast
+import builtins
+import collections
+import contextlib
+import enum
+import inspect
+import io
+import pickle
+import sys
+import textwrap
+import threading
+import types
+import typing
+import warnings
+import weakref
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Final,
+    ForwardRef,
+    get_args,
+    get_origin,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+)
+import torch
+# This is needed. `torch._jit_internal` is imported before `torch.distributed.__init__`.
+# Explicitly ask to import `torch.distributed.__init__` first.
+# Otherwise, "AttributeError: module 'torch' has no attribute 'distributed'" is raised.
+import torch.distributed.rpc
+import torch.package._mangling as package_mangling
+from torch._awaits import _Await
+from torch._C import _Await as CAwait, Future as CFuture
+from torch._sources import fake_range, get_source_lines_and_file, parse_def
+from torch.futures import Future
+IS_PY39_PLUS: Final[bool] = sys.version_info >= (3, 9)
+IS_PY310_PLUS: Final[bool] = sys.version_info >= (3, 10)
+BuiltinUnionType: Union[Type, Tuple[Type, ...]]
+if sys.version_info >= (3, 10):
+    # NOTE: IS_PY310_PLUS doesn't work with mypy.
+    # cf. https://mypy.readthedocs.io/en/stable/common_issues.html#python-version-and-system-platform-checks
+    BuiltinUnionType = types.UnionType
+else:
+    BuiltinUnionType = ()  # trick: this makes isinstance short circuit.
+LockType: Type
+try:
+    import _thread
+    LockType = _thread.LockType
+except ImportError:
+    import _dummy_thread  # type: ignore[import-not-found]
+    LockType = _dummy_thread.LockType
+# Wrapper functions that can call either of 2 functions depending on a boolean
+# argument
+boolean_dispatched: "weakref.WeakKeyDictionary[Callable, Dict[str, Callable]]" = (
+    weakref.WeakKeyDictionary()
+)  # noqa: T484
+FAKE_FILENAME_PREFIX = "__torch_jit_dataclass"
+def is_final(ann) -> bool:
+    return (
+        hasattr(ann, "__module__")
+        and ann.__module__ in {"typing", "typing_extensions"}
+        and (get_origin(ann) is Final or isinstance(ann, type(Final)))
+    )
+# allows BroadcastingList instance to be subscriptable
+class BroadcastingListCls:
+    def __getitem__(self, types):
+        return
+# mypy doesn't support parameters on types, so we have to explicitly type each
+# list size
+BroadcastingList1 = BroadcastingListCls()
+for i in range(2, 7):
+    globals()[f"BroadcastingList{i}"] = BroadcastingList1
+def is_scripting() -> bool:
+    r"""
+    Function that returns True when in compilation and False otherwise. This
+    is useful especially with the @unused decorator to leave code in your
+    model that is not yet TorchScript compatible.
+    .. testcode::
+        import torch
+        @torch.jit.unused
+        def unsupported_linear_op(x):
+            return x
+        def linear(x):
+            if torch.jit.is_scripting():
+                return torch.linear(x)
+            else:
+                return unsupported_linear_op(x)
+    """
+    return False
+# Retrieves a fully-qualified name (module hierarchy + classname) for a given obj.
+def _qualified_name(obj, mangle_name=True) -> str:
+    # This special case allows us to override the qualified name on a type.
+    # It's currently used in conjunction with tracing, where we create a
+    # fake module to filter only supported attributes. However, since this
+    # new type is defined as a local class, we need a mechanism to override
+    # its qualname so it appears correctly in the TorchScript system. This,
+    # we set '_jit_override_qualname' with the original traced module's
+    # qualified name, which is picked up here
+    if hasattr(obj, "_jit_override_qualname"):
+        return obj._jit_override_qualname
+    # short-circuit in cases where the object already has a known qualified name
+    if isinstance(obj, torch._C.ScriptFunction):
+        return obj.qualified_name
+    if getattr(obj, "__name__", None):
+        name = obj.__name__
+    # Enum classes do not have `__name__` attr, instead they have `name`.
+    elif isinstance(obj, enum.Enum):
+        name = obj.name
+    else:
+        raise RuntimeError("Could not get name of python class object")
+    if name == "<lambda>":
+        name = "_lambda"  # make name a valid identifier
+    module_name = obj.__module__
+    # If the module is actually a torchbind module, then we should short circuit
+    if module_name == "torch._classes":
+        return obj.qualified_name
+    # The Python docs are very clear that `__module__` can be None, but I can't
+    # figure out when it actually would be.
+    if module_name is None:
+        raise RuntimeError(
+            f"Could not get qualified name for class '{name}': "
+            "__module__ can't be None."
+        )
+    # if getattr(sys.modules[module_name], name) is not obj:
+    #     raise RuntimeError(f"Could not get qualified name for class '{name}': "
+    #                        f"the attr {name} on module {module_name} is not the class")
+    # torch.package and TorchScript have separate mangling schemes to avoid
+    # name collisions from multiple packages. To avoid them interfering with
+    # each other, normalize the package manging here.
+    if package_mangling.is_mangled(module_name):
+        module_name = module_name.replace("<", "_")
+        module_name = module_name.replace(">", "_")
+    # The PythonExceptionValue C++ class in torch/csrc/jit/python/python_sugared_value.h
+    # does not need mangle the python class name.
+    if mangle_name:
+        # __main__ is a builtin module, so rewrite it to "__torch__".
+        if module_name == "__main__":
+            module_name = "__torch__"
+        else:
+            # Everything else gets a "__torch__" prefix to avoid name collisions
+            # with the names of user values.
+            module_name = "__torch__." + module_name
+    if "." in name:
+        raise RuntimeError(
+            f"Could not get qualified name for class '{name}': "
+            f"'{name}' is not a valid identifier"
+        )
+    return module_name + "." + name
+class SourceLoader:
+    def __init__(self):
+        self.content = {}
+    def cache(self, fn, source):
+        self.content[fn] = source
+    def get_source(self, fn):
+        return self.content.get(fn)
+loader = SourceLoader()
+def createResolutionCallbackFromEnv(lookup_base):
+    """
+    Creates a resolution callback that will look up qualified names in an
+    environment, starting with `lookup_base` for the base of any qualified
+    names, then proceeding down the lookup chain with the resolved object.
+    You should not use this directly, it should only be used from the other
+    createResolutionCallbackFrom* functions.
+    """
+    def lookupInModule(qualified_name, module):
+        if "." in qualified_name:
+            base, remaining_pieces = qualified_name.split(".", maxsplit=1)
+            module_value = getattr(module, base)
+            return lookupInModule(remaining_pieces, module_value)
+        else:
+            return getattr(module, qualified_name)
+    def parseNestedExpr(expr, module) -> Tuple[Any, int]:
+        i = 0
+        while i < len(expr) and expr[i] not in (",", "[", "]"):
+            i += 1
+        # Special case logic for the empty Tuple as a subscript (used
+        # in the type annotation `Tuple[()]`)
+        if expr[:i] == "()":
+            return (), i
+        base = lookupInModule(expr[:i].strip(), module)
+        assert base is not None, f"Unresolvable type {expr[:i]}"
+        if i == len(expr) or expr[i] != "[":
+            return base, i
+        assert expr[i] == "["
+        parts = []
+        while expr[i] != "]":
+            part_len = 0
+            i += 1
+            part, part_len = parseNestedExpr(expr[i:], module)
+            parts.append(part)
+            i += part_len
+        if len(parts) > 1:
+            return base[tuple(parts)], i + 1
+        else:
+            return base[parts[0]], i + 1
+    def parseExpr(expr, module):
+        try:
+            value, len_parsed = parseNestedExpr(expr, module)
+            assert len_parsed == len(
+                expr
+            ), "whole expression was not parsed, falling back to c++ parser"
+            return value
+        except Exception:
+            """
+            The python resolver fails in several cases in known unit tests, and is intended
+            to fall back gracefully to the c++ resolver in general.  For example, python 2 style
+            annotations which are frequent in our unit tests often fail with types e.g. int not
+            resolvable from the calling frame.
+            """
+            return None
+    return lambda expr: parseExpr(expr, lookup_base)
+def createResolutionCallbackFromFrame(frames_up: int = 0):
+    """
+    Creates a function which, given a string variable name,
+    returns the value of the variable in the scope of the caller of
+    the function which called createResolutionCallbackFromFrame (by default).
+    This is used to enable access in-scope Python variables inside
+    TorchScript fragments.
+    frames_up is number of additional frames to go up on the stack.
+    The default value is 0, which correspond to the frame of the caller
+    of createResolutionCallbackFromFrame. Also for example, if frames_up is set
+    to 1, then the frame of the caller's caller of createResolutionCallbackFromFrame
+    will be taken.
+    For example, the following program prints 2::
+        def bar():
+            cb = createResolutionCallbackFromFrame(1)
+            print(cb("foo"))
+        def baz():
+            foo = 2
+            bar()
+        baz()
+    """
+    frame = inspect.currentframe()
+    i = 0
+    while i < frames_up + 1:
+        assert frame is not None
+        frame = frame.f_back
+        i += 1
+    assert frame is not None
+    f_locals = frame.f_locals
+    f_globals = frame.f_globals
+    class env:
+        def __getattr__(self, key):
+            if key in f_locals:
+                return f_locals[key]
+            elif key in f_globals:
+                return f_globals[key]
+            elif key in dir(builtins):
+                return getattr(builtins, key)
+    return createResolutionCallbackFromEnv(env())
+def get_closure(fn):
+    """
+    Get a dictionary of closed over variables from a function
+    """
+    captures = {}
+    captures.update(fn.__globals__)
+    for index, captured_name in enumerate(fn.__code__.co_freevars):
+        captures[captured_name] = fn.__closure__[index].cell_contents
+    return captures
+# [local resolution in python]
+# Depending on where a variable is defined, and where it is used, we may
+# or may not be able to recover its value when recursively compiling a
+# script function. Remember in the general case, a module or function is
+# first defined and then later scripted. This means we do not have a
+# chance to capture the active frames when the function is defined. Hence any
+# name resolution has to happen later on the created closure. The way
+# python captures type annotations restricts what we can recover. The
+# follow example illustrates the different cases:
+#
+#         class MyGlobalClass:
+#         ...
+#         def my_local_scope():
+#             @torch.jit.script
+#             class MyClass:
+#                 ...
+#             @torch.jit.script
+#             class MyClassUsedAsVar:
+#                 ...
+#             def eg(x: MyClass, y: MyGlobalClass):
+#                 a_local_capture : Foo
+#                 return MyClassUsedAsVar(x)
+#
+# MyGlobalClass is defined in the __globals__ dictionary of function
+# 'eg', so it is always recoverable. my_local_scope introduces a new local
+# variable scope in the function. Classes defined here are only visible as
+# local variables. For the case of MyClassUsedAsVar, it is captured
+# because it is used as a variable inside the body of the function, and we
+# can resolve it using the captures returned from `get_closure`. However,
+# the type annotations are not captured by the closure. In Python
+# 3.0--3.9, the _value_ of MyClass and MyGlobalClass will be available as
+# annotations on `eg``, but starting in Python 4.0, they will represented as
+# strings and no longer present. Furthermore, since the body of `eg` does
+# not reference those names, they do not appear in the list of closed over
+# variables. In Python 2.x, type annotations are in comments, leading to a
+# similar situation where their definitions are not available. We anticipate
+# that most users will not run into this issue because their modules and
+# functions will be defined at a global scope like MyGlobalClass. In cases
+# where they are not, it is possible to work around issues by declaring the
+# values global in the function.
+# In Python 3.9 declaring class as global will make it invisible to
+# `inspect.getsource`, see https://bugs.python.org/issue42666 .
+# This could be worked around by manualy adding it to `global()` dictionary.
+def createResolutionCallbackFromClosure(fn):
+    """
+    Create a resolutionCallback by introspecting the function instead of
+    looking up the stack for the enclosing scope
+    """
+    closure = get_closure(fn)
+    class closure_lookup:
+        # This is a class since `closure` is a dict and it's easier in
+        # `env_helper` if everything just works with `getattr` calls
+        def __getattr__(self, key):
+            if key in closure:
+                return closure[key]
+            elif hasattr(typing, key):
+                return getattr(typing, key)
+            elif hasattr(builtins, key):
+                return getattr(builtins, key)
+            return None
+    return createResolutionCallbackFromEnv(closure_lookup())
+def can_compile_class(cls) -> bool:
+    # If any of the functions on a type don't have a code object, this type can't
+    # be compiled and is probably a builtin / bound from C
+    if is_ignored_fn(cls):
+        return False
+    # Ignore the following list of built-in classes.
+    ignored_builtin_classes = (torch.nn.Module, tuple, list, Exception)
+    if issubclass(cls, ignored_builtin_classes):
+        return False
+    names = cls.__dict__
+    fns = [
+        getattr(cls, name)
+        for name in names
+        if inspect.isroutine(getattr(cls, name, None))
+    ]
+    has_code = [hasattr(fn, "__code__") for fn in fns]
+    return all(has_code)
+def get_callable_argument_names(fn) -> List[str]:
+    """
+    Gets names of all POSITIONAL_OR_KEYWORD arguments for callable `fn`.
+    Returns an empty list when other types of arguments are present.
+    This is used by `torch.jit.trace` to assign meaningful argument names to
+    traced functions and modules.
+    Args:
+        fn: A callable.
+    Returns:
+        Argument names: List[str]
+    """
+    # inspect.signature may fail, give up in that case.
+    try:
+        callable_signature = inspect.signature(fn)
+    except Exception:
+        return []
+    argument_names = []
+    for name, param in callable_signature.parameters.items():
+        # All four other types of arguments do not map to individual values
+        # with a keyword as name.
+        if not param.kind == param.POSITIONAL_OR_KEYWORD:
+            continue
+        argument_names.append(name)
+    return argument_names
+def get_annotation_str(annotation):
+    """
+    Convert an AST node containing a type annotation to the string present in the source
+    that represents the same annotation.
+    """
+    if isinstance(annotation, ast.Name):
+        return annotation.id
+    elif isinstance(annotation, ast.Attribute):
+        return ".".join([get_annotation_str(annotation.value), annotation.attr])
+    elif isinstance(annotation, ast.Subscript):
+        # In Python3.9+ subscript indicies are not wrapped in ast.Index
+        subscript_slice = annotation.slice if IS_PY39_PLUS else annotation.slice.value  # type: ignore[attr-defined]
+        return f"{get_annotation_str(annotation.value)}[{get_annotation_str(subscript_slice)}]"
+    elif isinstance(annotation, ast.Tuple):
+        return ",".join([get_annotation_str(elt) for elt in annotation.elts])
+    elif isinstance(annotation, ast.Constant):
+        return f"{annotation.value}"
+    # If an AST node is not handled here, it's probably handled in ScriptTypeParser.
+    return None
+def get_type_hint_captures(fn):
+    """
+    Get a dictionary containing type resolution mappings necessary to resolve types
+    for the literal annotations on 'fn'. These are not considered to be closed-over by fn
+    and must be obtained separately (e.g. using this function).
+    Args:
+        fn: A callable.
+    Returns:
+        A Dict[str, Any] containing a mapping from the literal annotations used on
+        fn to the Python objects they refer to.
+    """
+    # First, try to get the source of the function. We'll need to parse it to find the actual string names
+    # that were used to annotate the types, since inspect.signature() will only return the class object that
+    # the annotation refers to, not the string name. If we can't get the source, simply return an empty dict.
+    # This may happen in cases where the function is synthesized dynamically at runtime.
+    src = loader.get_source(fn)
+    if src is None:
+        try:
+            src = inspect.getsource(fn)
+        except OSError as e:
+            raise OSError(
+                f"Failed to get source for {fn} using inspect.getsource"
+            ) from e
+    # Gather a dictionary of parameter name -> type, skipping any parameters whose annotated
+    # types are strings. These are only understood by TorchScript in the context of a type annotation
+    # that refers to a class in its own definition, but trying to include a mapping for this in the result
+    # function would cause infinite recursion because the class is currently being compiled.
+    # In addition, there is logic in ScriptTypeParser to handle this.
+    signature = inspect.signature(fn)
+    name_to_type = {
+        name: parameter.annotation
+        for name, parameter in signature.parameters.items()
+        if parameter.annotation is not inspect.Parameter.empty
+        and not isinstance(parameter.annotation, str)
+    }
+    # Then, get the literal type annotations from the function declaration
+    # by source inspection. This accounts for the case in which aliases are used
+    # to annotate the arguments (e.g device_t = torch.device, and then d: device_t).
+    # frontend.py cannot be used here because it includes _jit_internal, so use ast instead.
+    a = ast.parse(textwrap.dedent(src))
+    if len(a.body) != 1 or not isinstance(a.body[0], ast.FunctionDef):
+        raise RuntimeError(f"Expected {fn} to be a function")
+    f = a.body[0]
+    # Prepare a dictionary of source annotation -> type, which will be the final result of this function,
+    # by using the parsed AST (f) to reconstruct source annotations as strings for each parameter and mapping
+    # them to the type object corresponding to the annotation via name_to_type using the parameter name.
+    annotation_to_type = {}
+    for arg in f.args.args:
+        # Get the source type annotation string for this argument if possible.
+        arg_annotation_str = (
+            get_annotation_str(arg.annotation) if arg.annotation else None
+        )
+        # If the argument has no annotation or get_annotation_str cannot convert it to a string,
+        # arg_annotation_str will be None. Skip this arg; ScriptTypeParser will probably handle
+        # this in the latter case.
+        if arg_annotation_str is None:
+            continue
+        # Insert {arg_annotation_str: type} into annotation_to_type if possible. One reason arg_name may not
+        # be present in name_to_type is that the annotation itself is a string and not a type object
+        # (common for self-refential annotations in classes). Once again, let ScriptTypeParser handle this.
+        arg_name = arg.arg
+        if arg_name in name_to_type:
+            annotation_to_type[arg_annotation_str] = name_to_type[arg_name]
+    # If there is a valid return annotation, include it in annotation_to_type. As with argument annotations,
+    # the literal annotation has to be convertible to a string by get_annotation_str, and the actual type
+    # of the annotation cannot be a string.
+    literal_return_annotation = get_annotation_str(f.returns)
+    valid_literal_annotation = literal_return_annotation is not None
+    return_annotation = signature.return_annotation
+    valid_return_annotation_type = (
+        return_annotation is not inspect.Parameter.empty
+        and not isinstance(return_annotation, str)
+    )
+    if valid_literal_annotation and valid_return_annotation_type:
+        annotation_to_type[literal_return_annotation] = return_annotation
+    return annotation_to_type
+def createResolutionCallbackForClassMethods(cls):
+    """
+    This looks at all the methods defined in a class and pulls their closed-over
+    variables into a dictionary and uses that to resolve variables.
+    """
+    # cls is a type here, so `ismethod` is false since the methods on the type
+    # aren't bound to anything, so Python treats them as regular functions
+    fns = [
+        getattr(cls, name)
+        for name in cls.__dict__
+        if inspect.isroutine(getattr(cls, name))
+    ]
+    # Skip built-ins, as they do not have global scope nor type hints
+    # Needed to support `enum.Enum` derived classes in Python-3.11
+    # That adds `_new_member_` property which is an alias to `__new__`
+    fns = [fn for fn in fns if not inspect.isbuiltin(fn) and hasattr(fn, "__globals__")]
+    captures = {}
+    for fn in fns:
+        captures.update(get_closure(fn))
+        captures.update(get_type_hint_captures(fn))
+    def lookup_in_class(key):
+        if key in captures:
+            return captures[key]
+        else:
+            return getattr(builtins, key, None)
+    return lookup_in_class
+def boolean_dispatch(
+    arg_name,
+    arg_index,
+    default,
+    if_true,
+    if_false,
+    module_name,
+    func_name,
+):
+    """
+    Dispatches to either of 2 script functions based on a boolean argument.
+    In TorchScript, the boolean argument must be constant so that the correct
+    function to use can be determined at compile time.
+    """
+    def fn(*args, **kwargs):
+        dispatch_flag = default
+        if arg_name in kwargs:
+            dispatch_flag = kwargs[arg_name]
+        elif arg_index < len(args):
+            dispatch_flag = args[arg_index]
+        if dispatch_flag:
+            return if_true(*args, **kwargs)
+        else:
+            return if_false(*args, **kwargs)
+    if if_true.__doc__ is None and if_false.__doc__ is not None:
+        doc = if_false.__doc__
+        if_true.__doc__ = doc
+    elif if_false.__doc__ is None and if_true.__doc__ is not None:
+        doc = if_true.__doc__
+        if_false.__doc__ = doc
+    elif if_false.__doc__ is None and if_true.__doc__ is None:
+        # neither function has a docstring
+        doc = None
+    else:
+        raise RuntimeError("only one function can have a docstring")
+    fn.__doc__ = doc
+    if module_name is not None:
+        fn.__module__ = module_name
+    if func_name is not None:
+        fn.__name__ = func_name
+    boolean_dispatched[fn] = {
+        "if_true": if_true,
+        "if_false": if_false,
+        "index": arg_index,
+        "default": default,
+        "arg_name": arg_name,
+    }
+    return fn
+class FunctionModifiers:
+    """
+    Used to denote the behavior of a function in TorchScript. See export() and
+    ignore() for details.
+    """
+    UNUSED = "unused (ignored and replaced with raising of an exception)"
+    IGNORE = "ignore (leave as a call to Python, cannot be torch.jit.save'd)"
+    EXPORT = "export (compile this function even if nothing calls it)"
+    DEFAULT = "default (compile if called from a exported function / forward)"
+    COPY_TO_SCRIPT_WRAPPER = (
+        "if this method is not scripted, copy the python method onto the scripted model"
+    )
+    _DROP = "_drop (function is fully ignored, declaration can be unscriptable)"
+def export(fn):
+    """
+    This decorator indicates that a method on an ``nn.Module`` is used as an entry point into a
+    :class:`ScriptModule` and should be compiled.
+    ``forward`` implicitly is assumed to be an entry point, so it does not need this decorator.
+    Functions and methods called from ``forward`` are compiled as they are seen
+    by the compiler, so they do not need this decorator either.
+    Example (using ``@torch.jit.export`` on a method):
+    .. testcode::
+        import torch
+        import torch.nn as nn
+        class MyModule(nn.Module):
+            def implicitly_compiled_method(self, x):
+                return x + 99
+            # `forward` is implicitly decorated with `@torch.jit.export`,
+            # so adding it here would have no effect
+            def forward(self, x):
+                return x + 10
+            @torch.jit.export
+            def another_forward(self, x):
+                # When the compiler sees this call, it will compile
+                # `implicitly_compiled_method`
+                return self.implicitly_compiled_method(x)
+            def unused_method(self, x):
+                return x - 20
+        # `m` will contain compiled methods:
+        #     `forward`
+        #     `another_forward`
+        #     `implicitly_compiled_method`
+        # `unused_method` will not be compiled since it was not called from
+        # any compiled methods and wasn't decorated with `@torch.jit.export`
+        m = torch.jit.script(MyModule())
+    """
+    fn._torchscript_modifier = FunctionModifiers.EXPORT
+    return fn
+def unused(fn):
+    """
+    This decorator indicates to the compiler that a function or method should
+    be ignored and replaced with the raising of an exception. This allows you
+    to leave code in your model that is not yet TorchScript compatible and still
+    export your model.
+        Example (using ``@torch.jit.unused`` on a method)::
+            import torch
+            import torch.nn as nn
+            class MyModule(nn.Module):
+                def __init__(self, use_memory_efficient):
+                    super().__init__()
+                    self.use_memory_efficient = use_memory_efficient
+                @torch.jit.unused
+                def memory_efficient(self, x):
+                    import pdb
+                    pdb.set_trace()
+                    return x + 10
+                def forward(self, x):
+                    # Use not-yet-scriptable memory efficient mode
+                    if self.use_memory_efficient:
+                        return self.memory_efficient(x)
+                    else:
+                        return x + 10
+            m = torch.jit.script(MyModule(use_memory_efficient=False))
+            m.save("m.pt")
+            m = torch.jit.script(MyModule(use_memory_efficient=True))
+            # exception raised
+            m(torch.rand(100))
+    """
+    if isinstance(fn, property):
+        prop = fn
+        setattr(  # noqa: B010
+            prop.fget, "_torchscript_modifier", FunctionModifiers.UNUSED
+        )
+        if prop.fset:
+            setattr(  # noqa: B010
+                prop.fset, "_torchscript_modifier", FunctionModifiers.UNUSED
+            )
+        return prop
+    fn._torchscript_modifier = FunctionModifiers.UNUSED
+    return fn
+# No op context manager from python side
+class _IgnoreContextManager(contextlib.AbstractContextManager):
+    def __init__(self, **kwargs):
+        pass
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        pass
+def ignore(drop=False, **kwargs):
+    """
+    This decorator indicates to the compiler that a function or method should
+    be ignored and left as a Python function. This allows you to leave code in
+    your model that is not yet TorchScript compatible. If called from TorchScript,
+    ignored functions will dispatch the call to the Python interpreter. Models with ignored
+    functions cannot be exported; use :func:`@torch.jit.unused <torch.jit.unused>` instead.
+    Example (using ``@torch.jit.ignore`` on a method)::
+        import torch
+        import torch.nn as nn
+        class MyModule(nn.Module):
+            @torch.jit.ignore
+            def debugger(self, x):
+                import pdb
+                pdb.set_trace()
+            def forward(self, x):
+                x += 10
+                # The compiler would normally try to compile `debugger`,
+                # but since it is `@ignore`d, it will be left as a call
+                # to Python
+                self.debugger(x)
+                return x
+        m = torch.jit.script(MyModule())
+        # Error! The call `debugger` cannot be saved since it calls into Python
+        m.save("m.pt")
+    Example (using ``@torch.jit.ignore(drop=True)`` on a method):
+    .. testcode::
+        import torch
+        import torch.nn as nn
+        class MyModule(nn.Module):
+            @torch.jit.ignore(drop=True)
+            def training_method(self, x):
+                import pdb
+                pdb.set_trace()
+            def forward(self, x):
+                if self.training:
+                    self.training_method(x)
+                return x
+        m = torch.jit.script(MyModule())
+        # This is OK since `training_method` is not saved, the call is replaced
+        # with a `raise`.
+        m.save("m.pt")
+    .. testcleanup::
+        import os
+        os.remove('m.pt')
+    """
+    if callable(drop):
+        # used without any args, so drop is actually a function
+        #   @torch.jit.ignore
+        #   def fn(...):
+        fn = drop
+        fn._torchscript_modifier = FunctionModifiers.IGNORE
+        return fn
+    if not isinstance(drop, bool):
+        raise RuntimeError(
+            "Argument to @torch.jit.ignore must be a bool or "
+            f"a function but got {drop}"
+        )
+    # for backwards compat
+    drop_on_export = kwargs.pop("drop_on_export", None)
+    if drop_on_export:
+        warnings.warn(
+            "ignore(drop_on_export=True) has been deprecated. TorchScript will now drop the function "
+            "call on compilation. Use torch.jit.unused now. {}",
+            category=FutureWarning,
+        )
+        drop = drop_on_export
+    elif drop:
+        warnings.warn(
+            "ignore(True) has been deprecated. TorchScript will now drop the function "
+            "call on compilation. Use torch.jit.unused now. {}",
+            category=FutureWarning,
+        )
+    def decorator(fn):
+        if drop:
+            fn._torchscript_modifier = FunctionModifiers.UNUSED
+        else:
+            fn._torchscript_modifier = FunctionModifiers.IGNORE
+        return fn
+    return decorator
+def _drop(fn):
+    fn._torchscript_modifier = FunctionModifiers._DROP
+    return fn
+def _copy_to_script_wrapper(fn):
+    fn._torchscript_modifier = FunctionModifiers.COPY_TO_SCRIPT_WRAPPER
+    return fn
+def module_has_exports(mod):
+    for name in dir(mod):
+        if hasattr(mod, name):
+            item = getattr(mod, name)
+            if callable(item):
+                if get_torchscript_modifier(item) is FunctionModifiers.EXPORT:
+                    return True
+    return False
+# WARNING: should_drop is currently being used by our JIT code coverage plug-in to mark JIT'd code as covered. If you
+# rename this function, please update references in tools/coverage_plugins_package/src/coverage_plugins/jit_plugin.py to
+# allow JIT'd code to still be covered.
+def should_drop(fn) -> bool:
+    attr = get_torchscript_modifier(fn)
+    if attr is None:
+        return False
+    return attr is FunctionModifiers.UNUSED or attr is FunctionModifiers._DROP
+def is_ignored_fn(fn) -> bool:
+    mod = get_torchscript_modifier(fn)
+    return (
+        mod is FunctionModifiers.UNUSED
+        or mod is FunctionModifiers.IGNORE
+        or mod is FunctionModifiers._DROP
+    )
+def _is_drop_fn(fn) -> bool:
+    mod = get_torchscript_modifier(fn)
+    return mod is FunctionModifiers._DROP
+def is_static_fn(cls, fn) -> bool:
+    return isinstance(inspect.getattr_static(cls, fn, default=None), staticmethod)
+def get_static_fn(cls, fn):
+    return inspect.getattr_static(cls, fn).__func__
+def get_torchscript_modifier(fn):
+    if not callable(fn):
+        return None
+    if hasattr(fn, "__func__"):
+        fn = fn.__func__
+    return getattr(fn, "_torchscript_modifier", FunctionModifiers.DEFAULT)
+def copy_torchscript_modifier(orig, new) -> None:
+    attr = get_torchscript_modifier(orig)
+    if attr is None:
+        return
+    new._torchscript_modifier = attr
+# overloading registration
+# overloads get registered in this file, and compiled in torch/jit/__init__.py
+# so that they can be imported in nn/functional.py without an import cycle
+# qualified_name => list[overload_functions]
+_overloaded_fns: Dict[str, List[Callable]] = {}  # noqa: T484
+_OVERLOAD_EXAMPLE = """
+Example usage of overload function:
+@torch.jit._overload
+def my_function(x: type0) -> type0: # decl 1
+    pass
+@torch.jit._overload
+def my_function(x: type1) -> type1: # decl 2
+    pass
+def my_function(x):                 # implementation
+    if isinstance(x, type0):
+        return x
+    elif isinstance(x, type1):
+        return x
+"""
+def get_overload_no_implementation_error_message(kind, obj):
+    sourcelines, file_lineno, filename = get_source_lines_and_file(obj)
+    return (
+        f'Implementation for the {kind} "{_qualified_name(obj)}" is missing. Please make '
+        f"sure a definition is provided and defined after all overload declarations.\n"
+        f'File "{filename}", line {file_lineno}:\n'
+        + "".join(sourcelines)
+        + "\n"
+        + _OVERLOAD_EXAMPLE
+    )
+def _check_overload_body(func):
+    try:
+        parsed_def = parse_def(func)
+    except OSError as e:
+        # Parsing the function definition can raise an OSError if source is unavailable.
+        # Since this is just an initial check, just raise a warning if this is the case.
+        warnings.warn(
+            f"Unable to retrieve source for @torch.jit._overload function: {func}."
+        )
+        return
+    body = parsed_def.ast.body[0].body
+    def is_pass(x):
+        return isinstance(x, ast.Pass)
+    def is_ellipsis(x):
+        return (
+            isinstance(x, ast.Expr)
+            and isinstance(x.value, ast.Constant)
+            and x.value.value is Ellipsis
+        )
+    if len(body) != 1 or not (is_pass(body[0]) or is_ellipsis(body[0])):
+        msg = (
+            "Only `pass` statement or `...` can be the body of overload declaration:\n"
+        )
+        msg += "\n".join(parsed_def.source.split("\n")[:3])
+        msg += " <- Expecting `pass` or `...` here!\n" + _OVERLOAD_EXAMPLE
+        raise RuntimeError(msg)
+def _overload(func):
+    _check_overload_body(func)
+    qual_name = _qualified_name(func)
+    global _overloaded_fns
+    fn_overload_list = _overloaded_fns.get(qual_name)
+    if fn_overload_list is None:
+        fn_overload_list = []
+        _overloaded_fns[qual_name] = fn_overload_list
+    fn_overload_list.append(func)
+    return func
+def _get_fn_overloads(qual_name):
+    return _overloaded_fns.get(qual_name)
+def _clear_fn_overloads(qual_name) -> None:
+    del _overloaded_fns[qual_name]
+def get_class_name_lineno(method) -> Tuple[str, int]:
+    current_frame = inspect.currentframe()
+    # one for the get_class_name call, one for _overload_method call
+    for i in range(2):
+        assert (
+            current_frame is not None
+        )  # assert current frame is not an Optional[FrameType]
+        current_frame = current_frame.f_back
+    assert current_frame is not None  # same here
+    class_name = current_frame.f_code.co_name
+    line_no = current_frame.f_code.co_firstlineno
+    return class_name, line_no
+# At the point the decorator is applied to class methods the method
+# has no reference to its owning class. _qualified_name would not include
+# the class it is defined in, so any methods with the same name in the same file
+# would have the same _qualified_name, even if they were defined in different
+# classes. This problem only exists in python 2.
+# We get around this problem by looking at the stack frame and identifying
+# the class name, and throwing an error whenever overloads are used
+# when modules of the same name are in the same file
+# qualified_name => class name => list[overload_functions]
+_overloaded_methods: Dict[str, Dict[str, List[Callable]]] = {}  # noqa: T484
+# (qualified_name, class name) => class_fileno
+_overloaded_method_class_fileno: Dict[Tuple[str, str], int] = {}
+def _overload_method(func):
+    _check_overload_body(func)
+    qual_name = _qualified_name(func)
+    global _overloaded_methods
+    class_name_map = _overloaded_methods.get(qual_name, None)
+    if class_name_map is None:
+        class_name_map = {}
+        _overloaded_methods[qual_name] = class_name_map
+    class_name, line_no = get_class_name_lineno(func)
+    method_overloads = class_name_map.get(class_name, None)
+    if method_overloads is None:
+        method_overloads = []
+        class_name_map[class_name] = method_overloads
+        _overloaded_method_class_fileno[(qual_name, class_name)] = line_no
+    else:
+        existing_lineno = _overloaded_method_class_fileno[(qual_name, class_name)]
+        if existing_lineno != line_no:
+            raise RuntimeError(
+                "Cannot currently overload the same method name in two different"
+                " classes with the same name in the same module"
+            )
+    method_overloads.append(func)
+    return func
+def _get_overloaded_methods(method, mod_class):
+    # TODO: __name__ not set for submodules in recursive script
+    if not hasattr(method, "__name__"):
+        return None
+    qual_name = _qualified_name(method)
+    class_name_map = _overloaded_methods.get(qual_name, None)
+    if class_name_map is None:
+        return None
+    overloads = class_name_map.get(mod_class.__name__, None)
+    if overloads is None:
+        return None
+    method_line_no = get_source_lines_and_file(method)[1]
+    mod_class_fileno = get_source_lines_and_file(mod_class)[1]
+    mod_end_fileno = mod_class_fileno + len(get_source_lines_and_file(mod_class)[0])
+    if not (method_line_no >= mod_class_fileno and method_line_no <= mod_end_fileno):
+        raise AssertionError(
+            "Overloads are not useable when a module is redeclared within the same file: "
+            + str(method)
+        )
+    return overloads
+def is_tuple(ann) -> bool:
+    if ann is Tuple:
+        raise_error_container_parameter_missing("Tuple")
+    # For some reason Python 3.7 violates the Type[A, B].__origin__ == Type rule
+    if not hasattr(ann, "__module__"):
+        return False
+    ann_origin = get_origin(ann)
+    if IS_PY39_PLUS and ann.__module__ == "builtins" and ann_origin is tuple:
+        return True
+    return ann.__module__ == "typing" and (ann_origin is Tuple or ann_origin is tuple)
+def is_list(ann) -> bool:
+    if ann is List:
+        raise_error_container_parameter_missing("List")
+    if not hasattr(ann, "__module__"):
+        return False
+    ann_origin = get_origin(ann)
+    if IS_PY39_PLUS and ann.__module__ == "builtins" and ann_origin is list:
+        return True
+    return ann.__module__ == "typing" and (ann_origin is List or ann_origin is list)
+def is_dict(ann) -> bool:
+    if ann is Dict:
+        raise_error_container_parameter_missing("Dict")
+    if not hasattr(ann, "__module__"):
+        return False
+    ann_origin = get_origin(ann)
+    if IS_PY39_PLUS and ann.__module__ == "builtins" and ann_origin is dict:
+        return True
+    return ann.__module__ == "typing" and (ann_origin is Dict or ann_origin is dict)
+def is_union(ann):
+    if ann is Union:
+        raise_error_container_parameter_missing("Union")
+    return isinstance(ann, BuiltinUnionType) or (
+        hasattr(ann, "__module__")
+        and ann.__module__ == "typing"
+        and (get_origin(ann) is Union)
+    )
+def is_optional(ann):
+    if ann is Optional:
+        raise_error_container_parameter_missing("Optional")
+    def is_optional_as_optional(ann):
+        return (
+            hasattr(ann, "__module__")
+            and ann.__module__ == "typing"
+            and (get_origin(ann) is Optional)
+        )
+    def is_union_as_optional(ann):
+        ann_args = get_args(ann)
+        return len(ann_args) == 2 and (None in ann_args or type(None) in ann_args)
+    return is_optional_as_optional(ann) or (is_union(ann) and is_union_as_optional(ann))
+def is_future(ann) -> bool:
+    if ann is Future:
+        raise RuntimeError(
+            "Attempted to use Future without a "
+            "contained type. Please add a contained type, e.g. "
+            "Future[int]"
+        )
+    return get_origin(ann) is Future
+def is_await(ann) -> bool:
+    if ann is _Await:
+        return True
+    return get_origin(ann) is _Await
+if torch.distributed.rpc.is_available():
+    from torch._C._distributed_rpc import PyRRef
+    from torch.distributed.rpc import RRef
+    def is_rref(ann) -> bool:
+        if ann is RRef:
+            raise RuntimeError(
+                "Attempted to use RRef without a "
+                "contained type. Please add a contained type, e.g. "
+                "RRef[int]"
+            )
+        return get_origin(ann) is RRef
+    def is_rref_instance(obj) -> bool:
+        return isinstance(obj, PyRRef)
+else:
+    def is_rref_instance(obj) -> bool:
+        # If the RPC module doesn't exist then RRefs don't exist either.
+        return False
+def _try_get_dispatched_fn(fn):
+    if not callable(fn):
+        return None
+    return boolean_dispatched.get(fn)
+def _get_named_tuple_properties(
+    obj,
+    loc: Optional[torch._C._jit_tree_views.SourceRange] = None,
+    rcb=None,
+):
+    if loc is None:
+        loc = fake_range()
+    assert issubclass(obj, tuple) and hasattr(obj, "_fields")
+    if hasattr(obj, "_field_defaults"):
+        defaults = [
+            obj._field_defaults[field]
+            for field in obj._fields
+            if field in obj._field_defaults
+        ]
+    else:
+        defaults = []
+    # In 3.10 recommended way to get annotations is to call `inspect.get_annotations` function
+    # Also, annotations from base class are not inherited so they need to be queried explicitly
+    if sys.version_info[:2] < (3, 10):
+        obj_annotations = getattr(obj, "__annotations__", {})
+    else:
+        obj_annotations = inspect.get_annotations(obj)
+        if len(obj_annotations) == 0 and hasattr(obj, "__base__"):
+            obj_annotations = inspect.get_annotations(obj.__base__)
+    annotations = []
+    for field in obj._fields:
+        if field in obj_annotations:
+            field_type = obj_annotations[field]
+            # [Note: ForwardRef annotations in NamedTuple attributes]
+            # NamedTuple types are slightly different from normal types.
+            #
+            # Normally, annotations are evaluted like this (during jit.script):
+            # 1. Load strings of python code into c++ and parse.
+            # 2. Get annotations as strings
+            # 3. Use the PythonResolver's resolution callback (rcb) to convert
+            #    the string into a python object
+            # 4. We call into annotations.py:ann_to_type to convert python obj
+            #    from step 3 into a type that torchscript understands.
+            #
+            # NamedTuples are more complicated, because it has sub-types.
+            # Normally, once we have the NamedTuple type object from #3,
+            # we can just look at the annotation literal values and use
+            # ann_to_type directly on them.
+            #
+            # But sometimes, users will annotate with string literals, e.g.
+            #    x: 'int'
+            # This also happens with PEP563 (from __forward__ import annotations)
+            #
+            # These annotations appear in the annotation dict as ForwardRef('int').
+            #
+            # Then, we need to convert the string into a python object. This
+            # requires having local context for custom objects or imported types.
+            # rcb() is what gives us this. So, we plumb rcb through the stack so
+            # it can be used in this context for the if block below.
+            #
+            # FAQ:
+            # - Why do we need this special handling for NamedTuple but string
+            #   annotations work fine for normal types? Normally, we parse the
+            #   string directly and then call rcb() directly from C++.
+            # - Why not use ForwardRef._evaluate? For that, we need globals()
+            #   and locals() for the local context where the NamedTuple was defined.
+            #   rcb is what lets us look up into these. So, basically rcb does the
+            #   hard work for us.
+            if isinstance(field_type, ForwardRef) and rcb is not None:
+                rcb_type = rcb(field_type.__forward_arg__)
+                # rcb returns None if it can't find anything.
+                if rcb_type is None:
+                    raise ValueError(
+                        f"Unknown type annotation: '{field_type}' in NamedTuple {obj.__name__}."
+                        f" Likely due to partial support for ForwardRef parameters in NamedTuples, see #95858."
+                        f" Issue occurred at {loc.highlight()}"
+                    )
+                field_type = rcb_type
+            the_type = torch.jit.annotations.ann_to_type(field_type, loc, rcb)
+            annotations.append(the_type)
+        else:
+            annotations.append(torch._C.TensorType.getInferred())
+    return type(obj).__name__, obj._fields, annotations, defaults
+def _create_named_tuple(
+    t,
+    unqual_name: str,
+    field_names: List[str],
+    defaults: Tuple[Any, ...],
+):
+    TupleType = collections.namedtuple(unqual_name, field_names, defaults=defaults)  # type: ignore[call-arg, no-redef, misc]
+    return TupleType(*t)
+@contextlib.contextmanager
+def _disable_emit_hooks():
+    hooks = torch._C._jit_get_emit_hooks()
+    torch._C._jit_set_emit_hooks(None, None)
+    try:
+        yield
+    finally:
+        torch._C._jit_set_emit_hooks(hooks[0], hooks[1])
+def _disable_emit_hooks_decorator(_DecoratorContextManager) -> None:  # noqa: F811
+    def __enter__(self) -> None:
+        self.hooks = torch._C._jit_get_emit_hooks()
+        torch._C._jit_set_emit_hooks(None, None)
+    def __exit__(self, *args) -> None:
+        torch._C._jit_set_emit_hooks(self.hooks[0], self.hooks[1])
+def _is_exception(obj) -> bool:
+    if not inspect.isclass(obj):
+        return False
+    return issubclass(obj, Exception)
+def raise_error_container_parameter_missing(target_type) -> None:
+    if target_type == "Dict":
+        raise RuntimeError(
+            "Attempted to use Dict without "
+            "contained types. Please add contained type, e.g. "
+            "Dict[int, int]"
+        )
+    raise RuntimeError(
+        f"Attempted to use {target_type} without a "
+        "contained type. Please add a contained type, e.g. "
+        f"{target_type}[int]"
+    )
+def check_args_exist(target_type) -> None:
+    if target_type is List or target_type is list:
+        raise_error_container_parameter_missing("List")
+    elif target_type is Tuple or target_type is tuple:
+        raise_error_container_parameter_missing("Tuple")
+    elif target_type is Dict or target_type is dict:
+        raise_error_container_parameter_missing("Dict")
+    elif target_type is None or target_type is Optional:
+        raise_error_container_parameter_missing("Optional")
+def check_empty_containers(obj) -> None:
+    if obj == [] or obj == {} or obj == ():
+        warnings.warn(
+            "The inner type of a container is lost when "
+            "calling torch.jit.isinstance in eager mode. For "
+            "example, List[int] would become list and "
+            "therefore falsely return True for List[float] or"
+            " List[str]."
+        )
+# supports List/Dict/Tuple and Optional types
+# TODO support future
+def container_checker(obj, target_type) -> bool:
+    origin_type = get_origin(target_type)
+    check_args_exist(target_type)
+    if origin_type is None:
+        return False
+    elif origin_type is list or origin_type is List:
+        check_empty_containers(obj)
+        if not isinstance(obj, list):
+            return False
+        arg_type = get_args(target_type)[0]
+        arg_origin = get_origin(arg_type)
+        for el in obj:
+            # check if nested container, ex: List[List[str]]
+            if arg_origin:  # processes nested container, ex: List[List[str]]
+                if not container_checker(el, arg_type):
+                    return False
+            elif not isinstance(el, arg_type):
+                return False
+        return True
+    elif origin_type is Dict or origin_type is dict:
+        check_empty_containers(obj)
+        if not isinstance(obj, dict):
+            return False
+        key_type = get_args(target_type)[0]
+        val_type = get_args(target_type)[1]
+        for key, val in obj.items():
+            # check if keys are of right type
+            if not isinstance(key, key_type):
+                return False
+            val_origin = get_origin(val_type)
+            if val_origin:
+                if not container_checker(val, val_type):
+                    return False
+            elif not isinstance(val, val_type):
+                return False
+        return True
+    elif origin_type is Tuple or origin_type is tuple:
+        check_empty_containers(obj)
+        if not isinstance(obj, tuple):
+            return False
+        arg_types = get_args(target_type)
+        if len(obj) != len(arg_types):
+            return False
+        for el, el_type in zip(obj, arg_types):
+            el_origin = get_origin(el_type)
+            if el_origin:
+                if not container_checker(el, el_type):
+                    return False
+            elif not isinstance(el, el_type):
+                return False
+        return True
+    elif origin_type is Union or issubclass(
+        origin_type, BuiltinUnionType
+    ):  # also handles Optional
+        if obj is None:  # check before recursion because None is always fine
+            return True
+        inner_types = get_args(target_type)
+        for t in inner_types:
+            t_origin = get_origin(t)
+            if t_origin:
+                return container_checker(obj, t)
+            elif isinstance(obj, t):
+                return True
+    return False
+def _isinstance(obj, target_type) -> bool:
+    if isinstance(target_type, collections.abc.Container):
+        if not isinstance(target_type, tuple):
+            raise RuntimeError(
+                "The second argument to "
+                "`torch.jit.isinstance` must be a type "
+                "or a tuple of types"
+            )
+        for t_type in target_type:
+            if _isinstance(obj, t_type):
+                return True
+        return False
+    origin_type = get_origin(target_type)
+    if origin_type:
+        return container_checker(obj, target_type)
+    # Check to handle non-typed optional origin returns as none instead
+    #    of as optional in 3.7-3.8
+    check_args_exist(target_type)
+    # handle non-containers
+    return isinstance(obj, target_type)
+class _TensorExtractor(pickle.Pickler):
+    def __init__(self, *args, tensors: List[torch.Tensor], **kwargs):
+        super().__init__(*args, **kwargs)
+        self.tensors = tensors
+    def persistent_id(self, obj):
+        if isinstance(obj, torch.Tensor):
+            self.tensors.append(obj)
+            return ""
+        # Since we just want to extract tensors, we don't mind if an object is
+        # unpicklable if it doesn't contain tensors, as we can just ignore/skip
+        # it. To play it safe, we only do so for common objects that we're sure
+        # don't contain tensors. Feel free to add new types here. Note also that
+        # even if a type isn't listed here this won't block users, since thet
+        # can just add a __getstate__ or __reduce__ method to their class.
+        if isinstance(obj, LockType):
+            return ""
+        # Futures and RRefs don't technically contain a value, they just offer
+        # the means to access a value.
+        if isinstance(obj, CFuture) or is_rref_instance(obj):
+            return ""
+        if isinstance(obj, CAwait):
+            return ""
+        if isinstance(obj, torch.cuda.Event):
+            return ""
+        if isinstance(obj, threading.Thread):
+            return ""
+        return None
+def _extract_tensors(obj):
+    r"""
+    This function is exclusively called from C++.
+    See ``torch/csrc/jit/python/python_ivalue.h``.
+    It extracts the tensors contained in the given object, through pickling.
+    """
+    tensors: List[torch.Tensor] = []
+    extractor = _TensorExtractor(io.BytesIO(), protocol=-1, tensors=tensors)
+    extractor.dump(obj)
+    return tensors
+def _get_model_id(obj) -> Optional[str]:
+    if isinstance(obj, torch.jit.ScriptModule):
+        return str(obj._c._type())
+    elif isinstance(obj, torch.jit.ScriptFunction):
+        return obj.qualified_name
+    else:
+        return None
+# In Python-3.11+ typed enums (i.e. IntEnum for example) retain number of base class methods in subclass
+# that were previously dropped. To preserve the behavior, explicitly drop them there
+if sys.version_info > (3, 10):
+    _drop(enum.Enum.__new__)
+    _drop(enum.Enum.__format__)
+    _drop(enum.Enum.__repr__)
+    _drop(enum.Enum.__str__)

.venv/lib/python3.11/site-packages/torch/_linalg_utils.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# mypy: allow-untyped-defs
+"""Various linear algebra utility methods for internal use."""
+from typing import Optional, Tuple
+import torch
+from torch import Tensor
+def is_sparse(A):
+    """Check if tensor A is a sparse tensor"""
+    if isinstance(A, torch.Tensor):
+        return A.layout == torch.sparse_coo
+    error_str = "expected Tensor"
+    if not torch.jit.is_scripting():
+        error_str += f" but got {type(A)}"
+    raise TypeError(error_str)
+def get_floating_dtype(A):
+    """Return the floating point dtype of tensor A.
+    Integer types map to float32.
+    """
+    dtype = A.dtype
+    if dtype in (torch.float16, torch.float32, torch.float64):
+        return dtype
+    return torch.float32
+def matmul(A: Optional[Tensor], B: Tensor) -> Tensor:
+    """Multiply two matrices.
+    If A is None, return B. A can be sparse or dense. B is always
+    dense.
+    """
+    if A is None:
+        return B
+    if is_sparse(A):
+        return torch.sparse.mm(A, B)
+    return torch.matmul(A, B)
+def bform(X: Tensor, A: Optional[Tensor], Y: Tensor) -> Tensor:
+    """Return bilinear form of matrices: :math:`X^T A Y`."""
+    return matmul(X.mT, matmul(A, Y))
+def qform(A: Optional[Tensor], S: Tensor):
+    """Return quadratic form :math:`S^T A S`."""
+    return bform(S, A, S)
+def basis(A):
+    """Return orthogonal basis of A columns."""
+    return torch.linalg.qr(A).Q
+def symeig(A: Tensor, largest: Optional[bool] = False) -> Tuple[Tensor, Tensor]:
+    """Return eigenpairs of A with specified ordering."""
+    if largest is None:
+        largest = False
+    E, Z = torch.linalg.eigh(A, UPLO="U")
+    # assuming that E is ordered
+    if largest:
+        E = torch.flip(E, dims=(-1,))
+        Z = torch.flip(Z, dims=(-1,))
+    return E, Z
+# These functions were deprecated and removed
+# This nice error message can be removed in version 1.13+
+def matrix_rank(input, tol=None, symmetric=False, *, out=None) -> Tensor:
+    raise RuntimeError(
+        "This function was deprecated since version 1.9 and is now removed.\n"
+        "Please use the `torch.linalg.matrix_rank` function instead. "
+        "The parameter 'symmetric' was renamed in `torch.linalg.matrix_rank()` to 'hermitian'."
+    )
+def solve(input: Tensor, A: Tensor, *, out=None) -> Tuple[Tensor, Tensor]:
+    raise RuntimeError(
+        "This function was deprecated since version 1.9 and is now removed. "
+        "`torch.solve` is deprecated in favor of `torch.linalg.solve`. "
+        "`torch.linalg.solve` has its arguments reversed and does not return the LU factorization.\n\n"
+        "To get the LU factorization see `torch.lu`, which can be used with `torch.lu_solve` or `torch.lu_unpack`.\n"
+        "X = torch.solve(B, A).solution "
+        "should be replaced with:\n"
+        "X = torch.linalg.solve(A, B)"
+    )
+def lstsq(input: Tensor, A: Tensor, *, out=None) -> Tuple[Tensor, Tensor]:
+    raise RuntimeError(
+        "This function was deprecated since version 1.9 and is now removed. "
+        "`torch.lstsq` is deprecated in favor of `torch.linalg.lstsq`.\n"
+        "`torch.linalg.lstsq` has reversed arguments and does not return the QR decomposition in "
+        "the returned tuple (although it returns other information about the problem).\n\n"
+        "To get the QR decomposition consider using `torch.linalg.qr`.\n\n"
+        "The returned solution in `torch.lstsq` stored the residuals of the solution in the "
+        "last m - n columns of the returned value whenever m > n. In torch.linalg.lstsq, "
+        "the residuals are in the field 'residuals' of the returned named tuple.\n\n"
+        "The unpacking of the solution, as in\n"
+        "X, _ = torch.lstsq(B, A).solution[:A.size(1)]\n"
+        "should be replaced with:\n"
+        "X = torch.linalg.lstsq(A, B).solution"
+    )
+def _symeig(
+    input,
+    eigenvectors=False,
+    upper=True,
+    *,
+    out=None,
+) -> Tuple[Tensor, Tensor]:
+    raise RuntimeError(
+        "This function was deprecated since version 1.9 and is now removed. "
+        "The default behavior has changed from using the upper triangular portion of the matrix by default "
+        "to using the lower triangular portion.\n\n"
+        "L, _ = torch.symeig(A, upper=upper) "
+        "should be replaced with:\n"
+        "L = torch.linalg.eigvalsh(A, UPLO='U' if upper else 'L')\n\n"
+        "and\n\n"
+        "L, V = torch.symeig(A, eigenvectors=True) "
+        "should be replaced with:\n"
+        "L, V = torch.linalg.eigh(A, UPLO='U' if upper else 'L')"
+    )
+def eig(
+    self: Tensor,
+    eigenvectors: bool = False,
+    *,
+    e=None,
+    v=None,
+) -> Tuple[Tensor, Tensor]:
+    raise RuntimeError(
+        "This function was deprecated since version 1.9 and is now removed. "
+        "`torch.linalg.eig` returns complex tensors of dtype `cfloat` or `cdouble` rather than real tensors "
+        "mimicking complex tensors.\n\n"
+        "L, _ = torch.eig(A) "
+        "should be replaced with:\n"
+        "L_complex = torch.linalg.eigvals(A)\n\n"
+        "and\n\n"
+        "L, V = torch.eig(A, eigenvectors=True) "
+        "should be replaced with:\n"
+        "L_complex, V_complex = torch.linalg.eig(A)"
+    )

.venv/lib/python3.11/site-packages/torch/_lobpcg.py ADDED Viewed

	@@ -0,0 +1,1157 @@

+# mypy: allow-untyped-defs
+"""Locally Optimal Block Preconditioned Conjugate Gradient methods."""
+# Author: Pearu Peterson
+# Created: February 2020
+from typing import Dict, Optional, Tuple
+import torch
+from torch import _linalg_utils as _utils, Tensor
+from torch.overrides import handle_torch_function, has_torch_function
+__all__ = ["lobpcg"]
+def _symeig_backward_complete_eigenspace(D_grad, U_grad, A, D, U):
+    # compute F, such that F_ij = (d_j - d_i)^{-1} for i != j, F_ii = 0
+    F = D.unsqueeze(-2) - D.unsqueeze(-1)
+    F.diagonal(dim1=-2, dim2=-1).fill_(float("inf"))
+    F.pow_(-1)
+    # A.grad = U (D.grad + (U^T U.grad * F)) U^T
+    Ut = U.mT.contiguous()
+    res = torch.matmul(
+        U, torch.matmul(torch.diag_embed(D_grad) + torch.matmul(Ut, U_grad) * F, Ut)
+    )
+    return res
+def _polynomial_coefficients_given_roots(roots):
+    """
+    Given the `roots` of a polynomial, find the polynomial's coefficients.
+    If roots = (r_1, ..., r_n), then the method returns
+    coefficients (a_0, a_1, ..., a_n (== 1)) so that
+    p(x) = (x - r_1) * ... * (x - r_n)
+         = x^n + a_{n-1} * x^{n-1} + ... a_1 * x_1 + a_0
+    Note: for better performance requires writing a low-level kernel
+    """
+    poly_order = roots.shape[-1]
+    poly_coeffs_shape = list(roots.shape)
+    # we assume p(x) = x^n + a_{n-1} * x^{n-1} + ... + a_1 * x + a_0,
+    # so poly_coeffs = {a_0, ..., a_n, a_{n+1}(== 1)},
+    # but we insert one extra coefficient to enable better vectorization below
+    poly_coeffs_shape[-1] += 2
+    poly_coeffs = roots.new_zeros(poly_coeffs_shape)
+    poly_coeffs[..., 0] = 1
+    poly_coeffs[..., -1] = 1
+    # perform the Horner's rule
+    for i in range(1, poly_order + 1):
+        # note that it is computationally hard to compute backward for this method,
+        # because then given the coefficients it would require finding the roots and/or
+        # calculating the sensitivity based on the Vieta's theorem.
+        # So the code below tries to circumvent the explicit root finding by series
+        # of operations on memory copies imitating the Horner's method.
+        # The memory copies are required to construct nodes in the computational graph
+        # by exploting the explicit (not in-place, separate node for each step)
+        # recursion of the Horner's method.
+        # Needs more memory, O(... * k^2), but with only O(... * k^2) complexity.
+        poly_coeffs_new = poly_coeffs.clone() if roots.requires_grad else poly_coeffs
+        out = poly_coeffs_new.narrow(-1, poly_order - i, i + 1)
+        out -= roots.narrow(-1, i - 1, 1) * poly_coeffs.narrow(
+            -1, poly_order - i + 1, i + 1
+        )
+        poly_coeffs = poly_coeffs_new
+    return poly_coeffs.narrow(-1, 1, poly_order + 1)
+def _polynomial_value(poly, x, zero_power, transition):
+    """
+    A generic method for computing poly(x) using the Horner's rule.
+    Args:
+      poly (Tensor): the (possibly batched) 1D Tensor representing
+                     polynomial coefficients such that
+                     poly[..., i] = (a_{i_0}, ..., a{i_n} (==1)), and
+                     poly(x) = poly[..., 0] * zero_power + ... + poly[..., n] * x^n
+      x (Tensor): the value (possible batched) to evalate the polynomial `poly` at.
+      zero_power (Tensor): the representation of `x^0`. It is application-specific.
+      transition (Callable): the function that accepts some intermediate result `int_val`,
+                             the `x` and a specific polynomial coefficient
+                             `poly[..., k]` for some iteration `k`.
+                             It basically performs one iteration of the Horner's rule
+                             defined as `x * int_val + poly[..., k] * zero_power`.
+                             Note that `zero_power` is not a parameter,
+                             because the step `+ poly[..., k] * zero_power` depends on `x`,
+                             whether it is a vector, a matrix, or something else, so this
+                             functionality is delegated to the user.
+    """
+    res = zero_power.clone()
+    for k in range(poly.size(-1) - 2, -1, -1):
+        res = transition(res, x, poly[..., k])
+    return res
+def _matrix_polynomial_value(poly, x, zero_power=None):
+    """
+    Evaluates `poly(x)` for the (batched) matrix input `x`.
+    Check out `_polynomial_value` function for more details.
+    """
+    # matrix-aware Horner's rule iteration
+    def transition(curr_poly_val, x, poly_coeff):
+        res = x.matmul(curr_poly_val)
+        res.diagonal(dim1=-2, dim2=-1).add_(poly_coeff.unsqueeze(-1))
+        return res
+    if zero_power is None:
+        zero_power = torch.eye(
+            x.size(-1), x.size(-1), dtype=x.dtype, device=x.device
+        ).view(*([1] * len(list(x.shape[:-2]))), x.size(-1), x.size(-1))
+    return _polynomial_value(poly, x, zero_power, transition)
+def _vector_polynomial_value(poly, x, zero_power=None):
+    """
+    Evaluates `poly(x)` for the (batched) vector input `x`.
+    Check out `_polynomial_value` function for more details.
+    """
+    # vector-aware Horner's rule iteration
+    def transition(curr_poly_val, x, poly_coeff):
+        res = torch.addcmul(poly_coeff.unsqueeze(-1), x, curr_poly_val)
+        return res
+    if zero_power is None:
+        zero_power = x.new_ones(1).expand(x.shape)
+    return _polynomial_value(poly, x, zero_power, transition)
+def _symeig_backward_partial_eigenspace(D_grad, U_grad, A, D, U, largest):
+    # compute a projection operator onto an orthogonal subspace spanned by the
+    # columns of U defined as (I - UU^T)
+    Ut = U.mT.contiguous()
+    proj_U_ortho = -U.matmul(Ut)
+    proj_U_ortho.diagonal(dim1=-2, dim2=-1).add_(1)
+    # compute U_ortho, a basis for the orthogonal complement to the span(U),
+    # by projecting a random [..., m, m - k] matrix onto the subspace spanned
+    # by the columns of U.
+    #
+    # fix generator for determinism
+    gen = torch.Generator(A.device)
+    # orthogonal complement to the span(U)
+    U_ortho = proj_U_ortho.matmul(
+        torch.randn(
+            (*A.shape[:-1], A.size(-1) - D.size(-1)),
+            dtype=A.dtype,
+            device=A.device,
+            generator=gen,
+        )
+    )
+    U_ortho_t = U_ortho.mT.contiguous()
+    # compute the coefficients of the characteristic polynomial of the tensor D.
+    # Note that D is diagonal, so the diagonal elements are exactly the roots
+    # of the characteristic polynomial.
+    chr_poly_D = _polynomial_coefficients_given_roots(D)
+    # the code belows finds the explicit solution to the Sylvester equation
+    # U_ortho^T A U_ortho dX - dX D = -U_ortho^T A U
+    # and incorporates it into the whole gradient stored in the `res` variable.
+    #
+    # Equivalent to the following naive implementation:
+    # res = A.new_zeros(A.shape)
+    # p_res = A.new_zeros(*A.shape[:-1], D.size(-1))
+    # for k in range(1, chr_poly_D.size(-1)):
+    #     p_res.zero_()
+    #     for i in range(0, k):
+    #         p_res += (A.matrix_power(k - 1 - i) @ U_grad) * D.pow(i).unsqueeze(-2)
+    #     res -= chr_poly_D[k] * (U_ortho @ poly_D_at_A.inverse() @ U_ortho_t @  p_res @ U.t())
+    #
+    # Note that dX is a differential, so the gradient contribution comes from the backward sensitivity
+    # Tr(f(U_grad, D_grad, A, U, D)^T dX) = Tr(g(U_grad, A, U, D)^T dA) for some functions f and g,
+    # and we need to compute g(U_grad, A, U, D)
+    #
+    # The naive implementation is based on the paper
+    # Hu, Qingxi, and Daizhan Cheng.
+    # "The polynomial solution to the Sylvester matrix equation."
+    # Applied mathematics letters 19.9 (2006): 859-864.
+    #
+    # We can modify the computation of `p_res` from above in a more efficient way
+    # p_res =   U_grad * (chr_poly_D[1] * D.pow(0) + ... + chr_poly_D[k] * D.pow(k)).unsqueeze(-2)
+    #       + A U_grad * (chr_poly_D[2] * D.pow(0) + ... + chr_poly_D[k] * D.pow(k - 1)).unsqueeze(-2)
+    #       + ...
+    #       + A.matrix_power(k - 1) U_grad * chr_poly_D[k]
+    # Note that this saves us from redundant matrix products with A (elimination of matrix_power)
+    U_grad_projected = U_grad
+    series_acc = U_grad_projected.new_zeros(U_grad_projected.shape)
+    for k in range(1, chr_poly_D.size(-1)):
+        poly_D = _vector_polynomial_value(chr_poly_D[..., k:], D)
+        series_acc += U_grad_projected * poly_D.unsqueeze(-2)
+        U_grad_projected = A.matmul(U_grad_projected)
+    # compute chr_poly_D(A) which essentially is:
+    #
+    # chr_poly_D_at_A = A.new_zeros(A.shape)
+    # for k in range(chr_poly_D.size(-1)):
+    #     chr_poly_D_at_A += chr_poly_D[k] * A.matrix_power(k)
+    #
+    # Note, however, for better performance we use the Horner's rule
+    chr_poly_D_at_A = _matrix_polynomial_value(chr_poly_D, A)
+    # compute the action of `chr_poly_D_at_A` restricted to U_ortho_t
+    chr_poly_D_at_A_to_U_ortho = torch.matmul(
+        U_ortho_t, torch.matmul(chr_poly_D_at_A, U_ortho)
+    )
+    # we need to invert 'chr_poly_D_at_A_to_U_ortho`, for that we compute its
+    # Cholesky decomposition and then use `torch.cholesky_solve` for better stability.
+    # Cholesky decomposition requires the input to be positive-definite.
+    # Note that `chr_poly_D_at_A_to_U_ortho` is positive-definite if
+    # 1. `largest` == False, or
+    # 2. `largest` == True and `k` is even
+    # under the assumption that `A` has distinct eigenvalues.
+    #
+    # check if `chr_poly_D_at_A_to_U_ortho` is positive-definite or negative-definite
+    chr_poly_D_at_A_to_U_ortho_sign = -1 if (largest and (k % 2 == 1)) else +1
+    chr_poly_D_at_A_to_U_ortho_L = torch.linalg.cholesky(
+        chr_poly_D_at_A_to_U_ortho_sign * chr_poly_D_at_A_to_U_ortho
+    )
+    # compute the gradient part in span(U)
+    res = _symeig_backward_complete_eigenspace(D_grad, U_grad, A, D, U)
+    # incorporate the Sylvester equation solution into the full gradient
+    # it resides in span(U_ortho)
+    res -= U_ortho.matmul(
+        chr_poly_D_at_A_to_U_ortho_sign
+        * torch.cholesky_solve(
+            U_ortho_t.matmul(series_acc), chr_poly_D_at_A_to_U_ortho_L
+        )
+    ).matmul(Ut)
+    return res
+def _symeig_backward(D_grad, U_grad, A, D, U, largest):
+    # if `U` is square, then the columns of `U` is a complete eigenspace
+    if U.size(-1) == U.size(-2):
+        return _symeig_backward_complete_eigenspace(D_grad, U_grad, A, D, U)
+    else:
+        return _symeig_backward_partial_eigenspace(D_grad, U_grad, A, D, U, largest)
+class LOBPCGAutogradFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(  # type: ignore[override]
+        ctx,
+        A: Tensor,
+        k: Optional[int] = None,
+        B: Optional[Tensor] = None,
+        X: Optional[Tensor] = None,
+        n: Optional[int] = None,
+        iK: Optional[Tensor] = None,
+        niter: Optional[int] = None,
+        tol: Optional[float] = None,
+        largest: Optional[bool] = None,
+        method: Optional[str] = None,
+        tracker: None = None,
+        ortho_iparams: Optional[Dict[str, int]] = None,
+        ortho_fparams: Optional[Dict[str, float]] = None,
+        ortho_bparams: Optional[Dict[str, bool]] = None,
+    ) -> Tuple[Tensor, Tensor]:
+        # makes sure that input is contiguous for efficiency.
+        # Note: autograd does not support dense gradients for sparse input yet.
+        A = A.contiguous() if (not A.is_sparse) else A
+        if B is not None:
+            B = B.contiguous() if (not B.is_sparse) else B
+        D, U = _lobpcg(
+            A,
+            k,
+            B,
+            X,
+            n,
+            iK,
+            niter,
+            tol,
+            largest,
+            method,
+            tracker,
+            ortho_iparams,
+            ortho_fparams,
+            ortho_bparams,
+        )
+        ctx.save_for_backward(A, B, D, U)
+        ctx.largest = largest
+        return D, U
+    @staticmethod
+    def backward(ctx, D_grad, U_grad):
+        A_grad = B_grad = None
+        grads = [None] * 14
+        A, B, D, U = ctx.saved_tensors
+        largest = ctx.largest
+        # lobpcg.backward has some limitations. Checks for unsupported input
+        if A.is_sparse or (B is not None and B.is_sparse and ctx.needs_input_grad[2]):
+            raise ValueError(
+                "lobpcg.backward does not support sparse input yet."
+                "Note that lobpcg.forward does though."
+            )
+        if (
+            A.dtype in (torch.complex64, torch.complex128)
+            or B is not None
+            and B.dtype in (torch.complex64, torch.complex128)
+        ):
+            raise ValueError(
+                "lobpcg.backward does not support complex input yet."
+                "Note that lobpcg.forward does though."
+            )
+        if B is not None:
+            raise ValueError(
+                "lobpcg.backward does not support backward with B != I yet."
+            )
+        if largest is None:
+            largest = True
+        # symeig backward
+        if B is None:
+            A_grad = _symeig_backward(D_grad, U_grad, A, D, U, largest)
+        # A has index 0
+        grads[0] = A_grad
+        # B has index 2
+        grads[2] = B_grad
+        return tuple(grads)
+def lobpcg(
+    A: Tensor,
+    k: Optional[int] = None,
+    B: Optional[Tensor] = None,
+    X: Optional[Tensor] = None,
+    n: Optional[int] = None,
+    iK: Optional[Tensor] = None,
+    niter: Optional[int] = None,
+    tol: Optional[float] = None,
+    largest: Optional[bool] = None,
+    method: Optional[str] = None,
+    tracker: None = None,
+    ortho_iparams: Optional[Dict[str, int]] = None,
+    ortho_fparams: Optional[Dict[str, float]] = None,
+    ortho_bparams: Optional[Dict[str, bool]] = None,
+) -> Tuple[Tensor, Tensor]:
+    """Find the k largest (or smallest) eigenvalues and the corresponding
+    eigenvectors of a symmetric positive definite generalized
+    eigenvalue problem using matrix-free LOBPCG methods.
+    This function is a front-end to the following LOBPCG algorithms
+    selectable via `method` argument:
+      `method="basic"` - the LOBPCG method introduced by Andrew
+      Knyazev, see [Knyazev2001]. A less robust method, may fail when
+      Cholesky is applied to singular input.
+      `method="ortho"` - the LOBPCG method with orthogonal basis
+      selection [StathopoulosEtal2002]. A robust method.
+    Supported inputs are dense, sparse, and batches of dense matrices.
+    .. note:: In general, the basic method spends least time per
+      iteration. However, the robust methods converge much faster and
+      are more stable. So, the usage of the basic method is generally
+      not recommended but there exist cases where the usage of the
+      basic method may be preferred.
+    .. warning:: The backward method does not support sparse and complex inputs.
+      It works only when `B` is not provided (i.e. `B == None`).
+      We are actively working on extensions, and the details of
+      the algorithms are going to be published promptly.
+    .. warning:: While it is assumed that `A` is symmetric, `A.grad` is not.
+      To make sure that `A.grad` is symmetric, so that `A - t * A.grad` is symmetric
+      in first-order optimization routines, prior to running `lobpcg`
+      we do the following symmetrization map: `A -> (A + A.t()) / 2`.
+      The map is performed only when the `A` requires gradients.
+    Args:
+      A (Tensor): the input tensor of size :math:`(*, m, m)`
+      B (Tensor, optional): the input tensor of size :math:`(*, m,
+                  m)`. When not specified, `B` is interpreted as
+                  identity matrix.
+      X (tensor, optional): the input tensor of size :math:`(*, m, n)`
+                  where `k <= n <= m`. When specified, it is used as
+                  initial approximation of eigenvectors. X must be a
+                  dense tensor.
+      iK (tensor, optional): the input tensor of size :math:`(*, m,
+                  m)`. When specified, it will be used as preconditioner.
+      k (integer, optional): the number of requested
+                  eigenpairs. Default is the number of :math:`X`
+                  columns (when specified) or `1`.
+      n (integer, optional): if :math:`X` is not specified then `n`
+                  specifies the size of the generated random
+                  approximation of eigenvectors. Default value for `n`
+                  is `k`. If :math:`X` is specified, the value of `n`
+                  (when specified) must be the number of :math:`X`
+                  columns.
+      tol (float, optional): residual tolerance for stopping
+                 criterion. Default is `feps ** 0.5` where `feps` is
+                 smallest non-zero floating-point number of the given
+                 input tensor `A` data type.
+      largest (bool, optional): when True, solve the eigenproblem for
+                 the largest eigenvalues. Otherwise, solve the
+                 eigenproblem for smallest eigenvalues. Default is
+                 `True`.
+      method (str, optional): select LOBPCG method. See the
+                 description of the function above. Default is
+                 "ortho".
+      niter (int, optional): maximum number of iterations. When
+                 reached, the iteration process is hard-stopped and
+                 the current approximation of eigenpairs is returned.
+                 For infinite iteration but until convergence criteria
+                 is met, use `-1`.
+      tracker (callable, optional) : a function for tracing the
+                 iteration process. When specified, it is called at
+                 each iteration step with LOBPCG instance as an
+                 argument. The LOBPCG instance holds the full state of
+                 the iteration process in the following attributes:
+                   `iparams`, `fparams`, `bparams` - dictionaries of
+                   integer, float, and boolean valued input
+                   parameters, respectively
+                   `ivars`, `fvars`, `bvars`, `tvars` - dictionaries
+                   of integer, float, boolean, and Tensor valued
+                   iteration variables, respectively.
+                   `A`, `B`, `iK` - input Tensor arguments.
+                   `E`, `X`, `S`, `R` - iteration Tensor variables.
+                 For instance:
+                   `ivars["istep"]` - the current iteration step
+                   `X` - the current approximation of eigenvectors
+                   `E` - the current approximation of eigenvalues
+                   `R` - the current residual
+                   `ivars["converged_count"]` - the current number of converged eigenpairs
+                   `tvars["rerr"]` - the current state of convergence criteria
+                 Note that when `tracker` stores Tensor objects from
+                 the LOBPCG instance, it must make copies of these.
+                 If `tracker` sets `bvars["force_stop"] = True`, the
+                 iteration process will be hard-stopped.
+      ortho_iparams, ortho_fparams, ortho_bparams (dict, optional):
+                 various parameters to LOBPCG algorithm when using
+                 `method="ortho"`.
+    Returns:
+      E (Tensor): tensor of eigenvalues of size :math:`(*, k)`
+      X (Tensor): tensor of eigenvectors of size :math:`(*, m, k)`
+    References:
+      [Knyazev2001] Andrew V. Knyazev. (2001) Toward the Optimal
+      Preconditioned Eigensolver: Locally Optimal Block Preconditioned
+      Conjugate Gradient Method. SIAM J. Sci. Comput., 23(2),
+      517-541. (25 pages)
+      https://epubs.siam.org/doi/abs/10.1137/S1064827500366124
+      [StathopoulosEtal2002] Andreas Stathopoulos and Kesheng
+      Wu. (2002) A Block Orthogonalization Procedure with Constant
+      Synchronization Requirements. SIAM J. Sci. Comput., 23(6),
+      2165-2182. (18 pages)
+      https://epubs.siam.org/doi/10.1137/S1064827500370883
+      [DuerschEtal2018] Jed A. Duersch, Meiyue Shao, Chao Yang, Ming
+      Gu. (2018) A Robust and Efficient Implementation of LOBPCG.
+      SIAM J. Sci. Comput., 40(5), C655-C676. (22 pages)
+      https://epubs.siam.org/doi/abs/10.1137/17M1129830
+    """
+    if not torch.jit.is_scripting():
+        tensor_ops = (A, B, X, iK)
+        if not set(map(type, tensor_ops)).issubset(
+            (torch.Tensor, type(None))
+        ) and has_torch_function(tensor_ops):
+            return handle_torch_function(
+                lobpcg,
+                tensor_ops,
+                A,
+                k=k,
+                B=B,
+                X=X,
+                n=n,
+                iK=iK,
+                niter=niter,
+                tol=tol,
+                largest=largest,
+                method=method,
+                tracker=tracker,
+                ortho_iparams=ortho_iparams,
+                ortho_fparams=ortho_fparams,
+                ortho_bparams=ortho_bparams,
+            )
+    if not torch._jit_internal.is_scripting():
+        if A.requires_grad or (B is not None and B.requires_grad):
+            # While it is expected that `A` is symmetric,
+            # the `A_grad` might be not. Therefore we perform the trick below,
+            # so that `A_grad` becomes symmetric.
+            # The symmetrization is important for first-order optimization methods,
+            # so that (A - alpha * A_grad) is still a symmetric matrix.
+            # Same holds for `B`.
+            A_sym = (A + A.mT) / 2
+            B_sym = (B + B.mT) / 2 if (B is not None) else None
+            return LOBPCGAutogradFunction.apply(
+                A_sym,
+                k,
+                B_sym,
+                X,
+                n,
+                iK,
+                niter,
+                tol,
+                largest,
+                method,
+                tracker,
+                ortho_iparams,
+                ortho_fparams,
+                ortho_bparams,
+            )
+    else:
+        if A.requires_grad or (B is not None and B.requires_grad):
+            raise RuntimeError(
+                "Script and require grads is not supported atm."
+                "If you just want to do the forward, use .detach()"
+                "on A and B before calling into lobpcg"
+            )
+    return _lobpcg(
+        A,
+        k,
+        B,
+        X,
+        n,
+        iK,
+        niter,
+        tol,
+        largest,
+        method,
+        tracker,
+        ortho_iparams,
+        ortho_fparams,
+        ortho_bparams,
+    )
+def _lobpcg(
+    A: Tensor,
+    k: Optional[int] = None,
+    B: Optional[Tensor] = None,
+    X: Optional[Tensor] = None,
+    n: Optional[int] = None,
+    iK: Optional[Tensor] = None,
+    niter: Optional[int] = None,
+    tol: Optional[float] = None,
+    largest: Optional[bool] = None,
+    method: Optional[str] = None,
+    tracker: None = None,
+    ortho_iparams: Optional[Dict[str, int]] = None,
+    ortho_fparams: Optional[Dict[str, float]] = None,
+    ortho_bparams: Optional[Dict[str, bool]] = None,
+) -> Tuple[Tensor, Tensor]:
+    # A must be square:
+    assert A.shape[-2] == A.shape[-1], A.shape
+    if B is not None:
+        # A and B must have the same shapes:
+        assert A.shape == B.shape, (A.shape, B.shape)
+    dtype = _utils.get_floating_dtype(A)
+    device = A.device
+    if tol is None:
+        feps = {torch.float32: 1.2e-07, torch.float64: 2.23e-16}[dtype]
+        tol = feps**0.5
+    m = A.shape[-1]
+    k = (1 if X is None else X.shape[-1]) if k is None else k
+    n = (k if n is None else n) if X is None else X.shape[-1]
+    if m < 3 * n:
+        raise ValueError(
+            f"LPBPCG algorithm is not applicable when the number of A rows (={m})"
+            f" is smaller than 3 x the number of requested eigenpairs (={n})"
+        )
+    method = "ortho" if method is None else method
+    iparams = {
+        "m": m,
+        "n": n,
+        "k": k,
+        "niter": 1000 if niter is None else niter,
+    }
+    fparams = {
+        "tol": tol,
+    }
+    bparams = {"largest": True if largest is None else largest}
+    if method == "ortho":
+        if ortho_iparams is not None:
+            iparams.update(ortho_iparams)
+        if ortho_fparams is not None:
+            fparams.update(ortho_fparams)
+        if ortho_bparams is not None:
+            bparams.update(ortho_bparams)
+        iparams["ortho_i_max"] = iparams.get("ortho_i_max", 3)
+        iparams["ortho_j_max"] = iparams.get("ortho_j_max", 3)
+        fparams["ortho_tol"] = fparams.get("ortho_tol", tol)
+        fparams["ortho_tol_drop"] = fparams.get("ortho_tol_drop", tol)
+        fparams["ortho_tol_replace"] = fparams.get("ortho_tol_replace", tol)
+        bparams["ortho_use_drop"] = bparams.get("ortho_use_drop", False)
+    if not torch.jit.is_scripting():
+        LOBPCG.call_tracker = LOBPCG_call_tracker  # type: ignore[method-assign]
+    if len(A.shape) > 2:
+        N = int(torch.prod(torch.tensor(A.shape[:-2])))
+        bA = A.reshape((N,) + A.shape[-2:])
+        bB = B.reshape((N,) + A.shape[-2:]) if B is not None else None
+        bX = X.reshape((N,) + X.shape[-2:]) if X is not None else None
+        bE = torch.empty((N, k), dtype=dtype, device=device)
+        bXret = torch.empty((N, m, k), dtype=dtype, device=device)
+        for i in range(N):
+            A_ = bA[i]
+            B_ = bB[i] if bB is not None else None
+            X_ = (
+                torch.randn((m, n), dtype=dtype, device=device) if bX is None else bX[i]
+            )
+            assert len(X_.shape) == 2 and X_.shape == (m, n), (X_.shape, (m, n))
+            iparams["batch_index"] = i
+            worker = LOBPCG(A_, B_, X_, iK, iparams, fparams, bparams, method, tracker)
+            worker.run()
+            bE[i] = worker.E[:k]
+            bXret[i] = worker.X[:, :k]
+        if not torch.jit.is_scripting():
+            LOBPCG.call_tracker = LOBPCG_call_tracker_orig  # type: ignore[method-assign]
+        return bE.reshape(A.shape[:-2] + (k,)), bXret.reshape(A.shape[:-2] + (m, k))
+    X = torch.randn((m, n), dtype=dtype, device=device) if X is None else X
+    assert len(X.shape) == 2 and X.shape == (m, n), (X.shape, (m, n))
+    worker = LOBPCG(A, B, X, iK, iparams, fparams, bparams, method, tracker)
+    worker.run()
+    if not torch.jit.is_scripting():
+        LOBPCG.call_tracker = LOBPCG_call_tracker_orig  # type: ignore[method-assign]
+    return worker.E[:k], worker.X[:, :k]
+class LOBPCG:
+    """Worker class of LOBPCG methods."""
+    def __init__(
+        self,
+        A: Optional[Tensor],
+        B: Optional[Tensor],
+        X: Tensor,
+        iK: Optional[Tensor],
+        iparams: Dict[str, int],
+        fparams: Dict[str, float],
+        bparams: Dict[str, bool],
+        method: str,
+        tracker: None,
+    ) -> None:
+        # constant parameters
+        self.A = A
+        self.B = B
+        self.iK = iK
+        self.iparams = iparams
+        self.fparams = fparams
+        self.bparams = bparams
+        self.method = method
+        self.tracker = tracker
+        m = iparams["m"]
+        n = iparams["n"]
+        # variable parameters
+        self.X = X
+        self.E = torch.zeros((n,), dtype=X.dtype, device=X.device)
+        self.R = torch.zeros((m, n), dtype=X.dtype, device=X.device)
+        self.S = torch.zeros((m, 3 * n), dtype=X.dtype, device=X.device)
+        self.tvars: Dict[str, Tensor] = {}
+        self.ivars: Dict[str, int] = {"istep": 0}
+        self.fvars: Dict[str, float] = {"_": 0.0}
+        self.bvars: Dict[str, bool] = {"_": False}
+    def __str__(self):
+        lines = ["LOPBCG:"]
+        lines += [f"  iparams={self.iparams}"]
+        lines += [f"  fparams={self.fparams}"]
+        lines += [f"  bparams={self.bparams}"]
+        lines += [f"  ivars={self.ivars}"]
+        lines += [f"  fvars={self.fvars}"]
+        lines += [f"  bvars={self.bvars}"]
+        lines += [f"  tvars={self.tvars}"]
+        lines += [f"  A={self.A}"]
+        lines += [f"  B={self.B}"]
+        lines += [f"  iK={self.iK}"]
+        lines += [f"  X={self.X}"]
+        lines += [f"  E={self.E}"]
+        r = ""
+        for line in lines:
+            r += line + "\n"
+        return r
+    def update(self):
+        """Set and update iteration variables."""
+        if self.ivars["istep"] == 0:
+            X_norm = float(torch.norm(self.X))
+            iX_norm = X_norm**-1
+            A_norm = float(torch.norm(_utils.matmul(self.A, self.X))) * iX_norm
+            B_norm = float(torch.norm(_utils.matmul(self.B, self.X))) * iX_norm
+            self.fvars["X_norm"] = X_norm
+            self.fvars["A_norm"] = A_norm
+            self.fvars["B_norm"] = B_norm
+            self.ivars["iterations_left"] = self.iparams["niter"]
+            self.ivars["converged_count"] = 0
+            self.ivars["converged_end"] = 0
+        if self.method == "ortho":
+            self._update_ortho()
+        else:
+            self._update_basic()
+        self.ivars["iterations_left"] = self.ivars["iterations_left"] - 1
+        self.ivars["istep"] = self.ivars["istep"] + 1
+    def update_residual(self):
+        """Update residual R from A, B, X, E."""
+        mm = _utils.matmul
+        self.R = mm(self.A, self.X) - mm(self.B, self.X) * self.E
+    def update_converged_count(self):
+        """Determine the number of converged eigenpairs using backward stable
+        convergence criterion, see discussion in Sec 4.3 of [DuerschEtal2018].
+        Users may redefine this method for custom convergence criteria.
+        """
+        # (...) -> int
+        prev_count = self.ivars["converged_count"]
+        tol = self.fparams["tol"]
+        A_norm = self.fvars["A_norm"]
+        B_norm = self.fvars["B_norm"]
+        E, X, R = self.E, self.X, self.R
+        rerr = (
+            torch.norm(R, 2, (0,))
+            * (torch.norm(X, 2, (0,)) * (A_norm + E[: X.shape[-1]] * B_norm)) ** -1
+        )
+        converged = rerr.real < tol  # this is a norm so imag is 0.0
+        count = 0
+        for b in converged:
+            if not b:
+                # ignore convergence of following pairs to ensure
+                # strict ordering of eigenpairs
+                break
+            count += 1
+        assert (
+            count >= prev_count
+        ), f"the number of converged eigenpairs (was {prev_count}, got {count}) cannot decrease"
+        self.ivars["converged_count"] = count
+        self.tvars["rerr"] = rerr
+        return count
+    def stop_iteration(self):
+        """Return True to stop iterations.
+        Note that tracker (if defined) can force-stop iterations by
+        setting ``worker.bvars['force_stop'] = True``.
+        """
+        return (
+            self.bvars.get("force_stop", False)
+            or self.ivars["iterations_left"] == 0
+            or self.ivars["converged_count"] >= self.iparams["k"]
+        )
+    def run(self):
+        """Run LOBPCG iterations.
+        Use this method as a template for implementing LOBPCG
+        iteration scheme with custom tracker that is compatible with
+        TorchScript.
+        """
+        self.update()
+        if not torch.jit.is_scripting() and self.tracker is not None:
+            self.call_tracker()
+        while not self.stop_iteration():
+            self.update()
+            if not torch.jit.is_scripting() and self.tracker is not None:
+                self.call_tracker()
+    @torch.jit.unused
+    def call_tracker(self):
+        """Interface for tracking iteration process in Python mode.
+        Tracking the iteration process is disabled in TorchScript
+        mode. In fact, one should specify tracker=None when JIT
+        compiling functions using lobpcg.
+        """
+        # do nothing when in TorchScript mode
+    # Internal methods
+    def _update_basic(self):
+        """
+        Update or initialize iteration variables when `method == "basic"`.
+        """
+        mm = torch.matmul
+        ns = self.ivars["converged_end"]
+        nc = self.ivars["converged_count"]
+        n = self.iparams["n"]
+        largest = self.bparams["largest"]
+        if self.ivars["istep"] == 0:
+            Ri = self._get_rayleigh_ritz_transform(self.X)
+            M = _utils.qform(_utils.qform(self.A, self.X), Ri)
+            E, Z = _utils.symeig(M, largest)
+            self.X[:] = mm(self.X, mm(Ri, Z))
+            self.E[:] = E
+            np = 0
+            self.update_residual()
+            nc = self.update_converged_count()
+            self.S[..., :n] = self.X
+            W = _utils.matmul(self.iK, self.R)
+            self.ivars["converged_end"] = ns = n + np + W.shape[-1]
+            self.S[:, n + np : ns] = W
+        else:
+            S_ = self.S[:, nc:ns]
+            Ri = self._get_rayleigh_ritz_transform(S_)
+            M = _utils.qform(_utils.qform(self.A, S_), Ri)
+            E_, Z = _utils.symeig(M, largest)
+            self.X[:, nc:] = mm(S_, mm(Ri, Z[:, : n - nc]))
+            self.E[nc:] = E_[: n - nc]
+            P = mm(S_, mm(Ri, Z[:, n : 2 * n - nc]))
+            np = P.shape[-1]
+            self.update_residual()
+            nc = self.update_converged_count()
+            self.S[..., :n] = self.X
+            self.S[:, n : n + np] = P
+            W = _utils.matmul(self.iK, self.R[:, nc:])
+            self.ivars["converged_end"] = ns = n + np + W.shape[-1]
+            self.S[:, n + np : ns] = W
+    def _update_ortho(self):
+        """
+        Update or initialize iteration variables when `method == "ortho"`.
+        """
+        mm = torch.matmul
+        ns = self.ivars["converged_end"]
+        nc = self.ivars["converged_count"]
+        n = self.iparams["n"]
+        largest = self.bparams["largest"]
+        if self.ivars["istep"] == 0:
+            Ri = self._get_rayleigh_ritz_transform(self.X)
+            M = _utils.qform(_utils.qform(self.A, self.X), Ri)
+            E, Z = _utils.symeig(M, largest)
+            self.X = mm(self.X, mm(Ri, Z))
+            self.update_residual()
+            np = 0
+            nc = self.update_converged_count()
+            self.S[:, :n] = self.X
+            W = self._get_ortho(self.R, self.X)
+            ns = self.ivars["converged_end"] = n + np + W.shape[-1]
+            self.S[:, n + np : ns] = W
+        else:
+            S_ = self.S[:, nc:ns]
+            # Rayleigh-Ritz procedure
+            E_, Z = _utils.symeig(_utils.qform(self.A, S_), largest)
+            # Update E, X, P
+            self.X[:, nc:] = mm(S_, Z[:, : n - nc])
+            self.E[nc:] = E_[: n - nc]
+            P = mm(S_, mm(Z[:, n - nc :], _utils.basis(Z[: n - nc, n - nc :].mT)))
+            np = P.shape[-1]
+            # check convergence
+            self.update_residual()
+            nc = self.update_converged_count()
+            # update S
+            self.S[:, :n] = self.X
+            self.S[:, n : n + np] = P
+            W = self._get_ortho(self.R[:, nc:], self.S[:, : n + np])
+            ns = self.ivars["converged_end"] = n + np + W.shape[-1]
+            self.S[:, n + np : ns] = W
+    def _get_rayleigh_ritz_transform(self, S):
+        """Return a transformation matrix that is used in Rayleigh-Ritz
+        procedure for reducing a general eigenvalue problem :math:`(S^TAS)
+        C = (S^TBS) C E` to a standard eigenvalue problem :math: `(Ri^T
+        S^TAS Ri) Z = Z E` where `C = Ri Z`.
+        .. note:: In the original Rayleight-Ritz procedure in
+          [DuerschEtal2018], the problem is formulated as follows::
+            SAS = S^T A S
+            SBS = S^T B S
+            D = (<diagonal matrix of SBS>) ** -1/2
+            R^T R = Cholesky(D SBS D)
+            Ri = D R^-1
+            solve symeig problem Ri^T SAS Ri Z = Theta Z
+            C = Ri Z
+          To reduce the number of matrix products (denoted by empty
+          space between matrices), here we introduce element-wise
+          products (denoted by symbol `*`) so that the Rayleight-Ritz
+          procedure becomes::
+            SAS = S^T A S
+            SBS = S^T B S
+            d = (<diagonal of SBS>) ** -1/2    # this is 1-d column vector
+            dd = d d^T                         # this is 2-d matrix
+            R^T R = Cholesky(dd * SBS)
+            Ri = R^-1 * d                      # broadcasting
+            solve symeig problem Ri^T SAS Ri Z = Theta Z
+            C = Ri Z
+          where `dd` is 2-d matrix that replaces matrix products `D M
+          D` with one element-wise product `M * dd`; and `d` replaces
+          matrix product `D M` with element-wise product `M *
+          d`. Also, creating the diagonal matrix `D` is avoided.
+        Args:
+        S (Tensor): the matrix basis for the search subspace, size is
+                    :math:`(m, n)`.
+        Returns:
+        Ri (tensor): upper-triangular transformation matrix of size
+                     :math:`(n, n)`.
+        """
+        B = self.B
+        mm = torch.matmul
+        SBS = _utils.qform(B, S)
+        d_row = SBS.diagonal(0, -2, -1) ** -0.5
+        d_col = d_row.reshape(d_row.shape[0], 1)
+        # TODO use torch.linalg.cholesky_solve once it is implemented
+        R = torch.linalg.cholesky((SBS * d_row) * d_col, upper=True)
+        return torch.linalg.solve_triangular(
+            R, d_row.diag_embed(), upper=True, left=False
+        )
+    def _get_svqb(self, U: Tensor, drop: bool, tau: float) -> Tensor:
+        """Return B-orthonormal U.
+        .. note:: When `drop` is `False` then `svqb` is based on the
+                  Algorithm 4 from [DuerschPhD2015] that is a slight
+                  modification of the corresponding algorithm
+                  introduced in [StathopolousWu2002].
+        Args:
+          U (Tensor) : initial approximation, size is (m, n)
+          drop (bool) : when True, drop columns that
+                     contribution to the `span([U])` is small.
+          tau (float) : positive tolerance
+        Returns:
+          U (Tensor) : B-orthonormal columns (:math:`U^T B U = I`), size
+                       is (m, n1), where `n1 = n` if `drop` is `False,
+                       otherwise `n1 <= n`.
+        """
+        if torch.numel(U) == 0:
+            return U
+        UBU = _utils.qform(self.B, U)
+        d = UBU.diagonal(0, -2, -1)
+        # Detect and drop exact zero columns from U. While the test
+        # `abs(d) == 0` is unlikely to be True for random data, it is
+        # possible to construct input data to lobpcg where it will be
+        # True leading to a failure (notice the `d ** -0.5` operation
+        # in the original algorithm). To prevent the failure, we drop
+        # the exact zero columns here and then continue with the
+        # original algorithm below.
+        nz = torch.where(abs(d) != 0.0)
+        assert len(nz) == 1, nz
+        if len(nz[0]) < len(d):
+            U = U[:, nz[0]]
+            if torch.numel(U) == 0:
+                return U
+            UBU = _utils.qform(self.B, U)
+            d = UBU.diagonal(0, -2, -1)
+            nz = torch.where(abs(d) != 0.0)
+            assert len(nz[0]) == len(d)
+        # The original algorithm 4 from [DuerschPhD2015].
+        d_col = (d**-0.5).reshape(d.shape[0], 1)
+        DUBUD = (UBU * d_col) * d_col.mT
+        E, Z = _utils.symeig(DUBUD)
+        t = tau * abs(E).max()
+        if drop:
+            keep = torch.where(E > t)
+            assert len(keep) == 1, keep
+            E = E[keep[0]]
+            Z = Z[:, keep[0]]
+            d_col = d_col[keep[0]]
+        else:
+            E[(torch.where(E < t))[0]] = t
+        return torch.matmul(U * d_col.mT, Z * E**-0.5)
+    def _get_ortho(self, U, V):
+        """Return B-orthonormal U with columns are B-orthogonal to V.
+        .. note:: When `bparams["ortho_use_drop"] == False` then
+                  `_get_ortho` is based on the Algorithm 3 from
+                  [DuerschPhD2015] that is a slight modification of
+                  the corresponding algorithm introduced in
+                  [StathopolousWu2002]. Otherwise, the method
+                  implements Algorithm 6 from [DuerschPhD2015]
+        .. note:: If all U columns are B-collinear to V then the
+                  returned tensor U will be empty.
+        Args:
+          U (Tensor) : initial approximation, size is (m, n)
+          V (Tensor) : B-orthogonal external basis, size is (m, k)
+        Returns:
+          U (Tensor) : B-orthonormal columns (:math:`U^T B U = I`)
+                       such that :math:`V^T B U=0`, size is (m, n1),
+                       where `n1 = n` if `drop` is `False, otherwise
+                       `n1 <= n`.
+        """
+        mm = torch.matmul
+        mm_B = _utils.matmul
+        m = self.iparams["m"]
+        tau_ortho = self.fparams["ortho_tol"]
+        tau_drop = self.fparams["ortho_tol_drop"]
+        tau_replace = self.fparams["ortho_tol_replace"]
+        i_max = self.iparams["ortho_i_max"]
+        j_max = self.iparams["ortho_j_max"]
+        # when use_drop==True, enable dropping U columns that have
+        # small contribution to the `span([U, V])`.
+        use_drop = self.bparams["ortho_use_drop"]
+        # clean up variables from the previous call
+        for vkey in list(self.fvars.keys()):
+            if vkey.startswith("ortho_") and vkey.endswith("_rerr"):
+                self.fvars.pop(vkey)
+        self.ivars.pop("ortho_i", 0)
+        self.ivars.pop("ortho_j", 0)
+        BV_norm = torch.norm(mm_B(self.B, V))
+        BU = mm_B(self.B, U)
+        VBU = mm(V.mT, BU)
+        i = j = 0
+        stats = ""
+        for i in range(i_max):
+            U = U - mm(V, VBU)
+            drop = False
+            tau_svqb = tau_drop
+            for j in range(j_max):
+                if use_drop:
+                    U = self._get_svqb(U, drop, tau_svqb)
+                    drop = True
+                    tau_svqb = tau_replace
+                else:
+                    U = self._get_svqb(U, False, tau_replace)
+                if torch.numel(U) == 0:
+                    # all initial U columns are B-collinear to V
+                    self.ivars["ortho_i"] = i
+                    self.ivars["ortho_j"] = j
+                    return U
+                BU = mm_B(self.B, U)
+                UBU = mm(U.mT, BU)
+                U_norm = torch.norm(U)
+                BU_norm = torch.norm(BU)
+                R = UBU - torch.eye(UBU.shape[-1], device=UBU.device, dtype=UBU.dtype)
+                R_norm = torch.norm(R)
+                # https://github.com/pytorch/pytorch/issues/33810 workaround:
+                rerr = float(R_norm) * float(BU_norm * U_norm) ** -1
+                vkey = f"ortho_UBUmI_rerr[{i}, {j}]"
+                self.fvars[vkey] = rerr
+                if rerr < tau_ortho:
+                    break
+            VBU = mm(V.mT, BU)
+            VBU_norm = torch.norm(VBU)
+            U_norm = torch.norm(U)
+            rerr = float(VBU_norm) * float(BV_norm * U_norm) ** -1
+            vkey = f"ortho_VBU_rerr[{i}]"
+            self.fvars[vkey] = rerr
+            if rerr < tau_ortho:
+                break
+            if m < U.shape[-1] + V.shape[-1]:
+                # TorchScript needs the class var to be assigned to a local to
+                # do optional type refinement
+                B = self.B
+                assert B is not None
+                raise ValueError(
+                    "Overdetermined shape of U:"
+                    f" #B-cols(={B.shape[-1]}) >= #U-cols(={U.shape[-1]}) + #V-cols(={V.shape[-1]}) must hold"
+                )
+        self.ivars["ortho_i"] = i
+        self.ivars["ortho_j"] = j
+        return U
+# Calling tracker is separated from LOBPCG definitions because
+# TorchScript does not support user-defined callback arguments:
+LOBPCG_call_tracker_orig = LOBPCG.call_tracker
+def LOBPCG_call_tracker(self):
+    self.tracker(self)

.venv/lib/python3.11/site-packages/torch/_lowrank.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""Implement various linear algebra algorithms for low rank matrices."""
+__all__ = ["svd_lowrank", "pca_lowrank"]
+from typing import Optional, Tuple
+import torch
+from torch import _linalg_utils as _utils, Tensor
+from torch.overrides import handle_torch_function, has_torch_function
+def get_approximate_basis(
+    A: Tensor,
+    q: int,
+    niter: Optional[int] = 2,
+    M: Optional[Tensor] = None,
+) -> Tensor:
+    """Return tensor :math:`Q` with :math:`q` orthonormal columns such
+    that :math:`Q Q^H A` approximates :math:`A`. If :math:`M` is
+    specified, then :math:`Q` is such that :math:`Q Q^H (A - M)`
+    approximates :math:`A - M`. without instantiating any tensors
+    of the size of :math:`A` or :math:`M`.
+    .. note:: The implementation is based on the Algorithm 4.4 from
+              Halko et al., 2009.
+    .. note:: For an adequate approximation of a k-rank matrix
+              :math:`A`, where k is not known in advance but could be
+              estimated, the number of :math:`Q` columns, q, can be
+              choosen according to the following criteria: in general,
+              :math:`k <= q <= min(2*k, m, n)`. For large low-rank
+              matrices, take :math:`q = k + 5..10`.  If k is
+              relatively small compared to :math:`min(m, n)`, choosing
+              :math:`q = k + 0..2` may be sufficient.
+    .. note:: To obtain repeatable results, reset the seed for the
+              pseudorandom number generator
+    Args::
+        A (Tensor): the input tensor of size :math:`(*, m, n)`
+        q (int): the dimension of subspace spanned by :math:`Q`
+                 columns.
+        niter (int, optional): the number of subspace iterations to
+                               conduct; ``niter`` must be a
+                               nonnegative integer. In most cases, the
+                               default value 2 is more than enough.
+        M (Tensor, optional): the input tensor's mean of size
+                              :math:`(*, m, n)`.
+    References::
+        - Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
+          structure with randomness: probabilistic algorithms for
+          constructing approximate matrix decompositions,
+          arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
+          `arXiv <http://arxiv.org/abs/0909.4061>`_).
+    """
+    niter = 2 if niter is None else niter
+    dtype = _utils.get_floating_dtype(A) if not A.is_complex() else A.dtype
+    matmul = _utils.matmul
+    R = torch.randn(A.shape[-1], q, dtype=dtype, device=A.device)
+    # The following code could be made faster using torch.geqrf + torch.ormqr
+    # but geqrf is not differentiable
+    X = matmul(A, R)
+    if M is not None:
+        X = X - matmul(M, R)
+    Q = torch.linalg.qr(X).Q
+    for i in range(niter):
+        X = matmul(A.mH, Q)
+        if M is not None:
+            X = X - matmul(M.mH, Q)
+        Q = torch.linalg.qr(X).Q
+        X = matmul(A, Q)
+        if M is not None:
+            X = X - matmul(M, Q)
+        Q = torch.linalg.qr(X).Q
+    return Q
+def svd_lowrank(
+    A: Tensor,
+    q: Optional[int] = 6,
+    niter: Optional[int] = 2,
+    M: Optional[Tensor] = None,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    r"""Return the singular value decomposition ``(U, S, V)`` of a matrix,
+    batches of matrices, or a sparse matrix :math:`A` such that
+    :math:`A \approx U \operatorname{diag}(S) V^{\text{H}}`. In case :math:`M` is given, then
+    SVD is computed for the matrix :math:`A - M`.
+    .. note:: The implementation is based on the Algorithm 5.1 from
+              Halko et al., 2009.
+    .. note:: For an adequate approximation of a k-rank matrix
+              :math:`A`, where k is not known in advance but could be
+              estimated, the number of :math:`Q` columns, q, can be
+              choosen according to the following criteria: in general,
+              :math:`k <= q <= min(2*k, m, n)`. For large low-rank
+              matrices, take :math:`q = k + 5..10`.  If k is
+              relatively small compared to :math:`min(m, n)`, choosing
+              :math:`q = k + 0..2` may be sufficient.
+    .. note:: This is a randomized method. To obtain repeatable results,
+              set the seed for the pseudorandom number generator
+    .. note:: In general, use the full-rank SVD implementation
+              :func:`torch.linalg.svd` for dense matrices due to its 10x
+              higher performance characteristics. The low-rank SVD
+              will be useful for huge sparse matrices that
+              :func:`torch.linalg.svd` cannot handle.
+    Args::
+        A (Tensor): the input tensor of size :math:`(*, m, n)`
+        q (int, optional): a slightly overestimated rank of A.
+        niter (int, optional): the number of subspace iterations to
+                               conduct; niter must be a nonnegative
+                               integer, and defaults to 2
+        M (Tensor, optional): the input tensor's mean of size
+                              :math:`(*, m, n)`, which will be broadcasted
+                              to the size of A in this function.
+    References::
+        - Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
+          structure with randomness: probabilistic algorithms for
+          constructing approximate matrix decompositions,
+          arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
+          `arXiv <https://arxiv.org/abs/0909.4061>`_).
+    """
+    if not torch.jit.is_scripting():
+        tensor_ops = (A, M)
+        if not set(map(type, tensor_ops)).issubset(
+            (torch.Tensor, type(None))
+        ) and has_torch_function(tensor_ops):
+            return handle_torch_function(
+                svd_lowrank, tensor_ops, A, q=q, niter=niter, M=M
+            )
+    return _svd_lowrank(A, q=q, niter=niter, M=M)
+def _svd_lowrank(
+    A: Tensor,
+    q: Optional[int] = 6,
+    niter: Optional[int] = 2,
+    M: Optional[Tensor] = None,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    # Algorithm 5.1 in Halko et al., 2009
+    q = 6 if q is None else q
+    m, n = A.shape[-2:]
+    matmul = _utils.matmul
+    if M is not None:
+        M = M.broadcast_to(A.size())
+    # Assume that A is tall
+    if m < n:
+        A = A.mH
+        if M is not None:
+            M = M.mH
+    Q = get_approximate_basis(A, q, niter=niter, M=M)
+    B = matmul(Q.mH, A)
+    if M is not None:
+        B = B - matmul(Q.mH, M)
+    U, S, Vh = torch.linalg.svd(B, full_matrices=False)
+    V = Vh.mH
+    U = Q.matmul(U)
+    if m < n:
+        U, V = V, U
+    return U, S, V
+def pca_lowrank(
+    A: Tensor,
+    q: Optional[int] = None,
+    center: bool = True,
+    niter: int = 2,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    r"""Performs linear Principal Component Analysis (PCA) on a low-rank
+    matrix, batches of such matrices, or sparse matrix.
+    This function returns a namedtuple ``(U, S, V)`` which is the
+    nearly optimal approximation of a singular value decomposition of
+    a centered matrix :math:`A` such that :math:`A \approx U \operatorname{diag}(S) V^{\text{H}}`
+    .. note:: The relation of ``(U, S, V)`` to PCA is as follows:
+                - :math:`A` is a data matrix with ``m`` samples and
+                  ``n`` features
+                - the :math:`V` columns represent the principal directions
+                - :math:`S ** 2 / (m - 1)` contains the eigenvalues of
+                  :math:`A^T A / (m - 1)` which is the covariance of
+                  ``A`` when ``center=True`` is provided.
+                - ``matmul(A, V[:, :k])`` projects data to the first k
+                  principal components
+    .. note:: Different from the standard SVD, the size of returned
+              matrices depend on the specified rank and q
+              values as follows:
+                - :math:`U` is m x q matrix
+                - :math:`S` is q-vector
+                - :math:`V` is n x q matrix
+    .. note:: To obtain repeatable results, reset the seed for the
+              pseudorandom number generator
+    Args:
+        A (Tensor): the input tensor of size :math:`(*, m, n)`
+        q (int, optional): a slightly overestimated rank of
+                           :math:`A`. By default, ``q = min(6, m,
+                           n)``.
+        center (bool, optional): if True, center the input tensor,
+                                 otherwise, assume that the input is
+                                 centered.
+        niter (int, optional): the number of subspace iterations to
+                               conduct; niter must be a nonnegative
+                               integer, and defaults to 2.
+    References::
+        - Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
+          structure with randomness: probabilistic algorithms for
+          constructing approximate matrix decompositions,
+          arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
+          `arXiv <http://arxiv.org/abs/0909.4061>`_).
+    """
+    if not torch.jit.is_scripting():
+        if type(A) is not torch.Tensor and has_torch_function((A,)):
+            return handle_torch_function(
+                pca_lowrank, (A,), A, q=q, center=center, niter=niter
+            )
+    (m, n) = A.shape[-2:]
+    if q is None:
+        q = min(6, m, n)
+    elif not (q >= 0 and q <= min(m, n)):
+        raise ValueError(
+            f"q(={q}) must be non-negative integer and not greater than min(m, n)={min(m, n)}"
+        )
+    if not (niter >= 0):
+        raise ValueError(f"niter(={niter}) must be non-negative integer")
+    dtype = _utils.get_floating_dtype(A)
+    if not center:
+        return _svd_lowrank(A, q, niter=niter, M=None)
+    if _utils.is_sparse(A):
+        if len(A.shape) != 2:
+            raise ValueError("pca_lowrank input is expected to be 2-dimensional tensor")
+        c = torch.sparse.sum(A, dim=(-2,)) / m
+        # reshape c
+        column_indices = c.indices()[0]
+        indices = torch.zeros(
+            2,
+            len(column_indices),
+            dtype=column_indices.dtype,
+            device=column_indices.device,
+        )
+        indices[0] = column_indices
+        C_t = torch.sparse_coo_tensor(
+            indices, c.values(), (n, 1), dtype=dtype, device=A.device
+        )
+        ones_m1_t = torch.ones(A.shape[:-2] + (1, m), dtype=dtype, device=A.device)
+        M = torch.sparse.mm(C_t, ones_m1_t).mT
+        return _svd_lowrank(A, q, niter=niter, M=M)
+    else:
+        C = A.mean(dim=(-2,), keepdim=True)
+        return _svd_lowrank(A - C, q, niter=niter, M=None)

.venv/lib/python3.11/site-packages/torch/_meta_registrations.py ADDED Viewed

The diff for this file is too large to render. See raw diff

.venv/lib/python3.11/site-packages/torch/_namedtensor_internals.py ADDED Viewed

	@@ -0,0 +1,159 @@

+# mypy: allow-untyped-defs
+from collections import OrderedDict
+"""
+This file contains helper functions that implement experimental functionality
+for named tensors in python. All of these are experimental, unstable, and
+subject to change or deletion.
+"""
+def check_serializing_named_tensor(tensor):
+    if tensor.has_names():
+        raise RuntimeError(
+            "NYI: Named tensors don't support serialization. Please drop "
+            "names via `tensor = tensor.rename(None)` before serialization."
+        )
+def build_dim_map(tensor):
+    """Returns a map of { dim: dim_name } where dim is a name if the dim is named
+    and the dim index otherwise."""
+    return OrderedDict(
+        [(idx if name is None else name, name) for idx, name in enumerate(tensor.names)]
+    )
+def unzip_namedshape(namedshape):
+    if isinstance(namedshape, OrderedDict):
+        namedshape = namedshape.items()
+    if not hasattr(namedshape, "__iter__") and not isinstance(namedshape, tuple):
+        raise RuntimeError(
+            f"Expected namedshape to be OrderedDict or iterable of tuples, got: {type(namedshape)}"
+        )
+    if len(namedshape) == 0:
+        raise RuntimeError("Expected namedshape to non-empty.")
+    return zip(*namedshape)
+def namer_api_name(inplace):
+    if inplace:
+        return "rename_"
+    else:
+        return "rename"
+def is_ellipsis(item):
+    return item == Ellipsis or item == "..."
+def single_ellipsis_index(names, fn_name):
+    ellipsis_indices = [i for i, name in enumerate(names) if is_ellipsis(name)]
+    if len(ellipsis_indices) >= 2:
+        raise RuntimeError(
+            f"{fn_name}: More than one Ellipsis ('...') found in names ("
+            f"{names}). This function supports up to one Ellipsis."
+        )
+    if len(ellipsis_indices) == 1:
+        return ellipsis_indices[0]
+    return None
+def expand_single_ellipsis(numel_pre_glob, numel_post_glob, names):
+    return names[numel_pre_glob : len(names) - numel_post_glob]
+def replace_ellipsis_by_position(ellipsis_idx, names, tensor_names):
+    globbed_names = expand_single_ellipsis(
+        ellipsis_idx, len(names) - ellipsis_idx - 1, tensor_names
+    )
+    return names[:ellipsis_idx] + globbed_names + names[ellipsis_idx + 1 :]
+def resolve_ellipsis(names, tensor_names, fn_name):
+    """
+    Expands ... inside `names` to be equal to a list of names from `tensor_names`.
+    """
+    ellipsis_idx = single_ellipsis_index(names, fn_name)
+    if ellipsis_idx is None:
+        return names
+    return replace_ellipsis_by_position(ellipsis_idx, names, tensor_names)
+def update_names_with_list(tensor, names, inplace):
+    # Special case for tensor.rename(None)
+    if len(names) == 1 and names[0] is None:
+        return tensor._update_names(None, inplace)
+    return tensor._update_names(
+        resolve_ellipsis(names, tensor.names, namer_api_name(inplace)), inplace
+    )
+def update_names_with_mapping(tensor, rename_map, inplace):
+    dim_map = build_dim_map(tensor)
+    for old_dim in rename_map.keys():
+        new_dim = rename_map[old_dim]
+        if old_dim in dim_map.keys():
+            dim_map[old_dim] = new_dim
+        else:
+            raise RuntimeError(
+                f"{namer_api_name(inplace)}: Tried to rename dim '{old_dim}' to dim "
+                f"{new_dim} in Tensor[{tensor.names}] but dim '{old_dim}' does not exist"
+            )
+    return tensor._update_names(tuple(dim_map.values()), inplace)
+def update_names(tensor, names, rename_map, inplace):
+    """There are two usages:
+    tensor.rename(*names) returns a view on tensor with named dims `names`.
+    `names` must be of length `tensor.dim()`; otherwise, if '...' is in `names`,
+    then it is expanded greedily to be equal to the corresponding names from
+    `tensor.names`.
+    For example,
+    ```
+    >>> # xdoctest: +SKIP
+    >>> x = torch.empty(2, 3, 5, 7, names=('N', 'C', 'H', 'W'))
+    >>> x.rename('...', 'height', 'width').names
+    ('N', 'C', 'height', 'width')
+    >>> # xdoctest: +SKIP
+    >>> x.rename('batch', '...', 'width').names
+    ('batch', 'C', 'H', 'width')
+    ```
+    tensor.rename(**rename_map) returns a view on tensor that has rename dims
+        as specified in the mapping `rename_map`.
+    For example,
+    ```
+    >>> # xdoctest: +SKIP
+    >>> x = torch.empty(2, 3, 5, 7, names=('N', 'C', 'H', 'W'))
+    >>> x.rename(W='width', H='height').names
+    ('N', 'C', 'height', 'width')
+    ```
+    Finally, tensor.rename has an in-place version called tensor.rename_.
+    """
+    has_names = len(names) > 0
+    has_rename_pairs = bool(rename_map)
+    if has_names and has_rename_pairs:
+        raise RuntimeError(
+            f"{namer_api_name(inplace)}: This function takes either positional "
+            f"args or keyword args, but not both. Use tensor.{namer_api_name(inplace)}(*names) "
+            f"to name dims and tensor.{namer_api_name(inplace)}(**rename_map) to rename "
+            "dims."
+        )
+    # Special case for tensor.rename(*[]), which is valid for a 0 dim tensor.
+    if not has_names and not has_rename_pairs:
+        return update_names_with_list(tensor, names, inplace)
+    if has_names:
+        return update_names_with_list(tensor, names, inplace)
+    return update_names_with_mapping(tensor, rename_map, inplace)

.venv/lib/python3.11/site-packages/torch/_ops.py ADDED Viewed

	@@ -0,0 +1,1355 @@

+# mypy: allow-untyped-defs
+import abc
+import contextlib
+import ctypes
+import importlib
+import inspect
+import sys
+import types
+from typing import Any, Callable, Dict, List, Set, Type, Union
+import torch
+import torch.utils._pytree as pytree
+from torch import _utils_internal
+from torch._C import _dispatch_is_included_in_alias as is_included_in_alias, DispatchKey
+from torch._functorch.pyfunctorch import dispatch_functorch
+from torch.utils._python_dispatch import TorchDispatchMode
+# Query `hasattr` only once.
+_SET_GLOBAL_FLAGS = hasattr(sys, "getdlopenflags") and hasattr(sys, "setdlopenflags")
+@contextlib.contextmanager
+def dl_open_guard():
+    """
+    Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
+    shared library to load custom operators.
+    """
+    if not _SET_GLOBAL_FLAGS:
+        yield
+        return
+    old_flags = sys.getdlopenflags()
+    sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
+    try:
+        yield
+    finally:
+        sys.setdlopenflags(old_flags)
+class OperatorBase:
+    """
+    Base class for OpOverload (which represents C++ ATen operators) and HigherOrderOperator
+    (which represents Python-only operators that are unrepresentable in TorchScript).
+    """
+    def __init__(self):
+        # The dispatch cache precomputes a mapping of dispatch key that the
+        # dispatcher wants to dispatch to, to an actual implementation of the
+        # dispatch key.  Confusingly, the actual implementation could *also* be a
+        # dispatch key, but in this case, this refers to the C++ kernel that
+        # was registered to some dispatch key.  Aliases are permitted in the
+        # latter but not the former; for example, you might lookup the
+        # entry for AutogradCPU, and this maps you to the Autograd key for
+        # the generic autograd kernel that works for all devices.  Since this
+        # is the Python dispatcher, you can also put an arbitrary Python
+        # callable to call instead.  This handler gets precisely the
+        # args/kwargs that the operator was __call__'ed with.
+        # NB: This name is hard-coded in torch/csrc/autograd/python_variable.cpp
+        # for use with OpOverload; cache lookup is done entirely from C++
+        # for speed.
+        # TODO: The cache is NOT currently used by HigherOrderOperator, but it should!
+        self._dispatch_cache: Dict[
+            DispatchKey, Union[DispatchKey, Callable[..., Any]]
+        ] = {}
+        # This table allows you to override the behavior of a particular
+        # dispatch key to call a custom Python function, rather than the
+        # ordinary C++ configured behavior.  This is the raison d'etre of
+        # Python dispatcher: to let you program the dispatcher from Python
+        # in case you need something unusual, and don't want to clobber
+        # the existing registrations using the Python operator registration
+        # API.
+        self.py_kernels: Dict[DispatchKey, Callable[..., Any]] = {}
+        # This table allows you to override the behavior of a particular
+        # operator for a particular TorchDispatchMode.  In practice,
+        # we are using this mostly for ProxyTensorMode.  Modes can be
+        # thought of as an open world extension of dispatch keys, so it
+        # makes sense that you should be able to register them, the same
+        # way you can register dispatch keys.
+        self.python_key_table: Dict[
+            Union[Type[TorchDispatchMode], Type[torch.Tensor]], Callable[..., Any]
+        ] = {}
+        # This table allows you to override the behavior of functorch
+        # transformations.  NB: this currently only does something for
+        # HigherOrderOperator
+        self.functorch_table = {}
+    def __call__(self, *args, **kwargs):
+        raise NotImplementedError
+    def has_kernel_for_dispatch_key(self, k):
+        return k in self.py_kernels
+    def has_kernel_for_any_dispatch_key(self, ks):
+        for k in self.py_kernels:
+            if not torch._C._dispatch_is_alias_key(k) and ks.has(k):
+                return True
+        return False
+    def py_impl(self, k):
+        def inner(fn):
+            if inspect.isclass(k) and (
+                issubclass(k, TorchDispatchMode) or issubclass(k, torch.Tensor)
+            ):
+                assert k not in self.python_key_table
+                # TODO(voz): Should we replace setting DispatchKey.Python entirely with setting mode keys?
+                self.python_key_table[k] = fn
+                self._dispatch_cache.clear()
+                return fn
+            if isinstance(k, torch._C._functorch.TransformType):
+                assert k not in self.functorch_table
+                self.functorch_table[k] = fn
+                return fn
+            assert isinstance(k, DispatchKey)
+            assert (
+                k != DispatchKey.Python
+            ), "Please register a mode for the torch._C.DispatchKey.Python key instead."
+            if k in self.py_kernels:
+                raise RuntimeError(
+                    f"Trying to override a python impl for {k} on operator {self.name()}"
+                )
+            self.py_kernels[k] = fn
+            self._dispatch_cache.clear()
+            return fn
+        return inner
+    # Registers an implementation to all **3** variants of functionalization that we have:
+    # - DispatchKey.Functionalize
+    # - functorch.TransformType.Functionalize
+    # - FunctionalTensorMode
+    # Example:
+    #   @py_functionalize_impl
+    #   def functionalize_rule(ctx, inner_f, *args):
+    #       args_unwrapped = ctx.unwrap_tensors(args)
+    #       with ctx.redispatch_to_next():
+    #           out = ctx.functionalize(inner_f)(*args_unwrapped)
+    #           return ctx.wrap_tensors(out)
+    def py_functionalize_impl(self, fn):
+        from torch._subclasses.functional_tensor import (
+            CppFunctionalizeAPI as _CppFunctionalizeAPI,
+            FunctorchFunctionalizeAPI as _FunctorchFunctionalizeAPI,
+            PythonFunctionalizeAPI as _PythonFunctionalizeAPI,
+        )
+        # Construct our three flavors of functionalization,
+        # each of which have slightly different wrap/unwrap/redispatch policies
+        def functionalize_dk_fn(*args, **kwargs):
+            return fn(_CppFunctionalizeAPI(), *args, **kwargs)
+        def functionalize_dispatch_mode_fn(mode, *args, **kwargs):
+            return fn(_PythonFunctionalizeAPI(mode), *args, **kwargs)
+        def functionalize_functorch_fn(interpreter, *args, **kwargs):
+            return fn(_FunctorchFunctionalizeAPI(interpreter), *args, **kwargs)
+        self.py_impl(DispatchKey.Functionalize)(functionalize_dk_fn)
+        self.py_impl(torch._subclasses.functional_tensor.FunctionalTensorMode)(
+            functionalize_dispatch_mode_fn
+        )
+        self.py_impl(torch._C._functorch.TransformType.Functionalize)(
+            functionalize_functorch_fn
+        )
+        return fn
+    def name(self):
+        raise NotImplementedError
+# Equivalent to computeDispatchTableEntryWithDebug
+def resolve_key(op: OperatorBase, k: DispatchKey):  # type: ignore[valid-type]
+    # 1. (Direct) operator registration
+    if op.has_kernel_for_dispatch_key(k):
+        return k
+    # 2.1 Use CompositeExplicitAutogradNonFunctional kernel if available
+    cand = DispatchKey.CompositeExplicitAutogradNonFunctional
+    if (
+        k == DispatchKey.Undefined or is_included_in_alias(k, cand)
+    ) and op.has_kernel_for_dispatch_key(cand):
+        return cand
+    # 2.2 Use CompositeExplicitAutograd kernel if available
+    cand = DispatchKey.CompositeExplicitAutograd
+    if (
+        k == DispatchKey.Undefined or is_included_in_alias(k, cand)
+    ) and op.has_kernel_for_dispatch_key(cand):
+        return cand
+    has_backend_kernel = op.has_kernel_for_any_dispatch_key(
+        torch._C._dispatch_get_backend_keyset_from_autograd(k)
+    ) or op.has_kernel_for_dispatch_key(DispatchKey.CompositeExplicitAutograd)
+    # 2.3. Use CompositeImplicitAutograd kernel if available
+    cand = DispatchKey.CompositeImplicitAutogradNestedTensor
+    if (
+        (k != DispatchKey.Undefined and is_included_in_alias(k, cand))
+        and op.has_kernel_for_dispatch_key(cand)
+        and not has_backend_kernel
+    ):
+        return cand
+    cand = DispatchKey.CompositeImplicitAutograd
+    if (
+        k == DispatchKey.Undefined or is_included_in_alias(k, cand)
+    ) and op.has_kernel_for_dispatch_key(cand):
+        if k == DispatchKey.AutogradOther and op.has_kernel_for_any_dispatch_key(
+            torch._C._dispatch_autogradother_backends
+        ):
+            raise RuntimeError("ambiguous autogradother kernel")
+        elif not has_backend_kernel:
+            return cand
+    # 2.4. For autograd backend keys, use kernel from DispatchKey::Autograd if available
+    cand = DispatchKey.Autograd
+    if is_included_in_alias(k, cand) and op.has_kernel_for_dispatch_key(cand):
+        return cand
+    # 2.5 Use kernel from DispatchKey::FuncTorchBatchedDecomposition if available
+    cand = DispatchKey.FuncTorchBatchedDecomposition
+    if is_included_in_alias(k, cand) and op.has_kernel_for_dispatch_key(cand):
+        return cand
+    # Backend fallback
+    if torch._C._dispatch_has_backend_fallback(k):
+        # The dispatch key itself will implicitly route to backend fallback.
+        # This is probably not great for the pure Python implementation.
+        return k
+    raise NotImplementedError(f"could not find kernel for {op} at dispatch key {k}")
+_higher_order_ops: Dict[str, "HigherOrderOperator"] = {}
+_HIGHER_ORDER_OP_DEFAULT_FALLTHROUGH_DISPATCH_KEYS = [
+    DispatchKey.PythonDispatcher,  # type: ignore[attr-defined]
+    DispatchKey.PythonTLSSnapshot,  # type: ignore[attr-defined]
+    DispatchKey.ADInplaceOrView,
+    DispatchKey.BackendSelect,
+    DispatchKey.AutocastCPU,  # type: ignore[attr-defined]
+    DispatchKey.AutocastCUDA,  # type: ignore[attr-defined]
+]
+class HigherOrderOperator(OperatorBase, abc.ABC):
+    # The HigherOrderOperator will appear as torch.ops.higher_order.{name}
+    #
+    # If you're creating a new HigherOrderOperator, please do not change the
+    # default. Adding operators to the global torch.ops namespace is a bad
+    # practice due to name collisions.
+    def __init__(self, name):
+        super().__init__()
+        if type(self) is HigherOrderOperator:
+            raise RuntimeError(
+                "Direct instantiation of HigherOrderOperator is not allowed. Please subclass it."
+            )
+        self._name = name
+        # Make _OPNamespace not scream, this whole name based association needs a good hard look
+        self.__name__ = name
+        _higher_order_ops[name] = self
+        self._ns = "higher_order"
+        self.__module__ = "torch.ops.higher_order"
+        self.non_fallthrough_keys = torch._C._dispatch_keyset_full()
+        for dispatch_key in _HIGHER_ORDER_OP_DEFAULT_FALLTHROUGH_DISPATCH_KEYS:
+            self.fallthrough(dispatch_key)
+        # [NOTE] We have to register pre-dispatch key implementation
+        # because sometimes HOP use aot-dispatch tracing to detect certaion
+        # mutations. This is problematic when we are functionalizing HOP
+        # during pre-dispatch because when the inner tracer starts, it will see
+        # that PreDispatch key is still active. In that case, we just redispatch
+        # it to next key. This is only safe to do when PreDispatch key stack has no
+        # active modes.
+    def py_impl(self, k):
+        if isinstance(k, DispatchKey) and not self.non_fallthrough_keys.has(k):
+            self.non_fallthrough_keys = self.non_fallthrough_keys.add(k)
+        return super().py_impl(k)
+    @property
+    def namespace(self):
+        return self._ns
+    def fallthrough(self, dispatch_key):
+        self.non_fallthrough_keys = self.non_fallthrough_keys.remove(dispatch_key)
+    # Use positional-only argument to avoid naming collide with custom ops arguments
+    # that are named "self".
+    def dispatch(self, /, dispatch_key, *args, **kwargs):
+        from torch.utils._python_dispatch import _get_current_dispatch_mode
+        if dispatch_key in self._dispatch_cache:
+            kernel = self._dispatch_cache[dispatch_key]
+            assert not isinstance(kernel, DispatchKey)
+            return kernel(*args, **kwargs)
+        if dispatch_key == DispatchKey.FuncTorchDynamicLayerFrontMode:
+            return dispatch_functorch(self, args, kwargs)
+        if dispatch_key == DispatchKey.Python:
+            # Keep the following 1:1 with handle_torch_function_no_python_arg_parser
+            # in torch/csrc/utils/python_arg_parser.cpp
+            overloaded_args_list = []
+            def has_python_key(tensor):
+                return torch._C._dispatch_keys(tensor).has("Python")
+            def check_overloaded(arg):
+                if isinstance(arg, torch.Tensor) and has_python_key(arg):
+                    overloaded_args_list.append(arg)
+            for arg in (*args, *kwargs.values()):
+                check_overloaded(arg)
+                if isinstance(arg, (list, tuple)):
+                    for a in arg:
+                        check_overloaded(a)
+            overloaded_args = tuple(overloaded_args_list)
+            overloaded_types = tuple(type(arg) for arg in overloaded_args)
+            # Step 1: dispatch on any user TorchDispatchModes
+            from torch.utils._python_dispatch import _pop_mode_temporarily
+            curr_mode = _get_current_dispatch_mode()
+            if curr_mode is not None:
+                if type(curr_mode) in self.python_key_table:
+                    handler = self.python_key_table[type(curr_mode)]
+                    with _pop_mode_temporarily() as mode:
+                        # "natural" calling convention: (mode, *args, **kwargs)
+                        # TODO(rzou): we should support torch_dispatch calling convention too.
+                        result = handler(mode, *args, **kwargs)
+                else:
+                    raise NotImplementedError(
+                        f"There was no rule registered for HOP {self._name} and mode {curr_mode}. "
+                        f"We recommend filing an issue."
+                    )
+                if result is not NotImplemented:
+                    return result
+            # Step 2: dispatch on any subclasses
+            for arg in overloaded_args:
+                subclass_type = type(arg)
+                if (
+                    subclass_type.__torch_dispatch__
+                    == torch._C._disabled_torch_dispatch_impl
+                ):
+                    continue
+                if subclass_type in self.python_key_table:
+                    handler = self.python_key_table[subclass_type]
+                    # "natural" calling convention: (*args, **kwargs)
+                    # TODO(rzou): we should support torch_dispatch calling convention too.
+                    result = handler(*args, **kwargs)
+                else:
+                    raise NotImplementedError(
+                        f"There was no rule registered for HOP {self._name} and subclass {subclass_type}. "
+                        f"We recommend filing an issue."
+                    )
+                if result is not NotImplemented:
+                    return result
+            # All handlers returned NotImplemented
+            raise TypeError(
+                f"Multiple dispatch failed for {self._name}. There was no registered that "
+                f"did not return NotImplemented. Use HOP.py_impl to register some. "
+                f"Tried mode: {curr_mode}) and subclasses: "
+                f"{[type(a) for a in overloaded_args]}"
+            )
+        functionality_key = torch._C._to_functionality_key(dispatch_key)  # type: ignore[attr-defined]
+        if functionality_key == DispatchKey.PreDispatch:
+            from torch.utils._python_dispatch import _pop_mode_temporarily
+            # The check for Python in the exclude set is so we properly respect `with no_dispatch()`
+            # calls inside of a mode.
+            if (
+                _len_torch_dispatch_stack_pre_dispatch() > 0
+            ) and not torch._C._dispatch_tls_is_dispatch_key_excluded(
+                DispatchKey.Python
+            ):
+                curr_mode = _get_current_dispatch_mode_pre_dispatch()
+                assert (
+                    curr_mode is not None
+                ), "Illegal invocation of dispatch on torch._C.DispatchKey.PreDispatch without a mode."
+                assert (
+                    type(curr_mode) in self.python_key_table
+                ), f"Current active mode {curr_mode} not registered"
+                handler = self.python_key_table[type(curr_mode)]
+                with _pop_mode_temporarily(functionality_key) as mode:
+                    return handler(mode, *args, **kwargs)
+        final_key = resolve_key(self, dispatch_key)
+        # This can current fail due to backend fallbacks.  You just have to
+        # register them by hand for HigherOrderOperator.
+        if final_key not in self.py_kernels:
+            raise NotImplementedError(
+                f"could not find kernel for HigherOrderOperator {self._name} "
+                f"at dispatch key {final_key} (resolved from {dispatch_key})"
+            )
+        # [NOTE] We shouldn't cache PreDispatch kernel here because depending
+        # on what modes are active, predispatch behaviour is different.
+        # Also we do same thing for normal ops:
+        # See Note [Not Caching Per-Dispatch-Key Mode Handlers]
+        if dispatch_key != DispatchKey.PreDispatch:
+            self._dispatch_cache[dispatch_key] = self.py_kernels[final_key]
+        kernel = self.py_kernels[final_key]
+        # It's illegal to register DispatchKey to py_kernels, since there's no
+        # C++ kernel to call into
+        assert not isinstance(kernel, DispatchKey)
+        return kernel(*args, **kwargs)
+    @abc.abstractmethod
+    def __call__(self, /, *args, **kwargs):
+        # Dynamo already traces the body of HigherOrderOp beforehand when it
+        # so no need to trace into it.
+        from torch._dynamo import disable
+        @disable
+        def wrapper():
+            flat_args = _to_flat_tuple(args, kwargs)
+            if torch.overrides.has_torch_function(flat_args):
+                return torch.overrides.handle_torch_function(
+                    self, flat_args, *args, **kwargs
+                )
+            dispatch_key_set = _compute_keyset(args, kwargs, self.non_fallthrough_keys)
+            return self.dispatch(
+                dispatch_key_set.highestPriorityTypeId(), *args, **kwargs
+            )
+        return wrapper()
+    def __str__(self):
+        return f"{self.name()}"
+    def name(self):
+        return self._name
+def _to_flat_tuple(args, kwargs):
+    return pytree.arg_tree_leaves(*args, **kwargs)
+def _compute_keyset(args, kwargs, non_fallthrough_keys):
+    tensors = _get_tensors(args, kwargs)
+    return key_extractor(tensors, non_fallthrough_keys)
+def _get_tensors(args, kwargs):
+    flat_all = _to_flat_tuple(args, kwargs)
+    tensor_args = [t for t in flat_all if isinstance(t, torch.Tensor)]
+    return tuple(tensor_args)
+# Note - this should maintain identical impl to the C++ dispatcher key extraction logic
+# at ATen/core/dispatch/DispatchKeyExtractor.h
+def key_extractor(tensors, key_mask):
+    key_set = torch._C._dispatch_tls_local_include_set()
+    for tensor in tensors:
+        key_set = key_set | torch._C._dispatch_keys(tensor)
+    key_set = key_set - torch._C._dispatch_tls_local_exclude_set()
+    key_set = key_set & key_mask
+    return key_set
+# Mode stack for PreDispatchKey
+# it should always have three keys with
+# priority given to FunctionalTensorMode and
+# then ProxyTorchDispatchMode. It means that
+# slot 0 belongs to ProxyTorchDispatchMode and
+# slot 1 belongs to FunctionalTensorMode.
+#
+# SchemaCheckMode is separate from the other 2,
+# and is only valid when the stack is empty.
+# SchemaCheckMode is for testing purposes, and
+# is meant to run in eager mode on concrete inputs,
+# checking for incorrect schemas in regards to
+# aliasing or mutating ops.
+class _ModeStackStateForPreDispatch:
+    def __init__(self):
+        self.__infra_modes = [None, None]
+        self._schema_check_mode = None
+    def set(self, index, mode):
+        assert index < len(self.__infra_modes)
+        self.__infra_modes[index] = mode
+    def get(self, index):
+        assert index < len(self.__infra_modes)
+        return self.__infra_modes[index]
+    def count(self):
+        return len([i for i in self.__infra_modes if i is not None]) + int(
+            self._schema_check_mode is not None
+        )
+_mode_stack_state_for_pre_dispatch = _ModeStackStateForPreDispatch()
+def unset_mode_pre_dispatch(mode_key, schema_check=False):
+    current_mode_stack_pre_dispatch = mode_stack_state_for_pre_dispatch()
+    assert mode_key is None or mode_key in (
+        torch._C._TorchDispatchModeKey.PROXY,
+        torch._C._TorchDispatchModeKey.FUNCTIONAL,
+    )
+    if schema_check:
+        assert mode_key is None
+    def _unset_mode():
+        if mode_key == torch._C._TorchDispatchModeKey.PROXY:
+            current_mode = current_mode_stack_pre_dispatch.get(0)
+            mode_stack_state_for_pre_dispatch().set(0, None)
+            return current_mode
+        elif mode_key == torch._C._TorchDispatchModeKey.FUNCTIONAL:
+            current_mode = current_mode_stack_pre_dispatch.get(1)
+            mode_stack_state_for_pre_dispatch().set(1, None)
+            return current_mode
+        else:
+            current_mode = mode_stack_state_for_pre_dispatch()._schema_check_mode
+            mode_stack_state_for_pre_dispatch()._schema_check_mode = None
+            return current_mode
+    current_mode = _unset_mode()
+    new_pre_dispatch_len = _len_torch_dispatch_stack_pre_dispatch()
+    # When we are unsetting a mode, we need to check if there is
+    # active mode left on the PreDispatch key. If there is nothing
+    # active, we need to remove PreDispatch key from local dispatch include
+    # set.
+    if new_pre_dispatch_len == 0:
+        torch._C._dispatch_tls_set_dispatch_key_included(DispatchKey.PreDispatch, False)
+    return current_mode
+def _set_mode_pre_dispatch(mode):
+    from torch._subclasses.functional_tensor import FunctionalTensorMode
+    from torch._subclasses.schema_check_mode import SchemaCheckMode
+    from torch.fx.experimental.proxy_tensor import ProxyTorchDispatchMode
+    assert isinstance(
+        mode,
+        (
+            FunctionalTensorMode,
+            ProxyTorchDispatchMode,
+            SchemaCheckMode,
+        ),
+    )
+    previous_mode_stack_len = _len_torch_dispatch_stack_pre_dispatch()
+    if isinstance(mode, SchemaCheckMode):
+        current_mode = mode_stack_state_for_pre_dispatch()._schema_check_mode
+        if previous_mode_stack_len > 0:
+            raise AssertionError(
+                "SchemaCheckMode for pre-dispatch must be used exclusively, found other modes on the stack"
+            )
+        mode_stack_state_for_pre_dispatch()._schema_check_mode = mode
+    elif isinstance(mode, FunctionalTensorMode):
+        current_mode = mode_stack_state_for_pre_dispatch().get(1)
+        assert current_mode is None
+        mode_stack_state_for_pre_dispatch().set(1, mode)
+    else:
+        current_mode = mode_stack_state_for_pre_dispatch().get(0)
+        assert current_mode is None
+        mode_stack_state_for_pre_dispatch().set(0, mode)
+    # When we are setting a mode, we need to check if there is
+    # active mode left on the PreDispatch key. If there was nothing
+    # active before setting this mode, it means that PreDispatch key
+    # was turned off. So we need to turn it on again.
+    if previous_mode_stack_len == 0:
+        torch._C._dispatch_tls_set_dispatch_key_included(DispatchKey.PreDispatch, True)
+def _pop_mode_from_pre_dispatch():
+    mode_stack = mode_stack_state_for_pre_dispatch()
+    pre_dispatch_len = _len_torch_dispatch_stack_pre_dispatch()
+    if pre_dispatch_len == 0:
+        raise AssertionError("Trying to pop empty mode stack")
+    if mode_stack._schema_check_mode is not None:
+        return unset_mode_pre_dispatch(None, schema_check=True)
+    if mode_stack.get(1) is not None:
+        return unset_mode_pre_dispatch(torch._C._TorchDispatchModeKey.FUNCTIONAL)
+    if mode_stack.get(0) is not None:
+        return unset_mode_pre_dispatch(torch._C._TorchDispatchModeKey.PROXY)
+def _len_torch_dispatch_stack_pre_dispatch():
+    return mode_stack_state_for_pre_dispatch().count()
+def _get_dispatch_mode_pre_dispatch(mode_key):
+    assert mode_key in (
+        torch._C._TorchDispatchModeKey.PROXY,
+        torch._C._TorchDispatchModeKey.FUNCTIONAL,
+    )
+    if mode_key == torch._C._TorchDispatchModeKey.PROXY:
+        return mode_stack_state_for_pre_dispatch().get(0)
+    else:
+        return mode_stack_state_for_pre_dispatch().get(1)
+def _get_current_dispatch_mode_pre_dispatch():
+    if mode_stack_state_for_pre_dispatch()._schema_check_mode is not None:
+        return mode_stack_state_for_pre_dispatch()._schema_check_mode
+    else:
+        stack_len = mode_stack_state_for_pre_dispatch().count()
+        if stack_len == 2:
+            return mode_stack_state_for_pre_dispatch().get(1)
+        if stack_len == 1:
+            return (
+                mode_stack_state_for_pre_dispatch().get(1)
+                if mode_stack_state_for_pre_dispatch().get(1) is not None
+                else mode_stack_state_for_pre_dispatch().get(0)
+            )
+    return None
+def mode_stack_state_for_pre_dispatch():
+    global _mode_stack_state_for_pre_dispatch
+    return _mode_stack_state_for_pre_dispatch
+cached_ops: Set["OpOverload"] = set()
+def add_cached_op(op_overload):
+    global cached_ops
+    cached_ops.add(op_overload)
+def reset_cached_ops():
+    global cached_ops
+    cached_ops.clear()
+def get_cached_ops():
+    global cached_ops
+    return cached_ops
+# Each OpOverload object contains pointer to a a specific operator overload, a pointer to the parent `OpOverloadPacket` object.
+# You can obtain an OpOverload object through attribute query on OpOverloadPacket.
+class OpOverload(OperatorBase):
+    def __init__(self, overloadpacket, op, op_dk, schema, tags):
+        super().__init__()
+        self._op = op
+        self._op_dk = op_dk
+        self._schema = schema
+        self._overloadpacket = overloadpacket
+        self._tags = tags
+        self._overloadname = (
+            "default" if schema.overload_name == "" else schema.overload_name
+        )
+        self._name = self._schema.name
+        if schema.overload_name:
+            self._name += "." + schema.overload_name
+        self.__name__ = f"{self._schema.name.split('::')[1]}.{self._overloadname}"
+        self.__module__ = overloadpacket.__module__
+        op.__module__ = overloadpacket.__module__
+        self.__qualname__ = self._name
+        self.__annotations__ = {}
+        # Only compute the OperatorHandle when we need it. Not all OpOverloads have
+        # OperatorHandles (the TorchScript ones don't...)
+        self._lazy_handle = None
+        # If the OpOverload was constructed from a Library.def in Python.
+        self._defined_in_python = self.__qualname__ in torch.library._defs
+        # Logic replicated from aten/src/ATen/native/MathBitsFallback.h
+        is_write = None
+        for a in self._schema.arguments:
+            if a.alias_info is None:
+                continue
+            if is_write is None:
+                is_write = a.alias_info.is_write
+            else:
+                # We will conservatively call mixed mutable/non-mutable
+                # aliased inputs as NOT a view
+                is_write = a.alias_info.is_write or is_write
+        self.is_view = is_write is not None and not is_write
+    @property
+    def _namespace(self):
+        return self._schema.name.split("::")[0]
+    @property
+    def _opname(self):
+        return self._schema.name.split("::")[1]
+    @property
+    def _handle(self):
+        if self._lazy_handle is None:
+            self._lazy_handle = torch._C._dispatch_find_schema_or_throw(
+                self._schema.name, self._schema.overload_name
+            )
+        return self._lazy_handle
+    # it's a no-op since OpOverload object is immutable and must be unique for a given op overload.
+    def __deepcopy__(self, memo=None):
+        return self
+    def __repr__(self):
+        return "<OpOverload(op='{}.{}', overload='{}')>".format(
+            *self._schema.name.split("::"), self._overloadname
+        )
+    # Use positional-only argument to avoid naming collision with aten ops arguments
+    # that are named "self". This way, all the aten ops can be called by kwargs.
+    def __call__(self, /, *args, **kwargs):
+        return self._op(*args, **kwargs)
+    # Use positional-only argument to avoid naming collision with aten ops arguments
+    # that are named "self". This way, all the aten ops can be called by kwargs.
+    def redispatch(self, /, keyset, *args, **kwargs):
+        return self._handle.redispatch_boxed(keyset, *args, **kwargs)
+    def __hash__(self):
+        return hash(self._op)
+    # `my_namespace.my_op_name.overload_name`
+    def __str__(self):
+        return "{}.{}.{}".format(*self._schema.name.split("::"), self._overloadname)
+    def has_kernel_for_dispatch_key(self, k):
+        return super().has_kernel_for_dispatch_key(
+            k
+        ) or torch._C._dispatch_has_kernel_for_dispatch_key(self.name(), k)
+    def has_kernel_for_any_dispatch_key(self, ks):
+        return torch._C._dispatch_has_kernel_for_any_dispatch_key(
+            self.name(), ks
+        ) or super().has_kernel_for_any_dispatch_key(ks)
+    @property
+    def namespace(self):
+        return self._schema.name.split("::")[0]
+    def _can_decompose(self):
+        dk = DispatchKey.CompositeImplicitAutograd
+        return dk in self.py_kernels or torch._C._dispatch_has_kernel_for_dispatch_key(
+            self.name(), dk
+        )
+    def decompose(self, *args, **kwargs):
+        dk = DispatchKey.CompositeImplicitAutograd
+        if dk in self.py_kernels:
+            # NB: This branch is not too necessary anymore, because we can
+            # apply Python CompositeImplicitAutograd *before* tracing
+            # using Python dispatcher (also taking advantage of the autograd
+            # formula).  But it's included for completeness
+            return self.py_kernels[dk](*args, **kwargs)
+        elif torch._C._dispatch_has_kernel_for_dispatch_key(self.name(), dk):
+            return self._op_dk(dk, *args, **kwargs)
+        else:
+            return NotImplemented
+    # Remove a dispatch key from the dispatch cache.  This will force it to get
+    # recomputed the next time.  Does nothing
+    # WARNING: if you register a dispatch key to py_kernels of an OpOverload,
+    # calling _del_dispatch on that key is NOT sufficient to apply your change,
+    # because a single registration may affect MULTIPLE dispatch keys (e.g.,
+    # registering Autograd affects AutogradCPU).  del_dispatch is to be used
+    # only if you are specifically modifying how get_dispatch handles a
+    # particular input 'key'.
+    def _uncache_dispatch(self, key):
+        self._dispatch_cache.pop(key, None)
+    # This implements the pre-computation logic for the Python dispatcher.
+    def _get_dispatch(self, key):
+        # This is only called upon a cache miss
+        assert key not in self._dispatch_cache, f"{self} {key}"
+        if key == DispatchKey.Python:
+            if not isinstance(self, TorchBindOpOverload) and not self.python_key_table:
+                self._dispatch_cache[key] = key
+                add_cached_op(self)
+                return key
+            def handler(*args, **kwargs):
+                from torch.utils._python_dispatch import _get_current_dispatch_mode
+                # TODO: We also need to handle tensor subclasses here
+                # TODO(voz): We should walk all the nodes here / turn it into a list, topmode is ok for now.
+                curr_mode = type(_get_current_dispatch_mode())
+                assert (
+                    curr_mode is not None
+                ), "Illegal invocation of dispatch on torch._C.DispatchKey.Python without a mode."
+                if curr_mode not in self.python_key_table:
+                    if isinstance(self, TorchBindOpOverload):
+                        with torch.utils._python_dispatch._pop_mode_temporarily() as mode:
+                            return torch._library.utils.handle_dispatch_mode(
+                                mode, self, *args, **kwargs
+                            )
+                    else:
+                        return self._op_dk(key, *args, **kwargs)
+                with torch.utils._python_dispatch._pop_mode_temporarily() as mode:
+                    return self.python_key_table[curr_mode](mode, *args, **kwargs)
+            self._dispatch_cache[key] = handler
+            add_cached_op(self)
+            return handler
+        functionality_key = torch._C._to_functionality_key(key)  # type: ignore[attr-defined]
+        if functionality_key == DispatchKey.PreDispatch:
+            curr_stack_len = _len_torch_dispatch_stack_pre_dispatch()
+            # The check for Python in the exclude set is so we properly respect `with no_dispatch()`
+            # calls inside of a mode.
+            if (
+                curr_stack_len > 0
+                and not torch._C._dispatch_tls_is_dispatch_key_excluded(
+                    DispatchKey.Python
+                )
+            ):
+                def handler(*args, **kwargs):
+                    @contextlib.contextmanager
+                    def _temporarily_pop_modes_from_pre_dispatch():
+                        top_mode = _pop_mode_from_pre_dispatch()
+                        try:
+                            yield top_mode
+                        finally:
+                            _set_mode_pre_dispatch(top_mode)
+                    with _temporarily_pop_modes_from_pre_dispatch() as curr_mode:
+                        return torch._library.utils.handle_dispatch_mode(
+                            curr_mode, self, *args, **kwargs
+                        )
+                # Note [Not Caching Per-Dispatch-Key Mode Handlers]
+                # Note that we're not caching this handler.  There isn't really a point, since the slow bit
+                # is the handler itself (in python).
+                # Also, not caching means that we don't have to reset the cache when any existing
+                # modes go out of scope (which in of itself takes time to loop through all operators).
+                return handler
+        final_key = resolve_key(self, key)
+        # See Note [Not Caching Per-Dispatch-Key Mode Handlers]
+        cache_result = key != DispatchKey.PreDispatch
+        # TODO: We could potentially have lots of debugging wrappers against
+        # dispatch keys; design some general registration mechanism instead of
+        # having if statement for each of them
+        if key == DispatchKey.Functionalize:
+            import torch._dispatch.python as pydispatch
+            if pydispatch.CROSSREF_FUNCTIONALIZE:
+                handler = pydispatch.make_crossref_functionalize(self, final_key)
+                if cache_result:
+                    self._dispatch_cache[key] = handler
+                    add_cached_op(self)
+                return handler
+        r = self.py_kernels.get(final_key, final_key)
+        if cache_result:
+            self._dispatch_cache[key] = r
+            add_cached_op(self)
+        return r
+    def name(self):
+        return self._name
+    @property
+    def overloadpacket(self):
+        return self._overloadpacket
+    @property
+    def op(self):
+        return self._op
+    @property
+    def tags(self):
+        return self._tags
+    # TODO: add more methods to expose information about input and output arguments
+# TorchBindOpOverload are those custom ops which have at least one overload's
+# schema consists of torch.ScriptObject (i.e. custom class) input.
+# TorchBindOpOverload will skip C++ dispatcher and purely dispatched in python
+# when its inputs contain FakeScriptObject in a similar way as higher order ops.
+class TorchBindOpOverload(OpOverload):
+    def _fallthrough_keys(self) -> List[DispatchKey]:
+        # TODO: we should be calling the fallback for these, but a fallthrough is almost close
+        # enough to the fallback in most cases that we care about.
+        _DEFAULT_FALLTHROUGH_KEYS = [
+            DispatchKey.Autograd,
+            DispatchKey.AutogradCPU,
+            DispatchKey.AutogradCUDA,
+            DispatchKey.ADInplaceOrView,
+            DispatchKey.BackendSelect,
+            DispatchKey.PythonTLSSnapshot,
+            DispatchKey.PythonDispatcher,
+        ]
+        def _may_use_fallthrough_instead_of_fallback(key: DispatchKey):
+            if torch._C._dispatch_has_kernel_for_dispatch_key(self.name(), key):
+                return torch._C._dispatch_kernel_for_dispatch_key_is_fallthrough(
+                    self.name(), key
+                )
+            return (
+                key not in self.py_kernels
+                or self.py_kernels[key] is torch.library.fallthrough_kernel
+            )
+        return [
+            key
+            for key in _DEFAULT_FALLTHROUGH_KEYS
+            if _may_use_fallthrough_instead_of_fallback(key)
+        ]
+    @contextlib.contextmanager
+    def _register_as_effectful_op_temporarily(self):
+        from torch._higher_order_ops.effects import (
+            _EffectType,
+            _register_effectful_op,
+            SIDE_EFFECTS,
+        )
+        try:
+            if self not in SIDE_EFFECTS:
+                _register_effectful_op(self, _EffectType.ORDERED)
+            yield
+        finally:
+            if self in SIDE_EFFECTS:
+                del SIDE_EFFECTS[self]
+    # Use positional-only argument to avoid naming collision with aten ops arguments
+    # that are named "self". This way, all the aten ops can be called by kwargs.
+    def __call__(self, /, *args, **kwargs):
+        if _must_dispatch_in_python(args, kwargs):
+            # When any inputs are FakeScriptObject, we need to
+            # skip c++ dispatcher and dispatch in python through _get_dispatch of python_dispatcher
+            # because C++ dispatcher will check the schema and cannot recognize FakeScriptObject.
+            #
+            # Note:
+            # 1. We only register the torchbind op temporarily as effectful op because we only want
+            #    the effect token functionalization logic to be applied during tracing. Otherwise, the behavior
+            #    of the eagerly executing the op might change after tracing.
+            # 2. We don't want to register the op as effectful for all torchbind ops in ctor because this might
+            #    cause unexpected behavior for some autograd.profiler ops e.g. profiler._record_function_exit._RecordFunction.
+            with self._register_as_effectful_op_temporarily():
+                return self._dispatch_in_python(args, kwargs, self._fallthrough_keys())
+        return self._op(*args, **kwargs)
+    def _dispatch_in_python(self, args, kwargs, fallthrough_keys):
+        non_fallthrough_keys = torch._C._dispatch_keyset_full()
+        for key in fallthrough_keys:
+            non_fallthrough_keys = non_fallthrough_keys.remove(key)
+        dispatch_key_set = _compute_keyset(args, kwargs, non_fallthrough_keys)
+        dispatch_key = dispatch_key_set.highestPriorityTypeId()
+        handler = (
+            self._get_dispatch(dispatch_key)
+            if dispatch_key not in self._dispatch_cache
+            else self._dispatch_cache[dispatch_key]
+        )
+        if isinstance(handler, DispatchKey):
+            # fallthrough keys can be registered at runtime via torch.library.impl
+            # so need to add it to fallthrough_keys and re-dispatch.
+            if torch._C._dispatch_kernel_for_dispatch_key_is_fallthrough(
+                self.name(), dispatch_key
+            ):
+                return self._dispatch_in_python(
+                    args, kwargs, fallthrough_keys + [dispatch_key]
+                )
+            raise RuntimeError(
+                f"Torchbind op {self} received a FakeScriptObject input when dispatching {handler}."
+                f" but no python implementation is found."
+                f" Please file an issue on this when you encounter this error."
+                f" This error can happen when you export or compile the model."
+                f" It can still happpen even if a C++ implementation for {dispatch_key}. "
+                f" has been registered. That's because FakeScriptObject purely lives in python and cannot work "
+                f" with a C++ implementation."
+            )
+        assert isinstance(handler, Callable)  # type: ignore[arg-type]
+        return handler(*args, **kwargs)
+def _must_dispatch_in_python(args, kwargs):
+    return pytree.tree_any(
+        lambda obj: isinstance(
+            obj, torch._library.fake_class_registry.FakeScriptObject
+        ),
+        (args, kwargs),
+    )
+def _has_script_object_arg(schema: torch.FunctionSchema) -> bool:
+    return any(isinstance(arg.type, torch.ClassType) for arg in schema.arguments)
+# OpOverloadPacket class contains pointer to a base unresolved operator that doesn't correspond to a specific operator
+# You can obtain an OpOverload object through attribute query.
+class OpOverloadPacket:
+    def __init__(self, qualified_op_name, op_name, op, overload_names):
+        # These attributes are accessible on the object through the properties
+        # defined below but are immutable
+        self._qualified_op_name = qualified_op_name
+        self.__name__ = op_name
+        self._op = op
+        self._overload_names = overload_names
+        self._dir = []
+        self._has_torchbind_op_overload = any(
+            _has_script_object_arg(schema) for schema in self._schemas.values()
+        )
+    # it's a no-op since OpOverloadPacket object is immutable and must be unique for a given op.
+    def __deepcopy__(self, memo=None):
+        return self
+    def __repr__(self):
+        return "<OpOverloadPacket(op='{}.{}')>".format(
+            *self._qualified_op_name.split("::")
+        )
+    def __hash__(self):
+        return hash(self._op)
+    def __str__(self):
+        return "{}.{}".format(*self._qualified_op_name.split("::"))
+    @property
+    def op(self):
+        return self._op
+    @property
+    def _schemas(self):
+        return {
+            overload_name: torch._C._get_schema(self._qualified_op_name, overload_name)
+            for overload_name in self._overload_names
+        }
+    def __getattr__(self, key):
+        # It is not a valid op_name when __file__ is passed in
+        if key == "__file__":
+            return "torch.ops"
+        # ensure that query for dunder attributes that does not exist on
+        # opoverloadpacket but instead exists on the self._op object does not unnecessarily call
+        # `_get_operation_overload` (which is an expensive operation).
+        # This is done to prevent any potential slowdown. This list can be extended
+        # if there exists other attributes like `__name__` that only exist on self._op and not on the
+        # opoverloadpacket.
+        # This is ok since we are guaranteed that an overload name for an aten op can't start with '__'
+        try:
+            if key.startswith("__"):
+                return getattr(self._op, key)
+        except AttributeError:
+            # for consistency because it seems weird to
+            # throw an attribute error with a message containing
+            # an object name different from the one the attribute
+            # query was performed on.
+            raise AttributeError(
+                f"'{str(self)}' can't have an overload name beginning with '__' and the "
+                f"underlying op {str(self._op)} has no attribute {key} either."
+            ) from None
+        try:
+            # This is ok since we are guaranteed that an overload name for an aten op can't be 'default'
+            use_key = "" if key == "default" else key
+            # TODO: disallow access to overloads registered by JIT
+            op_dk_tags = torch._C._get_operation_overload(
+                self._qualified_op_name, use_key
+            )
+            if op_dk_tags is None:
+                raise AttributeError(
+                    f"The underlying op of '{str(self)}' has no overload name '{key}'"
+                )
+            op_, op_dk_, tags = op_dk_tags
+            schema = torch._C._get_schema(self._qualified_op_name, use_key)
+            overload = (
+                OpOverload(self, op_, op_dk_, schema, tags)
+                if not _has_script_object_arg(schema)
+                else TorchBindOpOverload(self, op_, op_dk_, schema, tags)
+            )
+            # cache the overload object
+            setattr(self, key, overload)
+            self._dir.append(key)
+            return overload
+        except RuntimeError:
+            raise AttributeError(
+                f"The underlying op of '{str(self)}' has no overload name '{key}'"
+            ) from None
+    def __iter__(self):
+        return iter(self._dir)
+    # Use positional-only argument to avoid naming collision with aten ops arguments
+    # that are named "self". This way, all the aten ops can be called by kwargs.
+    def __call__(self, /, *args, **kwargs):
+        # overloading __call__ to ensure torch.ops.foo.bar()
+        # is still callable from JIT
+        # We save the function ptr as the `op` attribute on
+        # OpOverloadPacket to access it here.
+        # Directly calling OverloadPacket goes into C++, which will check
+        # the schema and cause an error for torchbind op when inputs consist of FakeScriptObject so we
+        # intercept it here and call TorchBindOpverload instead.
+        if self._has_torchbind_op_overload and _must_dispatch_in_python(args, kwargs):
+            return _call_overload_packet_from_python(self, args, kwargs)
+        return self._op(*args, **(kwargs or {}))
+    # TODO: use this to make a __dir__
+    def overloads(self):
+        return [n if n else "default" for n in self._overload_names]
+# Note - this mirrors the logic of the cpp_function defined in jit/python/init.cpp
+# _jit_get_operations, which calls _get_operation_for_overload_or_packet.
+def _call_overload_packet_from_python(op: OpOverloadPacket, args, kwargs):
+    # Re-use the torch function handling logic in cpp
+    torch_function_called, ret = torch._C._maybe_call_torch_function_for_op_packet(
+        op, *args, **kwargs
+    )
+    if torch_function_called:
+        return ret
+    # The following mirrors getOpWithStack.
+    # In cpp, we do a schema matching for the arguments, and call ToIValue to
+    # to check whether the arguments are valid. But need to do similar things here
+    # and check the schema whether the FakeScriptObject is the corresponding fake class
+    # of the actual class used in schema.
+    exceptions = {}
+    found_op = None
+    for overload_name in op.overloads():
+        op_overload = getattr(op, overload_name)
+        try:
+            _ = torch._C._check_schema_allow_fake_script_object(
+                op_overload._schema, *args, **kwargs
+            )
+            found_op = op_overload
+            break
+        except RuntimeError as e:
+            exceptions[overload_name] = e
+    if found_op:
+        return found_op(*args, **kwargs)
+    err_msg = (
+        f"Fail to match any TorchBindOverload of {op} with following exceptions:\n"
+    )
+    for i, (key, msg) in enumerate(exceptions.items()):
+        err_msg += f"Overload name {key}:\n {msg}\n"
+    raise RuntimeError(err_msg)
+# Resolution of torch.fn is different from torch.ops.aten.fn
+# torch.fn uses the Python argparser, matches with the
+# appropriate schema, and calls into the unboxed version of the method
+# torch.ops.aten.fn resolution is done via the mechanism defined in JIT.
+# JIT creates a stack of all the overloads and then tries to match the
+# correct one at runtime and always calls into the boxed version of the method
+# Autograd codegen creates VariableType, TracerType,
+# inplace or view type and python bindings.
+# Aten codegen generates tensor methods for the tensor class.
+# _OpNamespace is a subclass of ModuleType because the torch script
+# allows attribute lookups on modules only. Since we want torch.ops.foo.bar()
+# to work from script, we need to ensure ops and foo are modules
+class _OpNamespace(types.ModuleType):
+    """
+    An op namespace to dynamically bind Operators into Python.
+    Say a user has created a custom Operator called "my_namespace::my_op". To
+    call this op, the user will write torch.ops.my_namespace.my_op(...).
+    At startup, this operation will not yet be bound into Python. Instead, the
+    following sequence of magic tricks will occur:
+    1. `torch.ops.my_namespace` will invoke the `__getattr__` magic method
+       on the `torch.ops` object, which will create a new `_OpNamespace`
+       object called `my_namespace` and set it as an attribute on the `ops`
+       object.
+    2. `torch.ops.my_namespace.my_op` will then invoke `__getattr__` on
+       the `my_namespace` object, which will retrieve the operation via
+       `torch.get_operation`, a function bound from C++, and then in a similar
+       fashion bind this new object onto the `my_namespace` object.
+    3. `torch.ops.my_namespace.my_op(...)` then calls this new operation
+        and subsequent accesses will incur no further lookup (the namespace and
+        operation will already exist).
+    """
+    def __init__(self, name):
+        super().__init__("torch.ops." + name)
+        self.name = name
+        self._dir = []
+    def __iter__(self):
+        return iter(self._dir)
+    def __getattr__(self, op_name):
+        # It is not a valid op_name when __file__ is passed in
+        if op_name == "__file__":
+            return "torch.ops"
+        elif op_name in ["__origin__", "__self__"]:
+            raise AttributeError(
+                f"Invalid attribute '{op_name}' for '_OpNamespace' '{self.name}'"
+            )
+        # Get the op `my_namespace::my_op` if available. This will also check
+        # for overloads and raise an exception if there are more than one.
+        namespace_name = self.name
+        qualified_op_name = f"{namespace_name}::{op_name}"
+        module_name = self.__module__ + "." + namespace_name
+        try:
+            op, overload_names = _get_packet(qualified_op_name, module_name)
+            if op is None:
+                raise AttributeError(
+                    f"'_OpNamespace' '{self.name}' object has no attribute '{op_name}'"
+                )
+        except RuntimeError as e:
+            # Turn this into AttributeError so getattr(obj, key, default)
+            # works (this is called by TorchScript with __origin__)
+            raise AttributeError(
+                f"'_OpNamespace' '{self.name}' object has no attribute '{op_name}'"
+            ) from e
+        op.__module__ = module_name
+        opoverloadpacket = OpOverloadPacket(
+            qualified_op_name, op_name, op, overload_names
+        )
+        opoverloadpacket.__module__ = self.__module__ + "." + namespace_name
+        # cache the opoverloadpacket to ensure that each op corresponds to
+        # a unique OpOverloadPacket object
+        setattr(self, op_name, opoverloadpacket)
+        self._dir.append(op_name)
+        return opoverloadpacket
+def _get_packet(qualname, op_module):
+    op, overload_names = torch._C._jit_get_operation(qualname)
+    if op is not None:
+        # let the script frontend know that op is identical to the builtin op
+        # with qualified_op_name
+        torch.jit._builtins._register_builtin(op, qualname)
+        op.__module__ = op_module
+    return op, overload_names
+def _refresh_packet(packet):
+    op, overload_names = _get_packet(packet._qualified_op_name, packet._op.__module__)
+    assert op is not None
+    packet._op = op
+    packet._overload_names = overload_names
+class _PyOpNamespace(_OpNamespace):
+    def __init__(self, name, ops):
+        super().__init__(name)
+        self._ops = ops
+    def __getattr__(self, name):
+        # Following _OpNamespace.__getattr__, we cache the op on the _PyOpNamespace object.
+        op = self._ops.get(name, None)
+        if op is None:
+            raise AttributeError(
+                f"'_PyOpNamespace' '{self.name}' object has no attribute '{name}'"
+            )
+        setattr(self, name, op)
+        return op
+class _Ops(types.ModuleType):
+    __file__ = "_ops.py"
+    def __init__(self):
+        super().__init__("torch.ops")
+        self.loaded_libraries = set()
+        self._higher_order_op_namespace = _PyOpNamespace(
+            "torch.ops.higher_order", _higher_order_ops
+        )
+        self._dir = []
+    def __getattr__(self, name):
+        # Check if the name is a HigherOrderOperator
+        if name == "higher_order":
+            return self._higher_order_op_namespace
+        # Here we are creating `torch.ops.my_namespace`
+        namespace = _OpNamespace(name)
+        setattr(self, name, namespace)
+        self._dir.append(name)
+        return namespace
+    def __iter__(self):
+        return iter(self._dir)
+    def import_module(self, module):
+        """
+        Imports a Python module that has torch.library registrations.
+        Generally, to extend PyTorch with custom operators, a user will
+        create a Python module whose import triggers registration of
+        the custom operators via a torch.ops.load_library call or a call
+        to one or more torch.library.* APIs.
+        It is unexpected for Python modules to have side effects, so some
+        linters and formatters will complain. Use this API to import Python
+        modules that contain these torch.library side effects.
+        Args:
+            module (str): The name of the Python module to import
+        """
+        importlib.import_module(module)
+    def load_library(self, path):
+        """
+        Loads a shared library from the given path into the current process.
+        The library being loaded may run global initialization code to register
+        custom operators with the PyTorch JIT runtime. This allows dynamically
+        loading custom operators. For this, you should compile your operator
+        and the static registration code into a shared library object, and then
+        call ``torch.ops.load_library('path/to/libcustom.so')`` to load the
+        shared object.
+        After the library is loaded, it is added to the
+        ``torch.ops.loaded_libraries`` attribute, a set that may be inspected
+        for the paths of all libraries loaded using this function.
+        Args:
+            path (str): A path to a shared library to load.
+        """
+        if torch._running_with_deploy():
+            return
+        path = _utils_internal.resolve_library_path(path)
+        with dl_open_guard():
+            # Import the shared library into the process, thus running its
+            # static (global) initialization code in order to register custom
+            # operators with the JIT.
+            ctypes.CDLL(path)
+        self.loaded_libraries.add(path)
+# The ops "namespace"
+ops: _Ops = _Ops()

.venv/lib/python3.11/site-packages/torch/_python_dispatcher.py ADDED Viewed

	@@ -0,0 +1,182 @@

+# mypy: allow-untyped-defs
+import re
+import torch._C as C
+"""
+PythonDispatcher class is a thin python-binding to C++ dispatcher and it
+is designed to show how dispatcher precompute works. In particular,
+it shows for a certain op `foo`, what the computed dispatch table looks
+like after user register their kernels to certains dispatch keys.
+In the real C++ dispatcher we support many dispatch keys for different
+functionalities. For simplicity PythonDispatcher only supports dispatch
+keys for a single example of each use case. These use cases are listed below:
+- CPU/AutogradCPU: represents in-tree backends which we usually have dedicated inference &
+    autograd kernel in pytorch core library.
+    E.g. CPU, CUDA
+- FPGA/AutogradOther: represents in-tree backends which we usually have backend specific
+    inference kernels, but they share the same autograd kernel specified in AutogradOther.
+    E.g. FPGA, SparseCsrCPU
+- XLA/AutogradXLA: represents out-of-tree backends which we don't have either inference or autograd
+    kernel defined in pytorch core library. Backend owner is responsible for registering both
+    inference & autograd kernels in their extensions(e.g. torch-xla) for the operators they support.
+    E.g. XLA, XPU, MPS
+- CompositeExplicitAutograd: alias key mapped to inference kernels of all backends like CPU, CUDA, XLA etc.
+    Kernels registered to this key MUST work for inference for all backends.
+- Autograd: alias key mapped to autograd of all backends like AutogradCPU, AutogradXLA, AutogradOther.
+    Kernels registered to this key MUST work for autograd for all backends.
+- CompositeImplicitAutograd: alias key CompositeImplicitAutograd = CompositeExplicitAutograd + Autograd
+    Kernels registered to this key MUST work for both inference + autograd for all backends.
+Note we only allow registrations to alias keys inside pytorch core library. E.g
+you shouldn't register a CompositeImplicitAutograd or CompositeExplicitAutograd
+kernel from torch-xla extension, instead you should upstream the kernel into
+pytorch/pytorch repo so that it's available for all backends and continuously
+tested even without the extension.
+Usage:
+  dispatcher = PythonDispatcher()
+  dispatcher.register(["CPU", "XLA", "CompositeImplicitAutograd"])
+  print(dispatcher.dispatchTable()) # This tells you exactly which kernel is used for certain backend.
+  # For more debugging information
+  # print(dispatcher.keys())
+  # print(dispatcher.registrations())
+  # print(dispatcher.rawRegistrations())
+  # print(dispatcher.rawDispatchTable())
+PythonDispatcher calls C++ dispatcher under the hood for to precompute dispatch table.
+This file only provides the simplified API for developers, relevant test code is located in
+test/test_dispatch.py
+"""
+class PythonDispatcher:
+    namespace = "__test__"
+    name = "foo"
+    # fmt: off
+    runtime_keys = [
+        "CPU", "AutogradCPU",
+        "FPGA", "AutogradOther",
+        "XLA", "AutogradXLA",
+        "Lazy", "AutogradLazy",
+    ]
+    # fmt: on
+    alias_keys = [
+        "CompositeExplicitAutograd",
+        "Autograd",
+        "CompositeImplicitAutograd",
+    ]
+    supported_keys = runtime_keys + alias_keys
+    def __init__(self) -> None:
+        C._dispatch_check_invariants(self.name)  # type: ignore[attr-defined]
+        self.ref = C._dispatch_library("FRAGMENT", self.namespace, "")
+        self.ref.def_("foo(Tensor x) -> Tensor")
+    """
+    Returns a list of dispatch keys supported by PythonDispatcher.
+    You can register kernels to these keys.
+    """
+    def keys(self):
+        return self.supported_keys
+    """
+    Register kernels to the target dispatchKeys.
+    dispatchKeys(list[str]): a list of dispatch keys that you want to register
+      your own kernel. Note that you don't need to write the kernel yourself in
+      this PythonDispatcher.E.g. for CPU key, a kernel(e.g fn_CPU for CPU) is
+      automatically generated and registered.
+    """
+    def register(self, dispatchKeys):
+        # Overriden is not supported and triggers a warning in C++ dispatcher.
+        if len(set(dispatchKeys)) != len(dispatchKeys):
+            raise RuntimeError(
+                f"Overriden is not allowed but found duplicates in {dispatchKeys}."
+            )
+        # We currently forbid this in codegen instead of C++ dispatcher.
+        if (
+            "CompositeImplicitAutograd" in dispatchKeys
+            and "CompositeExplicitAutograd" in dispatchKeys
+        ):
+            raise RuntimeError(
+                "Registration to both CompositeImplicitAutograd and CompositeExplicitAutograd is not allowed."
+            )
+        for key in dispatchKeys:
+            if key not in self.supported_keys:
+                raise RuntimeError(
+                    f"{key} is not supported, please select a dispatch key in {self.supported_keys}."
+                )
+            self.ref.impl_t_t("foo", dispatch=key, debug="fn_" + key)
+    """
+    Helper function to format (key, kernel).
+    """
+    def _format_line(self, key, kernel):
+        return f"{key:<15} {kernel}\n"
+    """
+    Helper function to print a table header.
+    """
+    def _format_header(self, header):
+        s = f"""
+{header}
+"""
+        s += self._format_line("key", "kernel")
+        s += "---------------------------\n"
+        return s
+    """
+    Returns raw output of all registration info for debugging only.
+    Use registrations() for a simplified version.
+    """
+    def rawRegistrations(self):
+        return C._dispatch_dump(f"{self.namespace}::{self.name}")  # type: ignore[attr-defined]
+    """
+    Returns raw output of computed dispatch table for debugging only.
+    Use dispatchTable() for a simplified version.
+    """
+    def rawDispatchTable(self):
+        return C._dispatch_dump_table(f"{self.namespace}::{self.name}")  # type: ignore[attr-defined]
+    """
+    Returns a table(str) including all the registrations from users.
+    Note this includes registrations to both runtime keys and alias keys.
+    """
+    def registrations(self):
+        output = self._format_header("Registered Kernels")
+        state = self.rawRegistrations()
+        state_entries = state.split("\n")
+        for line in state_entries:
+            first = line.split(":")[0]
+            if any(first.startswith(k) for k in self.supported_keys):
+                kernel = line.split("::")[0].split(" ")[1]
+                output += self._format_line(first, kernel)
+        return output
+    """
+    Returns the computed dispatch table(str). Note this only include
+    runtime keys, registrations to alias keys have been decoded to their
+    mapped runtime keys.
+    """
+    def dispatchTable(self):
+        output = self._format_header("Computed Dispatch Table")
+        table = self.rawDispatchTable()
+        table_entries = table.split("\n")
+        regex = re.compile(r"registered at .*FallbackKernel\.cpp.*(\[)")
+        for line in table_entries:
+            k = line.split(":")[0]
+            if k in self.runtime_keys:
+                entry = regex.sub("[", line)
+                output += self._format_line(k, entry.split(": ")[1])
+        return output