diff --git a/.gitattributes b/.gitattributes
index a37724e09bb850f312f841f5d1e266c16c808bcf..f5441aff30b541916513f3d17b0033aa85c4b75c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -65,3 +65,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/_
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/model.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_fp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a443b39d4be005f8c3ad9094c13a29466c6a71e
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a030680b7909bc80325a6270a4b211203b3c619
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..609c5d27448bb56474e0a72b0159841d38767667
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b55afbcb629a106f796cc22a60b620437bb906d
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df66e2da2e7869aff4432a9449a0b0b2dfdeefa1
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b60bc4ab48d083293bed4e77e188c08c045bb8a0
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d7ff900f1090db39e1a63c8885cdc1f1fed025f
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f41bf071a7d39372c126cbf888df13a0c52c0d05
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a0e11cb648de2879023d09b10b11e67f7b4647f
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4654e2b846a39714020429aed0cee99f75fa844d
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/curand/include/curand.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/curand/include/curand.h
new file mode 100644
index 0000000000000000000000000000000000000000..9b769f5d56d3533279b61b398a3e81cab2035e68
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/curand/include/curand.h
@@ -0,0 +1,1077 @@
+
+ /* Copyright 2010-2014 NVIDIA Corporation.  All rights reserved.
+  *
+  * NOTICE TO LICENSEE:
+  *
+  * The source code and/or documentation ("Licensed Deliverables") are
+  * subject to NVIDIA intellectual property rights under U.S. and
+  * international Copyright laws.
+  *
+  * The Licensed Deliverables contained herein are PROPRIETARY and
+  * CONFIDENTIAL to NVIDIA and are being provided under the terms and
+  * conditions of a form of NVIDIA software license agreement by and
+  * between NVIDIA and Licensee ("License Agreement") or electronically
+  * accepted by Licensee.  Notwithstanding any terms or conditions to
+  * the contrary in the License Agreement, reproduction or disclosure
+  * of the Licensed Deliverables to any third party without the express
+  * written consent of NVIDIA is prohibited.
+  *
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE
+  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+  * OF THESE LICENSED DELIVERABLES.
+  *
+  * U.S. Government End Users.  These Licensed Deliverables are a
+  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+  * 1995), consisting of "commercial computer software" and "commercial
+  * computer software documentation" as such terms are used in 48
+  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
+  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+  * U.S. Government End Users acquire the Licensed Deliverables with
+  * only those rights set forth herein.
+  *
+  * Any use of the Licensed Deliverables in individual and commercial
+  * software must include, in the user documentation and internal
+  * comments to the code, the above Disclaimer and U.S. Government End
+  * Users Notice.
+  */
+
+#if !defined(CURAND_H_)
+#define CURAND_H_
+
+/**
+ * \defgroup HOST Host API
+ *
+ * @{
+ */
+#ifndef __CUDACC_RTC__
+#include <cuda_runtime.h>
+#endif
+
+#ifndef CURANDAPI
+#ifdef _WIN32
+#define CURANDAPI __stdcall
+#else
+#define CURANDAPI
+#endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+
+#define CURAND_VER_MAJOR 10
+#define CURAND_VER_MINOR 3
+#define CURAND_VER_PATCH 0
+#define CURAND_VER_BUILD 86
+#define CURAND_VERSION (CURAND_VER_MAJOR * 1000 + \
+                        CURAND_VER_MINOR *  100 + \
+                        CURAND_VER_PATCH)
+/* CURAND Host API datatypes */
+
+/**
+ * @{
+ */
+
+/**
+ * CURAND function call status types
+ */
+enum curandStatus {
+    CURAND_STATUS_SUCCESS = 0, ///< No errors
+    CURAND_STATUS_VERSION_MISMATCH = 100, ///< Header file and linked library version do not match
+    CURAND_STATUS_NOT_INITIALIZED = 101, ///< Generator not initialized
+    CURAND_STATUS_ALLOCATION_FAILED = 102, ///< Memory allocation failed
+    CURAND_STATUS_TYPE_ERROR = 103, ///< Generator is wrong type
+    CURAND_STATUS_OUT_OF_RANGE = 104, ///< Argument out of range
+    CURAND_STATUS_LENGTH_NOT_MULTIPLE = 105, ///< Length requested is not a multple of dimension
+    CURAND_STATUS_DOUBLE_PRECISION_REQUIRED = 106, ///< GPU does not have double precision required by MRG32k3a
+    CURAND_STATUS_LAUNCH_FAILURE = 201, ///< Kernel launch failure
+    CURAND_STATUS_PREEXISTING_FAILURE = 202, ///< Preexisting failure on library entry
+    CURAND_STATUS_INITIALIZATION_FAILED = 203, ///< Initialization of CUDA failed
+    CURAND_STATUS_ARCH_MISMATCH = 204, ///< Architecture mismatch, GPU does not support requested feature
+    CURAND_STATUS_INTERNAL_ERROR = 999 ///< Internal library error
+};
+
+/*
+ * CURAND function call status types
+*/
+/** \cond UNHIDE_TYPEDEFS */
+typedef enum curandStatus curandStatus_t;
+/** \endcond */
+
+/**
+ * CURAND generator types
+ */
+enum curandRngType {
+    CURAND_RNG_TEST = 0,
+    CURAND_RNG_PSEUDO_DEFAULT = 100, ///< Default pseudorandom generator
+    CURAND_RNG_PSEUDO_XORWOW = 101, ///< XORWOW pseudorandom generator
+    CURAND_RNG_PSEUDO_MRG32K3A = 121, ///< MRG32k3a pseudorandom generator
+    CURAND_RNG_PSEUDO_MTGP32 = 141, ///< Mersenne Twister MTGP32 pseudorandom generator
+    CURAND_RNG_PSEUDO_MT19937 = 142, ///< Mersenne Twister MT19937 pseudorandom generator
+    CURAND_RNG_PSEUDO_PHILOX4_32_10 = 161, ///< PHILOX-4x32-10 pseudorandom generator
+    CURAND_RNG_QUASI_DEFAULT = 200, ///< Default quasirandom generator
+    CURAND_RNG_QUASI_SOBOL32 = 201, ///< Sobol32 quasirandom generator
+    CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 = 202,  ///< Scrambled Sobol32 quasirandom generator
+    CURAND_RNG_QUASI_SOBOL64 = 203, ///< Sobol64 quasirandom generator
+    CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 = 204  ///< Scrambled Sobol64 quasirandom generator
+};
+
+/*
+ * CURAND generator types
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef enum curandRngType curandRngType_t;
+/** \endcond */
+
+/**
+ * CURAND ordering of results in memory
+ */
+enum curandOrdering {
+    CURAND_ORDERING_PSEUDO_BEST = 100, ///< Best ordering for pseudorandom results
+    CURAND_ORDERING_PSEUDO_DEFAULT = 101, ///< Specific default thread sequence for pseudorandom results, same as CURAND_ORDERING_PSEUDO_BEST
+    CURAND_ORDERING_PSEUDO_SEEDED = 102, ///< Specific seeding pattern for fast lower quality pseudorandom results
+    CURAND_ORDERING_PSEUDO_LEGACY = 103, ///< Specific legacy sequence for pseudorandom results, guaranteed to remain the same for all cuRAND release
+    CURAND_ORDERING_PSEUDO_DYNAMIC = 104, ///< Specific ordering adjusted to the device it is being executed on, provides the best performance
+    CURAND_ORDERING_QUASI_DEFAULT = 201 ///< Specific n-dimensional ordering for quasirandom results
+};
+
+/*
+ * CURAND ordering of results in memory
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef enum curandOrdering curandOrdering_t;
+/** \endcond */
+
+/**
+ * CURAND choice of direction vector set
+ */
+enum curandDirectionVectorSet {
+    CURAND_DIRECTION_VECTORS_32_JOEKUO6 = 101, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions
+    CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 = 102, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled
+    CURAND_DIRECTION_VECTORS_64_JOEKUO6 = 103, ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions
+    CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 = 104 ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled
+};
+
+/*
+ * CURAND choice of direction vector set
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef enum curandDirectionVectorSet curandDirectionVectorSet_t;
+/** \endcond */
+
+/**
+ * CURAND array of 32-bit direction vectors
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef unsigned int curandDirectionVectors32_t[32];
+/** \endcond */
+
+ /**
+ * CURAND array of 64-bit direction vectors
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef unsigned long long curandDirectionVectors64_t[64];
+/** \endcond **/
+
+/**
+ * CURAND generator (opaque)
+ */
+struct curandGenerator_st;
+
+/**
+ * CURAND generator
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef struct curandGenerator_st *curandGenerator_t;
+/** \endcond */
+
+/**
+ * CURAND distribution
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef double curandDistribution_st;
+typedef curandDistribution_st *curandDistribution_t;
+typedef struct curandDistributionShift_st *curandDistributionShift_t;
+/** \endcond */
+/**
+ * CURAND distribution M2
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef struct curandDistributionM2Shift_st *curandDistributionM2Shift_t;
+typedef struct curandHistogramM2_st *curandHistogramM2_t;
+typedef unsigned int curandHistogramM2K_st;
+typedef curandHistogramM2K_st *curandHistogramM2K_t;
+typedef curandDistribution_st curandHistogramM2V_st;
+typedef curandHistogramM2V_st *curandHistogramM2V_t;
+
+typedef struct curandDiscreteDistribution_st *curandDiscreteDistribution_t;
+/** \endcond */
+
+/*
+ * CURAND METHOD
+ */
+/** \cond UNHIDE_ENUMS */
+enum curandMethod {
+    CURAND_CHOOSE_BEST = 0, // choose best depends on args
+    CURAND_ITR = 1,
+    CURAND_KNUTH = 2,
+    CURAND_HITR = 3,
+    CURAND_M1 = 4,
+    CURAND_M2 = 5,
+    CURAND_BINARY_SEARCH = 6,
+    CURAND_DISCRETE_GAUSS = 7,
+    CURAND_REJECTION = 8,
+    CURAND_DEVICE_API = 9,
+    CURAND_FAST_REJECTION = 10,
+    CURAND_3RD = 11,
+    CURAND_DEFINITION = 12,
+    CURAND_POISSON = 13
+};
+
+typedef enum curandMethod curandMethod_t;
+/** \endcond */
+
+
+#ifndef __CUDACC_RTC__
+
+/**
+ * @}
+ */
+
+/**
+ * \brief Create new random number generator.
+ *
+ * Creates a new random number generator of type \p rng_type
+ * and returns it in \p *generator.
+ *
+ * Legal values for \p rng_type are:
+ * - CURAND_RNG_PSEUDO_DEFAULT
+ * - CURAND_RNG_PSEUDO_XORWOW
+ * - CURAND_RNG_PSEUDO_MRG32K3A
+ * - CURAND_RNG_PSEUDO_MTGP32
+ * - CURAND_RNG_PSEUDO_MT19937
+ * - CURAND_RNG_PSEUDO_PHILOX4_32_10
+ * - CURAND_RNG_QUASI_DEFAULT
+ * - CURAND_RNG_QUASI_SOBOL32
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL32
+ * - CURAND_RNG_QUASI_SOBOL64
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL64
+ *
+ * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen
+ * is CURAND_RNG_PSEUDO_XORWOW.  \n
+ * When \p rng_type is CURAND_RNG_QUASI_DEFAULT,
+ * the type chosen is CURAND_RNG_QUASI_SOBOL32.
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBBLED_SOBOL32 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * \param generator - Pointer to generator
+ * \param rng_type - Type of generator to create
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED, if memory could not be allocated \n
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
+ * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the
+ *   dynamically linked library version \n
+ * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n
+ * - CURAND_STATUS_SUCCESS if generator was created successfully \n
+ *
+ */
+curandStatus_t CURANDAPI
+curandCreateGenerator(curandGenerator_t *generator, curandRngType_t rng_type);
+
+/**
+ * \brief Create new host CPU random number generator.
+ *
+ * Creates a new host CPU random number generator of type \p rng_type
+ * and returns it in \p *generator.
+ *
+ * Legal values for \p rng_type are:
+ * - CURAND_RNG_PSEUDO_DEFAULT
+ * - CURAND_RNG_PSEUDO_XORWOW
+ * - CURAND_RNG_PSEUDO_MRG32K3A
+ * - CURAND_RNG_PSEUDO_MTGP32
+ * - CURAND_RNG_PSEUDO_MT19937
+ * - CURAND_RNG_PSEUDO_PHILOX4_32_10
+ * - CURAND_RNG_QUASI_DEFAULT
+ * - CURAND_RNG_QUASI_SOBOL32
+ *
+ * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen
+ * is CURAND_RNG_PSEUDO_XORWOW.  \n
+ * When \p rng_type is CURAND_RNG_QUASI_DEFAULT,
+ * the type chosen is CURAND_RNG_QUASI_SOBOL32.
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * \param generator - Pointer to generator
+ * \param rng_type - Type of generator to create
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
+ * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the
+ *   dynamically linked library version \n
+ * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n
+ * - CURAND_STATUS_SUCCESS if generator was created successfully \n
+ */
+curandStatus_t CURANDAPI
+curandCreateGeneratorHost(curandGenerator_t *generator, curandRngType_t rng_type);
+
+/**
+ * \brief Destroy an existing generator.
+ *
+ * Destroy an existing generator and free all memory associated with its state.
+ *
+ * \param generator - Generator to destroy
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_SUCCESS if generator was destroyed successfully \n
+ */
+curandStatus_t CURANDAPI
+curandDestroyGenerator(curandGenerator_t generator);
+
+/**
+ * \brief Return the version number of the library.
+ *
+ * Return in \p *version the version number of the dynamically linked CURAND
+ * library.  The format is the same as CUDART_VERSION from the CUDA Runtime.
+ * The only supported configuration is CURAND version equal to CUDA Runtime
+ * version.
+ *
+ * \param version - CURAND library version
+ *
+ * \return
+ * - CURAND_STATUS_SUCCESS if the version number was successfully returned \n
+ */
+curandStatus_t CURANDAPI
+curandGetVersion(int *version);
+
+/**
+* \brief Return the value of the curand property.
+*
+* Return in \p *value the number for the property described by \p type of the
+* dynamically linked CURAND library.
+*
+* \param type - CUDA library property
+* \param value - integer value for the requested property
+*
+* \return
+* - CURAND_STATUS_SUCCESS if the property value was successfully returned \n
+* - CURAND_STATUS_OUT_OF_RANGE if the property type is not recognized \n
+*/
+curandStatus_t CURANDAPI
+curandGetProperty(libraryPropertyType type, int *value);
+
+
+/**
+ * \brief Set the current stream for CURAND kernel launches.
+ *
+ * Set the current stream for CURAND kernel launches.  All library functions
+ * will use this stream until set again.
+ *
+ * \param generator - Generator to modify
+ * \param stream - Stream to use or ::NULL for null stream
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_SUCCESS if stream was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetStream(curandGenerator_t generator, cudaStream_t stream);
+
+/**
+ * \brief Set the seed value of the pseudo-random number generator.
+ *
+ * Set the seed value of the pseudorandom number generator.
+ * All values of seed are valid.  Different seeds will produce different sequences.
+ * Different seeds will often not be statistically correlated with each other,
+ * but some pairs of seed values may generate sequences which are statistically correlated.
+ *
+ * \param generator - Generator to modify
+ * \param seed - Seed value
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a pseudorandom number generator \n
+ * - CURAND_STATUS_SUCCESS if generator seed was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetPseudoRandomGeneratorSeed(curandGenerator_t generator, unsigned long long seed);
+
+/**
+ * \brief Set the absolute offset of the pseudo or quasirandom number generator.
+ *
+ * Set the absolute offset of the pseudo or quasirandom number generator.
+ *
+ * All values of offset are valid.  The offset position is absolute, not
+ * relative to the current position in the sequence.
+ *
+ * \param generator - Generator to modify
+ * \param offset - Absolute offset position
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_SUCCESS if generator offset was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetGeneratorOffset(curandGenerator_t generator, unsigned long long offset);
+
+/**
+ * \brief Set the ordering of results of the pseudo or quasirandom number generator.
+ *
+ * Set the ordering of results of the pseudo or quasirandom number generator.
+ *
+ * Legal values of \p order for pseudorandom generators are:
+ * - CURAND_ORDERING_PSEUDO_DEFAULT
+ * - CURAND_ORDERING_PSEUDO_BEST
+ * - CURAND_ORDERING_PSEUDO_SEEDED
+ * - CURAND_ORDERING_PSEUDO_LEGACY
+ *
+ * Legal values of \p order for quasirandom generators are:
+ * - CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * \param generator - Generator to modify
+ * \param order - Ordering of results
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_OUT_OF_RANGE if the ordering is not valid \n
+ * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetGeneratorOrdering(curandGenerator_t generator, curandOrdering_t order);
+
+/**
+ * \brief Set the number of dimensions.
+ *
+ * Set the number of dimensions to be generated by the quasirandom number
+ * generator.
+ *
+ * Legal values for \p num_dimensions are 1 to 20000.
+ *
+ * \param generator - Generator to modify
+ * \param num_dimensions - Number of dimensions
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_OUT_OF_RANGE if num_dimensions is not valid \n
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a quasirandom number generator \n
+ * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetQuasiRandomGeneratorDimensions(curandGenerator_t generator, unsigned int num_dimensions);
+
+/**
+ * \brief Generate 32-bit pseudo or quasirandom numbers.
+ *
+ * Use \p generator to generate \p num 32-bit results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit values with every bit random.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param num - Number of random 32-bit values to generate
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *     a previous kernel launch \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_TYPE_ERROR if the generator is a 64 bit quasirandom generator.
+ * (use ::curandGenerateLongLong() with 64 bit quasirandom generators)
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerate(curandGenerator_t generator, unsigned int *outputPtr, size_t num);
+
+/**
+ * \brief Generate 64-bit quasirandom numbers.
+ *
+ * Use \p generator to generate \p num 64-bit results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 64-bit values with every bit random.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param num - Number of random 64-bit values to generate
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *     a previous kernel launch \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a 64 bit quasirandom generator\n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateLongLong(curandGenerator_t generator, unsigned long long *outputPtr, size_t num);
+
+/**
+ * \brief Generate uniformly distributed floats.
+ *
+ * Use \p generator to generate \p num float results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit floating point values between \p 0.0f and \p 1.0f,
+ * excluding \p 0.0f and including \p 1.0f.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param num - Number of floats to generate
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateUniform(curandGenerator_t generator, float *outputPtr, size_t num);
+
+/**
+ * \brief Generate uniformly distributed doubles.
+ *
+ * Use \p generator to generate \p num double results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 64-bit double precision floating point values between
+ * \p 0.0 and \p 1.0, excluding \p 0.0 and including \p 1.0.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param num - Number of doubles to generate
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension \n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateUniformDouble(curandGenerator_t generator, double *outputPtr, size_t num);
+
+/**
+ * \brief Generate normally distributed doubles.
+ *
+ * Use \p generator to generate \p n float results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit floating point values with mean \p mean and standard
+ * deviation \p stddev.
+ *
+ * Normally distributed results are generated from pseudorandom generators
+ * with a Box-Muller transform, and so require \p n to be even.
+ * Quasirandom generators use an inverse cumulative distribution
+ * function to preserve dimensionality.
+ *
+ * There may be slight numerical differences between results generated
+ * on the GPU with generators created with ::curandCreateGenerator()
+ * and results calculated on the CPU with generators created with
+ * ::curandCreateGeneratorHost().  These differences arise because of
+ * differences in results for transcendental functions.  In addition,
+ * future versions of CURAND may use newer versions of the CUDA math
+ * library, so different versions of CURAND may give slightly different
+ * numerical values.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of floats to generate
+ * \param mean - Mean of normal distribution
+ * \param stddev - Standard deviation of normal distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension, or is not a multiple
+ *    of two for pseudorandom generators \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateNormal(curandGenerator_t generator, float *outputPtr,
+                     size_t n, float mean, float stddev);
+
+/**
+ * \brief Generate normally distributed doubles.
+ *
+ * Use \p generator to generate \p n double results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 64-bit floating point values with mean \p mean and standard
+ * deviation \p stddev.
+ *
+ * Normally distributed results are generated from pseudorandom generators
+ * with a Box-Muller transform, and so require \p n to be even.
+ * Quasirandom generators use an inverse cumulative distribution
+ * function to preserve dimensionality.
+ *
+ * There may be slight numerical differences between results generated
+ * on the GPU with generators created with ::curandCreateGenerator()
+ * and results calculated on the CPU with generators created with
+ * ::curandCreateGeneratorHost().  These differences arise because of
+ * differences in results for transcendental functions.  In addition,
+ * future versions of CURAND may use newer versions of the CUDA math
+ * library, so different versions of CURAND may give slightly different
+ * numerical values.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of doubles to generate
+ * \param mean - Mean of normal distribution
+ * \param stddev - Standard deviation of normal distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension, or is not a multiple
+ *    of two for pseudorandom generators \n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateNormalDouble(curandGenerator_t generator, double *outputPtr,
+                     size_t n, double mean, double stddev);
+
+/**
+ * \brief Generate log-normally distributed floats.
+ *
+ * Use \p generator to generate \p n float results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit floating point values with log-normal distribution based on
+ * an associated normal distribution with mean \p mean and standard deviation \p stddev.
+ *
+ * Normally distributed results are generated from pseudorandom generators
+ * with a Box-Muller transform, and so require \p n to be even.
+ * Quasirandom generators use an inverse cumulative distribution
+ * function to preserve dimensionality.
+ * The normally distributed results are transformed into log-normal distribution.
+ *
+ * There may be slight numerical differences between results generated
+ * on the GPU with generators created with ::curandCreateGenerator()
+ * and results calculated on the CPU with generators created with
+ * ::curandCreateGeneratorHost().  These differences arise because of
+ * differences in results for transcendental functions.  In addition,
+ * future versions of CURAND may use newer versions of the CUDA math
+ * library, so different versions of CURAND may give slightly different
+ * numerical values.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of floats to generate
+ * \param mean - Mean of associated normal distribution
+ * \param stddev - Standard deviation of associated normal distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension, or is not a multiple
+ *    of two for pseudorandom generators \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateLogNormal(curandGenerator_t generator, float *outputPtr,
+                     size_t n, float mean, float stddev);
+
+/**
+ * \brief Generate log-normally distributed doubles.
+ *
+ * Use \p generator to generate \p n double results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 64-bit floating point values with log-normal distribution based on
+ * an associated normal distribution with mean \p mean and standard deviation \p stddev.
+ *
+ * Normally distributed results are generated from pseudorandom generators
+ * with a Box-Muller transform, and so require \p n to be even.
+ * Quasirandom generators use an inverse cumulative distribution
+ * function to preserve dimensionality.
+ * The normally distributed results are transformed into log-normal distribution.
+ *
+ * There may be slight numerical differences between results generated
+ * on the GPU with generators created with ::curandCreateGenerator()
+ * and results calculated on the CPU with generators created with
+ * ::curandCreateGeneratorHost().  These differences arise because of
+ * differences in results for transcendental functions.  In addition,
+ * future versions of CURAND may use newer versions of the CUDA math
+ * library, so different versions of CURAND may give slightly different
+ * numerical values.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of doubles to generate
+ * \param mean - Mean of normal distribution
+ * \param stddev - Standard deviation of normal distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension, or is not a multiple
+ *    of two for pseudorandom generators \n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr,
+                     size_t n, double mean, double stddev);
+
+/**
+ * \brief Construct the histogram array for a Poisson distribution.
+ *
+ * Construct the histogram array for the Poisson distribution with lambda \p lambda.
+ * For lambda greater than 2000, an approximation with a normal distribution is used.
+ *
+ * \param lambda - lambda for the Poisson distribution
+ *
+ *
+ * \param discrete_distribution - pointer to the histogram in device memory
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the distribution pointer was null \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n
+ * - CURAND_STATUS_SUCCESS if the histogram was generated successfully \n
+ */
+
+curandStatus_t CURANDAPI
+curandCreatePoissonDistribution(double lambda, curandDiscreteDistribution_t *discrete_distribution);
+
+
+
+/**
+ * \brief Destroy the histogram array for a discrete distribution (e.g. Poisson).
+ *
+ * Destroy the histogram array for a discrete distribution created by curandCreatePoissonDistribution.
+ *
+ * \param discrete_distribution - pointer to device memory where the histogram is stored
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the histogram was never created \n
+ * - CURAND_STATUS_SUCCESS if the histogram was destroyed successfully \n
+ */
+curandStatus_t CURANDAPI
+curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution);
+
+
+/**
+ * \brief Generate Poisson-distributed unsigned ints.
+ *
+ * Use \p generator to generate \p n unsigned int results into device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and must be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit unsigned int point values with Poisson distribution, with lambda \p lambda.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of unsigned ints to generate
+ * \param lambda - lambda for the Poisson distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension\n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU or sm does not support double precision \n
+ * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+
+curandStatus_t CURANDAPI
+curandGeneratePoisson(curandGenerator_t generator, unsigned int *outputPtr,
+                     size_t n, double lambda);
+// just for internal usage
+curandStatus_t CURANDAPI
+curandGeneratePoissonMethod(curandGenerator_t generator, unsigned int *outputPtr,
+                     size_t n, double lambda, curandMethod_t method);
+
+
+curandStatus_t CURANDAPI
+curandGenerateBinomial(curandGenerator_t generator, unsigned int *outputPtr,
+                       size_t num, unsigned int n, double p);
+// just for internal usage
+curandStatus_t CURANDAPI
+curandGenerateBinomialMethod(curandGenerator_t generator,
+                             unsigned int *outputPtr,
+                             size_t num, unsigned int n, double p,
+                             curandMethod_t method);
+
+
+/**
+ * \brief Setup starting states.
+ *
+ * Generate the starting state of the generator.  This function is
+ * automatically called by generation functions such as
+ * ::curandGenerate() and ::curandGenerateUniform().
+ * It can be called manually for performance testing reasons to separate
+ * timings for starting state generation and random number generation.
+ *
+ * \param generator - Generator to update
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *     a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_SUCCESS if the seeds were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateSeeds(curandGenerator_t generator);
+
+/**
+ * \brief Get direction vectors for 32-bit quasirandom number generation.
+ *
+ * Get a pointer to an array of direction vectors that can be used
+ * for quasirandom number generation.  The resulting pointer will
+ * reference an array of direction vectors in host memory.
+ *
+ * The array contains vectors for many dimensions.  Each dimension
+ * has 32 vectors.  Each individual vector is an unsigned int.
+ *
+ * Legal values for \p set are:
+ * - CURAND_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions)
+ * - CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions)
+ *
+ * \param vectors - Address of pointer in which to return direction vectors
+ * \param set - Which set of direction vectors to use
+ *
+ * \return
+ * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGetDirectionVectors32(curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set);
+
+/**
+ * \brief Get scramble constants for 32-bit scrambled Sobol' .
+ *
+ * Get a pointer to an array of scramble constants that can be used
+ * for quasirandom number generation.  The resulting pointer will
+ * reference an array of unsinged ints in host memory.
+ *
+ * The array contains constants for many dimensions.  Each dimension
+ * has a single unsigned int constant.
+ *
+ * \param constants - Address of pointer in which to return scramble constants
+ *
+ * \return
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGetScrambleConstants32(unsigned int * * constants);
+
+/**
+ * \brief Get direction vectors for 64-bit quasirandom number generation.
+ *
+ * Get a pointer to an array of direction vectors that can be used
+ * for quasirandom number generation.  The resulting pointer will
+ * reference an array of direction vectors in host memory.
+ *
+ * The array contains vectors for many dimensions.  Each dimension
+ * has 64 vectors.  Each individual vector is an unsigned long long.
+ *
+ * Legal values for \p set are:
+ * - CURAND_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions)
+ * - CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions)
+ *
+ * \param vectors - Address of pointer in which to return direction vectors
+ * \param set - Which set of direction vectors to use
+ *
+ * \return
+ * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGetDirectionVectors64(curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set);
+
+/**
+ * \brief Get scramble constants for 64-bit scrambled Sobol' .
+ *
+ * Get a pointer to an array of scramble constants that can be used
+ * for quasirandom number generation.  The resulting pointer will
+ * reference an array of unsinged long longs in host memory.
+ *
+ * The array contains constants for many dimensions.  Each dimension
+ * has a single unsigned long long constant.
+ *
+ * \param constants - Address of pointer in which to return scramble constants
+ *
+ * \return
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGetScrambleConstants64(unsigned long long * * constants);
+
+/** @} */
+
+#endif // __CUDACC_RTC__
+
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */
+
+
+#endif /* !defined(CURAND_H_) */
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_nvrtc_cu11-11.8.89.dist-info/License.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_nvrtc_cu11-11.8.89.dist-info/License.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b491c70e0aef319022ded661e111ddbd45b8a17f
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_nvrtc_cu11-11.8.89.dist-info/License.txt
@@ -0,0 +1,1568 @@
+End User License Agreement
+--------------------------
+
+
+Preface
+-------
+
+The Software License Agreement in Chapter 1 and the Supplement
+in Chapter 2 contain license terms and conditions that govern
+the use of NVIDIA software. By accepting this agreement, you
+agree to comply with all the terms and conditions applicable
+to the product(s) included herein.
+
+
+NVIDIA Driver
+
+
+Description
+
+This package contains the operating system driver and
+fundamental system software components for NVIDIA GPUs.
+
+
+NVIDIA CUDA Toolkit
+
+
+Description
+
+The NVIDIA CUDA Toolkit provides command-line and graphical
+tools for building, debugging and optimizing the performance
+of applications accelerated by NVIDIA GPUs, runtime and math
+libraries, and documentation including programming guides,
+user manuals, and API references.
+
+
+Default Install Location of CUDA Toolkit
+
+Windows platform:
+
+%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
+
+Linux platform:
+
+/usr/local/cuda-#.#
+
+Mac platform:
+
+/Developer/NVIDIA/CUDA-#.#
+
+
+NVIDIA CUDA Samples
+
+
+Description
+
+This package includes over 100+ CUDA examples that demonstrate
+various CUDA programming principles, and efficient CUDA
+implementation of algorithms in specific application domains.
+
+
+Default Install Location of CUDA Samples
+
+Windows platform:
+
+%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.#
+
+Linux platform:
+
+/usr/local/cuda-#.#/samples
+
+and
+
+$HOME/NVIDIA_CUDA-#.#_Samples
+
+Mac platform:
+
+/Developer/NVIDIA/CUDA-#.#/samples
+
+
+NVIDIA Nsight Visual Studio Edition (Windows only)
+
+
+Description
+
+NVIDIA Nsight Development Platform, Visual Studio Edition is a
+development environment integrated into Microsoft Visual
+Studio that provides tools for debugging, profiling, analyzing
+and optimizing your GPU computing and graphics applications.
+
+
+Default Install Location of Nsight Visual Studio Edition
+
+Windows platform:
+
+%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
+
+
+1. License Agreement for NVIDIA Software Development Kits
+---------------------------------------------------------
+
+
+Release Date: July 26, 2018
+---------------------------
+
+
+Important NoticeRead before downloading, installing,
+copying or using the licensed software:
+-------------------------------------------------------
+
+This license agreement, including exhibits attached
+("Agreement”) is a legal agreement between you and NVIDIA
+Corporation ("NVIDIA") and governs your use of a NVIDIA
+software development kit (“SDK”).
+
+Each SDK has its own set of software and materials, but here
+is a description of the types of items that may be included in
+a SDK: source code, header files, APIs, data sets and assets
+(examples include images, textures, models, scenes, videos,
+native API input/output files), binary software, sample code,
+libraries, utility programs, programming code and
+documentation.
+
+This Agreement can be accepted only by an adult of legal age
+of majority in the country in which the SDK is used.
+
+If you are entering into this Agreement on behalf of a company
+or other legal entity, you represent that you have the legal
+authority to bind the entity to this Agreement, in which case
+“you” will mean the entity you represent.
+
+If you don’t have the required age or authority to accept
+this Agreement, or if you don’t accept all the terms and
+conditions of this Agreement, do not download, install or use
+the SDK.
+
+You agree to use the SDK only for purposes that are permitted
+by (a) this Agreement, and (b) any applicable law, regulation
+or generally accepted practices or guidelines in the relevant
+jurisdictions.
+
+
+1.1. License
+
+
+1.1.1. License Grant
+
+Subject to the terms of this Agreement, NVIDIA hereby grants
+you a non-exclusive, non-transferable license, without the
+right to sublicense (except as expressly provided in this
+Agreement) to:
+
+  1. Install and use the SDK,
+
+  2. Modify and create derivative works of sample source code
+    delivered in the SDK, and
+
+  3. Distribute those portions of the SDK that are identified
+    in this Agreement as distributable, as incorporated in
+    object code format into a software application that meets
+    the distribution requirements indicated in this Agreement.
+
+
+1.1.2. Distribution Requirements
+
+These are the distribution requirements for you to exercise
+the distribution grant:
+
+  1. Your application must have material additional
+    functionality, beyond the included portions of the SDK.
+
+  2. The distributable portions of the SDK shall only be
+    accessed by your application.
+
+  3. The following notice shall be included in modifications
+    and derivative works of sample source code distributed:
+    “This software contains source code provided by NVIDIA
+    Corporation.”
+
+  4. Unless a developer tool is identified in this Agreement
+    as distributable, it is delivered for your internal use
+    only.
+
+  5. The terms under which you distribute your application
+    must be consistent with the terms of this Agreement,
+    including (without limitation) terms relating to the
+    license grant and license restrictions and protection of
+    NVIDIA’s intellectual property rights. Additionally, you
+    agree that you will protect the privacy, security and
+    legal rights of your application users.
+
+  6. You agree to notify NVIDIA in writing of any known or
+    suspected distribution or use of the SDK not in compliance
+    with the requirements of this Agreement, and to enforce
+    the terms of your agreements with respect to distributed
+    SDK.
+
+
+1.1.3. Authorized Users
+
+You may allow employees and contractors of your entity or of
+your subsidiary(ies) to access and use the SDK from your
+secure network to perform work on your behalf.
+
+If you are an academic institution you may allow users
+enrolled or employed by the academic institution to access and
+use the SDK from your secure network.
+
+You are responsible for the compliance with the terms of this
+Agreement by your authorized users. If you become aware that
+your authorized users didn’t follow the terms of this
+Agreement, you agree to take reasonable steps to resolve the
+non-compliance and prevent new occurrences.
+
+
+1.1.4. Pre-Release SDK
+
+The SDK versions identified as alpha, beta, preview or
+otherwise as pre-release, may not be fully functional, may
+contain errors or design flaws, and may have reduced or
+different security, privacy, accessibility, availability, and
+reliability standards relative to commercial versions of
+NVIDIA software and materials. Use of a pre-release SDK may
+result in unexpected results, loss of data, project delays or
+other unpredictable damage or loss.
+
+You may use a pre-release SDK at your own risk, understanding
+that pre-release SDKs are not intended for use in production
+or business-critical systems.
+
+NVIDIA may choose not to make available a commercial version
+of any pre-release SDK. NVIDIA may also choose to abandon
+development and terminate the availability of a pre-release
+SDK at any time without liability.
+
+
+1.1.5. Updates
+
+NVIDIA may, at its option, make available patches, workarounds
+or other updates to this SDK. Unless the updates are provided
+with their separate governing terms, they are deemed part of
+the SDK licensed to you as provided in this Agreement. You
+agree that the form and content of the SDK that NVIDIA
+provides may change without prior notice to you. While NVIDIA
+generally maintains compatibility between versions, NVIDIA may
+in some cases make changes that introduce incompatibilities in
+future versions of the SDK.
+
+
+1.1.6. Third Party Licenses
+
+The SDK may come bundled with, or otherwise include or be
+distributed with, third party software licensed by a NVIDIA
+supplier and/or open source software provided under an open
+source license. Use of third party software is subject to the
+third-party license terms, or in the absence of third party
+terms, the terms of this Agreement. Copyright to third party
+software is held by the copyright holders indicated in the
+third-party software or license.
+
+
+1.1.7. Reservation of Rights
+
+NVIDIA reserves all rights, title, and interest in and to the
+SDK, not expressly granted to you under this Agreement.
+
+
+1.2. Limitations
+
+The following license limitations apply to your use of the
+SDK:
+
+  1. You may not reverse engineer, decompile or disassemble,
+    or remove copyright or other proprietary notices from any
+    portion of the SDK or copies of the SDK.
+
+  2. Except as expressly provided in this Agreement, you may
+    not copy, sell, rent, sublicense, transfer, distribute,
+    modify, or create derivative works of any portion of the
+    SDK. For clarity, you may not distribute or sublicense the
+    SDK as a stand-alone product.
+
+  3. Unless you have an agreement with NVIDIA for this
+    purpose, you may not indicate that an application created
+    with the SDK is sponsored or endorsed by NVIDIA.
+
+  4. You may not bypass, disable, or circumvent any
+    encryption, security, digital rights management or
+    authentication mechanism in the SDK.
+
+  5. You may not use the SDK in any manner that would cause it
+    to become subject to an open source software license. As
+    examples, licenses that require as a condition of use,
+    modification, and/or distribution that the SDK be:
+
+      a. Disclosed or distributed in source code form;
+
+      b. Licensed for the purpose of making derivative works;
+        or
+
+      c. Redistributable at no charge.
+
+  6. Unless you have an agreement with NVIDIA for this
+    purpose, you may not use the SDK with any system or
+    application where the use or failure of the system or
+    application can reasonably be expected to threaten or
+    result in personal injury, death, or catastrophic loss.
+    Examples include use in avionics, navigation, military,
+    medical, life support or other life critical applications.
+    NVIDIA does not design, test or manufacture the SDK for
+    these critical uses and NVIDIA shall not be liable to you
+    or any third party, in whole or in part, for any claims or
+    damages arising from such uses.
+
+  7. You agree to defend, indemnify and hold harmless NVIDIA
+    and its affiliates, and their respective employees,
+    contractors, agents, officers and directors, from and
+    against any and all claims, damages, obligations, losses,
+    liabilities, costs or debt, fines, restitutions and
+    expenses (including but not limited to attorney’s fees
+    and costs incident to establishing the right of
+    indemnification) arising out of or related to your use of
+    the SDK outside of the scope of this Agreement, or not in
+    compliance with its terms.
+
+
+1.3. Ownership
+
+  1.  NVIDIA or its licensors hold all rights, title and
+    interest in and to the SDK and its modifications and
+    derivative works, including their respective intellectual
+    property rights, subject to your rights described in this
+    section. This SDK may include software and materials from
+    NVIDIA’s licensors, and these licensors are intended
+    third party beneficiaries that may enforce this Agreement
+    with respect to their intellectual property rights.
+
+  2.  You hold all rights, title and interest in and to your
+    applications and your derivative works of the sample
+    source code delivered in the SDK, including their
+    respective intellectual property rights, subject to
+    NVIDIA’s rights described in this section.
+
+  3. You may, but don’t have to, provide to NVIDIA
+    suggestions, feature requests or other feedback regarding
+    the SDK, including possible enhancements or modifications
+    to the SDK. For any feedback that you voluntarily provide,
+    you hereby grant NVIDIA and its affiliates a perpetual,
+    non-exclusive, worldwide, irrevocable license to use,
+    reproduce, modify, license, sublicense (through multiple
+    tiers of sublicensees), and distribute (through multiple
+    tiers of distributors) it without the payment of any
+    royalties or fees to you. NVIDIA will use feedback at its
+    choice. NVIDIA is constantly looking for ways to improve
+    its products, so you may send feedback to NVIDIA through
+    the developer portal at https://developer.nvidia.com.
+
+
+1.4. No Warranties
+
+THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL
+FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND
+ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND
+OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING,
+BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE
+ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO
+WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF
+DEALING OR COURSE OF TRADE.
+
+
+1.5. Limitation of Liability
+
+TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS
+AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
+PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS
+OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF
+PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION
+WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK,
+WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH
+OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE),
+PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF
+LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES
+TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS
+AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE
+NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS
+LIMIT.
+
+These exclusions and limitations of liability shall apply
+regardless if NVIDIA or its affiliates have been advised of
+the possibility of such damages, and regardless of whether a
+remedy fails its essential purpose. These exclusions and
+limitations of liability form an essential basis of the
+bargain between the parties, and, absent any of these
+exclusions or limitations of liability, the provisions of this
+Agreement, including, without limitation, the economic terms,
+would be substantially different.
+
+
+1.6. Termination
+
+  1. This Agreement will continue to apply until terminated by
+    either you or NVIDIA as described below.
+
+  2. If you want to terminate this Agreement, you may do so by
+    stopping to use the SDK.
+
+  3. NVIDIA may, at any time, terminate this Agreement if:
+
+      a. (i) you fail to comply with any term of this
+        Agreement and the non-compliance is not fixed within
+        thirty (30) days following notice from NVIDIA (or
+        immediately if you violate NVIDIA’s intellectual
+        property rights);
+
+      b. (ii) you commence or participate in any legal
+        proceeding against NVIDIA with respect to the SDK; or
+
+      c. (iii) NVIDIA decides to no longer provide the SDK in
+        a country or, in NVIDIA’s sole discretion, the
+        continued use of it is no longer commercially viable.
+
+  4. Upon any termination of this Agreement, you agree to
+    promptly discontinue use of the SDK and destroy all copies
+    in your possession or control. Your prior distributions in
+    accordance with this Agreement are not affected by the
+    termination of this Agreement. Upon written request, you
+    will certify in writing that you have complied with your
+    commitments under this section. Upon any termination of
+    this Agreement all provisions survive except for the
+    license grant provisions.
+
+
+1.7. General
+
+If you wish to assign this Agreement or your rights and
+obligations, including by merger, consolidation, dissolution
+or operation of law, contact NVIDIA to ask for permission. Any
+attempted assignment not approved by NVIDIA in writing shall
+be void and of no effect. NVIDIA may assign, delegate or
+transfer this Agreement and its rights and obligations, and if
+to a non-affiliate you will be notified.
+
+You agree to cooperate with NVIDIA and provide reasonably
+requested information to verify your compliance with this
+Agreement.
+
+This Agreement will be governed in all respects by the laws of
+the United States and of the State of Delaware as those laws
+are applied to contracts entered into and performed entirely
+within Delaware by Delaware residents, without regard to the
+conflicts of laws principles. The United Nations Convention on
+Contracts for the International Sale of Goods is specifically
+disclaimed. You agree to all terms of this Agreement in the
+English language.
+
+The state or federal courts residing in Santa Clara County,
+California shall have exclusive jurisdiction over any dispute
+or claim arising out of this Agreement. Notwithstanding this,
+you agree that NVIDIA shall still be allowed to apply for
+injunctive remedies or an equivalent type of urgent legal
+relief in any jurisdiction.
+
+If any court of competent jurisdiction determines that any
+provision of this Agreement is illegal, invalid or
+unenforceable, such provision will be construed as limited to
+the extent necessary to be consistent with and fully
+enforceable under the law and the remaining provisions will
+remain in full force and effect. Unless otherwise specified,
+remedies are cumulative.
+
+Each party acknowledges and agrees that the other is an
+independent contractor in the performance of this Agreement.
+
+The SDK has been developed entirely at private expense and is
+“commercial items” consisting of “commercial computer
+software” and “commercial computer software
+documentation” provided with RESTRICTED RIGHTS. Use,
+duplication or disclosure by the U.S. Government or a U.S.
+Government subcontractor is subject to the restrictions in
+this Agreement pursuant to DFARS 227.7202-3(a) or as set forth
+in subparagraphs (c)(1) and (2) of the Commercial Computer
+Software - Restricted Rights clause at FAR 52.227-19, as
+applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas
+Expressway, Santa Clara, CA 95051.
+
+The SDK is subject to United States export laws and
+regulations. You agree that you will not ship, transfer or
+export the SDK into any country, or use the SDK in any manner,
+prohibited by the United States Bureau of Industry and
+Security or economic sanctions regulations administered by the
+U.S. Department of Treasury’s Office of Foreign Assets
+Control (OFAC), or any applicable export laws, restrictions or
+regulations. These laws include restrictions on destinations,
+end users and end use. By accepting this Agreement, you
+confirm that you are not a resident or citizen of any country
+currently embargoed by the U.S. and that you are not otherwise
+prohibited from receiving the SDK.
+
+Any notice delivered by NVIDIA to you under this Agreement
+will be delivered via mail, email or fax. You agree that any
+notices that NVIDIA sends you electronically will satisfy any
+legal communication requirements. Please direct your legal
+notices or other correspondence to NVIDIA Corporation, 2788
+San Tomas Expressway, Santa Clara, California 95051, United
+States of America, Attention: Legal Department.
+
+This Agreement and any exhibits incorporated into this
+Agreement constitute the entire agreement of the parties with
+respect to the subject matter of this Agreement and supersede
+all prior negotiations or documentation exchanged between the
+parties relating to this SDK license. Any additional and/or
+conflicting terms on documents issued by you are null, void,
+and invalid. Any amendment or waiver under this Agreement
+shall be in writing and signed by representatives of both
+parties.
+
+
+2. CUDA Toolkit Supplement to Software License Agreement for
+NVIDIA Software Development Kits
+------------------------------------------------------------
+
+
+Release date: August 16, 2018
+-----------------------------
+
+The terms in this supplement govern your use of the NVIDIA
+CUDA Toolkit SDK under the terms of your license agreement
+(“Agreement”) as modified by this supplement. Capitalized
+terms used but not defined below have the meaning assigned to
+them in the Agreement.
+
+This supplement is an exhibit to the Agreement and is
+incorporated as an integral part of the Agreement. In the
+event of conflict between the terms in this supplement and the
+terms in the Agreement, the terms in this supplement govern.
+
+
+2.1. License Scope
+
+The SDK is licensed for you to develop applications only for
+use in systems with NVIDIA GPUs.
+
+
+2.2. Distribution
+
+The portions of the SDK that are distributable under the
+Agreement are listed in Attachment A.
+
+
+2.3. Operating Systems
+
+Those portions of the SDK designed exclusively for use on the
+Linux or FreeBSD operating systems, or other operating systems
+derived from the source code to these operating systems, may
+be copied and redistributed for use in accordance with this
+Agreement, provided that the object code files are not
+modified in any way (except for unzipping of compressed
+files).
+
+
+2.4. Audio and Video Encoders and Decoders
+
+You acknowledge and agree that it is your sole responsibility
+to obtain any additional third-party licenses required to
+make, have made, use, have used, sell, import, and offer for
+sale your products or services that include or incorporate any
+third-party software and content relating to audio and/or
+video encoders and decoders from, including but not limited
+to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A.,
+MPEG-LA, and Coding Technologies. NVIDIA does not grant to you
+under this Agreement any necessary patent or other rights with
+respect to any audio and/or video encoders and decoders.
+
+
+2.5. Licensing
+
+If the distribution terms in this Agreement are not suitable
+for your organization, or for any questions regarding this
+Agreement, please contact NVIDIA at
+nvidia-compute-license-questions@nvidia.com.
+
+
+2.6. Attachment A
+
+The following portions of the SDK are distributable under the
+Agreement:
+
+Component
+
+CUDA Runtime
+
+Windows
+
+cudart.dll, cudart_static.lib, cudadevrt.lib
+
+Mac OSX
+
+libcudart.dylib, libcudart_static.a, libcudadevrt.a
+
+Linux
+
+libcudart.so, libcudart_static.a, libcudadevrt.a
+
+Android
+
+libcudart.so, libcudart_static.a, libcudadevrt.a
+
+Component
+
+CUDA FFT Library
+
+Windows
+
+cufft.dll, cufftw.dll, cufft.lib, cufftw.lib
+
+Mac OSX
+
+libcufft.dylib, libcufft_static.a, libcufftw.dylib,
+libcufftw_static.a
+
+Linux
+
+libcufft.so, libcufft_static.a, libcufftw.so,
+libcufftw_static.a
+
+Android
+
+libcufft.so, libcufft_static.a, libcufftw.so,
+libcufftw_static.a
+
+Component
+
+CUDA BLAS Library
+
+Windows
+
+cublas.dll, cublasLt.dll
+
+Mac OSX
+
+libcublas.dylib, libcublasLt.dylib, libcublas_static.a,
+libcublasLt_static.a
+
+Linux
+
+libcublas.so, libcublasLt.so, libcublas_static.a,
+libcublasLt_static.a
+
+Android
+
+libcublas.so, libcublasLt.so, libcublas_static.a,
+libcublasLt_static.a
+
+Component
+
+NVIDIA "Drop-in" BLAS Library
+
+Windows
+
+nvblas.dll
+
+Mac OSX
+
+libnvblas.dylib
+
+Linux
+
+libnvblas.so
+
+Component
+
+CUDA Sparse Matrix Library
+
+Windows
+
+cusparse.dll, cusparse.lib
+
+Mac OSX
+
+libcusparse.dylib, libcusparse_static.a
+
+Linux
+
+libcusparse.so, libcusparse_static.a
+
+Android
+
+libcusparse.so, libcusparse_static.a
+
+Component
+
+CUDA Linear Solver Library
+
+Windows
+
+cusolver.dll, cusolver.lib
+
+Mac OSX
+
+libcusolver.dylib, libcusolver_static.a
+
+Linux
+
+libcusolver.so, libcusolver_static.a
+
+Android
+
+libcusolver.so, libcusolver_static.a
+
+Component
+
+CUDA Random Number Generation Library
+
+Windows
+
+curand.dll, curand.lib
+
+Mac OSX
+
+libcurand.dylib, libcurand_static.a
+
+Linux
+
+libcurand.so, libcurand_static.a
+
+Android
+
+libcurand.so, libcurand_static.a
+
+Component
+
+CUDA Accelerated Graph Library
+
+Component
+
+NVIDIA Performance Primitives Library
+
+Windows
+
+nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll,
+nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll,
+nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib,
+nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll,
+nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib
+
+Mac OSX
+
+libnppc.dylib, libnppc_static.a, libnppial.dylib,
+libnppial_static.a, libnppicc.dylib, libnppicc_static.a,
+libnppicom.dylib, libnppicom_static.a, libnppidei.dylib,
+libnppidei_static.a, libnppif.dylib, libnppif_static.a,
+libnppig.dylib, libnppig_static.a, libnppim.dylib,
+libnppisu_static.a, libnppitc.dylib, libnppitc_static.a,
+libnpps.dylib, libnpps_static.a
+
+Linux
+
+libnppc.so, libnppc_static.a, libnppial.so,
+libnppial_static.a, libnppicc.so, libnppicc_static.a,
+libnppicom.so, libnppicom_static.a, libnppidei.so,
+libnppidei_static.a, libnppif.so, libnppif_static.a
+libnppig.so, libnppig_static.a, libnppim.so,
+libnppim_static.a, libnppist.so, libnppist_static.a,
+libnppisu.so, libnppisu_static.a, libnppitc.so
+libnppitc_static.a, libnpps.so, libnpps_static.a
+
+Android
+
+libnppc.so, libnppc_static.a, libnppial.so,
+libnppial_static.a, libnppicc.so, libnppicc_static.a,
+libnppicom.so, libnppicom_static.a, libnppidei.so,
+libnppidei_static.a, libnppif.so, libnppif_static.a
+libnppig.so, libnppig_static.a, libnppim.so,
+libnppim_static.a, libnppist.so, libnppist_static.a,
+libnppisu.so, libnppisu_static.a, libnppitc.so
+libnppitc_static.a, libnpps.so, libnpps_static.a
+
+Component
+
+NVIDIA JPEG Library
+
+Linux
+
+libnvjpeg.so, libnvjpeg_static.a
+
+Component
+
+Internal common library required for statically linking to
+cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP
+
+Mac OSX
+
+libculibos.a
+
+Linux
+
+libculibos.a
+
+Component
+
+NVIDIA Runtime Compilation Library and Header
+
+All
+
+nvrtc.h
+
+Windows
+
+nvrtc.dll, nvrtc-builtins.dll
+
+Mac OSX
+
+libnvrtc.dylib, libnvrtc-builtins.dylib
+
+Linux
+
+libnvrtc.so, libnvrtc-builtins.so
+
+Component
+
+NVIDIA Optimizing Compiler Library
+
+Windows
+
+nvvm.dll
+
+Mac OSX
+
+libnvvm.dylib
+
+Linux
+
+libnvvm.so
+
+Component
+
+NVIDIA Common Device Math Functions Library
+
+Windows
+
+libdevice.10.bc
+
+Mac OSX
+
+libdevice.10.bc
+
+Linux
+
+libdevice.10.bc
+
+Component
+
+CUDA Occupancy Calculation Header Library
+
+All
+
+cuda_occupancy.h
+
+Component
+
+CUDA Half Precision Headers
+
+All
+
+cuda_fp16.h, cuda_fp16.hpp
+
+Component
+
+CUDA Profiling Tools Interface (CUPTI) Library
+
+Windows
+
+cupti.dll
+
+Mac OSX
+
+libcupti.dylib
+
+Linux
+
+libcupti.so
+
+Component
+
+NVIDIA Tools Extension Library
+
+Windows
+
+nvToolsExt.dll, nvToolsExt.lib
+
+Mac OSX
+
+libnvToolsExt.dylib
+
+Linux
+
+libnvToolsExt.so
+
+Component
+
+NVIDIA CUDA Driver Libraries
+
+Linux
+
+libcuda.so, libnvidia-fatbinaryloader.so,
+libnvidia-ptxjitcompiler.so
+
+The NVIDIA CUDA Driver Libraries are only distributable in
+applications that meet this criteria:
+
+  1. The application was developed starting from a NVIDIA CUDA
+    container obtained from Docker Hub or the NVIDIA GPU
+    Cloud, and
+
+  2. The resulting application is packaged as a Docker
+    container and distributed to users on Docker Hub or the
+    NVIDIA GPU Cloud only.
+
+
+2.7. Attachment B
+
+
+Additional Licensing Obligations
+
+The following third party components included in the SOFTWARE
+are licensed to Licensee pursuant to the following terms and
+conditions:
+
+  1. Licensee's use of the GDB third party component is
+    subject to the terms and conditions of GNU GPL v3:
+
+    This product includes copyrighted third-party software licensed
+    under the terms of the GNU General Public License v3 ("GPL v3").
+    All third-party software packages are copyright by their respective
+    authors. GPL v3 terms and conditions are hereby incorporated into
+    the Agreement by this reference:     http://www.gnu.org/licenses/gpl.txt
+
+    Consistent with these licensing requirements, the software
+    listed below is provided under the terms of the specified
+    open source software licenses. To obtain source code for
+    software provided under licenses that require
+    redistribution of source code, including the GNU General
+    Public License (GPL) and GNU Lesser General Public License
+    (LGPL), contact oss-requests@nvidia.com. This offer is
+    valid for a period of three (3) years from the date of the
+    distribution of this product by NVIDIA CORPORATION.
+
+    Component          License
+    CUDA-GDB           GPL v3
+
+  2. Licensee represents and warrants that any and all third
+    party licensing and/or royalty payment obligations in
+    connection with Licensee's use of the H.264 video codecs
+    are solely the responsibility of Licensee.
+
+  3. Licensee's use of the Thrust library is subject to the
+    terms and conditions of the Apache License Version 2.0.
+    All third-party software packages are copyright by their
+    respective authors. Apache License Version 2.0 terms and
+    conditions are hereby incorporated into the Agreement by
+    this reference.
+    http://www.apache.org/licenses/LICENSE-2.0.html
+
+    In addition, Licensee acknowledges the following notice:
+    Thrust includes source code from the Boost Iterator,
+    Tuple, System, and Random Number libraries.
+
+    Boost Software License - Version 1.0 - August 17th, 2003
+    . . . .
+
+    Permission is hereby granted, free of charge, to any person or
+    organization obtaining a copy of the software and accompanying
+    documentation covered by this license (the "Software") to use,
+    reproduce, display, distribute, execute, and transmit the Software,
+    and to prepare derivative works of the Software, and to permit
+    third-parties to whom the Software is furnished to do so, all
+    subject to the following:
+
+    The copyright notices in the Software and this entire statement,
+    including the above license grant, this restriction and the following
+    disclaimer, must be included in all copies of the Software, in whole
+    or in part, and all derivative works of the Software, unless such
+    copies or derivative works are solely in the form of machine-executable
+    object code generated by a source language processor.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
+    NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
+    ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
+    OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+    OTHER DEALINGS IN THE SOFTWARE.
+
+  4. Licensee's use of the LLVM third party component is
+    subject to the following terms and conditions:
+
+    ======================================================
+    LLVM Release License
+    ======================================================
+    University of Illinois/NCSA
+    Open Source License
+
+    Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+    All rights reserved.
+
+    Developed by:
+
+        LLVM Team
+
+        University of Illinois at Urbana-Champaign
+
+        http://llvm.org
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal with the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    *  Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimers.
+
+    *  Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimers in the
+       documentation and/or other materials provided with the distribution.
+
+    *  Neither the names of the LLVM Team, University of Illinois at Urbana-
+       Champaign, nor the names of its contributors may be used to endorse or
+       promote products derived from this Software without specific prior
+       written permission.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+    THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS WITH THE SOFTWARE.
+
+  5. Licensee's use (e.g. nvprof) of the PCRE third party
+    component is subject to the following terms and
+    conditions:
+
+    ------------
+    PCRE LICENCE
+    ------------
+    PCRE is a library of functions to support regular expressions whose syntax
+    and semantics are as close as possible to those of the Perl 5 language.
+    Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
+    specified below. The documentation for PCRE, supplied in the "doc"
+    directory, is distributed under the same terms as the software itself. The
+    basic library functions are written in C and are freestanding. Also
+    included in the distribution is a set of C++ wrapper functions, and a just-
+    in-time compiler that can be used to optimize pattern matching. These are
+    both optional features that can be omitted when the library is built.
+
+    THE BASIC LIBRARY FUNCTIONS
+    ---------------------------
+    Written by:       Philip Hazel
+    Email local part: ph10
+    Email domain:     cam.ac.uk
+    University of Cambridge Computing Service,
+    Cambridge, England.
+    Copyright (c) 1997-2012 University of Cambridge
+    All rights reserved.
+
+    PCRE JUST-IN-TIME COMPILATION SUPPORT
+    -------------------------------------
+    Written by:       Zoltan Herczeg
+    Email local part: hzmester
+    Emain domain:     freemail.hu
+    Copyright(c) 2010-2012 Zoltan Herczeg
+    All rights reserved.
+
+    STACK-LESS JUST-IN-TIME COMPILER
+    --------------------------------
+    Written by:       Zoltan Herczeg
+    Email local part: hzmester
+    Emain domain:     freemail.hu
+    Copyright(c) 2009-2012 Zoltan Herczeg
+    All rights reserved.
+
+    THE C++ WRAPPER FUNCTIONS
+    -------------------------
+    Contributed by:   Google Inc.
+    Copyright (c) 2007-2012, Google Inc.
+    All rights reserved.
+
+    THE "BSD" LICENCE
+    -----------------
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+
+      * Redistributions of source code must retain the above copyright notice,
+        this list of conditions and the following disclaimer.
+
+      * Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+      * Neither the name of the University of Cambridge nor the name of Google
+        Inc. nor the names of their contributors may be used to endorse or
+        promote products derived from this software without specific prior
+        written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+
+  6. Some of the cuBLAS library routines were written by or
+    derived from code written by Vasily Volkov and are subject
+    to the Modified Berkeley Software Distribution License as
+    follows:
+
+    Copyright (c) 2007-2009, Regents of the University of California
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the University of California, Berkeley nor
+          the names of its contributors may be used to endorse or promote
+          products derived from this software without specific prior
+          written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
+    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+
+  7. Some of the cuBLAS library routines were written by or
+    derived from code written by Davide Barbieri and are
+    subject to the Modified Berkeley Software Distribution
+    License as follows:
+
+    Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata.
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * The name of the author may not be used to endorse or promote
+          products derived from this software without specific prior
+          written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
+    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+
+  8. Some of the cuBLAS library routines were derived from
+    code developed by the University of Tennessee and are
+    subject to the Modified Berkeley Software Distribution
+    License as follows:
+
+    Copyright (c) 2010 The University of Tennessee.
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer listed in this license in the documentation and/or
+          other materials provided with the distribution.
+        * Neither the name of the copyright holders nor the names of its
+          contributors may be used to endorse or promote products derived
+          from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  9. Some of the cuBLAS library routines were written by or
+    derived from code written by Jonathan Hogg and are subject
+    to the Modified Berkeley Software Distribution License as
+    follows:
+
+    Copyright (c) 2012, The Science and Technology Facilities Council (STFC).
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the STFC nor the names of its contributors
+          may be used to endorse or promote products derived from this
+          software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE
+    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+    OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+    IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  10. Some of the cuBLAS library routines were written by or
+    derived from code written by Ahmad M. Abdelfattah, David
+    Keyes, and Hatem Ltaief, and are subject to the Apache
+    License, Version 2.0, as follows:
+
+     -- (C) Copyright 2013 King Abdullah University of Science and Technology
+      Authors:
+      Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa)
+      David Keyes (david.keyes@kaust.edu.sa)
+      Hatem Ltaief (hatem.ltaief@kaust.edu.sa)
+
+      Redistribution  and  use  in  source and binary forms, with or without
+      modification,  are  permitted  provided  that the following conditions
+      are met:
+
+      * Redistributions  of  source  code  must  retain  the above copyright
+        notice,  this  list  of  conditions  and  the  following  disclaimer.
+      * Redistributions  in  binary  form must reproduce the above copyright
+        notice,  this list of conditions and the following disclaimer in the
+        documentation  and/or other materials provided with the distribution.
+      * Neither  the  name of the King Abdullah University of Science and
+        Technology nor the names of its contributors may be used to endorse
+        or promote products derived from this software without specific prior
+        written permission.
+
+      THIS  SOFTWARE  IS  PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+      ``AS IS''  AND  ANY  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+      LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+      A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+      HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+      SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT NOT
+      LIMITED  TO,  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+      DATA,  OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+      THEORY  OF  LIABILITY,  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+      (INCLUDING  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+      OF  THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
+
+  11. Some of the cuSPARSE library routines were written by or
+    derived from code written by Li-Wen Chang and are subject
+    to the NCSA Open Source License as follows:
+
+    Copyright (c) 2012, University of Illinois.
+
+    All rights reserved.
+
+    Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal with the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimers in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the names of IMPACT Group, University of Illinois, nor
+          the names of its contributors may be used to endorse or promote
+          products derived from this Software without specific prior
+          written permission.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+    IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+    IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+    SOFTWARE.
+
+  12. Some of the cuRAND library routines were written by or
+    derived from code written by Mutsuo Saito and Makoto
+    Matsumoto and are subject to the following license:
+
+    Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
+    University. All rights reserved.
+
+    Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
+    University and University of Tokyo.  All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the Hiroshima University nor the names of
+          its contributors may be used to endorse or promote products
+          derived from this software without specific prior written
+          permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  13. Some of the cuRAND library routines were derived from
+    code developed by D. E. Shaw Research and are subject to
+    the following license:
+
+    Copyright 2010-2011, D. E. Shaw Research.
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions, and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions, and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of D. E. Shaw Research nor the names of its
+          contributors may be used to endorse or promote products derived
+          from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  14. Some of the Math library routines were written by or
+    derived from code developed by Norbert Juffa and are
+    subject to the following license:
+
+    Copyright (c) 2015-2017, Norbert Juffa
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  15. Licensee's use of the lz4 third party component is
+    subject to the following terms and conditions:
+
+    Copyright (C) 2011-2013, Yann Collet.
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+        * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  16. The NPP library uses code from the Boost Math Toolkit,
+    and is subject to the following license:
+
+    Boost Software License - Version 1.0 - August 17th, 2003
+    . . . .
+
+    Permission is hereby granted, free of charge, to any person or
+    organization obtaining a copy of the software and accompanying
+    documentation covered by this license (the "Software") to use,
+    reproduce, display, distribute, execute, and transmit the Software,
+    and to prepare derivative works of the Software, and to permit
+    third-parties to whom the Software is furnished to do so, all
+    subject to the following:
+
+    The copyright notices in the Software and this entire statement,
+    including the above license grant, this restriction and the following
+    disclaimer, must be included in all copies of the Software, in whole
+    or in part, and all derivative works of the Software, unless such
+    copies or derivative works are solely in the form of machine-executable
+    object code generated by a source language processor.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
+    NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
+    ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
+    OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+    OTHER DEALINGS IN THE SOFTWARE.
+
+  17. Portions of the Nsight Eclipse Edition is subject to the
+    following license:
+
+    The Eclipse Foundation makes available all content in this plug-in
+    ("Content"). Unless otherwise indicated below, the Content is provided
+    to you under the terms and conditions of the Eclipse Public License
+    Version 1.0 ("EPL"). A copy of the EPL is available at http://
+    www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program"
+    will mean the Content.
+
+    If you did not receive this Content directly from the Eclipse
+    Foundation, the Content is being redistributed by another party
+    ("Redistributor") and different terms and conditions may apply to your
+    use of any object code in the Content. Check the Redistributor's
+    license that was provided with the Content. If no such license exists,
+    contact the Redistributor. Unless otherwise indicated below, the terms
+    and conditions of the EPL still apply to any source code in the
+    Content and such source code may be obtained at http://www.eclipse.org.
+
+  18. Some of the cuBLAS library routines uses code from
+    OpenAI, which is subject to the following license:
+
+    License URL
+    https://github.com/openai/openai-gemm/blob/master/LICENSE
+
+    License Text
+    The MIT License
+
+    Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc.
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+
+  19. Licensee's use of the Visual Studio Setup Configuration
+    Samples is subject to the following license:
+
+    The MIT License (MIT)
+    Copyright (C) Microsoft Corporation. All rights reserved.
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without restriction,
+    including without limitation the rights to use, copy, modify, merge,
+    publish, distribute, sublicense, and/or sell copies of the Software,
+    and to permit persons to whom the Software is furnished to do so,
+    subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+  20. Licensee's use of linmath.h header for CPU functions for
+    GL vector/matrix operations from lunarG is subject to the
+    Apache License Version 2.0.
+
+  21. The DX12-CUDA sample uses the d3dx12.h header, which is
+    subject to the MIT license .
+
+-----------------
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_runtime_cu11-11.8.89.dist-info/License.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_runtime_cu11-11.8.89.dist-info/License.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b491c70e0aef319022ded661e111ddbd45b8a17f
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_runtime_cu11-11.8.89.dist-info/License.txt
@@ -0,0 +1,1568 @@
+End User License Agreement
+--------------------------
+
+
+Preface
+-------
+
+The Software License Agreement in Chapter 1 and the Supplement
+in Chapter 2 contain license terms and conditions that govern
+the use of NVIDIA software. By accepting this agreement, you
+agree to comply with all the terms and conditions applicable
+to the product(s) included herein.
+
+
+NVIDIA Driver
+
+
+Description
+
+This package contains the operating system driver and
+fundamental system software components for NVIDIA GPUs.
+
+
+NVIDIA CUDA Toolkit
+
+
+Description
+
+The NVIDIA CUDA Toolkit provides command-line and graphical
+tools for building, debugging and optimizing the performance
+of applications accelerated by NVIDIA GPUs, runtime and math
+libraries, and documentation including programming guides,
+user manuals, and API references.
+
+
+Default Install Location of CUDA Toolkit
+
+Windows platform:
+
+%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
+
+Linux platform:
+
+/usr/local/cuda-#.#
+
+Mac platform:
+
+/Developer/NVIDIA/CUDA-#.#
+
+
+NVIDIA CUDA Samples
+
+
+Description
+
+This package includes over 100+ CUDA examples that demonstrate
+various CUDA programming principles, and efficient CUDA
+implementation of algorithms in specific application domains.
+
+
+Default Install Location of CUDA Samples
+
+Windows platform:
+
+%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.#
+
+Linux platform:
+
+/usr/local/cuda-#.#/samples
+
+and
+
+$HOME/NVIDIA_CUDA-#.#_Samples
+
+Mac platform:
+
+/Developer/NVIDIA/CUDA-#.#/samples
+
+
+NVIDIA Nsight Visual Studio Edition (Windows only)
+
+
+Description
+
+NVIDIA Nsight Development Platform, Visual Studio Edition is a
+development environment integrated into Microsoft Visual
+Studio that provides tools for debugging, profiling, analyzing
+and optimizing your GPU computing and graphics applications.
+
+
+Default Install Location of Nsight Visual Studio Edition
+
+Windows platform:
+
+%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
+
+
+1. License Agreement for NVIDIA Software Development Kits
+---------------------------------------------------------
+
+
+Release Date: July 26, 2018
+---------------------------
+
+
+Important NoticeRead before downloading, installing,
+copying or using the licensed software:
+-------------------------------------------------------
+
+This license agreement, including exhibits attached
+("Agreement”) is a legal agreement between you and NVIDIA
+Corporation ("NVIDIA") and governs your use of a NVIDIA
+software development kit (“SDK”).
+
+Each SDK has its own set of software and materials, but here
+is a description of the types of items that may be included in
+a SDK: source code, header files, APIs, data sets and assets
+(examples include images, textures, models, scenes, videos,
+native API input/output files), binary software, sample code,
+libraries, utility programs, programming code and
+documentation.
+
+This Agreement can be accepted only by an adult of legal age
+of majority in the country in which the SDK is used.
+
+If you are entering into this Agreement on behalf of a company
+or other legal entity, you represent that you have the legal
+authority to bind the entity to this Agreement, in which case
+“you” will mean the entity you represent.
+
+If you don’t have the required age or authority to accept
+this Agreement, or if you don’t accept all the terms and
+conditions of this Agreement, do not download, install or use
+the SDK.
+
+You agree to use the SDK only for purposes that are permitted
+by (a) this Agreement, and (b) any applicable law, regulation
+or generally accepted practices or guidelines in the relevant
+jurisdictions.
+
+
+1.1. License
+
+
+1.1.1. License Grant
+
+Subject to the terms of this Agreement, NVIDIA hereby grants
+you a non-exclusive, non-transferable license, without the
+right to sublicense (except as expressly provided in this
+Agreement) to:
+
+  1. Install and use the SDK,
+
+  2. Modify and create derivative works of sample source code
+    delivered in the SDK, and
+
+  3. Distribute those portions of the SDK that are identified
+    in this Agreement as distributable, as incorporated in
+    object code format into a software application that meets
+    the distribution requirements indicated in this Agreement.
+
+
+1.1.2. Distribution Requirements
+
+These are the distribution requirements for you to exercise
+the distribution grant:
+
+  1. Your application must have material additional
+    functionality, beyond the included portions of the SDK.
+
+  2. The distributable portions of the SDK shall only be
+    accessed by your application.
+
+  3. The following notice shall be included in modifications
+    and derivative works of sample source code distributed:
+    “This software contains source code provided by NVIDIA
+    Corporation.”
+
+  4. Unless a developer tool is identified in this Agreement
+    as distributable, it is delivered for your internal use
+    only.
+
+  5. The terms under which you distribute your application
+    must be consistent with the terms of this Agreement,
+    including (without limitation) terms relating to the
+    license grant and license restrictions and protection of
+    NVIDIA’s intellectual property rights. Additionally, you
+    agree that you will protect the privacy, security and
+    legal rights of your application users.
+
+  6. You agree to notify NVIDIA in writing of any known or
+    suspected distribution or use of the SDK not in compliance
+    with the requirements of this Agreement, and to enforce
+    the terms of your agreements with respect to distributed
+    SDK.
+
+
+1.1.3. Authorized Users
+
+You may allow employees and contractors of your entity or of
+your subsidiary(ies) to access and use the SDK from your
+secure network to perform work on your behalf.
+
+If you are an academic institution you may allow users
+enrolled or employed by the academic institution to access and
+use the SDK from your secure network.
+
+You are responsible for the compliance with the terms of this
+Agreement by your authorized users. If you become aware that
+your authorized users didn’t follow the terms of this
+Agreement, you agree to take reasonable steps to resolve the
+non-compliance and prevent new occurrences.
+
+
+1.1.4. Pre-Release SDK
+
+The SDK versions identified as alpha, beta, preview or
+otherwise as pre-release, may not be fully functional, may
+contain errors or design flaws, and may have reduced or
+different security, privacy, accessibility, availability, and
+reliability standards relative to commercial versions of
+NVIDIA software and materials. Use of a pre-release SDK may
+result in unexpected results, loss of data, project delays or
+other unpredictable damage or loss.
+
+You may use a pre-release SDK at your own risk, understanding
+that pre-release SDKs are not intended for use in production
+or business-critical systems.
+
+NVIDIA may choose not to make available a commercial version
+of any pre-release SDK. NVIDIA may also choose to abandon
+development and terminate the availability of a pre-release
+SDK at any time without liability.
+
+
+1.1.5. Updates
+
+NVIDIA may, at its option, make available patches, workarounds
+or other updates to this SDK. Unless the updates are provided
+with their separate governing terms, they are deemed part of
+the SDK licensed to you as provided in this Agreement. You
+agree that the form and content of the SDK that NVIDIA
+provides may change without prior notice to you. While NVIDIA
+generally maintains compatibility between versions, NVIDIA may
+in some cases make changes that introduce incompatibilities in
+future versions of the SDK.
+
+
+1.1.6. Third Party Licenses
+
+The SDK may come bundled with, or otherwise include or be
+distributed with, third party software licensed by a NVIDIA
+supplier and/or open source software provided under an open
+source license. Use of third party software is subject to the
+third-party license terms, or in the absence of third party
+terms, the terms of this Agreement. Copyright to third party
+software is held by the copyright holders indicated in the
+third-party software or license.
+
+
+1.1.7. Reservation of Rights
+
+NVIDIA reserves all rights, title, and interest in and to the
+SDK, not expressly granted to you under this Agreement.
+
+
+1.2. Limitations
+
+The following license limitations apply to your use of the
+SDK:
+
+  1. You may not reverse engineer, decompile or disassemble,
+    or remove copyright or other proprietary notices from any
+    portion of the SDK or copies of the SDK.
+
+  2. Except as expressly provided in this Agreement, you may
+    not copy, sell, rent, sublicense, transfer, distribute,
+    modify, or create derivative works of any portion of the
+    SDK. For clarity, you may not distribute or sublicense the
+    SDK as a stand-alone product.
+
+  3. Unless you have an agreement with NVIDIA for this
+    purpose, you may not indicate that an application created
+    with the SDK is sponsored or endorsed by NVIDIA.
+
+  4. You may not bypass, disable, or circumvent any
+    encryption, security, digital rights management or
+    authentication mechanism in the SDK.
+
+  5. You may not use the SDK in any manner that would cause it
+    to become subject to an open source software license. As
+    examples, licenses that require as a condition of use,
+    modification, and/or distribution that the SDK be:
+
+      a. Disclosed or distributed in source code form;
+
+      b. Licensed for the purpose of making derivative works;
+        or
+
+      c. Redistributable at no charge.
+
+  6. Unless you have an agreement with NVIDIA for this
+    purpose, you may not use the SDK with any system or
+    application where the use or failure of the system or
+    application can reasonably be expected to threaten or
+    result in personal injury, death, or catastrophic loss.
+    Examples include use in avionics, navigation, military,
+    medical, life support or other life critical applications.
+    NVIDIA does not design, test or manufacture the SDK for
+    these critical uses and NVIDIA shall not be liable to you
+    or any third party, in whole or in part, for any claims or
+    damages arising from such uses.
+
+  7. You agree to defend, indemnify and hold harmless NVIDIA
+    and its affiliates, and their respective employees,
+    contractors, agents, officers and directors, from and
+    against any and all claims, damages, obligations, losses,
+    liabilities, costs or debt, fines, restitutions and
+    expenses (including but not limited to attorney’s fees
+    and costs incident to establishing the right of
+    indemnification) arising out of or related to your use of
+    the SDK outside of the scope of this Agreement, or not in
+    compliance with its terms.
+
+
+1.3. Ownership
+
+  1.  NVIDIA or its licensors hold all rights, title and
+    interest in and to the SDK and its modifications and
+    derivative works, including their respective intellectual
+    property rights, subject to your rights described in this
+    section. This SDK may include software and materials from
+    NVIDIA’s licensors, and these licensors are intended
+    third party beneficiaries that may enforce this Agreement
+    with respect to their intellectual property rights.
+
+  2.  You hold all rights, title and interest in and to your
+    applications and your derivative works of the sample
+    source code delivered in the SDK, including their
+    respective intellectual property rights, subject to
+    NVIDIA’s rights described in this section.
+
+  3. You may, but don’t have to, provide to NVIDIA
+    suggestions, feature requests or other feedback regarding
+    the SDK, including possible enhancements or modifications
+    to the SDK. For any feedback that you voluntarily provide,
+    you hereby grant NVIDIA and its affiliates a perpetual,
+    non-exclusive, worldwide, irrevocable license to use,
+    reproduce, modify, license, sublicense (through multiple
+    tiers of sublicensees), and distribute (through multiple
+    tiers of distributors) it without the payment of any
+    royalties or fees to you. NVIDIA will use feedback at its
+    choice. NVIDIA is constantly looking for ways to improve
+    its products, so you may send feedback to NVIDIA through
+    the developer portal at https://developer.nvidia.com.
+
+
+1.4. No Warranties
+
+THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL
+FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND
+ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND
+OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING,
+BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE
+ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO
+WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF
+DEALING OR COURSE OF TRADE.
+
+
+1.5. Limitation of Liability
+
+TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS
+AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
+PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS
+OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF
+PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION
+WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK,
+WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH
+OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE),
+PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF
+LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES
+TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS
+AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE
+NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS
+LIMIT.
+
+These exclusions and limitations of liability shall apply
+regardless if NVIDIA or its affiliates have been advised of
+the possibility of such damages, and regardless of whether a
+remedy fails its essential purpose. These exclusions and
+limitations of liability form an essential basis of the
+bargain between the parties, and, absent any of these
+exclusions or limitations of liability, the provisions of this
+Agreement, including, without limitation, the economic terms,
+would be substantially different.
+
+
+1.6. Termination
+
+  1. This Agreement will continue to apply until terminated by
+    either you or NVIDIA as described below.
+
+  2. If you want to terminate this Agreement, you may do so by
+    stopping to use the SDK.
+
+  3. NVIDIA may, at any time, terminate this Agreement if:
+
+      a. (i) you fail to comply with any term of this
+        Agreement and the non-compliance is not fixed within
+        thirty (30) days following notice from NVIDIA (or
+        immediately if you violate NVIDIA’s intellectual
+        property rights);
+
+      b. (ii) you commence or participate in any legal
+        proceeding against NVIDIA with respect to the SDK; or
+
+      c. (iii) NVIDIA decides to no longer provide the SDK in
+        a country or, in NVIDIA’s sole discretion, the
+        continued use of it is no longer commercially viable.
+
+  4. Upon any termination of this Agreement, you agree to
+    promptly discontinue use of the SDK and destroy all copies
+    in your possession or control. Your prior distributions in
+    accordance with this Agreement are not affected by the
+    termination of this Agreement. Upon written request, you
+    will certify in writing that you have complied with your
+    commitments under this section. Upon any termination of
+    this Agreement all provisions survive except for the
+    license grant provisions.
+
+
+1.7. General
+
+If you wish to assign this Agreement or your rights and
+obligations, including by merger, consolidation, dissolution
+or operation of law, contact NVIDIA to ask for permission. Any
+attempted assignment not approved by NVIDIA in writing shall
+be void and of no effect. NVIDIA may assign, delegate or
+transfer this Agreement and its rights and obligations, and if
+to a non-affiliate you will be notified.
+
+You agree to cooperate with NVIDIA and provide reasonably
+requested information to verify your compliance with this
+Agreement.
+
+This Agreement will be governed in all respects by the laws of
+the United States and of the State of Delaware as those laws
+are applied to contracts entered into and performed entirely
+within Delaware by Delaware residents, without regard to the
+conflicts of laws principles. The United Nations Convention on
+Contracts for the International Sale of Goods is specifically
+disclaimed. You agree to all terms of this Agreement in the
+English language.
+
+The state or federal courts residing in Santa Clara County,
+California shall have exclusive jurisdiction over any dispute
+or claim arising out of this Agreement. Notwithstanding this,
+you agree that NVIDIA shall still be allowed to apply for
+injunctive remedies or an equivalent type of urgent legal
+relief in any jurisdiction.
+
+If any court of competent jurisdiction determines that any
+provision of this Agreement is illegal, invalid or
+unenforceable, such provision will be construed as limited to
+the extent necessary to be consistent with and fully
+enforceable under the law and the remaining provisions will
+remain in full force and effect. Unless otherwise specified,
+remedies are cumulative.
+
+Each party acknowledges and agrees that the other is an
+independent contractor in the performance of this Agreement.
+
+The SDK has been developed entirely at private expense and is
+“commercial items” consisting of “commercial computer
+software” and “commercial computer software
+documentation” provided with RESTRICTED RIGHTS. Use,
+duplication or disclosure by the U.S. Government or a U.S.
+Government subcontractor is subject to the restrictions in
+this Agreement pursuant to DFARS 227.7202-3(a) or as set forth
+in subparagraphs (c)(1) and (2) of the Commercial Computer
+Software - Restricted Rights clause at FAR 52.227-19, as
+applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas
+Expressway, Santa Clara, CA 95051.
+
+The SDK is subject to United States export laws and
+regulations. You agree that you will not ship, transfer or
+export the SDK into any country, or use the SDK in any manner,
+prohibited by the United States Bureau of Industry and
+Security or economic sanctions regulations administered by the
+U.S. Department of Treasury’s Office of Foreign Assets
+Control (OFAC), or any applicable export laws, restrictions or
+regulations. These laws include restrictions on destinations,
+end users and end use. By accepting this Agreement, you
+confirm that you are not a resident or citizen of any country
+currently embargoed by the U.S. and that you are not otherwise
+prohibited from receiving the SDK.
+
+Any notice delivered by NVIDIA to you under this Agreement
+will be delivered via mail, email or fax. You agree that any
+notices that NVIDIA sends you electronically will satisfy any
+legal communication requirements. Please direct your legal
+notices or other correspondence to NVIDIA Corporation, 2788
+San Tomas Expressway, Santa Clara, California 95051, United
+States of America, Attention: Legal Department.
+
+This Agreement and any exhibits incorporated into this
+Agreement constitute the entire agreement of the parties with
+respect to the subject matter of this Agreement and supersede
+all prior negotiations or documentation exchanged between the
+parties relating to this SDK license. Any additional and/or
+conflicting terms on documents issued by you are null, void,
+and invalid. Any amendment or waiver under this Agreement
+shall be in writing and signed by representatives of both
+parties.
+
+
+2. CUDA Toolkit Supplement to Software License Agreement for
+NVIDIA Software Development Kits
+------------------------------------------------------------
+
+
+Release date: August 16, 2018
+-----------------------------
+
+The terms in this supplement govern your use of the NVIDIA
+CUDA Toolkit SDK under the terms of your license agreement
+(“Agreement”) as modified by this supplement. Capitalized
+terms used but not defined below have the meaning assigned to
+them in the Agreement.
+
+This supplement is an exhibit to the Agreement and is
+incorporated as an integral part of the Agreement. In the
+event of conflict between the terms in this supplement and the
+terms in the Agreement, the terms in this supplement govern.
+
+
+2.1. License Scope
+
+The SDK is licensed for you to develop applications only for
+use in systems with NVIDIA GPUs.
+
+
+2.2. Distribution
+
+The portions of the SDK that are distributable under the
+Agreement are listed in Attachment A.
+
+
+2.3. Operating Systems
+
+Those portions of the SDK designed exclusively for use on the
+Linux or FreeBSD operating systems, or other operating systems
+derived from the source code to these operating systems, may
+be copied and redistributed for use in accordance with this
+Agreement, provided that the object code files are not
+modified in any way (except for unzipping of compressed
+files).
+
+
+2.4. Audio and Video Encoders and Decoders
+
+You acknowledge and agree that it is your sole responsibility
+to obtain any additional third-party licenses required to
+make, have made, use, have used, sell, import, and offer for
+sale your products or services that include or incorporate any
+third-party software and content relating to audio and/or
+video encoders and decoders from, including but not limited
+to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A.,
+MPEG-LA, and Coding Technologies. NVIDIA does not grant to you
+under this Agreement any necessary patent or other rights with
+respect to any audio and/or video encoders and decoders.
+
+
+2.5. Licensing
+
+If the distribution terms in this Agreement are not suitable
+for your organization, or for any questions regarding this
+Agreement, please contact NVIDIA at
+nvidia-compute-license-questions@nvidia.com.
+
+
+2.6. Attachment A
+
+The following portions of the SDK are distributable under the
+Agreement:
+
+Component
+
+CUDA Runtime
+
+Windows
+
+cudart.dll, cudart_static.lib, cudadevrt.lib
+
+Mac OSX
+
+libcudart.dylib, libcudart_static.a, libcudadevrt.a
+
+Linux
+
+libcudart.so, libcudart_static.a, libcudadevrt.a
+
+Android
+
+libcudart.so, libcudart_static.a, libcudadevrt.a
+
+Component
+
+CUDA FFT Library
+
+Windows
+
+cufft.dll, cufftw.dll, cufft.lib, cufftw.lib
+
+Mac OSX
+
+libcufft.dylib, libcufft_static.a, libcufftw.dylib,
+libcufftw_static.a
+
+Linux
+
+libcufft.so, libcufft_static.a, libcufftw.so,
+libcufftw_static.a
+
+Android
+
+libcufft.so, libcufft_static.a, libcufftw.so,
+libcufftw_static.a
+
+Component
+
+CUDA BLAS Library
+
+Windows
+
+cublas.dll, cublasLt.dll
+
+Mac OSX
+
+libcublas.dylib, libcublasLt.dylib, libcublas_static.a,
+libcublasLt_static.a
+
+Linux
+
+libcublas.so, libcublasLt.so, libcublas_static.a,
+libcublasLt_static.a
+
+Android
+
+libcublas.so, libcublasLt.so, libcublas_static.a,
+libcublasLt_static.a
+
+Component
+
+NVIDIA "Drop-in" BLAS Library
+
+Windows
+
+nvblas.dll
+
+Mac OSX
+
+libnvblas.dylib
+
+Linux
+
+libnvblas.so
+
+Component
+
+CUDA Sparse Matrix Library
+
+Windows
+
+cusparse.dll, cusparse.lib
+
+Mac OSX
+
+libcusparse.dylib, libcusparse_static.a
+
+Linux
+
+libcusparse.so, libcusparse_static.a
+
+Android
+
+libcusparse.so, libcusparse_static.a
+
+Component
+
+CUDA Linear Solver Library
+
+Windows
+
+cusolver.dll, cusolver.lib
+
+Mac OSX
+
+libcusolver.dylib, libcusolver_static.a
+
+Linux
+
+libcusolver.so, libcusolver_static.a
+
+Android
+
+libcusolver.so, libcusolver_static.a
+
+Component
+
+CUDA Random Number Generation Library
+
+Windows
+
+curand.dll, curand.lib
+
+Mac OSX
+
+libcurand.dylib, libcurand_static.a
+
+Linux
+
+libcurand.so, libcurand_static.a
+
+Android
+
+libcurand.so, libcurand_static.a
+
+Component
+
+CUDA Accelerated Graph Library
+
+Component
+
+NVIDIA Performance Primitives Library
+
+Windows
+
+nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll,
+nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll,
+nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib,
+nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll,
+nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib
+
+Mac OSX
+
+libnppc.dylib, libnppc_static.a, libnppial.dylib,
+libnppial_static.a, libnppicc.dylib, libnppicc_static.a,
+libnppicom.dylib, libnppicom_static.a, libnppidei.dylib,
+libnppidei_static.a, libnppif.dylib, libnppif_static.a,
+libnppig.dylib, libnppig_static.a, libnppim.dylib,
+libnppisu_static.a, libnppitc.dylib, libnppitc_static.a,
+libnpps.dylib, libnpps_static.a
+
+Linux
+
+libnppc.so, libnppc_static.a, libnppial.so,
+libnppial_static.a, libnppicc.so, libnppicc_static.a,
+libnppicom.so, libnppicom_static.a, libnppidei.so,
+libnppidei_static.a, libnppif.so, libnppif_static.a
+libnppig.so, libnppig_static.a, libnppim.so,
+libnppim_static.a, libnppist.so, libnppist_static.a,
+libnppisu.so, libnppisu_static.a, libnppitc.so
+libnppitc_static.a, libnpps.so, libnpps_static.a
+
+Android
+
+libnppc.so, libnppc_static.a, libnppial.so,
+libnppial_static.a, libnppicc.so, libnppicc_static.a,
+libnppicom.so, libnppicom_static.a, libnppidei.so,
+libnppidei_static.a, libnppif.so, libnppif_static.a
+libnppig.so, libnppig_static.a, libnppim.so,
+libnppim_static.a, libnppist.so, libnppist_static.a,
+libnppisu.so, libnppisu_static.a, libnppitc.so
+libnppitc_static.a, libnpps.so, libnpps_static.a
+
+Component
+
+NVIDIA JPEG Library
+
+Linux
+
+libnvjpeg.so, libnvjpeg_static.a
+
+Component
+
+Internal common library required for statically linking to
+cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP
+
+Mac OSX
+
+libculibos.a
+
+Linux
+
+libculibos.a
+
+Component
+
+NVIDIA Runtime Compilation Library and Header
+
+All
+
+nvrtc.h
+
+Windows
+
+nvrtc.dll, nvrtc-builtins.dll
+
+Mac OSX
+
+libnvrtc.dylib, libnvrtc-builtins.dylib
+
+Linux
+
+libnvrtc.so, libnvrtc-builtins.so
+
+Component
+
+NVIDIA Optimizing Compiler Library
+
+Windows
+
+nvvm.dll
+
+Mac OSX
+
+libnvvm.dylib
+
+Linux
+
+libnvvm.so
+
+Component
+
+NVIDIA Common Device Math Functions Library
+
+Windows
+
+libdevice.10.bc
+
+Mac OSX
+
+libdevice.10.bc
+
+Linux
+
+libdevice.10.bc
+
+Component
+
+CUDA Occupancy Calculation Header Library
+
+All
+
+cuda_occupancy.h
+
+Component
+
+CUDA Half Precision Headers
+
+All
+
+cuda_fp16.h, cuda_fp16.hpp
+
+Component
+
+CUDA Profiling Tools Interface (CUPTI) Library
+
+Windows
+
+cupti.dll
+
+Mac OSX
+
+libcupti.dylib
+
+Linux
+
+libcupti.so
+
+Component
+
+NVIDIA Tools Extension Library
+
+Windows
+
+nvToolsExt.dll, nvToolsExt.lib
+
+Mac OSX
+
+libnvToolsExt.dylib
+
+Linux
+
+libnvToolsExt.so
+
+Component
+
+NVIDIA CUDA Driver Libraries
+
+Linux
+
+libcuda.so, libnvidia-fatbinaryloader.so,
+libnvidia-ptxjitcompiler.so
+
+The NVIDIA CUDA Driver Libraries are only distributable in
+applications that meet this criteria:
+
+  1. The application was developed starting from a NVIDIA CUDA
+    container obtained from Docker Hub or the NVIDIA GPU
+    Cloud, and
+
+  2. The resulting application is packaged as a Docker
+    container and distributed to users on Docker Hub or the
+    NVIDIA GPU Cloud only.
+
+
+2.7. Attachment B
+
+
+Additional Licensing Obligations
+
+The following third party components included in the SOFTWARE
+are licensed to Licensee pursuant to the following terms and
+conditions:
+
+  1. Licensee's use of the GDB third party component is
+    subject to the terms and conditions of GNU GPL v3:
+
+    This product includes copyrighted third-party software licensed
+    under the terms of the GNU General Public License v3 ("GPL v3").
+    All third-party software packages are copyright by their respective
+    authors. GPL v3 terms and conditions are hereby incorporated into
+    the Agreement by this reference:     http://www.gnu.org/licenses/gpl.txt
+
+    Consistent with these licensing requirements, the software
+    listed below is provided under the terms of the specified
+    open source software licenses. To obtain source code for
+    software provided under licenses that require
+    redistribution of source code, including the GNU General
+    Public License (GPL) and GNU Lesser General Public License
+    (LGPL), contact oss-requests@nvidia.com. This offer is
+    valid for a period of three (3) years from the date of the
+    distribution of this product by NVIDIA CORPORATION.
+
+    Component          License
+    CUDA-GDB           GPL v3
+
+  2. Licensee represents and warrants that any and all third
+    party licensing and/or royalty payment obligations in
+    connection with Licensee's use of the H.264 video codecs
+    are solely the responsibility of Licensee.
+
+  3. Licensee's use of the Thrust library is subject to the
+    terms and conditions of the Apache License Version 2.0.
+    All third-party software packages are copyright by their
+    respective authors. Apache License Version 2.0 terms and
+    conditions are hereby incorporated into the Agreement by
+    this reference.
+    http://www.apache.org/licenses/LICENSE-2.0.html
+
+    In addition, Licensee acknowledges the following notice:
+    Thrust includes source code from the Boost Iterator,
+    Tuple, System, and Random Number libraries.
+
+    Boost Software License - Version 1.0 - August 17th, 2003
+    . . . .
+
+    Permission is hereby granted, free of charge, to any person or
+    organization obtaining a copy of the software and accompanying
+    documentation covered by this license (the "Software") to use,
+    reproduce, display, distribute, execute, and transmit the Software,
+    and to prepare derivative works of the Software, and to permit
+    third-parties to whom the Software is furnished to do so, all
+    subject to the following:
+
+    The copyright notices in the Software and this entire statement,
+    including the above license grant, this restriction and the following
+    disclaimer, must be included in all copies of the Software, in whole
+    or in part, and all derivative works of the Software, unless such
+    copies or derivative works are solely in the form of machine-executable
+    object code generated by a source language processor.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
+    NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
+    ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
+    OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+    OTHER DEALINGS IN THE SOFTWARE.
+
+  4. Licensee's use of the LLVM third party component is
+    subject to the following terms and conditions:
+
+    ======================================================
+    LLVM Release License
+    ======================================================
+    University of Illinois/NCSA
+    Open Source License
+
+    Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+    All rights reserved.
+
+    Developed by:
+
+        LLVM Team
+
+        University of Illinois at Urbana-Champaign
+
+        http://llvm.org
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal with the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    *  Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimers.
+
+    *  Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimers in the
+       documentation and/or other materials provided with the distribution.
+
+    *  Neither the names of the LLVM Team, University of Illinois at Urbana-
+       Champaign, nor the names of its contributors may be used to endorse or
+       promote products derived from this Software without specific prior
+       written permission.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+    THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS WITH THE SOFTWARE.
+
+  5. Licensee's use (e.g. nvprof) of the PCRE third party
+    component is subject to the following terms and
+    conditions:
+
+    ------------
+    PCRE LICENCE
+    ------------
+    PCRE is a library of functions to support regular expressions whose syntax
+    and semantics are as close as possible to those of the Perl 5 language.
+    Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
+    specified below. The documentation for PCRE, supplied in the "doc"
+    directory, is distributed under the same terms as the software itself. The
+    basic library functions are written in C and are freestanding. Also
+    included in the distribution is a set of C++ wrapper functions, and a just-
+    in-time compiler that can be used to optimize pattern matching. These are
+    both optional features that can be omitted when the library is built.
+
+    THE BASIC LIBRARY FUNCTIONS
+    ---------------------------
+    Written by:       Philip Hazel
+    Email local part: ph10
+    Email domain:     cam.ac.uk
+    University of Cambridge Computing Service,
+    Cambridge, England.
+    Copyright (c) 1997-2012 University of Cambridge
+    All rights reserved.
+
+    PCRE JUST-IN-TIME COMPILATION SUPPORT
+    -------------------------------------
+    Written by:       Zoltan Herczeg
+    Email local part: hzmester
+    Emain domain:     freemail.hu
+    Copyright(c) 2010-2012 Zoltan Herczeg
+    All rights reserved.
+
+    STACK-LESS JUST-IN-TIME COMPILER
+    --------------------------------
+    Written by:       Zoltan Herczeg
+    Email local part: hzmester
+    Emain domain:     freemail.hu
+    Copyright(c) 2009-2012 Zoltan Herczeg
+    All rights reserved.
+
+    THE C++ WRAPPER FUNCTIONS
+    -------------------------
+    Contributed by:   Google Inc.
+    Copyright (c) 2007-2012, Google Inc.
+    All rights reserved.
+
+    THE "BSD" LICENCE
+    -----------------
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+
+      * Redistributions of source code must retain the above copyright notice,
+        this list of conditions and the following disclaimer.
+
+      * Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+      * Neither the name of the University of Cambridge nor the name of Google
+        Inc. nor the names of their contributors may be used to endorse or
+        promote products derived from this software without specific prior
+        written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+
+  6. Some of the cuBLAS library routines were written by or
+    derived from code written by Vasily Volkov and are subject
+    to the Modified Berkeley Software Distribution License as
+    follows:
+
+    Copyright (c) 2007-2009, Regents of the University of California
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the University of California, Berkeley nor
+          the names of its contributors may be used to endorse or promote
+          products derived from this software without specific prior
+          written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
+    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+
+  7. Some of the cuBLAS library routines were written by or
+    derived from code written by Davide Barbieri and are
+    subject to the Modified Berkeley Software Distribution
+    License as follows:
+
+    Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata.
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * The name of the author may not be used to endorse or promote
+          products derived from this software without specific prior
+          written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
+    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+
+  8. Some of the cuBLAS library routines were derived from
+    code developed by the University of Tennessee and are
+    subject to the Modified Berkeley Software Distribution
+    License as follows:
+
+    Copyright (c) 2010 The University of Tennessee.
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer listed in this license in the documentation and/or
+          other materials provided with the distribution.
+        * Neither the name of the copyright holders nor the names of its
+          contributors may be used to endorse or promote products derived
+          from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  9. Some of the cuBLAS library routines were written by or
+    derived from code written by Jonathan Hogg and are subject
+    to the Modified Berkeley Software Distribution License as
+    follows:
+
+    Copyright (c) 2012, The Science and Technology Facilities Council (STFC).
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the STFC nor the names of its contributors
+          may be used to endorse or promote products derived from this
+          software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE
+    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+    OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+    IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  10. Some of the cuBLAS library routines were written by or
+    derived from code written by Ahmad M. Abdelfattah, David
+    Keyes, and Hatem Ltaief, and are subject to the Apache
+    License, Version 2.0, as follows:
+
+     -- (C) Copyright 2013 King Abdullah University of Science and Technology
+      Authors:
+      Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa)
+      David Keyes (david.keyes@kaust.edu.sa)
+      Hatem Ltaief (hatem.ltaief@kaust.edu.sa)
+
+      Redistribution  and  use  in  source and binary forms, with or without
+      modification,  are  permitted  provided  that the following conditions
+      are met:
+
+      * Redistributions  of  source  code  must  retain  the above copyright
+        notice,  this  list  of  conditions  and  the  following  disclaimer.
+      * Redistributions  in  binary  form must reproduce the above copyright
+        notice,  this list of conditions and the following disclaimer in the
+        documentation  and/or other materials provided with the distribution.
+      * Neither  the  name of the King Abdullah University of Science and
+        Technology nor the names of its contributors may be used to endorse
+        or promote products derived from this software without specific prior
+        written permission.
+
+      THIS  SOFTWARE  IS  PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+      ``AS IS''  AND  ANY  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+      LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+      A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+      HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+      SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT NOT
+      LIMITED  TO,  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+      DATA,  OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+      THEORY  OF  LIABILITY,  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+      (INCLUDING  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+      OF  THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
+
+  11. Some of the cuSPARSE library routines were written by or
+    derived from code written by Li-Wen Chang and are subject
+    to the NCSA Open Source License as follows:
+
+    Copyright (c) 2012, University of Illinois.
+
+    All rights reserved.
+
+    Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal with the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimers in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the names of IMPACT Group, University of Illinois, nor
+          the names of its contributors may be used to endorse or promote
+          products derived from this Software without specific prior
+          written permission.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+    IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+    IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+    SOFTWARE.
+
+  12. Some of the cuRAND library routines were written by or
+    derived from code written by Mutsuo Saito and Makoto
+    Matsumoto and are subject to the following license:
+
+    Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
+    University. All rights reserved.
+
+    Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
+    University and University of Tokyo.  All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the Hiroshima University nor the names of
+          its contributors may be used to endorse or promote products
+          derived from this software without specific prior written
+          permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  13. Some of the cuRAND library routines were derived from
+    code developed by D. E. Shaw Research and are subject to
+    the following license:
+
+    Copyright 2010-2011, D. E. Shaw Research.
+
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions, and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions, and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of D. E. Shaw Research nor the names of its
+          contributors may be used to endorse or promote products derived
+          from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  14. Some of the Math library routines were written by or
+    derived from code developed by Norbert Juffa and are
+    subject to the following license:
+
+    Copyright (c) 2015-2017, Norbert Juffa
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  15. Licensee's use of the lz4 third party component is
+    subject to the following terms and conditions:
+
+    Copyright (C) 2011-2013, Yann Collet.
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+        * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  16. The NPP library uses code from the Boost Math Toolkit,
+    and is subject to the following license:
+
+    Boost Software License - Version 1.0 - August 17th, 2003
+    . . . .
+
+    Permission is hereby granted, free of charge, to any person or
+    organization obtaining a copy of the software and accompanying
+    documentation covered by this license (the "Software") to use,
+    reproduce, display, distribute, execute, and transmit the Software,
+    and to prepare derivative works of the Software, and to permit
+    third-parties to whom the Software is furnished to do so, all
+    subject to the following:
+
+    The copyright notices in the Software and this entire statement,
+    including the above license grant, this restriction and the following
+    disclaimer, must be included in all copies of the Software, in whole
+    or in part, and all derivative works of the Software, unless such
+    copies or derivative works are solely in the form of machine-executable
+    object code generated by a source language processor.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
+    NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
+    ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
+    OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+    OTHER DEALINGS IN THE SOFTWARE.
+
+  17. Portions of the Nsight Eclipse Edition is subject to the
+    following license:
+
+    The Eclipse Foundation makes available all content in this plug-in
+    ("Content"). Unless otherwise indicated below, the Content is provided
+    to you under the terms and conditions of the Eclipse Public License
+    Version 1.0 ("EPL"). A copy of the EPL is available at http://
+    www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program"
+    will mean the Content.
+
+    If you did not receive this Content directly from the Eclipse
+    Foundation, the Content is being redistributed by another party
+    ("Redistributor") and different terms and conditions may apply to your
+    use of any object code in the Content. Check the Redistributor's
+    license that was provided with the Content. If no such license exists,
+    contact the Redistributor. Unless otherwise indicated below, the terms
+    and conditions of the EPL still apply to any source code in the
+    Content and such source code may be obtained at http://www.eclipse.org.
+
+  18. Some of the cuBLAS library routines uses code from
+    OpenAI, which is subject to the following license:
+
+    License URL
+    https://github.com/openai/openai-gemm/blob/master/LICENSE
+
+    License Text
+    The MIT License
+
+    Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc.
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+
+  19. Licensee's use of the Visual Studio Setup Configuration
+    Samples is subject to the following license:
+
+    The MIT License (MIT)
+    Copyright (C) Microsoft Corporation. All rights reserved.
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without restriction,
+    including without limitation the rights to use, copy, modify, merge,
+    publish, distribute, sublicense, and/or sell copies of the Software,
+    and to permit persons to whom the Software is furnished to do so,
+    subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+  20. Licensee's use of linmath.h header for CPU functions for
+    GL vector/matrix operations from lunarG is subject to the
+    Apache License Version 2.0.
+
+  21. The DX12-CUDA sample uses the d3dx12.h header, which is
+    subject to the MIT license .
+
+-----------------
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe
new file mode 100644
index 0000000000000000000000000000000000000000..bc02472528a32bd70ebfa2b0eca26e34a83fa264
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5dc9884a8f458371550e09bd396e5418bf375820a31b9899f6499bf391c7b2e
+size 168448
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distro/__main__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distro/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c01d5b08b6b44379b931d54d7fcf5221fdc9fde
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distro/__main__.py
@@ -0,0 +1,4 @@
+from .distro import main
+
+if __name__ == "__main__":
+    main()
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99da8b9dd120d37abe0113f75e0abcf982bf2f0e
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e170be019dddbb3c1551e726bd81cb4f6c5a9f0
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..13e2094b5da471e74c3b7108d1a948edd3680075
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64c8f64d84fe4b846886be3e11fa8bf150e26f4e
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a75867d2dc5225c1bfc104d3985cc2c14663017
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py
new file mode 100644
index 0000000000000000000000000000000000000000..264d564dbda676b52f446c0d25433a15939a78a3
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py
@@ -0,0 +1,519 @@
+"""
+This module uses ctypes to bind a whole bunch of functions and constants from
+SecureTransport. The goal here is to provide the low-level API to
+SecureTransport. These are essentially the C-level functions and constants, and
+they're pretty gross to work with.
+
+This code is a bastardised version of the code found in Will Bond's oscrypto
+library. An enormous debt is owed to him for blazing this trail for us. For
+that reason, this code should be considered to be covered both by urllib3's
+license and by oscrypto's:
+
+    Copyright (c) 2015-2016 Will Bond <will@wbond.net>
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+"""
+from __future__ import absolute_import
+
+import platform
+from ctypes import (
+    CDLL,
+    CFUNCTYPE,
+    POINTER,
+    c_bool,
+    c_byte,
+    c_char_p,
+    c_int32,
+    c_long,
+    c_size_t,
+    c_uint32,
+    c_ulong,
+    c_void_p,
+)
+from ctypes.util import find_library
+
+from ...packages.six import raise_from
+
+if platform.system() != "Darwin":
+    raise ImportError("Only macOS is supported")
+
+version = platform.mac_ver()[0]
+version_info = tuple(map(int, version.split(".")))
+if version_info < (10, 8):
+    raise OSError(
+        "Only OS X 10.8 and newer are supported, not %s.%s"
+        % (version_info[0], version_info[1])
+    )
+
+
+def load_cdll(name, macos10_16_path):
+    """Loads a CDLL by name, falling back to known path on 10.16+"""
+    try:
+        # Big Sur is technically 11 but we use 10.16 due to the Big Sur
+        # beta being labeled as 10.16.
+        if version_info >= (10, 16):
+            path = macos10_16_path
+        else:
+            path = find_library(name)
+        if not path:
+            raise OSError  # Caught and reraised as 'ImportError'
+        return CDLL(path, use_errno=True)
+    except OSError:
+        raise_from(ImportError("The library %s failed to load" % name), None)
+
+
+Security = load_cdll(
+    "Security", "/System/Library/Frameworks/Security.framework/Security"
+)
+CoreFoundation = load_cdll(
+    "CoreFoundation",
+    "/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation",
+)
+
+
+Boolean = c_bool
+CFIndex = c_long
+CFStringEncoding = c_uint32
+CFData = c_void_p
+CFString = c_void_p
+CFArray = c_void_p
+CFMutableArray = c_void_p
+CFDictionary = c_void_p
+CFError = c_void_p
+CFType = c_void_p
+CFTypeID = c_ulong
+
+CFTypeRef = POINTER(CFType)
+CFAllocatorRef = c_void_p
+
+OSStatus = c_int32
+
+CFDataRef = POINTER(CFData)
+CFStringRef = POINTER(CFString)
+CFArrayRef = POINTER(CFArray)
+CFMutableArrayRef = POINTER(CFMutableArray)
+CFDictionaryRef = POINTER(CFDictionary)
+CFArrayCallBacks = c_void_p
+CFDictionaryKeyCallBacks = c_void_p
+CFDictionaryValueCallBacks = c_void_p
+
+SecCertificateRef = POINTER(c_void_p)
+SecExternalFormat = c_uint32
+SecExternalItemType = c_uint32
+SecIdentityRef = POINTER(c_void_p)
+SecItemImportExportFlags = c_uint32
+SecItemImportExportKeyParameters = c_void_p
+SecKeychainRef = POINTER(c_void_p)
+SSLProtocol = c_uint32
+SSLCipherSuite = c_uint32
+SSLContextRef = POINTER(c_void_p)
+SecTrustRef = POINTER(c_void_p)
+SSLConnectionRef = c_uint32
+SecTrustResultType = c_uint32
+SecTrustOptionFlags = c_uint32
+SSLProtocolSide = c_uint32
+SSLConnectionType = c_uint32
+SSLSessionOption = c_uint32
+
+
+try:
+    Security.SecItemImport.argtypes = [
+        CFDataRef,
+        CFStringRef,
+        POINTER(SecExternalFormat),
+        POINTER(SecExternalItemType),
+        SecItemImportExportFlags,
+        POINTER(SecItemImportExportKeyParameters),
+        SecKeychainRef,
+        POINTER(CFArrayRef),
+    ]
+    Security.SecItemImport.restype = OSStatus
+
+    Security.SecCertificateGetTypeID.argtypes = []
+    Security.SecCertificateGetTypeID.restype = CFTypeID
+
+    Security.SecIdentityGetTypeID.argtypes = []
+    Security.SecIdentityGetTypeID.restype = CFTypeID
+
+    Security.SecKeyGetTypeID.argtypes = []
+    Security.SecKeyGetTypeID.restype = CFTypeID
+
+    Security.SecCertificateCreateWithData.argtypes = [CFAllocatorRef, CFDataRef]
+    Security.SecCertificateCreateWithData.restype = SecCertificateRef
+
+    Security.SecCertificateCopyData.argtypes = [SecCertificateRef]
+    Security.SecCertificateCopyData.restype = CFDataRef
+
+    Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p]
+    Security.SecCopyErrorMessageString.restype = CFStringRef
+
+    Security.SecIdentityCreateWithCertificate.argtypes = [
+        CFTypeRef,
+        SecCertificateRef,
+        POINTER(SecIdentityRef),
+    ]
+    Security.SecIdentityCreateWithCertificate.restype = OSStatus
+
+    Security.SecKeychainCreate.argtypes = [
+        c_char_p,
+        c_uint32,
+        c_void_p,
+        Boolean,
+        c_void_p,
+        POINTER(SecKeychainRef),
+    ]
+    Security.SecKeychainCreate.restype = OSStatus
+
+    Security.SecKeychainDelete.argtypes = [SecKeychainRef]
+    Security.SecKeychainDelete.restype = OSStatus
+
+    Security.SecPKCS12Import.argtypes = [
+        CFDataRef,
+        CFDictionaryRef,
+        POINTER(CFArrayRef),
+    ]
+    Security.SecPKCS12Import.restype = OSStatus
+
+    SSLReadFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t))
+    SSLWriteFunc = CFUNCTYPE(
+        OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t)
+    )
+
+    Security.SSLSetIOFuncs.argtypes = [SSLContextRef, SSLReadFunc, SSLWriteFunc]
+    Security.SSLSetIOFuncs.restype = OSStatus
+
+    Security.SSLSetPeerID.argtypes = [SSLContextRef, c_char_p, c_size_t]
+    Security.SSLSetPeerID.restype = OSStatus
+
+    Security.SSLSetCertificate.argtypes = [SSLContextRef, CFArrayRef]
+    Security.SSLSetCertificate.restype = OSStatus
+
+    Security.SSLSetCertificateAuthorities.argtypes = [SSLContextRef, CFTypeRef, Boolean]
+    Security.SSLSetCertificateAuthorities.restype = OSStatus
+
+    Security.SSLSetConnection.argtypes = [SSLContextRef, SSLConnectionRef]
+    Security.SSLSetConnection.restype = OSStatus
+
+    Security.SSLSetPeerDomainName.argtypes = [SSLContextRef, c_char_p, c_size_t]
+    Security.SSLSetPeerDomainName.restype = OSStatus
+
+    Security.SSLHandshake.argtypes = [SSLContextRef]
+    Security.SSLHandshake.restype = OSStatus
+
+    Security.SSLRead.argtypes = [SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t)]
+    Security.SSLRead.restype = OSStatus
+
+    Security.SSLWrite.argtypes = [SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t)]
+    Security.SSLWrite.restype = OSStatus
+
+    Security.SSLClose.argtypes = [SSLContextRef]
+    Security.SSLClose.restype = OSStatus
+
+    Security.SSLGetNumberSupportedCiphers.argtypes = [SSLContextRef, POINTER(c_size_t)]
+    Security.SSLGetNumberSupportedCiphers.restype = OSStatus
+
+    Security.SSLGetSupportedCiphers.argtypes = [
+        SSLContextRef,
+        POINTER(SSLCipherSuite),
+        POINTER(c_size_t),
+    ]
+    Security.SSLGetSupportedCiphers.restype = OSStatus
+
+    Security.SSLSetEnabledCiphers.argtypes = [
+        SSLContextRef,
+        POINTER(SSLCipherSuite),
+        c_size_t,
+    ]
+    Security.SSLSetEnabledCiphers.restype = OSStatus
+
+    Security.SSLGetNumberEnabledCiphers.argtype = [SSLContextRef, POINTER(c_size_t)]
+    Security.SSLGetNumberEnabledCiphers.restype = OSStatus
+
+    Security.SSLGetEnabledCiphers.argtypes = [
+        SSLContextRef,
+        POINTER(SSLCipherSuite),
+        POINTER(c_size_t),
+    ]
+    Security.SSLGetEnabledCiphers.restype = OSStatus
+
+    Security.SSLGetNegotiatedCipher.argtypes = [SSLContextRef, POINTER(SSLCipherSuite)]
+    Security.SSLGetNegotiatedCipher.restype = OSStatus
+
+    Security.SSLGetNegotiatedProtocolVersion.argtypes = [
+        SSLContextRef,
+        POINTER(SSLProtocol),
+    ]
+    Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus
+
+    Security.SSLCopyPeerTrust.argtypes = [SSLContextRef, POINTER(SecTrustRef)]
+    Security.SSLCopyPeerTrust.restype = OSStatus
+
+    Security.SecTrustSetAnchorCertificates.argtypes = [SecTrustRef, CFArrayRef]
+    Security.SecTrustSetAnchorCertificates.restype = OSStatus
+
+    Security.SecTrustSetAnchorCertificatesOnly.argstypes = [SecTrustRef, Boolean]
+    Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus
+
+    Security.SecTrustEvaluate.argtypes = [SecTrustRef, POINTER(SecTrustResultType)]
+    Security.SecTrustEvaluate.restype = OSStatus
+
+    Security.SecTrustGetCertificateCount.argtypes = [SecTrustRef]
+    Security.SecTrustGetCertificateCount.restype = CFIndex
+
+    Security.SecTrustGetCertificateAtIndex.argtypes = [SecTrustRef, CFIndex]
+    Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef
+
+    Security.SSLCreateContext.argtypes = [
+        CFAllocatorRef,
+        SSLProtocolSide,
+        SSLConnectionType,
+    ]
+    Security.SSLCreateContext.restype = SSLContextRef
+
+    Security.SSLSetSessionOption.argtypes = [SSLContextRef, SSLSessionOption, Boolean]
+    Security.SSLSetSessionOption.restype = OSStatus
+
+    Security.SSLSetProtocolVersionMin.argtypes = [SSLContextRef, SSLProtocol]
+    Security.SSLSetProtocolVersionMin.restype = OSStatus
+
+    Security.SSLSetProtocolVersionMax.argtypes = [SSLContextRef, SSLProtocol]
+    Security.SSLSetProtocolVersionMax.restype = OSStatus
+
+    try:
+        Security.SSLSetALPNProtocols.argtypes = [SSLContextRef, CFArrayRef]
+        Security.SSLSetALPNProtocols.restype = OSStatus
+    except AttributeError:
+        # Supported only in 10.12+
+        pass
+
+    Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p]
+    Security.SecCopyErrorMessageString.restype = CFStringRef
+
+    Security.SSLReadFunc = SSLReadFunc
+    Security.SSLWriteFunc = SSLWriteFunc
+    Security.SSLContextRef = SSLContextRef
+    Security.SSLProtocol = SSLProtocol
+    Security.SSLCipherSuite = SSLCipherSuite
+    Security.SecIdentityRef = SecIdentityRef
+    Security.SecKeychainRef = SecKeychainRef
+    Security.SecTrustRef = SecTrustRef
+    Security.SecTrustResultType = SecTrustResultType
+    Security.SecExternalFormat = SecExternalFormat
+    Security.OSStatus = OSStatus
+
+    Security.kSecImportExportPassphrase = CFStringRef.in_dll(
+        Security, "kSecImportExportPassphrase"
+    )
+    Security.kSecImportItemIdentity = CFStringRef.in_dll(
+        Security, "kSecImportItemIdentity"
+    )
+
+    # CoreFoundation time!
+    CoreFoundation.CFRetain.argtypes = [CFTypeRef]
+    CoreFoundation.CFRetain.restype = CFTypeRef
+
+    CoreFoundation.CFRelease.argtypes = [CFTypeRef]
+    CoreFoundation.CFRelease.restype = None
+
+    CoreFoundation.CFGetTypeID.argtypes = [CFTypeRef]
+    CoreFoundation.CFGetTypeID.restype = CFTypeID
+
+    CoreFoundation.CFStringCreateWithCString.argtypes = [
+        CFAllocatorRef,
+        c_char_p,
+        CFStringEncoding,
+    ]
+    CoreFoundation.CFStringCreateWithCString.restype = CFStringRef
+
+    CoreFoundation.CFStringGetCStringPtr.argtypes = [CFStringRef, CFStringEncoding]
+    CoreFoundation.CFStringGetCStringPtr.restype = c_char_p
+
+    CoreFoundation.CFStringGetCString.argtypes = [
+        CFStringRef,
+        c_char_p,
+        CFIndex,
+        CFStringEncoding,
+    ]
+    CoreFoundation.CFStringGetCString.restype = c_bool
+
+    CoreFoundation.CFDataCreate.argtypes = [CFAllocatorRef, c_char_p, CFIndex]
+    CoreFoundation.CFDataCreate.restype = CFDataRef
+
+    CoreFoundation.CFDataGetLength.argtypes = [CFDataRef]
+    CoreFoundation.CFDataGetLength.restype = CFIndex
+
+    CoreFoundation.CFDataGetBytePtr.argtypes = [CFDataRef]
+    CoreFoundation.CFDataGetBytePtr.restype = c_void_p
+
+    CoreFoundation.CFDictionaryCreate.argtypes = [
+        CFAllocatorRef,
+        POINTER(CFTypeRef),
+        POINTER(CFTypeRef),
+        CFIndex,
+        CFDictionaryKeyCallBacks,
+        CFDictionaryValueCallBacks,
+    ]
+    CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef
+
+    CoreFoundation.CFDictionaryGetValue.argtypes = [CFDictionaryRef, CFTypeRef]
+    CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef
+
+    CoreFoundation.CFArrayCreate.argtypes = [
+        CFAllocatorRef,
+        POINTER(CFTypeRef),
+        CFIndex,
+        CFArrayCallBacks,
+    ]
+    CoreFoundation.CFArrayCreate.restype = CFArrayRef
+
+    CoreFoundation.CFArrayCreateMutable.argtypes = [
+        CFAllocatorRef,
+        CFIndex,
+        CFArrayCallBacks,
+    ]
+    CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef
+
+    CoreFoundation.CFArrayAppendValue.argtypes = [CFMutableArrayRef, c_void_p]
+    CoreFoundation.CFArrayAppendValue.restype = None
+
+    CoreFoundation.CFArrayGetCount.argtypes = [CFArrayRef]
+    CoreFoundation.CFArrayGetCount.restype = CFIndex
+
+    CoreFoundation.CFArrayGetValueAtIndex.argtypes = [CFArrayRef, CFIndex]
+    CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p
+
+    CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll(
+        CoreFoundation, "kCFAllocatorDefault"
+    )
+    CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll(
+        CoreFoundation, "kCFTypeArrayCallBacks"
+    )
+    CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll(
+        CoreFoundation, "kCFTypeDictionaryKeyCallBacks"
+    )
+    CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll(
+        CoreFoundation, "kCFTypeDictionaryValueCallBacks"
+    )
+
+    CoreFoundation.CFTypeRef = CFTypeRef
+    CoreFoundation.CFArrayRef = CFArrayRef
+    CoreFoundation.CFStringRef = CFStringRef
+    CoreFoundation.CFDictionaryRef = CFDictionaryRef
+
+except (AttributeError):
+    raise ImportError("Error initializing ctypes")
+
+
+class CFConst(object):
+    """
+    A class object that acts as essentially a namespace for CoreFoundation
+    constants.
+    """
+
+    kCFStringEncodingUTF8 = CFStringEncoding(0x08000100)
+
+
+class SecurityConst(object):
+    """
+    A class object that acts as essentially a namespace for Security constants.
+    """
+
+    kSSLSessionOptionBreakOnServerAuth = 0
+
+    kSSLProtocol2 = 1
+    kSSLProtocol3 = 2
+    kTLSProtocol1 = 4
+    kTLSProtocol11 = 7
+    kTLSProtocol12 = 8
+    # SecureTransport does not support TLS 1.3 even if there's a constant for it
+    kTLSProtocol13 = 10
+    kTLSProtocolMaxSupported = 999
+
+    kSSLClientSide = 1
+    kSSLStreamType = 0
+
+    kSecFormatPEMSequence = 10
+
+    kSecTrustResultInvalid = 0
+    kSecTrustResultProceed = 1
+    # This gap is present on purpose: this was kSecTrustResultConfirm, which
+    # is deprecated.
+    kSecTrustResultDeny = 3
+    kSecTrustResultUnspecified = 4
+    kSecTrustResultRecoverableTrustFailure = 5
+    kSecTrustResultFatalTrustFailure = 6
+    kSecTrustResultOtherError = 7
+
+    errSSLProtocol = -9800
+    errSSLWouldBlock = -9803
+    errSSLClosedGraceful = -9805
+    errSSLClosedNoNotify = -9816
+    errSSLClosedAbort = -9806
+
+    errSSLXCertChainInvalid = -9807
+    errSSLCrypto = -9809
+    errSSLInternal = -9810
+    errSSLCertExpired = -9814
+    errSSLCertNotYetValid = -9815
+    errSSLUnknownRootCert = -9812
+    errSSLNoRootCert = -9813
+    errSSLHostNameMismatch = -9843
+    errSSLPeerHandshakeFail = -9824
+    errSSLPeerUserCancelled = -9839
+    errSSLWeakPeerEphemeralDHKey = -9850
+    errSSLServerAuthCompleted = -9841
+    errSSLRecordOverflow = -9847
+
+    errSecVerifyFailed = -67808
+    errSecNoTrustSettings = -25263
+    errSecItemNotFound = -25300
+    errSecInvalidTrustSettings = -25262
+
+    # Cipher suites. We only pick the ones our default cipher string allows.
+    # Source: https://developer.apple.com/documentation/security/1550981-ssl_cipher_suite_values
+    TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C
+    TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030
+    TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B
+    TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F
+    TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA9
+    TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA8
+    TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F
+    TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E
+    TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024
+    TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028
+    TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A
+    TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014
+    TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B
+    TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039
+    TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023
+    TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027
+    TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009
+    TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013
+    TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067
+    TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033
+    TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D
+    TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C
+    TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D
+    TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C
+    TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035
+    TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F
+    TLS_AES_128_GCM_SHA256 = 0x1301
+    TLS_AES_256_GCM_SHA384 = 0x1302
+    TLS_AES_128_CCM_8_SHA256 = 0x1305
+    TLS_AES_128_CCM_SHA256 = 0x1304
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa0b245d279e96724d5610f93bc3b3c8c22ca032
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py
@@ -0,0 +1,397 @@
+"""
+Low-level helpers for the SecureTransport bindings.
+
+These are Python functions that are not directly related to the high-level APIs
+but are necessary to get them to work. They include a whole bunch of low-level
+CoreFoundation messing about and memory management. The concerns in this module
+are almost entirely about trying to avoid memory leaks and providing
+appropriate and useful assistance to the higher-level code.
+"""
+import base64
+import ctypes
+import itertools
+import os
+import re
+import ssl
+import struct
+import tempfile
+
+from .bindings import CFConst, CoreFoundation, Security
+
+# This regular expression is used to grab PEM data out of a PEM bundle.
+_PEM_CERTS_RE = re.compile(
+    b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL
+)
+
+
+def _cf_data_from_bytes(bytestring):
+    """
+    Given a bytestring, create a CFData object from it. This CFData object must
+    be CFReleased by the caller.
+    """
+    return CoreFoundation.CFDataCreate(
+        CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring)
+    )
+
+
+def _cf_dictionary_from_tuples(tuples):
+    """
+    Given a list of Python tuples, create an associated CFDictionary.
+    """
+    dictionary_size = len(tuples)
+
+    # We need to get the dictionary keys and values out in the same order.
+    keys = (t[0] for t in tuples)
+    values = (t[1] for t in tuples)
+    cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys)
+    cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values)
+
+    return CoreFoundation.CFDictionaryCreate(
+        CoreFoundation.kCFAllocatorDefault,
+        cf_keys,
+        cf_values,
+        dictionary_size,
+        CoreFoundation.kCFTypeDictionaryKeyCallBacks,
+        CoreFoundation.kCFTypeDictionaryValueCallBacks,
+    )
+
+
+def _cfstr(py_bstr):
+    """
+    Given a Python binary data, create a CFString.
+    The string must be CFReleased by the caller.
+    """
+    c_str = ctypes.c_char_p(py_bstr)
+    cf_str = CoreFoundation.CFStringCreateWithCString(
+        CoreFoundation.kCFAllocatorDefault,
+        c_str,
+        CFConst.kCFStringEncodingUTF8,
+    )
+    return cf_str
+
+
+def _create_cfstring_array(lst):
+    """
+    Given a list of Python binary data, create an associated CFMutableArray.
+    The array must be CFReleased by the caller.
+
+    Raises an ssl.SSLError on failure.
+    """
+    cf_arr = None
+    try:
+        cf_arr = CoreFoundation.CFArrayCreateMutable(
+            CoreFoundation.kCFAllocatorDefault,
+            0,
+            ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
+        )
+        if not cf_arr:
+            raise MemoryError("Unable to allocate memory!")
+        for item in lst:
+            cf_str = _cfstr(item)
+            if not cf_str:
+                raise MemoryError("Unable to allocate memory!")
+            try:
+                CoreFoundation.CFArrayAppendValue(cf_arr, cf_str)
+            finally:
+                CoreFoundation.CFRelease(cf_str)
+    except BaseException as e:
+        if cf_arr:
+            CoreFoundation.CFRelease(cf_arr)
+        raise ssl.SSLError("Unable to allocate array: %s" % (e,))
+    return cf_arr
+
+
+def _cf_string_to_unicode(value):
+    """
+    Creates a Unicode string from a CFString object. Used entirely for error
+    reporting.
+
+    Yes, it annoys me quite a lot that this function is this complex.
+    """
+    value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p))
+
+    string = CoreFoundation.CFStringGetCStringPtr(
+        value_as_void_p, CFConst.kCFStringEncodingUTF8
+    )
+    if string is None:
+        buffer = ctypes.create_string_buffer(1024)
+        result = CoreFoundation.CFStringGetCString(
+            value_as_void_p, buffer, 1024, CFConst.kCFStringEncodingUTF8
+        )
+        if not result:
+            raise OSError("Error copying C string from CFStringRef")
+        string = buffer.value
+    if string is not None:
+        string = string.decode("utf-8")
+    return string
+
+
+def _assert_no_error(error, exception_class=None):
+    """
+    Checks the return code and throws an exception if there is an error to
+    report
+    """
+    if error == 0:
+        return
+
+    cf_error_string = Security.SecCopyErrorMessageString(error, None)
+    output = _cf_string_to_unicode(cf_error_string)
+    CoreFoundation.CFRelease(cf_error_string)
+
+    if output is None or output == u"":
+        output = u"OSStatus %s" % error
+
+    if exception_class is None:
+        exception_class = ssl.SSLError
+
+    raise exception_class(output)
+
+
+def _cert_array_from_pem(pem_bundle):
+    """
+    Given a bundle of certs in PEM format, turns them into a CFArray of certs
+    that can be used to validate a cert chain.
+    """
+    # Normalize the PEM bundle's line endings.
+    pem_bundle = pem_bundle.replace(b"\r\n", b"\n")
+
+    der_certs = [
+        base64.b64decode(match.group(1)) for match in _PEM_CERTS_RE.finditer(pem_bundle)
+    ]
+    if not der_certs:
+        raise ssl.SSLError("No root certificates specified")
+
+    cert_array = CoreFoundation.CFArrayCreateMutable(
+        CoreFoundation.kCFAllocatorDefault,
+        0,
+        ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
+    )
+    if not cert_array:
+        raise ssl.SSLError("Unable to allocate memory!")
+
+    try:
+        for der_bytes in der_certs:
+            certdata = _cf_data_from_bytes(der_bytes)
+            if not certdata:
+                raise ssl.SSLError("Unable to allocate memory!")
+            cert = Security.SecCertificateCreateWithData(
+                CoreFoundation.kCFAllocatorDefault, certdata
+            )
+            CoreFoundation.CFRelease(certdata)
+            if not cert:
+                raise ssl.SSLError("Unable to build cert object!")
+
+            CoreFoundation.CFArrayAppendValue(cert_array, cert)
+            CoreFoundation.CFRelease(cert)
+    except Exception:
+        # We need to free the array before the exception bubbles further.
+        # We only want to do that if an error occurs: otherwise, the caller
+        # should free.
+        CoreFoundation.CFRelease(cert_array)
+        raise
+
+    return cert_array
+
+
+def _is_cert(item):
+    """
+    Returns True if a given CFTypeRef is a certificate.
+    """
+    expected = Security.SecCertificateGetTypeID()
+    return CoreFoundation.CFGetTypeID(item) == expected
+
+
+def _is_identity(item):
+    """
+    Returns True if a given CFTypeRef is an identity.
+    """
+    expected = Security.SecIdentityGetTypeID()
+    return CoreFoundation.CFGetTypeID(item) == expected
+
+
+def _temporary_keychain():
+    """
+    This function creates a temporary Mac keychain that we can use to work with
+    credentials. This keychain uses a one-time password and a temporary file to
+    store the data. We expect to have one keychain per socket. The returned
+    SecKeychainRef must be freed by the caller, including calling
+    SecKeychainDelete.
+
+    Returns a tuple of the SecKeychainRef and the path to the temporary
+    directory that contains it.
+    """
+    # Unfortunately, SecKeychainCreate requires a path to a keychain. This
+    # means we cannot use mkstemp to use a generic temporary file. Instead,
+    # we're going to create a temporary directory and a filename to use there.
+    # This filename will be 8 random bytes expanded into base64. We also need
+    # some random bytes to password-protect the keychain we're creating, so we
+    # ask for 40 random bytes.
+    random_bytes = os.urandom(40)
+    filename = base64.b16encode(random_bytes[:8]).decode("utf-8")
+    password = base64.b16encode(random_bytes[8:])  # Must be valid UTF-8
+    tempdirectory = tempfile.mkdtemp()
+
+    keychain_path = os.path.join(tempdirectory, filename).encode("utf-8")
+
+    # We now want to create the keychain itself.
+    keychain = Security.SecKeychainRef()
+    status = Security.SecKeychainCreate(
+        keychain_path, len(password), password, False, None, ctypes.byref(keychain)
+    )
+    _assert_no_error(status)
+
+    # Having created the keychain, we want to pass it off to the caller.
+    return keychain, tempdirectory
+
+
+def _load_items_from_file(keychain, path):
+    """
+    Given a single file, loads all the trust objects from it into arrays and
+    the keychain.
+    Returns a tuple of lists: the first list is a list of identities, the
+    second a list of certs.
+    """
+    certificates = []
+    identities = []
+    result_array = None
+
+    with open(path, "rb") as f:
+        raw_filedata = f.read()
+
+    try:
+        filedata = CoreFoundation.CFDataCreate(
+            CoreFoundation.kCFAllocatorDefault, raw_filedata, len(raw_filedata)
+        )
+        result_array = CoreFoundation.CFArrayRef()
+        result = Security.SecItemImport(
+            filedata,  # cert data
+            None,  # Filename, leaving it out for now
+            None,  # What the type of the file is, we don't care
+            None,  # what's in the file, we don't care
+            0,  # import flags
+            None,  # key params, can include passphrase in the future
+            keychain,  # The keychain to insert into
+            ctypes.byref(result_array),  # Results
+        )
+        _assert_no_error(result)
+
+        # A CFArray is not very useful to us as an intermediary
+        # representation, so we are going to extract the objects we want
+        # and then free the array. We don't need to keep hold of keys: the
+        # keychain already has them!
+        result_count = CoreFoundation.CFArrayGetCount(result_array)
+        for index in range(result_count):
+            item = CoreFoundation.CFArrayGetValueAtIndex(result_array, index)
+            item = ctypes.cast(item, CoreFoundation.CFTypeRef)
+
+            if _is_cert(item):
+                CoreFoundation.CFRetain(item)
+                certificates.append(item)
+            elif _is_identity(item):
+                CoreFoundation.CFRetain(item)
+                identities.append(item)
+    finally:
+        if result_array:
+            CoreFoundation.CFRelease(result_array)
+
+        CoreFoundation.CFRelease(filedata)
+
+    return (identities, certificates)
+
+
+def _load_client_cert_chain(keychain, *paths):
+    """
+    Load certificates and maybe keys from a number of files. Has the end goal
+    of returning a CFArray containing one SecIdentityRef, and then zero or more
+    SecCertificateRef objects, suitable for use as a client certificate trust
+    chain.
+    """
+    # Ok, the strategy.
+    #
+    # This relies on knowing that macOS will not give you a SecIdentityRef
+    # unless you have imported a key into a keychain. This is a somewhat
+    # artificial limitation of macOS (for example, it doesn't necessarily
+    # affect iOS), but there is nothing inside Security.framework that lets you
+    # get a SecIdentityRef without having a key in a keychain.
+    #
+    # So the policy here is we take all the files and iterate them in order.
+    # Each one will use SecItemImport to have one or more objects loaded from
+    # it. We will also point at a keychain that macOS can use to work with the
+    # private key.
+    #
+    # Once we have all the objects, we'll check what we actually have. If we
+    # already have a SecIdentityRef in hand, fab: we'll use that. Otherwise,
+    # we'll take the first certificate (which we assume to be our leaf) and
+    # ask the keychain to give us a SecIdentityRef with that cert's associated
+    # key.
+    #
+    # We'll then return a CFArray containing the trust chain: one
+    # SecIdentityRef and then zero-or-more SecCertificateRef objects. The
+    # responsibility for freeing this CFArray will be with the caller. This
+    # CFArray must remain alive for the entire connection, so in practice it
+    # will be stored with a single SSLSocket, along with the reference to the
+    # keychain.
+    certificates = []
+    identities = []
+
+    # Filter out bad paths.
+    paths = (path for path in paths if path)
+
+    try:
+        for file_path in paths:
+            new_identities, new_certs = _load_items_from_file(keychain, file_path)
+            identities.extend(new_identities)
+            certificates.extend(new_certs)
+
+        # Ok, we have everything. The question is: do we have an identity? If
+        # not, we want to grab one from the first cert we have.
+        if not identities:
+            new_identity = Security.SecIdentityRef()
+            status = Security.SecIdentityCreateWithCertificate(
+                keychain, certificates[0], ctypes.byref(new_identity)
+            )
+            _assert_no_error(status)
+            identities.append(new_identity)
+
+            # We now want to release the original certificate, as we no longer
+            # need it.
+            CoreFoundation.CFRelease(certificates.pop(0))
+
+        # We now need to build a new CFArray that holds the trust chain.
+        trust_chain = CoreFoundation.CFArrayCreateMutable(
+            CoreFoundation.kCFAllocatorDefault,
+            0,
+            ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
+        )
+        for item in itertools.chain(identities, certificates):
+            # ArrayAppendValue does a CFRetain on the item. That's fine,
+            # because the finally block will release our other refs to them.
+            CoreFoundation.CFArrayAppendValue(trust_chain, item)
+
+        return trust_chain
+    finally:
+        for obj in itertools.chain(identities, certificates):
+            CoreFoundation.CFRelease(obj)
+
+
+TLS_PROTOCOL_VERSIONS = {
+    "SSLv2": (0, 2),
+    "SSLv3": (3, 0),
+    "TLSv1": (3, 1),
+    "TLSv1.1": (3, 2),
+    "TLSv1.2": (3, 3),
+}
+
+
+def _build_tls_unknown_ca_alert(version):
+    """
+    Builds a TLS alert record for an unknown CA.
+    """
+    ver_maj, ver_min = TLS_PROTOCOL_VERSIONS[version]
+    severity_fatal = 0x02
+    description_unknown_ca = 0x30
+    msg = struct.pack(">BB", severity_fatal, description_unknown_ca)
+    msg_len = len(msg)
+    record_type_alert = 0x15
+    record = struct.pack(">BBBH", record_type_alert, ver_maj, ver_min, msg_len) + msg
+    return record
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/appengine.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/appengine.py
new file mode 100644
index 0000000000000000000000000000000000000000..1717ee22cdf77849e2e273566c877f95311e691b
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/appengine.py
@@ -0,0 +1,314 @@
+"""
+This module provides a pool manager that uses Google App Engine's
+`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
+
+Example usage::
+
+    from pip._vendor.urllib3 import PoolManager
+    from pip._vendor.urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
+
+    if is_appengine_sandbox():
+        # AppEngineManager uses AppEngine's URLFetch API behind the scenes
+        http = AppEngineManager()
+    else:
+        # PoolManager uses a socket-level API behind the scenes
+        http = PoolManager()
+
+    r = http.request('GET', 'https://google.com/')
+
+There are `limitations <https://cloud.google.com/appengine/docs/python/\
+urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
+the best choice for your application. There are three options for using
+urllib3 on Google App Engine:
+
+1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
+   cost-effective in many circumstances as long as your usage is within the
+   limitations.
+2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
+   Sockets also have `limitations and restrictions
+   <https://cloud.google.com/appengine/docs/python/sockets/\
+   #limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
+   To use sockets, be sure to specify the following in your ``app.yaml``::
+
+        env_variables:
+            GAE_USE_SOCKETS_HTTPLIB : 'true'
+
+3. If you are using `App Engine Flexible
+<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
+:class:`PoolManager` without any configuration or special environment variables.
+"""
+
+from __future__ import absolute_import
+
+import io
+import logging
+import warnings
+
+from ..exceptions import (
+    HTTPError,
+    HTTPWarning,
+    MaxRetryError,
+    ProtocolError,
+    SSLError,
+    TimeoutError,
+)
+from ..packages.six.moves.urllib.parse import urljoin
+from ..request import RequestMethods
+from ..response import HTTPResponse
+from ..util.retry import Retry
+from ..util.timeout import Timeout
+from . import _appengine_environ
+
+try:
+    from google.appengine.api import urlfetch
+except ImportError:
+    urlfetch = None
+
+
+log = logging.getLogger(__name__)
+
+
+class AppEnginePlatformWarning(HTTPWarning):
+    pass
+
+
+class AppEnginePlatformError(HTTPError):
+    pass
+
+
+class AppEngineManager(RequestMethods):
+    """
+    Connection manager for Google App Engine sandbox applications.
+
+    This manager uses the URLFetch service directly instead of using the
+    emulated httplib, and is subject to URLFetch limitations as described in
+    the App Engine documentation `here
+    <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
+
+    Notably it will raise an :class:`AppEnginePlatformError` if:
+        * URLFetch is not available.
+        * If you attempt to use this on App Engine Flexible, as full socket
+          support is available.
+        * If a request size is more than 10 megabytes.
+        * If a response size is more than 32 megabytes.
+        * If you use an unsupported request method such as OPTIONS.
+
+    Beyond those cases, it will raise normal urllib3 errors.
+    """
+
+    def __init__(
+        self,
+        headers=None,
+        retries=None,
+        validate_certificate=True,
+        urlfetch_retries=True,
+    ):
+        if not urlfetch:
+            raise AppEnginePlatformError(
+                "URLFetch is not available in this environment."
+            )
+
+        warnings.warn(
+            "urllib3 is using URLFetch on Google App Engine sandbox instead "
+            "of sockets. To use sockets directly instead of URLFetch see "
+            "https://urllib3.readthedocs.io/en/1.26.x/reference/urllib3.contrib.html.",
+            AppEnginePlatformWarning,
+        )
+
+        RequestMethods.__init__(self, headers)
+        self.validate_certificate = validate_certificate
+        self.urlfetch_retries = urlfetch_retries
+
+        self.retries = retries or Retry.DEFAULT
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        # Return False to re-raise any potential exceptions
+        return False
+
+    def urlopen(
+        self,
+        method,
+        url,
+        body=None,
+        headers=None,
+        retries=None,
+        redirect=True,
+        timeout=Timeout.DEFAULT_TIMEOUT,
+        **response_kw
+    ):
+
+        retries = self._get_retries(retries, redirect)
+
+        try:
+            follow_redirects = redirect and retries.redirect != 0 and retries.total
+            response = urlfetch.fetch(
+                url,
+                payload=body,
+                method=method,
+                headers=headers or {},
+                allow_truncated=False,
+                follow_redirects=self.urlfetch_retries and follow_redirects,
+                deadline=self._get_absolute_timeout(timeout),
+                validate_certificate=self.validate_certificate,
+            )
+        except urlfetch.DeadlineExceededError as e:
+            raise TimeoutError(self, e)
+
+        except urlfetch.InvalidURLError as e:
+            if "too large" in str(e):
+                raise AppEnginePlatformError(
+                    "URLFetch request too large, URLFetch only "
+                    "supports requests up to 10mb in size.",
+                    e,
+                )
+            raise ProtocolError(e)
+
+        except urlfetch.DownloadError as e:
+            if "Too many redirects" in str(e):
+                raise MaxRetryError(self, url, reason=e)
+            raise ProtocolError(e)
+
+        except urlfetch.ResponseTooLargeError as e:
+            raise AppEnginePlatformError(
+                "URLFetch response too large, URLFetch only supports"
+                "responses up to 32mb in size.",
+                e,
+            )
+
+        except urlfetch.SSLCertificateError as e:
+            raise SSLError(e)
+
+        except urlfetch.InvalidMethodError as e:
+            raise AppEnginePlatformError(
+                "URLFetch does not support method: %s" % method, e
+            )
+
+        http_response = self._urlfetch_response_to_http_response(
+            response, retries=retries, **response_kw
+        )
+
+        # Handle redirect?
+        redirect_location = redirect and http_response.get_redirect_location()
+        if redirect_location:
+            # Check for redirect response
+            if self.urlfetch_retries and retries.raise_on_redirect:
+                raise MaxRetryError(self, url, "too many redirects")
+            else:
+                if http_response.status == 303:
+                    method = "GET"
+
+                try:
+                    retries = retries.increment(
+                        method, url, response=http_response, _pool=self
+                    )
+                except MaxRetryError:
+                    if retries.raise_on_redirect:
+                        raise MaxRetryError(self, url, "too many redirects")
+                    return http_response
+
+                retries.sleep_for_retry(http_response)
+                log.debug("Redirecting %s -> %s", url, redirect_location)
+                redirect_url = urljoin(url, redirect_location)
+                return self.urlopen(
+                    method,
+                    redirect_url,
+                    body,
+                    headers,
+                    retries=retries,
+                    redirect=redirect,
+                    timeout=timeout,
+                    **response_kw
+                )
+
+        # Check if we should retry the HTTP response.
+        has_retry_after = bool(http_response.headers.get("Retry-After"))
+        if retries.is_retry(method, http_response.status, has_retry_after):
+            retries = retries.increment(method, url, response=http_response, _pool=self)
+            log.debug("Retry: %s", url)
+            retries.sleep(http_response)
+            return self.urlopen(
+                method,
+                url,
+                body=body,
+                headers=headers,
+                retries=retries,
+                redirect=redirect,
+                timeout=timeout,
+                **response_kw
+            )
+
+        return http_response
+
+    def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
+
+        if is_prod_appengine():
+            # Production GAE handles deflate encoding automatically, but does
+            # not remove the encoding header.
+            content_encoding = urlfetch_resp.headers.get("content-encoding")
+
+            if content_encoding == "deflate":
+                del urlfetch_resp.headers["content-encoding"]
+
+        transfer_encoding = urlfetch_resp.headers.get("transfer-encoding")
+        # We have a full response's content,
+        # so let's make sure we don't report ourselves as chunked data.
+        if transfer_encoding == "chunked":
+            encodings = transfer_encoding.split(",")
+            encodings.remove("chunked")
+            urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings)
+
+        original_response = HTTPResponse(
+            # In order for decoding to work, we must present the content as
+            # a file-like object.
+            body=io.BytesIO(urlfetch_resp.content),
+            msg=urlfetch_resp.header_msg,
+            headers=urlfetch_resp.headers,
+            status=urlfetch_resp.status_code,
+            **response_kw
+        )
+
+        return HTTPResponse(
+            body=io.BytesIO(urlfetch_resp.content),
+            headers=urlfetch_resp.headers,
+            status=urlfetch_resp.status_code,
+            original_response=original_response,
+            **response_kw
+        )
+
+    def _get_absolute_timeout(self, timeout):
+        if timeout is Timeout.DEFAULT_TIMEOUT:
+            return None  # Defer to URLFetch's default.
+        if isinstance(timeout, Timeout):
+            if timeout._read is not None or timeout._connect is not None:
+                warnings.warn(
+                    "URLFetch does not support granular timeout settings, "
+                    "reverting to total or default URLFetch timeout.",
+                    AppEnginePlatformWarning,
+                )
+            return timeout.total
+        return timeout
+
+    def _get_retries(self, retries, redirect):
+        if not isinstance(retries, Retry):
+            retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
+
+        if retries.connect or retries.read or retries.redirect:
+            warnings.warn(
+                "URLFetch only supports total retries and does not "
+                "recognize connect, read, or redirect retry parameters.",
+                AppEnginePlatformWarning,
+            )
+
+        return retries
+
+
+# Alias methods from _appengine_environ to maintain public API interface.
+
+is_appengine = _appengine_environ.is_appengine
+is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
+is_local_appengine = _appengine_environ.is_local_appengine
+is_prod_appengine = _appengine_environ.is_prod_appengine
+is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/request.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/request.py
new file mode 100644
index 0000000000000000000000000000000000000000..330766ef4f3403e05a6ad8ec30f25fe05fdbc199
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/request.py
@@ -0,0 +1,137 @@
+from __future__ import absolute_import
+
+from base64 import b64encode
+
+from ..exceptions import UnrewindableBodyError
+from ..packages.six import b, integer_types
+
+# Pass as a value within ``headers`` to skip
+# emitting some HTTP headers that are added automatically.
+# The only headers that are supported are ``Accept-Encoding``,
+# ``Host``, and ``User-Agent``.
+SKIP_HEADER = "@@@SKIP_HEADER@@@"
+SKIPPABLE_HEADERS = frozenset(["accept-encoding", "host", "user-agent"])
+
+ACCEPT_ENCODING = "gzip,deflate"
+
+_FAILEDTELL = object()
+
+
+def make_headers(
+    keep_alive=None,
+    accept_encoding=None,
+    user_agent=None,
+    basic_auth=None,
+    proxy_basic_auth=None,
+    disable_cache=None,
+):
+    """
+    Shortcuts for generating request headers.
+
+    :param keep_alive:
+        If ``True``, adds 'connection: keep-alive' header.
+
+    :param accept_encoding:
+        Can be a boolean, list, or string.
+        ``True`` translates to 'gzip,deflate'.
+        List will get joined by comma.
+        String will be used as provided.
+
+    :param user_agent:
+        String representing the user-agent you want, such as
+        "python-urllib3/0.6"
+
+    :param basic_auth:
+        Colon-separated username:password string for 'authorization: basic ...'
+        auth header.
+
+    :param proxy_basic_auth:
+        Colon-separated username:password string for 'proxy-authorization: basic ...'
+        auth header.
+
+    :param disable_cache:
+        If ``True``, adds 'cache-control: no-cache' header.
+
+    Example::
+
+        >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
+        {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
+        >>> make_headers(accept_encoding=True)
+        {'accept-encoding': 'gzip,deflate'}
+    """
+    headers = {}
+    if accept_encoding:
+        if isinstance(accept_encoding, str):
+            pass
+        elif isinstance(accept_encoding, list):
+            accept_encoding = ",".join(accept_encoding)
+        else:
+            accept_encoding = ACCEPT_ENCODING
+        headers["accept-encoding"] = accept_encoding
+
+    if user_agent:
+        headers["user-agent"] = user_agent
+
+    if keep_alive:
+        headers["connection"] = "keep-alive"
+
+    if basic_auth:
+        headers["authorization"] = "Basic " + b64encode(b(basic_auth)).decode("utf-8")
+
+    if proxy_basic_auth:
+        headers["proxy-authorization"] = "Basic " + b64encode(
+            b(proxy_basic_auth)
+        ).decode("utf-8")
+
+    if disable_cache:
+        headers["cache-control"] = "no-cache"
+
+    return headers
+
+
+def set_file_position(body, pos):
+    """
+    If a position is provided, move file to that point.
+    Otherwise, we'll attempt to record a position for future use.
+    """
+    if pos is not None:
+        rewind_body(body, pos)
+    elif getattr(body, "tell", None) is not None:
+        try:
+            pos = body.tell()
+        except (IOError, OSError):
+            # This differentiates from None, allowing us to catch
+            # a failed `tell()` later when trying to rewind the body.
+            pos = _FAILEDTELL
+
+    return pos
+
+
+def rewind_body(body, body_pos):
+    """
+    Attempt to rewind body to a certain position.
+    Primarily used for request redirects and retries.
+
+    :param body:
+        File-like object that supports seek.
+
+    :param int pos:
+        Position to seek to in file.
+    """
+    body_seek = getattr(body, "seek", None)
+    if body_seek is not None and isinstance(body_pos, integer_types):
+        try:
+            body_seek(body_pos)
+        except (IOError, OSError):
+            raise UnrewindableBodyError(
+                "An error occurred when rewinding request body for redirect/retry."
+            )
+    elif body_pos is _FAILEDTELL:
+        raise UnrewindableBodyError(
+            "Unable to record file position for rewinding "
+            "request body during a redirect/retry."
+        )
+    else:
+        raise ValueError(
+            "body_pos must be of type integer, instead it was %s." % type(body_pos)
+        )
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py
new file mode 100644
index 0000000000000000000000000000000000000000..1dd950c489607d06ecc5218292a1b55558b47be8
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py
@@ -0,0 +1,159 @@
+"""The match_hostname() function from Python 3.3.3, essential when using SSL."""
+
+# Note: This file is under the PSF license as the code comes from the python
+# stdlib.   http://docs.python.org/3/license.html
+
+import re
+import sys
+
+# ipaddress has been backported to 2.6+ in pypi.  If it is installed on the
+# system, use it to handle IPAddress ServerAltnames (this was added in
+# python-3.5) otherwise only do DNS matching.  This allows
+# util.ssl_match_hostname to continue to be used in Python 2.7.
+try:
+    import ipaddress
+except ImportError:
+    ipaddress = None
+
+__version__ = "3.5.0.1"
+
+
+class CertificateError(ValueError):
+    pass
+
+
+def _dnsname_match(dn, hostname, max_wildcards=1):
+    """Matching according to RFC 6125, section 6.4.3
+
+    http://tools.ietf.org/html/rfc6125#section-6.4.3
+    """
+    pats = []
+    if not dn:
+        return False
+
+    # Ported from python3-syntax:
+    # leftmost, *remainder = dn.split(r'.')
+    parts = dn.split(r".")
+    leftmost = parts[0]
+    remainder = parts[1:]
+
+    wildcards = leftmost.count("*")
+    if wildcards > max_wildcards:
+        # Issue #17980: avoid denials of service by refusing more
+        # than one wildcard per fragment.  A survey of established
+        # policy among SSL implementations showed it to be a
+        # reasonable choice.
+        raise CertificateError(
+            "too many wildcards in certificate DNS name: " + repr(dn)
+        )
+
+    # speed up common case w/o wildcards
+    if not wildcards:
+        return dn.lower() == hostname.lower()
+
+    # RFC 6125, section 6.4.3, subitem 1.
+    # The client SHOULD NOT attempt to match a presented identifier in which
+    # the wildcard character comprises a label other than the left-most label.
+    if leftmost == "*":
+        # When '*' is a fragment by itself, it matches a non-empty dotless
+        # fragment.
+        pats.append("[^.]+")
+    elif leftmost.startswith("xn--") or hostname.startswith("xn--"):
+        # RFC 6125, section 6.4.3, subitem 3.
+        # The client SHOULD NOT attempt to match a presented identifier
+        # where the wildcard character is embedded within an A-label or
+        # U-label of an internationalized domain name.
+        pats.append(re.escape(leftmost))
+    else:
+        # Otherwise, '*' matches any dotless string, e.g. www*
+        pats.append(re.escape(leftmost).replace(r"\*", "[^.]*"))
+
+    # add the remaining fragments, ignore any wildcards
+    for frag in remainder:
+        pats.append(re.escape(frag))
+
+    pat = re.compile(r"\A" + r"\.".join(pats) + r"\Z", re.IGNORECASE)
+    return pat.match(hostname)
+
+
+def _to_unicode(obj):
+    if isinstance(obj, str) and sys.version_info < (3,):
+        # ignored flake8 # F821 to support python 2.7 function
+        obj = unicode(obj, encoding="ascii", errors="strict")  # noqa: F821
+    return obj
+
+
+def _ipaddress_match(ipname, host_ip):
+    """Exact matching of IP addresses.
+
+    RFC 6125 explicitly doesn't define an algorithm for this
+    (section 1.7.2 - "Out of Scope").
+    """
+    # OpenSSL may add a trailing newline to a subjectAltName's IP address
+    # Divergence from upstream: ipaddress can't handle byte str
+    ip = ipaddress.ip_address(_to_unicode(ipname).rstrip())
+    return ip == host_ip
+
+
+def match_hostname(cert, hostname):
+    """Verify that *cert* (in decoded format as returned by
+    SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
+    rules are followed, but IP addresses are not accepted for *hostname*.
+
+    CertificateError is raised on failure. On success, the function
+    returns nothing.
+    """
+    if not cert:
+        raise ValueError(
+            "empty or no certificate, match_hostname needs a "
+            "SSL socket or SSL context with either "
+            "CERT_OPTIONAL or CERT_REQUIRED"
+        )
+    try:
+        # Divergence from upstream: ipaddress can't handle byte str
+        host_ip = ipaddress.ip_address(_to_unicode(hostname))
+    except (UnicodeError, ValueError):
+        # ValueError: Not an IP address (common case)
+        # UnicodeError: Divergence from upstream: Have to deal with ipaddress not taking
+        # byte strings.  addresses should be all ascii, so we consider it not
+        # an ipaddress in this case
+        host_ip = None
+    except AttributeError:
+        # Divergence from upstream: Make ipaddress library optional
+        if ipaddress is None:
+            host_ip = None
+        else:  # Defensive
+            raise
+    dnsnames = []
+    san = cert.get("subjectAltName", ())
+    for key, value in san:
+        if key == "DNS":
+            if host_ip is None and _dnsname_match(value, hostname):
+                return
+            dnsnames.append(value)
+        elif key == "IP Address":
+            if host_ip is not None and _ipaddress_match(value, host_ip):
+                return
+            dnsnames.append(value)
+    if not dnsnames:
+        # The subject is only checked when there is no dNSName entry
+        # in subjectAltName
+        for sub in cert.get("subject", ()):
+            for key, value in sub:
+                # XXX according to RFC 2818, the most specific Common Name
+                # must be used.
+                if key == "commonName":
+                    if _dnsname_match(value, hostname):
+                        return
+                    dnsnames.append(value)
+    if len(dnsnames) > 1:
+        raise CertificateError(
+            "hostname %r "
+            "doesn't match either of %s" % (hostname, ", ".join(map(repr, dnsnames)))
+        )
+    elif len(dnsnames) == 1:
+        raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0]))
+    else:
+        raise CertificateError(
+            "no appropriate commonName or subjectAltName fields were found"
+        )
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssltransport.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssltransport.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a7105d17916a7237f3df6e59d65ca82375f8803
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssltransport.py
@@ -0,0 +1,221 @@
+import io
+import socket
+import ssl
+
+from ..exceptions import ProxySchemeUnsupported
+from ..packages import six
+
+SSL_BLOCKSIZE = 16384
+
+
+class SSLTransport:
+    """
+    The SSLTransport wraps an existing socket and establishes an SSL connection.
+
+    Contrary to Python's implementation of SSLSocket, it allows you to chain
+    multiple TLS connections together. It's particularly useful if you need to
+    implement TLS within TLS.
+
+    The class supports most of the socket API operations.
+    """
+
+    @staticmethod
+    def _validate_ssl_context_for_tls_in_tls(ssl_context):
+        """
+        Raises a ProxySchemeUnsupported if the provided ssl_context can't be used
+        for TLS in TLS.
+
+        The only requirement is that the ssl_context provides the 'wrap_bio'
+        methods.
+        """
+
+        if not hasattr(ssl_context, "wrap_bio"):
+            if six.PY2:
+                raise ProxySchemeUnsupported(
+                    "TLS in TLS requires SSLContext.wrap_bio() which isn't "
+                    "supported on Python 2"
+                )
+            else:
+                raise ProxySchemeUnsupported(
+                    "TLS in TLS requires SSLContext.wrap_bio() which isn't "
+                    "available on non-native SSLContext"
+                )
+
+    def __init__(
+        self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True
+    ):
+        """
+        Create an SSLTransport around socket using the provided ssl_context.
+        """
+        self.incoming = ssl.MemoryBIO()
+        self.outgoing = ssl.MemoryBIO()
+
+        self.suppress_ragged_eofs = suppress_ragged_eofs
+        self.socket = socket
+
+        self.sslobj = ssl_context.wrap_bio(
+            self.incoming, self.outgoing, server_hostname=server_hostname
+        )
+
+        # Perform initial handshake.
+        self._ssl_io_loop(self.sslobj.do_handshake)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *_):
+        self.close()
+
+    def fileno(self):
+        return self.socket.fileno()
+
+    def read(self, len=1024, buffer=None):
+        return self._wrap_ssl_read(len, buffer)
+
+    def recv(self, len=1024, flags=0):
+        if flags != 0:
+            raise ValueError("non-zero flags not allowed in calls to recv")
+        return self._wrap_ssl_read(len)
+
+    def recv_into(self, buffer, nbytes=None, flags=0):
+        if flags != 0:
+            raise ValueError("non-zero flags not allowed in calls to recv_into")
+        if buffer and (nbytes is None):
+            nbytes = len(buffer)
+        elif nbytes is None:
+            nbytes = 1024
+        return self.read(nbytes, buffer)
+
+    def sendall(self, data, flags=0):
+        if flags != 0:
+            raise ValueError("non-zero flags not allowed in calls to sendall")
+        count = 0
+        with memoryview(data) as view, view.cast("B") as byte_view:
+            amount = len(byte_view)
+            while count < amount:
+                v = self.send(byte_view[count:])
+                count += v
+
+    def send(self, data, flags=0):
+        if flags != 0:
+            raise ValueError("non-zero flags not allowed in calls to send")
+        response = self._ssl_io_loop(self.sslobj.write, data)
+        return response
+
+    def makefile(
+        self, mode="r", buffering=None, encoding=None, errors=None, newline=None
+    ):
+        """
+        Python's httpclient uses makefile and buffered io when reading HTTP
+        messages and we need to support it.
+
+        This is unfortunately a copy and paste of socket.py makefile with small
+        changes to point to the socket directly.
+        """
+        if not set(mode) <= {"r", "w", "b"}:
+            raise ValueError("invalid mode %r (only r, w, b allowed)" % (mode,))
+
+        writing = "w" in mode
+        reading = "r" in mode or not writing
+        assert reading or writing
+        binary = "b" in mode
+        rawmode = ""
+        if reading:
+            rawmode += "r"
+        if writing:
+            rawmode += "w"
+        raw = socket.SocketIO(self, rawmode)
+        self.socket._io_refs += 1
+        if buffering is None:
+            buffering = -1
+        if buffering < 0:
+            buffering = io.DEFAULT_BUFFER_SIZE
+        if buffering == 0:
+            if not binary:
+                raise ValueError("unbuffered streams must be binary")
+            return raw
+        if reading and writing:
+            buffer = io.BufferedRWPair(raw, raw, buffering)
+        elif reading:
+            buffer = io.BufferedReader(raw, buffering)
+        else:
+            assert writing
+            buffer = io.BufferedWriter(raw, buffering)
+        if binary:
+            return buffer
+        text = io.TextIOWrapper(buffer, encoding, errors, newline)
+        text.mode = mode
+        return text
+
+    def unwrap(self):
+        self._ssl_io_loop(self.sslobj.unwrap)
+
+    def close(self):
+        self.socket.close()
+
+    def getpeercert(self, binary_form=False):
+        return self.sslobj.getpeercert(binary_form)
+
+    def version(self):
+        return self.sslobj.version()
+
+    def cipher(self):
+        return self.sslobj.cipher()
+
+    def selected_alpn_protocol(self):
+        return self.sslobj.selected_alpn_protocol()
+
+    def selected_npn_protocol(self):
+        return self.sslobj.selected_npn_protocol()
+
+    def shared_ciphers(self):
+        return self.sslobj.shared_ciphers()
+
+    def compression(self):
+        return self.sslobj.compression()
+
+    def settimeout(self, value):
+        self.socket.settimeout(value)
+
+    def gettimeout(self):
+        return self.socket.gettimeout()
+
+    def _decref_socketios(self):
+        self.socket._decref_socketios()
+
+    def _wrap_ssl_read(self, len, buffer=None):
+        try:
+            return self._ssl_io_loop(self.sslobj.read, len, buffer)
+        except ssl.SSLError as e:
+            if e.errno == ssl.SSL_ERROR_EOF and self.suppress_ragged_eofs:
+                return 0  # eof, return 0.
+            else:
+                raise
+
+    def _ssl_io_loop(self, func, *args):
+        """Performs an I/O loop between incoming/outgoing and the socket."""
+        should_loop = True
+        ret = None
+
+        while should_loop:
+            errno = None
+            try:
+                ret = func(*args)
+            except ssl.SSLError as e:
+                if e.errno not in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE):
+                    # WANT_READ, and WANT_WRITE are expected, others are not.
+                    raise e
+                errno = e.errno
+
+            buf = self.outgoing.read()
+            self.socket.sendall(buf)
+
+            if errno is None:
+                should_loop = False
+            elif errno == ssl.SSL_ERROR_WANT_READ:
+                buf = self.socket.recv(SSL_BLOCKSIZE)
+                if buf:
+                    self.incoming.write(buf)
+                else:
+                    self.incoming.write_eof()
+        return ret
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_torch_docs.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_torch_docs.py
new file mode 100644
index 0000000000000000000000000000000000000000..dee69d9c408fe7c1ae2ac16fe5cd0788aaf1e108
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_torch_docs.py
@@ -0,0 +1,14192 @@
+"""Adds docstrings to functions defined in the torch._C module."""
+
+import re
+
+import torch._C
+from torch._C import _add_docstr as add_docstr
+
+
+def parse_kwargs(desc):
+    r"""Map a description of args to a dictionary of {argname: description}.
+
+    Input:
+        ('    weight (Tensor): a weight tensor\n' +
+         '        Some optional description')
+    Output: {
+        'weight': \
+        'weight (Tensor): a weight tensor\n        Some optional description'
+    }
+    """
+    # Split on exactly 4 spaces after a newline
+    regx = re.compile(r"\n\s{4}(?!\s)")
+    kwargs = [section.strip() for section in regx.split(desc)]
+    kwargs = [section for section in kwargs if len(section) > 0]
+    return {desc.split(" ")[0]: desc for desc in kwargs}
+
+
+def merge_dicts(*dicts):
+    """Merge dictionaries into a single dictionary."""
+    return {x: d[x] for d in dicts for x in d}
+
+
+common_args = parse_kwargs(
+    """
+    input (Tensor): the input tensor.
+    generator (:class:`torch.Generator`, optional): a pseudorandom number generator for sampling
+    out (Tensor, optional): the output tensor.
+    memory_format (:class:`torch.memory_format`, optional): the desired memory format of
+        returned tensor. Default: ``torch.preserve_format``.
+"""
+)
+
+reduceops_common_args = merge_dicts(
+    common_args,
+    parse_kwargs(
+        """
+    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
+        If specified, the input tensor is casted to :attr:`dtype` before the operation
+        is performed. This is useful for preventing data type overflows. Default: None.
+    keepdim (bool): whether the output tensor has :attr:`dim` retained or not.
+"""
+    ),
+)
+
+multi_dim_common = merge_dicts(
+    reduceops_common_args,
+    parse_kwargs(
+        """
+    dim (int or tuple of ints): the dimension or dimensions to reduce.
+"""
+    ),
+    {
+        "keepdim_details": """
+If :attr:`keepdim` is ``True``, the output tensor is of the same size
+as :attr:`input` except in the dimension(s) :attr:`dim` where it is of size 1.
+Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in the
+output tensor having 1 (or ``len(dim)``) fewer dimension(s).
+"""
+    },
+    {
+        "opt_dim": """
+    dim (int or tuple of ints, optional): the dimension or dimensions to reduce.
+        If ``None``, all dimensions are reduced.
+"""
+    },
+)
+
+single_dim_common = merge_dicts(
+    reduceops_common_args,
+    parse_kwargs(
+        """
+    dim (int): the dimension to reduce.
+"""
+    ),
+    {
+        "keepdim_details": """If :attr:`keepdim` is ``True``, the output tensor is of the same size
+as :attr:`input` except in the dimension :attr:`dim` where it is of size 1.
+Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in
+the output tensor having 1 fewer dimension than :attr:`input`."""
+    },
+)
+
+factory_common_args = merge_dicts(
+    common_args,
+    parse_kwargs(
+        """
+    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
+        Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).
+    layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.
+        Default: ``torch.strided``.
+    device (:class:`torch.device`, optional): the desired device of returned tensor.
+        Default: if ``None``, uses the current device for the default tensor type
+        (see :func:`torch.set_default_device`). :attr:`device` will be the CPU
+        for CPU tensor types and the current CUDA device for CUDA tensor types.
+    requires_grad (bool, optional): If autograd should record operations on the
+        returned tensor. Default: ``False``.
+    pin_memory (bool, optional): If set, returned tensor would be allocated in
+        the pinned memory. Works only for CPU tensors. Default: ``False``.
+    memory_format (:class:`torch.memory_format`, optional): the desired memory format of
+        returned Tensor. Default: ``torch.contiguous_format``.
+    check_invariants (bool, optional): If sparse tensor invariants are checked.
+        Default: as returned by :func:`torch.sparse.check_sparse_tensor_invariants.is_enabled`,
+        initially False.
+"""
+    ),
+    {
+        "sparse_factory_device_note": """\
+.. note::
+
+   If the ``device`` argument is not specified the device of the given
+   :attr:`values` and indices tensor(s) must match. If, however, the
+   argument is specified the input Tensors will be converted to the
+   given device and in turn determine the device of the constructed
+   sparse tensor."""
+    },
+)
+
+factory_like_common_args = parse_kwargs(
+    """
+    input (Tensor): the size of :attr:`input` will determine size of the output tensor.
+    layout (:class:`torch.layout`, optional): the desired layout of returned tensor.
+        Default: if ``None``, defaults to the layout of :attr:`input`.
+    dtype (:class:`torch.dtype`, optional): the desired data type of returned Tensor.
+        Default: if ``None``, defaults to the dtype of :attr:`input`.
+    device (:class:`torch.device`, optional): the desired device of returned tensor.
+        Default: if ``None``, defaults to the device of :attr:`input`.
+    requires_grad (bool, optional): If autograd should record operations on the
+        returned tensor. Default: ``False``.
+    pin_memory (bool, optional): If set, returned tensor would be allocated in
+        the pinned memory. Works only for CPU tensors. Default: ``False``.
+    memory_format (:class:`torch.memory_format`, optional): the desired memory format of
+        returned Tensor. Default: ``torch.preserve_format``.
+"""
+)
+
+factory_data_common_args = parse_kwargs(
+    """
+    data (array_like): Initial data for the tensor. Can be a list, tuple,
+        NumPy ``ndarray``, scalar, and other types.
+    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
+        Default: if ``None``, infers data type from :attr:`data`.
+    device (:class:`torch.device`, optional): the desired device of returned tensor.
+        Default: if ``None``, uses the current device for the default tensor type
+        (see :func:`torch.set_default_device`). :attr:`device` will be the CPU
+        for CPU tensor types and the current CUDA device for CUDA tensor types.
+    requires_grad (bool, optional): If autograd should record operations on the
+        returned tensor. Default: ``False``.
+    pin_memory (bool, optional): If set, returned tensor would be allocated in
+        the pinned memory. Works only for CPU tensors. Default: ``False``.
+"""
+)
+
+tf32_notes = {
+    "tf32_note": """This operator supports :ref:`TensorFloat32<tf32_on_ampere>`."""
+}
+
+rocm_fp16_notes = {
+    "rocm_fp16_note": """On certain ROCm devices, when using float16 inputs this module will use \
+:ref:`different precision<fp16_on_mi200>` for backward."""
+}
+
+reproducibility_notes = {
+    "forward_reproducibility_note": """This operation may behave nondeterministically when given tensors on \
+a CUDA device. See :doc:`/notes/randomness` for more information.""",
+    "backward_reproducibility_note": """This operation may produce nondeterministic gradients when given tensors on \
+a CUDA device. See :doc:`/notes/randomness` for more information.""",
+    "cudnn_reproducibility_note": """In some circumstances when given tensors on a CUDA device \
+and using CuDNN, this operator may select a nondeterministic algorithm to increase performance. If this is \
+undesirable, you can try to make the operation deterministic (potentially at \
+a performance cost) by setting ``torch.backends.cudnn.deterministic = True``. \
+See :doc:`/notes/randomness` for more information.""",
+}
+
+sparse_support_notes = {
+    "sparse_beta_warning": """
+.. warning::
+    Sparse support is a beta feature and some layout(s)/dtype/device combinations may not be supported,
+    or may not have autograd support. If you notice missing functionality please
+    open a feature request.""",
+}
+
+add_docstr(
+    torch.abs,
+    r"""
+abs(input, *, out=None) -> Tensor
+
+Computes the absolute value of each element in :attr:`input`.
+
+.. math::
+    \text{out}_{i} = |\text{input}_{i}|
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.abs(torch.tensor([-1, -2, 3]))
+    tensor([ 1,  2,  3])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.absolute,
+    r"""
+absolute(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.abs`
+""",
+)
+
+add_docstr(
+    torch.acos,
+    r"""
+acos(input, *, out=None) -> Tensor
+
+Computes the inverse cosine of each element in :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \cos^{-1}(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.3348, -0.5889,  0.2005, -0.1584])
+    >>> torch.acos(a)
+    tensor([ 1.2294,  2.2004,  1.3690,  1.7298])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.arccos,
+    r"""
+arccos(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.acos`.
+""",
+)
+
+add_docstr(
+    torch.acosh,
+    r"""
+acosh(input, *, out=None) -> Tensor
+
+Returns a new tensor with the inverse hyperbolic cosine of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \cosh^{-1}(\text{input}_{i})
+
+Note:
+    The domain of the inverse hyperbolic cosine is `[1, inf)` and values outside this range
+    will be mapped to ``NaN``, except for `+ INF` for which the output is mapped to `+ INF`.
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4).uniform_(1, 2)
+    >>> a
+    tensor([ 1.3192, 1.9915, 1.9674, 1.7151 ])
+    >>> torch.acosh(a)
+    tensor([ 0.7791, 1.3120, 1.2979, 1.1341 ])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.arccosh,
+    r"""
+arccosh(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.acosh`.
+""",
+)
+
+add_docstr(
+    torch.index_add,
+    r"""
+index_add(input, dim, index, source, *, alpha=1, out=None) -> Tensor
+
+See :meth:`~Tensor.index_add_` for function description.
+""",
+)
+
+add_docstr(
+    torch.index_copy,
+    r"""
+index_copy(input, dim, index, source, *, out=None) -> Tensor
+
+See :meth:`~Tensor.index_add_` for function description.
+""",
+)
+
+add_docstr(
+    torch.index_reduce,
+    r"""
+index_reduce(input, dim, index, source, reduce, *, include_self=True, out=None) -> Tensor
+
+See :meth:`~Tensor.index_reduce_` for function description.
+""",
+)
+
+add_docstr(
+    torch.add,
+    r"""
+add(input, other, *, alpha=1, out=None) -> Tensor
+
+Adds :attr:`other`, scaled by :attr:`alpha`, to :attr:`input`.
+
+.. math::
+    \text{{out}}_i = \text{{input}}_i + \text{{alpha}} \times \text{{other}}_i
+"""
+    + r"""
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+:ref:`type promotion <type-promotion-doc>`, and integer, float, and complex inputs.
+
+Args:
+    {input}
+    other (Tensor or Number): the tensor or number to add to :attr:`input`.
+
+Keyword arguments:
+    alpha (Number): the multiplier for :attr:`other`.
+    {out}
+
+Examples::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.0202,  1.0985,  1.3506, -0.6056])
+    >>> torch.add(a, 20)
+    tensor([ 20.0202,  21.0985,  21.3506,  19.3944])
+
+    >>> b = torch.randn(4)
+    >>> b
+    tensor([-0.9732, -0.3497,  0.6245,  0.4022])
+    >>> c = torch.randn(4, 1)
+    >>> c
+    tensor([[ 0.3743],
+            [-1.7724],
+            [-0.5811],
+            [-0.8017]])
+    >>> torch.add(b, c, alpha=10)
+    tensor([[  2.7695,   3.3930,   4.3672,   4.1450],
+            [-18.6971, -18.0736, -17.0994, -17.3216],
+            [ -6.7845,  -6.1610,  -5.1868,  -5.4090],
+            [ -8.9902,  -8.3667,  -7.3925,  -7.6147]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.addbmm,
+    r"""
+addbmm(input, batch1, batch2, *, beta=1, alpha=1, out=None) -> Tensor
+
+Performs a batch matrix-matrix product of matrices stored
+in :attr:`batch1` and :attr:`batch2`,
+with a reduced add step (all matrix multiplications get accumulated
+along the first dimension).
+:attr:`input` is added to the final result.
+
+:attr:`batch1` and :attr:`batch2` must be 3-D tensors each containing the
+same number of matrices.
+
+If :attr:`batch1` is a :math:`(b \times n \times m)` tensor, :attr:`batch2` is a
+:math:`(b \times m \times p)` tensor, :attr:`input` must be
+:ref:`broadcastable <broadcasting-semantics>` with a :math:`(n \times p)` tensor
+and :attr:`out` will be a :math:`(n \times p)` tensor.
+
+.. math::
+    out = \beta\ \text{input} + \alpha\ (\sum_{i=0}^{b-1} \text{batch1}_i \mathbin{@} \text{batch2}_i)
+
+If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in
+it will not be propagated.
+"""
+    + r"""
+For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and :attr:`alpha`
+must be real numbers, otherwise they should be integers.
+
+{tf32_note}
+
+{rocm_fp16_note}
+
+Args:
+    batch1 (Tensor): the first batch of matrices to be multiplied
+    batch2 (Tensor): the second batch of matrices to be multiplied
+
+Keyword args:
+    beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`)
+    input (Tensor): matrix to be added
+    alpha (Number, optional): multiplier for `batch1 @ batch2` (:math:`\alpha`)
+    {out}
+
+Example::
+
+    >>> M = torch.randn(3, 5)
+    >>> batch1 = torch.randn(10, 3, 4)
+    >>> batch2 = torch.randn(10, 4, 5)
+    >>> torch.addbmm(M, batch1, batch2)
+    tensor([[  6.6311,   0.0503,   6.9768, -12.0362,  -2.1653],
+            [ -4.8185,  -1.4255,  -6.6760,   8.9453,   2.5743],
+            [ -3.8202,   4.3691,   1.0943,  -1.1109,   5.4730]])
+""".format(
+        **common_args, **tf32_notes, **rocm_fp16_notes
+    ),
+)
+
+add_docstr(
+    torch.addcdiv,
+    r"""
+addcdiv(input, tensor1, tensor2, *, value=1, out=None) -> Tensor
+
+Performs the element-wise division of :attr:`tensor1` by :attr:`tensor2`,
+multiplies the result by the scalar :attr:`value` and adds it to :attr:`input`.
+
+.. warning::
+    Integer division with addcdiv is no longer supported, and in a future
+    release addcdiv will perform a true division of tensor1 and tensor2.
+    The historic addcdiv behavior can be implemented as
+    (input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype)
+    for integer inputs and as (input + value * tensor1 / tensor2) for float inputs.
+    The future addcdiv behavior is just the latter implementation:
+    (input + value * tensor1 / tensor2), for all dtypes.
+
+.. math::
+    \text{out}_i = \text{input}_i + \text{value} \times \frac{\text{tensor1}_i}{\text{tensor2}_i}
+"""
+    + r"""
+
+The shapes of :attr:`input`, :attr:`tensor1`, and :attr:`tensor2` must be
+:ref:`broadcastable <broadcasting-semantics>`.
+
+For inputs of type `FloatTensor` or `DoubleTensor`, :attr:`value` must be
+a real number, otherwise an integer.
+
+Args:
+    input (Tensor): the tensor to be added
+    tensor1 (Tensor): the numerator tensor
+    tensor2 (Tensor): the denominator tensor
+
+Keyword args:
+    value (Number, optional): multiplier for :math:`\text{{tensor1}} / \text{{tensor2}}`
+    {out}
+
+Example::
+
+    >>> t = torch.randn(1, 3)
+    >>> t1 = torch.randn(3, 1)
+    >>> t2 = torch.randn(1, 3)
+    >>> torch.addcdiv(t, t1, t2, value=0.1)
+    tensor([[-0.2312, -3.6496,  0.1312],
+            [-1.0428,  3.4292, -0.1030],
+            [-0.5369, -0.9829,  0.0430]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.addcmul,
+    r"""
+addcmul(input, tensor1, tensor2, *, value=1, out=None) -> Tensor
+
+Performs the element-wise multiplication of :attr:`tensor1`
+by :attr:`tensor2`, multiplies the result by the scalar :attr:`value`
+and adds it to :attr:`input`.
+
+.. math::
+    \text{out}_i = \text{input}_i + \text{value} \times \text{tensor1}_i \times \text{tensor2}_i
+"""
+    + r"""
+The shapes of :attr:`tensor`, :attr:`tensor1`, and :attr:`tensor2` must be
+:ref:`broadcastable <broadcasting-semantics>`.
+
+For inputs of type `FloatTensor` or `DoubleTensor`, :attr:`value` must be
+a real number, otherwise an integer.
+
+Args:
+    input (Tensor): the tensor to be added
+    tensor1 (Tensor): the tensor to be multiplied
+    tensor2 (Tensor): the tensor to be multiplied
+
+Keyword args:
+    value (Number, optional): multiplier for :math:`tensor1 .* tensor2`
+    {out}
+
+Example::
+
+    >>> t = torch.randn(1, 3)
+    >>> t1 = torch.randn(3, 1)
+    >>> t2 = torch.randn(1, 3)
+    >>> torch.addcmul(t, t1, t2, value=0.1)
+    tensor([[-0.8635, -0.6391,  1.6174],
+            [-0.7617, -0.5879,  1.7388],
+            [-0.8353, -0.6249,  1.6511]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.addmm,
+    r"""
+addmm(input, mat1, mat2, *, beta=1, alpha=1, out=None) -> Tensor
+
+Performs a matrix multiplication of the matrices :attr:`mat1` and :attr:`mat2`.
+The matrix :attr:`input` is added to the final result.
+
+If :attr:`mat1` is a :math:`(n \times m)` tensor, :attr:`mat2` is a
+:math:`(m \times p)` tensor, then :attr:`input` must be
+:ref:`broadcastable <broadcasting-semantics>` with a :math:`(n \times p)` tensor
+and :attr:`out` will be a :math:`(n \times p)` tensor.
+
+:attr:`alpha` and :attr:`beta` are scaling factors on matrix-vector product between
+:attr:`mat1` and :attr:`mat2` and the added matrix :attr:`input` respectively.
+
+.. math::
+    \text{out} = \beta\ \text{input} + \alpha\ (\text{mat1}_i \mathbin{@} \text{mat2}_i)
+
+If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in
+it will not be propagated.
+"""
+    + r"""
+For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and
+:attr:`alpha` must be real numbers, otherwise they should be integers.
+
+This operation has support for arguments with :ref:`sparse layouts<sparse-docs>`. If
+:attr:`input` is sparse the result will have the same layout and if :attr:`out`
+is provided it must have the same layout as :attr:`input`.
+
+{sparse_beta_warning}
+
+{tf32_note}
+
+{rocm_fp16_note}
+
+Args:
+    input (Tensor): matrix to be added
+    mat1 (Tensor): the first matrix to be matrix multiplied
+    mat2 (Tensor): the second matrix to be matrix multiplied
+
+Keyword args:
+    beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`)
+    alpha (Number, optional): multiplier for :math:`mat1 @ mat2` (:math:`\alpha`)
+    {out}
+
+Example::
+
+    >>> M = torch.randn(2, 3)
+    >>> mat1 = torch.randn(2, 3)
+    >>> mat2 = torch.randn(3, 3)
+    >>> torch.addmm(M, mat1, mat2)
+    tensor([[-4.8716,  1.4671, -1.3746],
+            [ 0.7573, -3.9555, -2.8681]])
+""".format(
+        **common_args, **tf32_notes, **rocm_fp16_notes, **sparse_support_notes
+    ),
+)
+
+add_docstr(
+    torch.adjoint,
+    r"""
+adjoint(Tensor) -> Tensor
+Returns a view of the tensor conjugated and with the last two dimensions transposed.
+
+``x.adjoint()`` is equivalent to ``x.transpose(-2, -1).conj()`` for complex tensors and
+to ``x.transpose(-2, -1)`` for real tensors.
+
+Example::
+    >>> x = torch.arange(4, dtype=torch.float)
+    >>> A = torch.complex(x, x).reshape(2, 2)
+    >>> A
+    tensor([[0.+0.j, 1.+1.j],
+            [2.+2.j, 3.+3.j]])
+    >>> A.adjoint()
+    tensor([[0.-0.j, 2.-2.j],
+            [1.-1.j, 3.-3.j]])
+    >>> (A.adjoint() == A.mH).all()
+    tensor(True)
+""",
+)
+
+add_docstr(
+    torch.sspaddmm,
+    r"""
+sspaddmm(input, mat1, mat2, *, beta=1, alpha=1, out=None) -> Tensor
+
+Matrix multiplies a sparse tensor :attr:`mat1` with a dense tensor
+:attr:`mat2`, then adds the sparse tensor :attr:`input` to the result.
+
+Note: This function is equivalent to :func:`torch.addmm`, except
+:attr:`input` and :attr:`mat1` are sparse.
+
+Args:
+    input (Tensor): a sparse matrix to be added
+    mat1 (Tensor): a sparse matrix to be matrix multiplied
+    mat2 (Tensor): a dense matrix to be matrix multiplied
+
+Keyword args:
+    beta (Number, optional): multiplier for :attr:`mat` (:math:`\beta`)
+    alpha (Number, optional): multiplier for :math:`mat1 @ mat2` (:math:`\alpha`)
+    {out}
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.smm,
+    r"""
+smm(input, mat) -> Tensor
+
+Performs a matrix multiplication of the sparse matrix :attr:`input`
+with the dense matrix :attr:`mat`.
+
+Args:
+    input (Tensor): a sparse matrix to be matrix multiplied
+    mat (Tensor): a dense matrix to be matrix multiplied
+""",
+)
+
+add_docstr(
+    torch.addmv,
+    r"""
+addmv(input, mat, vec, *, beta=1, alpha=1, out=None) -> Tensor
+
+Performs a matrix-vector product of the matrix :attr:`mat` and
+the vector :attr:`vec`.
+The vector :attr:`input` is added to the final result.
+
+If :attr:`mat` is a :math:`(n \times m)` tensor, :attr:`vec` is a 1-D tensor of
+size `m`, then :attr:`input` must be
+:ref:`broadcastable <broadcasting-semantics>` with a 1-D tensor of size `n` and
+:attr:`out` will be 1-D tensor of size `n`.
+
+:attr:`alpha` and :attr:`beta` are scaling factors on matrix-vector product between
+:attr:`mat` and :attr:`vec` and the added tensor :attr:`input` respectively.
+
+.. math::
+    \text{out} = \beta\ \text{input} + \alpha\ (\text{mat} \mathbin{@} \text{vec})
+
+If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in
+it will not be propagated.
+"""
+    + r"""
+For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and
+:attr:`alpha` must be real numbers, otherwise they should be integers.
+
+Args:
+    input (Tensor): vector to be added
+    mat (Tensor): matrix to be matrix multiplied
+    vec (Tensor): vector to be matrix multiplied
+
+Keyword args:
+    beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`)
+    alpha (Number, optional): multiplier for :math:`mat @ vec` (:math:`\alpha`)
+    {out}
+
+Example::
+
+    >>> M = torch.randn(2)
+    >>> mat = torch.randn(2, 3)
+    >>> vec = torch.randn(3)
+    >>> torch.addmv(M, mat, vec)
+    tensor([-0.3768, -5.5565])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.addr,
+    r"""
+addr(input, vec1, vec2, *, beta=1, alpha=1, out=None) -> Tensor
+
+Performs the outer-product of vectors :attr:`vec1` and :attr:`vec2`
+and adds it to the matrix :attr:`input`.
+
+Optional values :attr:`beta` and :attr:`alpha` are scaling factors on the
+outer product between :attr:`vec1` and :attr:`vec2` and the added matrix
+:attr:`input` respectively.
+
+.. math::
+    \text{out} = \beta\ \text{input} + \alpha\ (\text{vec1} \otimes \text{vec2})
+
+If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in
+it will not be propagated.
+"""
+    + r"""
+If :attr:`vec1` is a vector of size `n` and :attr:`vec2` is a vector
+of size `m`, then :attr:`input` must be
+:ref:`broadcastable <broadcasting-semantics>` with a matrix of size
+:math:`(n \times m)` and :attr:`out` will be a matrix of size
+:math:`(n \times m)`.
+
+Args:
+    input (Tensor): matrix to be added
+    vec1 (Tensor): the first vector of the outer product
+    vec2 (Tensor): the second vector of the outer product
+
+Keyword args:
+    beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`)
+    alpha (Number, optional): multiplier for :math:`\text{{vec1}} \otimes \text{{vec2}}` (:math:`\alpha`)
+    {out}
+
+Example::
+
+    >>> vec1 = torch.arange(1., 4.)
+    >>> vec2 = torch.arange(1., 3.)
+    >>> M = torch.zeros(3, 2)
+    >>> torch.addr(M, vec1, vec2)
+    tensor([[ 1.,  2.],
+            [ 2.,  4.],
+            [ 3.,  6.]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.allclose,
+    r"""
+allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False) -> bool
+
+This function checks if :attr:`input` and :attr:`other` satisfy the condition:
+
+.. math::
+    \lvert \text{input} - \text{other} \rvert \leq \texttt{atol} + \texttt{rtol} \times \lvert \text{other} \rvert
+"""
+    + r"""
+elementwise, for all elements of :attr:`input` and :attr:`other`. The behaviour of this function is analogous to
+`numpy.allclose <https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html>`_
+
+Args:
+    input (Tensor): first tensor to compare
+    other (Tensor): second tensor to compare
+    atol (float, optional): absolute tolerance. Default: 1e-08
+    rtol (float, optional): relative tolerance. Default: 1e-05
+    equal_nan (bool, optional): if ``True``, then two ``NaN`` s will be considered equal. Default: ``False``
+
+Example::
+
+    >>> torch.allclose(torch.tensor([10000., 1e-07]), torch.tensor([10000.1, 1e-08]))
+    False
+    >>> torch.allclose(torch.tensor([10000., 1e-08]), torch.tensor([10000.1, 1e-09]))
+    True
+    >>> torch.allclose(torch.tensor([1.0, float('nan')]), torch.tensor([1.0, float('nan')]))
+    False
+    >>> torch.allclose(torch.tensor([1.0, float('nan')]), torch.tensor([1.0, float('nan')]), equal_nan=True)
+    True
+""",
+)
+
+add_docstr(
+    torch.all,
+    r"""
+all(input) -> Tensor
+
+Tests if all elements in :attr:`input` evaluate to `True`.
+
+.. note:: This function matches the behaviour of NumPy in returning
+          output of dtype `bool` for all supported dtypes except `uint8`.
+          For `uint8` the dtype of output is `uint8` itself.
+
+Example::
+
+    >>> a = torch.rand(1, 2).bool()
+    >>> a
+    tensor([[False, True]], dtype=torch.bool)
+    >>> torch.all(a)
+    tensor(False, dtype=torch.bool)
+    >>> a = torch.arange(0, 3)
+    >>> a
+    tensor([0, 1, 2])
+    >>> torch.all(a)
+    tensor(False)
+
+.. function:: all(input, dim, keepdim=False, *, out=None) -> Tensor
+   :noindex:
+
+For each row of :attr:`input` in the given dimension :attr:`dim`,
+returns `True` if all elements in the row evaluate to `True` and `False` otherwise.
+
+{keepdim_details}
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.rand(4, 2).bool()
+    >>> a
+    tensor([[True, True],
+            [True, False],
+            [True, True],
+            [True, True]], dtype=torch.bool)
+    >>> torch.all(a, dim=1)
+    tensor([ True, False,  True,  True], dtype=torch.bool)
+    >>> torch.all(a, dim=0)
+    tensor([ True, False], dtype=torch.bool)
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.any,
+    r"""
+any(input) -> Tensor
+
+Tests if any element in :attr:`input` evaluates to `True`.
+
+.. note:: This function matches the behaviour of NumPy in returning
+          output of dtype `bool` for all supported dtypes except `uint8`.
+          For `uint8` the dtype of output is `uint8` itself.
+
+Example::
+
+    >>> a = torch.rand(1, 2).bool()
+    >>> a
+    tensor([[False, True]], dtype=torch.bool)
+    >>> torch.any(a)
+    tensor(True, dtype=torch.bool)
+    >>> a = torch.arange(0, 3)
+    >>> a
+    tensor([0, 1, 2])
+    >>> torch.any(a)
+    tensor(True)
+
+.. function:: any(input, dim, keepdim=False, *, out=None) -> Tensor
+   :noindex:
+
+For each row of :attr:`input` in the given dimension :attr:`dim`,
+returns `True` if any element in the row evaluate to `True` and `False` otherwise.
+
+{keepdim_details}
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4, 2) < 0
+    >>> a
+    tensor([[ True,  True],
+            [False,  True],
+            [ True,  True],
+            [False, False]])
+    >>> torch.any(a, 1)
+    tensor([ True,  True,  True, False])
+    >>> torch.any(a, 0)
+    tensor([True, True])
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.angle,
+    r"""
+angle(input, *, out=None) -> Tensor
+
+Computes the element-wise angle (in radians) of the given :attr:`input` tensor.
+
+.. math::
+    \text{out}_{i} = angle(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+.. note:: Starting in PyTorch 1.8, angle returns pi for negative real numbers,
+          zero for non-negative real numbers, and propagates NaNs. Previously
+          the function would return zero for all real numbers and not propagate
+          floating-point NaNs.
+
+Example::
+
+    >>> torch.angle(torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j]))*180/3.14159
+    tensor([ 135.,  135,  -45])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.as_strided,
+    r"""
+as_strided(input, size, stride, storage_offset=None) -> Tensor
+
+Create a view of an existing `torch.Tensor` :attr:`input` with specified
+:attr:`size`, :attr:`stride` and :attr:`storage_offset`.
+
+.. warning::
+    Prefer using other view functions, like :meth:`torch.Tensor.expand`,
+    to setting a view's strides manually with `as_strided`, as this
+    function's behavior depends on the implementation of a tensor's storage.
+    The constructed view of the storage must only refer to elements within
+    the storage or a runtime error will be thrown, and if the view is
+    "overlapped" (with multiple indices referring to the same element in
+    memory) its behavior is undefined.
+
+Args:
+    {input}
+    size (tuple or ints): the shape of the output tensor
+    stride (tuple or ints): the stride of the output tensor
+    storage_offset (int, optional): the offset in the underlying storage of the output tensor.
+        If ``None``, the storage_offset of the output tensor will match the input tensor.
+
+Example::
+
+    >>> x = torch.randn(3, 3)
+    >>> x
+    tensor([[ 0.9039,  0.6291,  1.0795],
+            [ 0.1586,  2.1939, -0.4900],
+            [-0.1909, -0.7503,  1.9355]])
+    >>> t = torch.as_strided(x, (2, 2), (1, 2))
+    >>> t
+    tensor([[0.9039, 1.0795],
+            [0.6291, 0.1586]])
+    >>> t = torch.as_strided(x, (2, 2), (1, 2), 1)
+    tensor([[0.6291, 0.1586],
+            [1.0795, 2.1939]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.as_tensor,
+    r"""
+as_tensor(data, dtype=None, device=None) -> Tensor
+
+Converts :attr:`data` into a tensor, sharing data and preserving autograd
+history if possible.
+
+If :attr:`data` is already a tensor with the requested dtype and device
+then :attr:`data` itself is returned, but if :attr:`data` is a
+tensor with a different dtype or device then it's copied as if using
+`data.to(dtype=dtype, device=device)`.
+
+If :attr:`data` is a NumPy array (an ndarray) with the same dtype and device then a
+tensor is constructed using :func:`torch.from_numpy`.
+
+.. seealso::
+
+    :func:`torch.tensor` never shares its data and creates a new "leaf tensor" (see :doc:`/notes/autograd`).
+
+
+Args:
+    {data}
+    {dtype}
+    device (:class:`torch.device`, optional): the device of the constructed tensor. If None and data is a tensor
+        then the device of data is used. If None and data is not a tensor then
+        the result tensor is constructed on the current device.
+
+
+Example::
+
+    >>> a = numpy.array([1, 2, 3])
+    >>> t = torch.as_tensor(a)
+    >>> t
+    tensor([ 1,  2,  3])
+    >>> t[0] = -1
+    >>> a
+    array([-1,  2,  3])
+
+    >>> a = numpy.array([1, 2, 3])
+    >>> t = torch.as_tensor(a, device=torch.device('cuda'))
+    >>> t
+    tensor([ 1,  2,  3])
+    >>> t[0] = -1
+    >>> a
+    array([1,  2,  3])
+""".format(
+        **factory_data_common_args
+    ),
+)
+
+add_docstr(
+    torch.asin,
+    r"""
+asin(input, *, out=None) -> Tensor
+
+Returns a new tensor with the arcsine of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \sin^{-1}(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-0.5962,  1.4985, -0.4396,  1.4525])
+    >>> torch.asin(a)
+    tensor([-0.6387,     nan, -0.4552,     nan])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.arcsin,
+    r"""
+arcsin(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.asin`.
+""",
+)
+
+add_docstr(
+    torch.asinh,
+    r"""
+asinh(input, *, out=None) -> Tensor
+
+Returns a new tensor with the inverse hyperbolic sine of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \sinh^{-1}(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.1606, -1.4267, -1.0899, -1.0250 ])
+    >>> torch.asinh(a)
+    tensor([ 0.1599, -1.1534, -0.9435, -0.8990 ])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.arcsinh,
+    r"""
+arcsinh(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.asinh`.
+""",
+)
+
+add_docstr(
+    torch.atan,
+    r"""
+atan(input, *, out=None) -> Tensor
+
+Returns a new tensor with the arctangent of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \tan^{-1}(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.2341,  0.2539, -0.6256, -0.6448])
+    >>> torch.atan(a)
+    tensor([ 0.2299,  0.2487, -0.5591, -0.5727])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.arctan,
+    r"""
+arctan(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.atan`.
+""",
+)
+
+add_docstr(
+    torch.atan2,
+    r"""
+atan2(input, other, *, out=None) -> Tensor
+
+Element-wise arctangent of :math:`\text{{input}}_{{i}} / \text{{other}}_{{i}}`
+with consideration of the quadrant. Returns a new tensor with the signed angles
+in radians between vector :math:`(\text{{other}}_{{i}}, \text{{input}}_{{i}})`
+and vector :math:`(1, 0)`. (Note that :math:`\text{{other}}_{{i}}`, the second
+parameter, is the x-coordinate, while :math:`\text{{input}}_{{i}}`, the first
+parameter, is the y-coordinate.)
+
+The shapes of ``input`` and ``other`` must be
+:ref:`broadcastable <broadcasting-semantics>`.
+
+Args:
+    input (Tensor): the first input tensor
+    other (Tensor): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.9041,  0.0196, -0.3108, -2.4423])
+    >>> torch.atan2(a, torch.randn(4))
+    tensor([ 0.9833,  0.0811, -1.9743, -1.4151])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.arctan2,
+    r"""
+arctan2(input, other, *, out=None) -> Tensor
+Alias for :func:`torch.atan2`.
+""",
+)
+
+add_docstr(
+    torch.atanh,
+    r"""
+atanh(input, *, out=None) -> Tensor
+
+Returns a new tensor with the inverse hyperbolic tangent of the elements of :attr:`input`.
+
+Note:
+    The domain of the inverse hyperbolic tangent is `(-1, 1)` and values outside this range
+    will be mapped to ``NaN``, except for the values `1` and `-1` for which the output is
+    mapped to `+/-INF` respectively.
+
+.. math::
+    \text{out}_{i} = \tanh^{-1}(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4).uniform_(-1, 1)
+    >>> a
+    tensor([ -0.9385, 0.2968, -0.8591, -0.1871 ])
+    >>> torch.atanh(a)
+    tensor([ -1.7253, 0.3060, -1.2899, -0.1893 ])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.arctanh,
+    r"""
+arctanh(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.atanh`.
+""",
+)
+
+add_docstr(
+    torch.asarray,
+    r"""
+asarray(obj, *, dtype=None, device=None, copy=None, requires_grad=False) -> Tensor
+
+Converts :attr:`obj` to a tensor.
+
+:attr:`obj` can be one of:
+
+1. a tensor
+2. a NumPy array or a NumPy scalar
+3. a DLPack capsule
+4. an object that implements Python's buffer protocol
+5. a scalar
+6. a sequence of scalars
+
+When :attr:`obj` is a tensor, NumPy array, or DLPack capsule the returned tensor will,
+by default, not require a gradient, have the same datatype as :attr:`obj`, be on the
+same device, and share memory with it. These properties can be controlled with the
+:attr:`dtype`, :attr:`device`, :attr:`copy`, and :attr:`requires_grad` keyword arguments.
+If the returned tensor is of a different datatype, on a different device, or a copy is
+requested then it will not share its memory with :attr:`obj`. If :attr:`requires_grad`
+is ``True`` then the returned tensor will require a gradient, and if :attr:`obj` is
+also a tensor with an autograd history then the returned tensor will have the same history.
+
+When :attr:`obj` is not a tensor, NumPy array, or DLPack capsule but implements Python's
+buffer protocol then the buffer is interpreted as an array of bytes grouped according to
+the size of the datatype passed to the :attr:`dtype` keyword argument. (If no datatype is
+passed then the default floating point datatype is used, instead.) The returned tensor
+will have the specified datatype (or default floating point datatype if none is specified)
+and, by default, be on the CPU device and share memory with the buffer.
+
+When :attr:`obj` is a NumPy scalar, the returned tensor will be a 0-dimensional tensor on
+the CPU and that doesn't share its memory (i.e. ``copy=True``). By default datatype will
+be the PyTorch datatype corresponding to the NumPy's scalar's datatype.
+
+When :attr:`obj` is none of the above but a scalar, or a sequence of scalars then the
+returned tensor will, by default, infer its datatype from the scalar values, be on the
+current default device, and not share its memory.
+
+.. seealso::
+
+    :func:`torch.tensor` creates a tensor that always copies the data from the input object.
+    :func:`torch.from_numpy` creates a tensor that always shares memory from NumPy arrays.
+    :func:`torch.frombuffer` creates a tensor that always shares memory from objects that
+    implement the buffer protocol.
+    :func:`torch.from_dlpack` creates a tensor that always shares memory from
+    DLPack capsules.
+
+Args:
+    obj (object): a tensor, NumPy array, DLPack Capsule, object that implements Python's
+           buffer protocol, scalar, or sequence of scalars.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the datatype of the returned tensor.
+           Default: ``None``, which causes the datatype of the returned tensor to be
+           inferred from :attr:`obj`.
+    copy (bool, optional): controls whether the returned tensor shares memory with :attr:`obj`.
+           Default: ``None``, which causes the returned tensor to share memory with :attr:`obj`
+           whenever possible. If ``True`` then the returned tensor does not share its memory.
+           If ``False`` then the returned tensor shares its memory with :attr:`obj` and an
+           error is thrown if it cannot.
+    device (:class:`torch.device`, optional): the device of the returned tensor.
+           Default: ``None``, which causes the device of :attr:`obj` to be used. Or, if
+           :attr:`obj` is a Python sequence, the current default device will be used.
+    requires_grad (bool, optional): whether the returned tensor requires grad.
+           Default: ``False``, which causes the returned tensor not to require a gradient.
+           If ``True``, then the returned tensor will require a gradient, and if :attr:`obj`
+           is also a tensor with an autograd history then the returned tensor will have
+           the same history.
+
+Example::
+
+    >>> a = torch.tensor([1, 2, 3])
+    >>> # Shares memory with tensor 'a'
+    >>> b = torch.asarray(a)
+    >>> a.data_ptr() == b.data_ptr()
+    True
+    >>> # Forces memory copy
+    >>> c = torch.asarray(a, copy=True)
+    >>> a.data_ptr() == c.data_ptr()
+    False
+
+    >>> a = torch.tensor([1., 2., 3.], requires_grad=True)
+    >>> b = a + 2
+    >>> b
+    tensor([3., 4., 5.], grad_fn=<AddBackward0>)
+    >>> # Shares memory with tensor 'b', with no grad
+    >>> c = torch.asarray(b)
+    >>> c
+    tensor([3., 4., 5.])
+    >>> # Shares memory with tensor 'b', retaining autograd history
+    >>> d = torch.asarray(b, requires_grad=True)
+    >>> d
+    tensor([3., 4., 5.], grad_fn=<AddBackward0>)
+
+    >>> array = numpy.array([1, 2, 3])
+    >>> # Shares memory with array 'array'
+    >>> t1 = torch.asarray(array)
+    >>> array.__array_interface__['data'][0] == t1.data_ptr()
+    True
+    >>> # Copies memory due to dtype mismatch
+    >>> t2 = torch.asarray(array, dtype=torch.float32)
+    >>> array.__array_interface__['data'][0] == t2.data_ptr()
+    False
+
+    >>> scalar = numpy.float64(0.5)
+    >>> torch.asarray(scalar)
+    tensor(0.5000, dtype=torch.float64)
+""",
+)
+
+add_docstr(
+    torch.baddbmm,
+    r"""
+baddbmm(input, batch1, batch2, *, beta=1, alpha=1, out=None) -> Tensor
+
+Performs a batch matrix-matrix product of matrices in :attr:`batch1`
+and :attr:`batch2`.
+:attr:`input` is added to the final result.
+
+:attr:`batch1` and :attr:`batch2` must be 3-D tensors each containing the same
+number of matrices.
+
+If :attr:`batch1` is a :math:`(b \times n \times m)` tensor, :attr:`batch2` is a
+:math:`(b \times m \times p)` tensor, then :attr:`input` must be
+:ref:`broadcastable <broadcasting-semantics>` with a
+:math:`(b \times n \times p)` tensor and :attr:`out` will be a
+:math:`(b \times n \times p)` tensor. Both :attr:`alpha` and :attr:`beta` mean the
+same as the scaling factors used in :meth:`torch.addbmm`.
+
+.. math::
+    \text{out}_i = \beta\ \text{input}_i + \alpha\ (\text{batch1}_i \mathbin{@} \text{batch2}_i)
+
+If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in
+it will not be propagated.
+"""
+    + r"""
+For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and
+:attr:`alpha` must be real numbers, otherwise they should be integers.
+
+{tf32_note}
+
+{rocm_fp16_note}
+
+Args:
+    input (Tensor): the tensor to be added
+    batch1 (Tensor): the first batch of matrices to be multiplied
+    batch2 (Tensor): the second batch of matrices to be multiplied
+
+Keyword args:
+    beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`)
+    alpha (Number, optional): multiplier for :math:`\text{{batch1}} \mathbin{{@}} \text{{batch2}}` (:math:`\alpha`)
+    {out}
+
+Example::
+
+    >>> M = torch.randn(10, 3, 5)
+    >>> batch1 = torch.randn(10, 3, 4)
+    >>> batch2 = torch.randn(10, 4, 5)
+    >>> torch.baddbmm(M, batch1, batch2).size()
+    torch.Size([10, 3, 5])
+""".format(
+        **common_args, **tf32_notes, **rocm_fp16_notes
+    ),
+)
+
+add_docstr(
+    torch.bernoulli,
+    r"""
+bernoulli(input, *, generator=None, out=None) -> Tensor
+
+Draws binary random numbers (0 or 1) from a Bernoulli distribution.
+
+The :attr:`input` tensor should be a tensor containing probabilities
+to be used for drawing the binary random number.
+Hence, all values in :attr:`input` have to be in the range:
+:math:`0 \leq \text{input}_i \leq 1`.
+
+The :math:`\text{i}^{th}` element of the output tensor will draw a
+value :math:`1` according to the :math:`\text{i}^{th}` probability value given
+in :attr:`input`.
+
+.. math::
+    \text{out}_{i} \sim \mathrm{Bernoulli}(p = \text{input}_{i})
+"""
+    + r"""
+The returned :attr:`out` tensor only has values 0 or 1 and is of the same
+shape as :attr:`input`.
+
+:attr:`out` can have integral ``dtype``, but :attr:`input` must have floating
+point ``dtype``.
+
+Args:
+    input (Tensor): the input tensor of probability values for the Bernoulli distribution
+
+Keyword args:
+    {generator}
+    {out}
+
+Example::
+
+    >>> a = torch.empty(3, 3).uniform_(0, 1)  # generate a uniform random matrix with range [0, 1]
+    >>> a
+    tensor([[ 0.1737,  0.0950,  0.3609],
+            [ 0.7148,  0.0289,  0.2676],
+            [ 0.9456,  0.8937,  0.7202]])
+    >>> torch.bernoulli(a)
+    tensor([[ 1.,  0.,  0.],
+            [ 0.,  0.,  0.],
+            [ 1.,  1.,  1.]])
+
+    >>> a = torch.ones(3, 3) # probability of drawing "1" is 1
+    >>> torch.bernoulli(a)
+    tensor([[ 1.,  1.,  1.],
+            [ 1.,  1.,  1.],
+            [ 1.,  1.,  1.]])
+    >>> a = torch.zeros(3, 3) # probability of drawing "1" is 0
+    >>> torch.bernoulli(a)
+    tensor([[ 0.,  0.,  0.],
+            [ 0.,  0.,  0.],
+            [ 0.,  0.,  0.]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.bincount,
+    r"""
+bincount(input, weights=None, minlength=0) -> Tensor
+
+Count the frequency of each value in an array of non-negative ints.
+
+The number of bins (size 1) is one larger than the largest value in
+:attr:`input` unless :attr:`input` is empty, in which case the result is a
+tensor of size 0. If :attr:`minlength` is specified, the number of bins is at least
+:attr:`minlength` and if :attr:`input` is empty, then the result is tensor of size
+:attr:`minlength` filled with zeros. If ``n`` is the value at position ``i``,
+``out[n] += weights[i]`` if :attr:`weights` is specified else
+``out[n] += 1``.
+
+Note:
+    {backward_reproducibility_note}
+
+Arguments:
+    input (Tensor): 1-d int tensor
+    weights (Tensor): optional, weight for each value in the input tensor.
+        Should be of same size as input tensor.
+    minlength (int): optional, minimum number of bins. Should be non-negative.
+
+Returns:
+    output (Tensor): a tensor of shape ``Size([max(input) + 1])`` if
+    :attr:`input` is non-empty, else ``Size(0)``
+
+Example::
+
+    >>> input = torch.randint(0, 8, (5,), dtype=torch.int64)
+    >>> weights = torch.linspace(0, 1, steps=5)
+    >>> input, weights
+    (tensor([4, 3, 6, 3, 4]),
+     tensor([ 0.0000,  0.2500,  0.5000,  0.7500,  1.0000])
+
+    >>> torch.bincount(input)
+    tensor([0, 0, 0, 2, 2, 0, 1])
+
+    >>> input.bincount(weights)
+    tensor([0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.5000])
+""".format(
+        **reproducibility_notes
+    ),
+)
+
+add_docstr(
+    torch.bitwise_not,
+    r"""
+bitwise_not(input, *, out=None) -> Tensor
+
+Computes the bitwise NOT of the given input tensor. The input tensor must be of
+integral or Boolean types. For bool tensors, it computes the logical NOT.
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.bitwise_not(torch.tensor([-1, -2, 3], dtype=torch.int8))
+    tensor([ 0,  1, -4], dtype=torch.int8)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.bmm,
+    r"""
+bmm(input, mat2, *, out=None) -> Tensor
+
+Performs a batch matrix-matrix product of matrices stored in :attr:`input`
+and :attr:`mat2`.
+
+:attr:`input` and :attr:`mat2` must be 3-D tensors each containing
+the same number of matrices.
+
+If :attr:`input` is a :math:`(b \times n \times m)` tensor, :attr:`mat2` is a
+:math:`(b \times m \times p)` tensor, :attr:`out` will be a
+:math:`(b \times n \times p)` tensor.
+
+.. math::
+    \text{out}_i = \text{input}_i \mathbin{@} \text{mat2}_i
+"""
+    + r"""
+{tf32_note}
+
+{rocm_fp16_note}
+
+.. note:: This function does not :ref:`broadcast <broadcasting-semantics>`.
+          For broadcasting matrix products, see :func:`torch.matmul`.
+
+Args:
+    input (Tensor): the first batch of matrices to be multiplied
+    mat2 (Tensor): the second batch of matrices to be multiplied
+
+Keyword Args:
+    {out}
+
+Example::
+
+    >>> input = torch.randn(10, 3, 4)
+    >>> mat2 = torch.randn(10, 4, 5)
+    >>> res = torch.bmm(input, mat2)
+    >>> res.size()
+    torch.Size([10, 3, 5])
+""".format(
+        **common_args, **tf32_notes, **rocm_fp16_notes
+    ),
+)
+
+add_docstr(
+    torch.bitwise_and,
+    r"""
+bitwise_and(input, other, *, out=None) -> Tensor
+
+Computes the bitwise AND of :attr:`input` and :attr:`other`. The input tensor must be of
+integral or Boolean types. For bool tensors, it computes the logical AND.
+
+Args:
+    input: the first input tensor
+    other: the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.bitwise_and(torch.tensor([-1, -2, 3], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8))
+    tensor([1, 0,  3], dtype=torch.int8)
+    >>> torch.bitwise_and(torch.tensor([True, True, False]), torch.tensor([False, True, False]))
+    tensor([ False, True, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.bitwise_or,
+    r"""
+bitwise_or(input, other, *, out=None) -> Tensor
+
+Computes the bitwise OR of :attr:`input` and :attr:`other`. The input tensor must be of
+integral or Boolean types. For bool tensors, it computes the logical OR.
+
+Args:
+    input: the first input tensor
+    other: the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.bitwise_or(torch.tensor([-1, -2, 3], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8))
+    tensor([-1, -2,  3], dtype=torch.int8)
+    >>> torch.bitwise_or(torch.tensor([True, True, False]), torch.tensor([False, True, False]))
+    tensor([ True, True, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.bitwise_xor,
+    r"""
+bitwise_xor(input, other, *, out=None) -> Tensor
+
+Computes the bitwise XOR of :attr:`input` and :attr:`other`. The input tensor must be of
+integral or Boolean types. For bool tensors, it computes the logical XOR.
+
+Args:
+    input: the first input tensor
+    other: the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.bitwise_xor(torch.tensor([-1, -2, 3], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8))
+    tensor([-2, -2,  0], dtype=torch.int8)
+    >>> torch.bitwise_xor(torch.tensor([True, True, False]), torch.tensor([False, True, False]))
+    tensor([ True, False, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.bitwise_left_shift,
+    r"""
+bitwise_left_shift(input, other, *, out=None) -> Tensor
+
+Computes the left arithmetic shift of :attr:`input` by :attr:`other` bits.
+The input tensor must be of integral type. This operator supports
+:ref:`broadcasting to a common shape <broadcasting-semantics>` and
+:ref:`type promotion <type-promotion-doc>`.
+
+The operation applied is:
+
+.. math::
+    \text{{out}}_i = \text{{input}}_i << \text{{other}}_i
+
+Args:
+    input (Tensor or Scalar): the first input tensor
+    other (Tensor or Scalar): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.bitwise_left_shift(torch.tensor([-1, -2, 3], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8))
+    tensor([-2, -2, 24], dtype=torch.int8)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.bitwise_right_shift,
+    r"""
+bitwise_right_shift(input, other, *, out=None) -> Tensor
+
+Computes the right arithmetic shift of :attr:`input` by :attr:`other` bits.
+The input tensor must be of integral type. This operator supports
+:ref:`broadcasting to a common shape <broadcasting-semantics>` and
+:ref:`type promotion <type-promotion-doc>`.
+In any case, if the value of the right operand is negative or is greater
+or equal to the number of bits in the promoted left operand, the behavior is undefined.
+
+The operation applied is:
+
+.. math::
+    \text{{out}}_i = \text{{input}}_i >> \text{{other}}_i
+
+Args:
+    input (Tensor or Scalar): the first input tensor
+    other (Tensor or Scalar): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.bitwise_right_shift(torch.tensor([-2, -7, 31], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8))
+    tensor([-1, -7,  3], dtype=torch.int8)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.broadcast_to,
+    r"""
+broadcast_to(input, shape) -> Tensor
+
+Broadcasts :attr:`input` to the shape :attr:`\shape`.
+Equivalent to calling ``input.expand(shape)``. See :meth:`~Tensor.expand` for details.
+
+Args:
+    {input}
+    shape (list, tuple, or :class:`torch.Size`): the new shape.
+
+Example::
+
+    >>> x = torch.tensor([1, 2, 3])
+    >>> torch.broadcast_to(x, (3, 3))
+    tensor([[1, 2, 3],
+            [1, 2, 3],
+            [1, 2, 3]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.stack,
+    r"""
+stack(tensors, dim=0, *, out=None) -> Tensor
+
+Concatenates a sequence of tensors along a new dimension.
+
+All tensors need to be of the same size.
+
+.. seealso::
+
+    :func:`torch.cat` concatenates the given sequence along an existing dimension.
+
+Arguments:
+    tensors (sequence of Tensors): sequence of tensors to concatenate
+    dim (int, optional): dimension to insert. Has to be between 0 and the number
+        of dimensions of concatenated tensors (inclusive). Default: 0
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> x = torch.randn(2, 3)
+    >>> x
+    tensor([[ 0.3367,  0.1288,  0.2345],
+            [ 0.2303, -1.1229, -0.1863]])
+    >>> x = torch.stack((x, x)) # same as torch.stack((x, x), dim=0)
+    >>> x
+    tensor([[[ 0.3367,  0.1288,  0.2345],
+             [ 0.2303, -1.1229, -0.1863]],
+
+            [[ 0.3367,  0.1288,  0.2345],
+             [ 0.2303, -1.1229, -0.1863]]])
+    >>> x.size()
+    torch.Size([2, 2, 3])
+    >>> x = torch.stack((x, x), dim=1)
+    tensor([[[ 0.3367,  0.1288,  0.2345],
+             [ 0.3367,  0.1288,  0.2345]],
+
+            [[ 0.2303, -1.1229, -0.1863],
+             [ 0.2303, -1.1229, -0.1863]]])
+    >>> x = torch.stack((x, x), dim=2)
+    tensor([[[ 0.3367,  0.3367],
+             [ 0.1288,  0.1288],
+             [ 0.2345,  0.2345]],
+
+            [[ 0.2303,  0.2303],
+             [-1.1229, -1.1229],
+             [-0.1863, -0.1863]]])
+    >>> x = torch.stack((x, x), dim=-1)
+    tensor([[[ 0.3367,  0.3367],
+             [ 0.1288,  0.1288],
+             [ 0.2345,  0.2345]],
+
+            [[ 0.2303,  0.2303],
+             [-1.1229, -1.1229],
+             [-0.1863, -0.1863]]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.hstack,
+    r"""
+hstack(tensors, *, out=None) -> Tensor
+
+Stack tensors in sequence horizontally (column wise).
+
+This is equivalent to concatenation along the first axis for 1-D tensors, and along the second axis for all other tensors.
+
+Args:
+    tensors (sequence of Tensors): sequence of tensors to concatenate
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([1, 2, 3])
+    >>> b = torch.tensor([4, 5, 6])
+    >>> torch.hstack((a,b))
+    tensor([1, 2, 3, 4, 5, 6])
+    >>> a = torch.tensor([[1],[2],[3]])
+    >>> b = torch.tensor([[4],[5],[6]])
+    >>> torch.hstack((a,b))
+    tensor([[1, 4],
+            [2, 5],
+            [3, 6]])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.vstack,
+    r"""
+vstack(tensors, *, out=None) -> Tensor
+
+Stack tensors in sequence vertically (row wise).
+
+This is equivalent to concatenation along the first axis after all 1-D tensors have been reshaped by :func:`torch.atleast_2d`.
+
+Args:
+    tensors (sequence of Tensors): sequence of tensors to concatenate
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([1, 2, 3])
+    >>> b = torch.tensor([4, 5, 6])
+    >>> torch.vstack((a,b))
+    tensor([[1, 2, 3],
+            [4, 5, 6]])
+    >>> a = torch.tensor([[1],[2],[3]])
+    >>> b = torch.tensor([[4],[5],[6]])
+    >>> torch.vstack((a,b))
+    tensor([[1],
+            [2],
+            [3],
+            [4],
+            [5],
+            [6]])
+
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.dstack,
+    r"""
+dstack(tensors, *, out=None) -> Tensor
+
+Stack tensors in sequence depthwise (along third axis).
+
+This is equivalent to concatenation along the third axis after 1-D and 2-D tensors have been reshaped by :func:`torch.atleast_3d`.
+
+Args:
+    tensors (sequence of Tensors): sequence of tensors to concatenate
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([1, 2, 3])
+    >>> b = torch.tensor([4, 5, 6])
+    >>> torch.dstack((a,b))
+    tensor([[[1, 4],
+             [2, 5],
+             [3, 6]]])
+    >>> a = torch.tensor([[1],[2],[3]])
+    >>> b = torch.tensor([[4],[5],[6]])
+    >>> torch.dstack((a,b))
+    tensor([[[1, 4]],
+            [[2, 5]],
+            [[3, 6]]])
+
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.tensor_split,
+    r"""
+tensor_split(input, indices_or_sections, dim=0) -> List of Tensors
+
+Splits a tensor into multiple sub-tensors, all of which are views of :attr:`input`,
+along dimension :attr:`dim` according to the indices or number of sections specified
+by :attr:`indices_or_sections`. This function is based on NumPy's
+:func:`numpy.array_split`.
+
+Args:
+    input (Tensor): the tensor to split
+    indices_or_sections (Tensor, int or list or tuple of ints):
+        If :attr:`indices_or_sections` is an integer ``n`` or a zero dimensional long tensor
+        with value ``n``, :attr:`input` is split into ``n`` sections along dimension :attr:`dim`.
+        If :attr:`input` is divisible by ``n`` along dimension :attr:`dim`, each
+        section will be of equal size, :code:`input.size(dim) / n`. If :attr:`input`
+        is not divisible by ``n``, the sizes of the first :code:`int(input.size(dim) % n)`
+        sections will have size :code:`int(input.size(dim) / n) + 1`, and the rest will
+        have size :code:`int(input.size(dim) / n)`.
+
+        If :attr:`indices_or_sections` is a list or tuple of ints, or a one-dimensional long
+        tensor, then :attr:`input` is split along dimension :attr:`dim` at each of the indices
+        in the list, tuple or tensor. For instance, :code:`indices_or_sections=[2, 3]` and :code:`dim=0`
+        would result in the tensors :code:`input[:2]`, :code:`input[2:3]`, and :code:`input[3:]`.
+
+        If :attr:`indices_or_sections` is a tensor, it must be a zero-dimensional or one-dimensional
+        long tensor on the CPU.
+
+    dim (int, optional): dimension along which to split the tensor. Default: ``0``
+
+Example::
+
+    >>> x = torch.arange(8)
+    >>> torch.tensor_split(x, 3)
+    (tensor([0, 1, 2]), tensor([3, 4, 5]), tensor([6, 7]))
+
+    >>> x = torch.arange(7)
+    >>> torch.tensor_split(x, 3)
+    (tensor([0, 1, 2]), tensor([3, 4]), tensor([5, 6]))
+    >>> torch.tensor_split(x, (1, 6))
+    (tensor([0]), tensor([1, 2, 3, 4, 5]), tensor([6]))
+
+    >>> x = torch.arange(14).reshape(2, 7)
+    >>> x
+    tensor([[ 0,  1,  2,  3,  4,  5,  6],
+            [ 7,  8,  9, 10, 11, 12, 13]])
+    >>> torch.tensor_split(x, 3, dim=1)
+    (tensor([[0, 1, 2],
+            [7, 8, 9]]),
+     tensor([[ 3,  4],
+            [10, 11]]),
+     tensor([[ 5,  6],
+            [12, 13]]))
+    >>> torch.tensor_split(x, (1, 6), dim=1)
+    (tensor([[0],
+            [7]]),
+     tensor([[ 1,  2,  3,  4,  5],
+            [ 8,  9, 10, 11, 12]]),
+     tensor([[ 6],
+            [13]]))
+""",
+)
+
+add_docstr(
+    torch.chunk,
+    r"""
+chunk(input, chunks, dim=0) -> List of Tensors
+
+Attempts to split a tensor into the specified number of chunks. Each chunk is a view of
+the input tensor.
+
+
+.. note::
+
+    This function may return fewer than the specified number of chunks!
+
+.. seealso::
+
+    :func:`torch.tensor_split` a function that always returns exactly the specified number of chunks
+
+If the tensor size along the given dimension :attr:`dim` is divisible by :attr:`chunks`,
+all returned chunks will be the same size.
+If the tensor size along the given dimension :attr:`dim` is not divisible by :attr:`chunks`,
+all returned chunks will be the same size, except the last one.
+If such division is not possible, this function may return fewer
+than the specified number of chunks.
+
+Arguments:
+    input (Tensor): the tensor to split
+    chunks (int): number of chunks to return
+    dim (int): dimension along which to split the tensor
+
+Example:
+    >>> torch.arange(11).chunk(6)
+    (tensor([0, 1]),
+     tensor([2, 3]),
+     tensor([4, 5]),
+     tensor([6, 7]),
+     tensor([8, 9]),
+     tensor([10]))
+    >>> torch.arange(12).chunk(6)
+    (tensor([0, 1]),
+     tensor([2, 3]),
+     tensor([4, 5]),
+     tensor([6, 7]),
+     tensor([8, 9]),
+     tensor([10, 11]))
+    >>> torch.arange(13).chunk(6)
+    (tensor([0, 1, 2]),
+     tensor([3, 4, 5]),
+     tensor([6, 7, 8]),
+     tensor([ 9, 10, 11]),
+     tensor([12]))
+""",
+)
+
+add_docstr(
+    torch.unsafe_chunk,
+    r"""
+unsafe_chunk(input, chunks, dim=0) -> List of Tensors
+
+Works like :func:`torch.chunk` but without enforcing the autograd restrictions
+on inplace modification of the outputs.
+
+.. warning::
+    This function is safe to use as long as only the input, or only the outputs
+    are modified inplace after calling this function. It is user's
+    responsibility to ensure that is the case. If both the input and one or more
+    of the outputs are modified inplace, gradients computed by autograd will be
+    silently incorrect.
+""",
+)
+
+add_docstr(
+    torch.unsafe_split,
+    r"""
+unsafe_split(tensor, split_size_or_sections, dim=0) -> List of Tensors
+
+Works like :func:`torch.split` but without enforcing the autograd restrictions
+on inplace modification of the outputs.
+
+.. warning::
+    This function is safe to use as long as only the input, or only the outputs
+    are modified inplace after calling this function. It is user's
+    responsibility to ensure that is the case. If both the input and one or more
+    of the outputs are modified inplace, gradients computed by autograd will be
+    silently incorrect.
+""",
+)
+
+add_docstr(
+    torch.hsplit,
+    r"""
+hsplit(input, indices_or_sections) -> List of Tensors
+
+Splits :attr:`input`, a tensor with one or more dimensions, into multiple tensors
+horizontally according to :attr:`indices_or_sections`. Each split is a view of
+:attr:`input`.
+
+If :attr:`input` is one dimensional this is equivalent to calling
+torch.tensor_split(input, indices_or_sections, dim=0) (the split dimension is
+zero), and if :attr:`input` has two or more dimensions it's equivalent to calling
+torch.tensor_split(input, indices_or_sections, dim=1) (the split dimension is 1),
+except that if :attr:`indices_or_sections` is an integer it must evenly divide
+the split dimension or a runtime error will be thrown.
+
+This function is based on NumPy's :func:`numpy.hsplit`.
+
+Args:
+    input (Tensor): tensor to split.
+    indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`.
+
+Example::
+    >>> t = torch.arange(16.0).reshape(4,4)
+    >>> t
+    tensor([[ 0.,  1.,  2.,  3.],
+            [ 4.,  5.,  6.,  7.],
+            [ 8.,  9., 10., 11.],
+            [12., 13., 14., 15.]])
+    >>> torch.hsplit(t, 2)
+    (tensor([[ 0.,  1.],
+             [ 4.,  5.],
+             [ 8.,  9.],
+             [12., 13.]]),
+     tensor([[ 2.,  3.],
+             [ 6.,  7.],
+             [10., 11.],
+             [14., 15.]]))
+    >>> torch.hsplit(t, [3, 6])
+    (tensor([[ 0.,  1.,  2.],
+             [ 4.,  5.,  6.],
+             [ 8.,  9., 10.],
+             [12., 13., 14.]]),
+     tensor([[ 3.],
+             [ 7.],
+             [11.],
+             [15.]]),
+     tensor([], size=(4, 0)))
+
+""",
+)
+
+add_docstr(
+    torch.vsplit,
+    r"""
+vsplit(input, indices_or_sections) -> List of Tensors
+
+Splits :attr:`input`, a tensor with two or more dimensions, into multiple tensors
+vertically according to :attr:`indices_or_sections`. Each split is a view of
+:attr:`input`.
+
+This is equivalent to calling torch.tensor_split(input, indices_or_sections, dim=0)
+(the split dimension is 0), except that if :attr:`indices_or_sections` is an integer
+it must evenly divide the split dimension or a runtime error will be thrown.
+
+This function is based on NumPy's :func:`numpy.vsplit`.
+
+Args:
+    input (Tensor): tensor to split.
+    indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`.
+
+Example::
+    >>> t = torch.arange(16.0).reshape(4,4)
+    >>> t
+    tensor([[ 0.,  1.,  2.,  3.],
+            [ 4.,  5.,  6.,  7.],
+            [ 8.,  9., 10., 11.],
+            [12., 13., 14., 15.]])
+    >>> torch.vsplit(t, 2)
+    (tensor([[0., 1., 2., 3.],
+             [4., 5., 6., 7.]]),
+     tensor([[ 8.,  9., 10., 11.],
+             [12., 13., 14., 15.]]))
+    >>> torch.vsplit(t, [3, 6])
+    (tensor([[ 0.,  1.,  2.,  3.],
+             [ 4.,  5.,  6.,  7.],
+             [ 8.,  9., 10., 11.]]),
+     tensor([[12., 13., 14., 15.]]),
+     tensor([], size=(0, 4)))
+
+""",
+)
+
+add_docstr(
+    torch.dsplit,
+    r"""
+dsplit(input, indices_or_sections) -> List of Tensors
+
+Splits :attr:`input`, a tensor with three or more dimensions, into multiple tensors
+depthwise according to :attr:`indices_or_sections`. Each split is a view of
+:attr:`input`.
+
+This is equivalent to calling torch.tensor_split(input, indices_or_sections, dim=2)
+(the split dimension is 2), except that if :attr:`indices_or_sections` is an integer
+it must evenly divide the split dimension or a runtime error will be thrown.
+
+This function is based on NumPy's :func:`numpy.dsplit`.
+
+Args:
+    input (Tensor): tensor to split.
+    indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`.
+
+Example::
+    >>> t = torch.arange(16.0).reshape(2, 2, 4)
+    >>> t
+    tensor([[[ 0.,  1.,  2.,  3.],
+             [ 4.,  5.,  6.,  7.]],
+            [[ 8.,  9., 10., 11.],
+             [12., 13., 14., 15.]]])
+    >>> torch.dsplit(t, 2)
+    (tensor([[[ 0.,  1.],
+            [ 4.,  5.]],
+           [[ 8.,  9.],
+            [12., 13.]]]),
+     tensor([[[ 2.,  3.],
+              [ 6.,  7.]],
+             [[10., 11.],
+              [14., 15.]]]))
+
+    >>> torch.dsplit(t, [3, 6])
+    (tensor([[[ 0.,  1.,  2.],
+              [ 4.,  5.,  6.]],
+             [[ 8.,  9., 10.],
+              [12., 13., 14.]]]),
+     tensor([[[ 3.],
+              [ 7.]],
+             [[11.],
+              [15.]]]),
+     tensor([], size=(2, 2, 0)))
+
+""",
+)
+
+add_docstr(
+    torch.can_cast,
+    r"""
+can_cast(from, to) -> bool
+
+Determines if a type conversion is allowed under PyTorch casting rules
+described in the type promotion :ref:`documentation <type-promotion-doc>`.
+
+Args:
+    from (dtype): The original :class:`torch.dtype`.
+    to (dtype): The target :class:`torch.dtype`.
+
+Example::
+
+    >>> torch.can_cast(torch.double, torch.float)
+    True
+    >>> torch.can_cast(torch.float, torch.int)
+    False
+""",
+)
+
+add_docstr(
+    torch.corrcoef,
+    r"""
+corrcoef(input) -> Tensor
+
+Estimates the Pearson product-moment correlation coefficient matrix of the variables given by the :attr:`input` matrix,
+where rows are the variables and columns are the observations.
+
+.. note::
+
+    The correlation coefficient matrix R is computed using the covariance matrix C as given by
+    :math:`R_{ij} = \frac{ C_{ij} } { \sqrt{ C_{ii} * C_{jj} } }`
+
+.. note::
+
+    Due to floating point rounding, the resulting array may not be Hermitian and its diagonal elements may not be 1.
+    The real and imaginary values are clipped to the interval [-1, 1] in an attempt to improve this situation.
+
+Args:
+    input (Tensor): A 2D matrix containing multiple variables and observations, or a
+        Scalar or 1D vector representing a single variable.
+
+Returns:
+    (Tensor) The correlation coefficient matrix of the variables.
+
+.. seealso::
+
+        :func:`torch.cov` covariance matrix.
+
+Example::
+
+    >>> x = torch.tensor([[0, 1, 2], [2, 1, 0]])
+    >>> torch.corrcoef(x)
+    tensor([[ 1., -1.],
+            [-1.,  1.]])
+    >>> x = torch.randn(2, 4)
+    >>> x
+    tensor([[-0.2678, -0.0908, -0.3766,  0.2780],
+            [-0.5812,  0.1535,  0.2387,  0.2350]])
+    >>> torch.corrcoef(x)
+    tensor([[1.0000, 0.3582],
+            [0.3582, 1.0000]])
+    >>> torch.corrcoef(x[0])
+    tensor(1.)
+""",
+)
+
+add_docstr(
+    torch.cov,
+    r"""
+cov(input, *, correction=1, fweights=None, aweights=None) -> Tensor
+
+Estimates the covariance matrix of the variables given by the :attr:`input` matrix, where rows are
+the variables and columns are the observations.
+
+A covariance matrix is a square matrix giving the covariance of each pair of variables. The diagonal contains
+the variance of each variable (covariance of a variable with itself). By definition, if :attr:`input` represents
+a single variable (Scalar or 1D) then its variance is returned.
+
+The sample covariance of the variables :math:`x` and :math:`y` is given by:
+
+.. math::
+    \text{cov}(x,y) = \frac{\sum^{N}_{i = 1}(x_{i} - \bar{x})(y_{i} - \bar{y})}{\max(0,~N~-~\delta N)}
+
+where :math:`\bar{x}` and :math:`\bar{y}` are the simple means of the :math:`x` and :math:`y` respectively, and
+:math:`\delta N` is the :attr:`correction`.
+
+If :attr:`fweights` and/or :attr:`aweights` are provided, the weighted covariance
+is calculated, which is given by:
+
+.. math::
+    \text{cov}_w(x,y) = \frac{\sum^{N}_{i = 1}w_i(x_{i} - \mu_x^*)(y_{i} - \mu_y^*)}
+    {\max(0,~\sum^{N}_{i = 1}w_i~-~\frac{\sum^{N}_{i = 1}w_ia_i}{\sum^{N}_{i = 1}w_i}~\delta N)}
+
+where :math:`w` denotes :attr:`fweights` or :attr:`aweights` (``f`` and ``a`` for brevity) based on whichever is
+provided, or :math:`w = f \times a` if both are provided, and
+:math:`\mu_x^* = \frac{\sum^{N}_{i = 1}w_ix_{i} }{\sum^{N}_{i = 1}w_i}` is the weighted mean of the variable. If not
+provided, ``f`` and/or ``a`` can be seen as a :math:`\mathbb{1}` vector of appropriate size.
+
+Args:
+    input (Tensor): A 2D matrix containing multiple variables and observations, or a
+        Scalar or 1D vector representing a single variable.
+
+Keyword Args:
+    correction (int, optional): difference between the sample size and sample degrees of freedom.
+        Defaults to Bessel's correction, ``correction = 1`` which returns the unbiased estimate,
+        even if both :attr:`fweights` and :attr:`aweights` are specified. ``correction = 0``
+        will return the simple average. Defaults to ``1``.
+    fweights (tensor, optional): A Scalar or 1D tensor of observation vector frequencies representing the number of
+        times each observation should be repeated. Its numel must equal the number of columns of :attr:`input`.
+        Must have integral dtype. Ignored if ``None``. Defaults to ``None``.
+    aweights (tensor, optional): A Scalar or 1D array of observation vector weights.
+        These relative weights are typically large for observations considered “important” and smaller for
+        observations considered less “important”. Its numel must equal the number of columns of :attr:`input`.
+        Must have floating point dtype. Ignored if ``None``. Defaults to ``None``.
+
+Returns:
+    (Tensor) The covariance matrix of the variables.
+
+.. seealso::
+
+        :func:`torch.corrcoef` normalized covariance matrix.
+
+Example::
+    >>> x = torch.tensor([[0, 2], [1, 1], [2, 0]]).T
+    >>> x
+    tensor([[0, 1, 2],
+            [2, 1, 0]])
+    >>> torch.cov(x)
+    tensor([[ 1., -1.],
+            [-1.,  1.]])
+    >>> torch.cov(x, correction=0)
+    tensor([[ 0.6667, -0.6667],
+            [-0.6667,  0.6667]])
+    >>> fw = torch.randint(1, 10, (3,))
+    >>> fw
+    tensor([1, 6, 9])
+    >>> aw = torch.rand(3)
+    >>> aw
+    tensor([0.4282, 0.0255, 0.4144])
+    >>> torch.cov(x, fweights=fw, aweights=aw)
+    tensor([[ 0.4169, -0.4169],
+            [-0.4169,  0.4169]])
+""",
+)
+
+add_docstr(
+    torch.cat,
+    r"""
+cat(tensors, dim=0, *, out=None) -> Tensor
+
+Concatenates the given sequence of :attr:`seq` tensors in the given dimension.
+All tensors must either have the same shape (except in the concatenating
+dimension) or be a 1-D empty tensor with size ``(0,)``.
+
+:func:`torch.cat` can be seen as an inverse operation for :func:`torch.split`
+and :func:`torch.chunk`.
+
+:func:`torch.cat` can be best understood via examples.
+
+.. seealso::
+
+    :func:`torch.stack` concatenates the given sequence along a new dimension.
+
+Args:
+    tensors (sequence of Tensors): any python sequence of tensors of the same type.
+        Non-empty tensors provided must have the same shape, except in the
+        cat dimension.
+    dim (int, optional): the dimension over which the tensors are concatenated
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> x = torch.randn(2, 3)
+    >>> x
+    tensor([[ 0.6580, -1.0969, -0.4614],
+            [-0.1034, -0.5790,  0.1497]])
+    >>> torch.cat((x, x, x), 0)
+    tensor([[ 0.6580, -1.0969, -0.4614],
+            [-0.1034, -0.5790,  0.1497],
+            [ 0.6580, -1.0969, -0.4614],
+            [-0.1034, -0.5790,  0.1497],
+            [ 0.6580, -1.0969, -0.4614],
+            [-0.1034, -0.5790,  0.1497]])
+    >>> torch.cat((x, x, x), 1)
+    tensor([[ 0.6580, -1.0969, -0.4614,  0.6580, -1.0969, -0.4614,  0.6580,
+             -1.0969, -0.4614],
+            [-0.1034, -0.5790,  0.1497, -0.1034, -0.5790,  0.1497, -0.1034,
+             -0.5790,  0.1497]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.concat,
+    r"""
+concat(tensors, dim=0, *, out=None) -> Tensor
+
+Alias of :func:`torch.cat`.
+""",
+)
+
+add_docstr(
+    torch.concatenate,
+    r"""
+concatenate(tensors, axis=0, out=None) -> Tensor
+
+Alias of :func:`torch.cat`.
+""",
+)
+
+add_docstr(
+    torch.ceil,
+    r"""
+ceil(input, *, out=None) -> Tensor
+
+Returns a new tensor with the ceil of the elements of :attr:`input`,
+the smallest integer greater than or equal to each element.
+
+For integer inputs, follows the array-api convention of returning a
+copy of the input tensor.
+
+.. math::
+    \text{out}_{i} = \left\lceil \text{input}_{i} \right\rceil
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-0.6341, -1.4208, -1.0900,  0.5826])
+    >>> torch.ceil(a)
+    tensor([-0., -1., -1.,  1.])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.real,
+    r"""
+real(input) -> Tensor
+
+Returns a new tensor containing real values of the :attr:`self` tensor.
+The returned tensor and :attr:`self` share the same underlying storage.
+
+Args:
+    {input}
+
+Example::
+
+    >>> x=torch.randn(4, dtype=torch.cfloat)
+    >>> x
+    tensor([(0.3100+0.3553j), (-0.5445-0.7896j), (-1.6492-0.0633j), (-0.0638-0.8119j)])
+    >>> x.real
+    tensor([ 0.3100, -0.5445, -1.6492, -0.0638])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.imag,
+    r"""
+imag(input) -> Tensor
+
+Returns a new tensor containing imaginary values of the :attr:`self` tensor.
+The returned tensor and :attr:`self` share the same underlying storage.
+
+.. warning::
+    :func:`imag` is only supported for tensors with complex dtypes.
+
+Args:
+    {input}
+
+Example::
+
+    >>> x=torch.randn(4, dtype=torch.cfloat)
+    >>> x
+    tensor([(0.3100+0.3553j), (-0.5445-0.7896j), (-1.6492-0.0633j), (-0.0638-0.8119j)])
+    >>> x.imag
+    tensor([ 0.3553, -0.7896, -0.0633, -0.8119])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.view_as_real,
+    r"""
+view_as_real(input) -> Tensor
+
+Returns a view of :attr:`input` as a real tensor. For an input complex tensor of
+:attr:`size` :math:`m1, m2, \dots, mi`, this function returns a new
+real tensor of size :math:`m1, m2, \dots, mi, 2`, where the last dimension of size 2
+represents the real and imaginary components of complex numbers.
+
+.. warning::
+    :func:`view_as_real` is only supported for tensors with ``complex dtypes``.
+
+Args:
+    {input}
+
+Example::
+
+    >>> x=torch.randn(4, dtype=torch.cfloat)
+    >>> x
+    tensor([(0.4737-0.3839j), (-0.2098-0.6699j), (0.3470-0.9451j), (-0.5174-1.3136j)])
+    >>> torch.view_as_real(x)
+    tensor([[ 0.4737, -0.3839],
+            [-0.2098, -0.6699],
+            [ 0.3470, -0.9451],
+            [-0.5174, -1.3136]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.view_as_complex,
+    r"""
+view_as_complex(input) -> Tensor
+
+Returns a view of :attr:`input` as a complex tensor. For an input complex
+tensor of :attr:`size` :math:`m1, m2, \dots, mi, 2`, this function returns a
+new complex tensor of :attr:`size` :math:`m1, m2, \dots, mi` where the last
+dimension of the input tensor is expected to represent the real and imaginary
+components of complex numbers.
+
+.. warning::
+    :func:`view_as_complex` is only supported for tensors with
+    :class:`torch.dtype` ``torch.float64`` and ``torch.float32``.  The input is
+    expected to have the last dimension of :attr:`size` 2. In addition, the
+    tensor must have a `stride` of 1 for its last dimension. The strides of all
+    other dimensions must be even numbers.
+
+Args:
+    {input}
+
+Example::
+
+    >>> x=torch.randn(4, 2)
+    >>> x
+    tensor([[ 1.6116, -0.5772],
+            [-1.4606, -0.9120],
+            [ 0.0786, -1.7497],
+            [-0.6561, -1.6623]])
+    >>> torch.view_as_complex(x)
+    tensor([(1.6116-0.5772j), (-1.4606-0.9120j), (0.0786-1.7497j), (-0.6561-1.6623j)])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.reciprocal,
+    r"""
+reciprocal(input, *, out=None) -> Tensor
+
+Returns a new tensor with the reciprocal of the elements of :attr:`input`
+
+.. math::
+    \text{out}_{i} = \frac{1}{\text{input}_{i}}
+
+.. note::
+    Unlike NumPy's reciprocal, torch.reciprocal supports integral inputs. Integral
+    inputs to reciprocal are automatically :ref:`promoted <type-promotion-doc>` to
+    the default scalar type.
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-0.4595, -2.1219, -1.4314,  0.7298])
+    >>> torch.reciprocal(a)
+    tensor([-2.1763, -0.4713, -0.6986,  1.3702])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.cholesky,
+    r"""
+cholesky(input, upper=False, *, out=None) -> Tensor
+
+Computes the Cholesky decomposition of a symmetric positive-definite
+matrix :math:`A` or for batches of symmetric positive-definite matrices.
+
+If :attr:`upper` is ``True``, the returned matrix ``U`` is upper-triangular, and
+the decomposition has the form:
+
+.. math::
+
+  A = U^TU
+
+If :attr:`upper` is ``False``, the returned matrix ``L`` is lower-triangular, and
+the decomposition has the form:
+
+.. math::
+
+    A = LL^T
+
+If :attr:`upper` is ``True``, and :math:`A` is a batch of symmetric positive-definite
+matrices, then the returned tensor will be composed of upper-triangular Cholesky factors
+of each of the individual matrices. Similarly, when :attr:`upper` is ``False``, the returned
+tensor will be composed of lower-triangular Cholesky factors of each of the individual
+matrices.
+
+.. warning::
+
+    :func:`torch.cholesky` is deprecated in favor of :func:`torch.linalg.cholesky`
+    and will be removed in a future PyTorch release.
+
+    ``L = torch.cholesky(A)`` should be replaced with
+
+    .. code:: python
+
+        L = torch.linalg.cholesky(A)
+
+    ``U = torch.cholesky(A, upper=True)`` should be replaced with
+
+    .. code:: python
+
+        U = torch.linalg.cholesky(A).mH
+
+    This transform will produce equivalent results for all valid (symmetric positive definite) inputs.
+
+Args:
+    input (Tensor): the input tensor :math:`A` of size :math:`(*, n, n)` where `*` is zero or more
+                batch dimensions consisting of symmetric positive-definite matrices.
+    upper (bool, optional): flag that indicates whether to return a
+                            upper or lower triangular matrix. Default: ``False``
+
+Keyword args:
+    out (Tensor, optional): the output matrix
+
+Example::
+
+    >>> a = torch.randn(3, 3)
+    >>> a = a @ a.mT + 1e-3 # make symmetric positive-definite
+    >>> l = torch.cholesky(a)
+    >>> a
+    tensor([[ 2.4112, -0.7486,  1.4551],
+            [-0.7486,  1.3544,  0.1294],
+            [ 1.4551,  0.1294,  1.6724]])
+    >>> l
+    tensor([[ 1.5528,  0.0000,  0.0000],
+            [-0.4821,  1.0592,  0.0000],
+            [ 0.9371,  0.5487,  0.7023]])
+    >>> l @ l.mT
+    tensor([[ 2.4112, -0.7486,  1.4551],
+            [-0.7486,  1.3544,  0.1294],
+            [ 1.4551,  0.1294,  1.6724]])
+    >>> a = torch.randn(3, 2, 2) # Example for batched input
+    >>> a = a @ a.mT + 1e-03 # make symmetric positive-definite
+    >>> l = torch.cholesky(a)
+    >>> z = l @ l.mT
+    >>> torch.dist(z, a)
+    tensor(2.3842e-07)
+""",
+)
+
+add_docstr(
+    torch.cholesky_solve,
+    r"""
+cholesky_solve(B, L, upper=False, *, out=None) -> Tensor
+
+Computes the solution of a system of linear equations with complex Hermitian
+or real symmetric positive-definite lhs given its Cholesky decomposition.
+
+Let :math:`A` be a complex Hermitian or real symmetric positive-definite matrix,
+and :math:`L` its Cholesky decomposition such that:
+
+.. math::
+
+    A = LL^{\text{H}}
+
+where :math:`L^{\text{H}}` is the conjugate transpose when :math:`L` is complex,
+and the transpose when :math:`L` is real-valued.
+
+Returns the solution :math:`X` of the following linear system:
+
+.. math::
+
+    AX = B
+
+Supports inputs of float, double, cfloat and cdouble dtypes.
+Also supports batches of matrices, and if :math:`A` or :math:`B` is a batch of matrices
+then the output has the same batch dimensions.
+
+Args:
+    B (Tensor): right-hand side tensor of shape `(*, n, k)`
+        where :math:`*` is zero or more batch dimensions
+    L (Tensor): tensor of shape `(*, n, n)` where `*` is zero or more batch dimensions
+        consisting of lower or upper triangular Cholesky decompositions of
+        symmetric or Hermitian positive-definite matrices.
+    upper (bool, optional): flag that indicates whether :math:`L` is lower triangular
+        or upper triangular. Default: ``False``.
+
+Keyword args:
+    out (Tensor, optional): output tensor. Ignored if `None`. Default: `None`.
+
+Example::
+
+    >>> A = torch.randn(3, 3)
+    >>> A = A @ A.T + torch.eye(3) * 1e-3 # Creates a symmetric positive-definite matrix
+    >>> L = torch.linalg.cholesky(A) # Extract Cholesky decomposition
+    >>> B = torch.randn(3, 2)
+    >>> torch.cholesky_solve(B, L)
+    tensor([[ -8.1625,  19.6097],
+            [ -5.8398,  14.2387],
+            [ -4.3771,  10.4173]])
+    >>> A.inverse() @  B
+    tensor([[ -8.1626,  19.6097],
+            [ -5.8398,  14.2387],
+            [ -4.3771,  10.4173]])
+
+    >>> A = torch.randn(3, 2, 2, dtype=torch.complex64)
+    >>> A = A @ A.mH + torch.eye(2) * 1e-3 # Batch of Hermitian positive-definite matrices
+    >>> L = torch.linalg.cholesky(A)
+    >>> B = torch.randn(2, 1, dtype=torch.complex64)
+    >>> X = torch.cholesky_solve(B, L)
+    >>> torch.dist(X, A.inverse() @ B)
+    tensor(1.6881e-5)
+""",
+)
+
+add_docstr(
+    torch.cholesky_inverse,
+    r"""
+cholesky_inverse(L, upper=False, *, out=None) -> Tensor
+
+Computes the inverse of a complex Hermitian or real symmetric
+positive-definite matrix given its Cholesky decomposition.
+
+Let :math:`A` be a complex Hermitian or real symmetric positive-definite matrix,
+and :math:`L` its Cholesky decomposition such that:
+
+.. math::
+
+    A = LL^{\text{H}}
+
+where :math:`L^{\text{H}}` is the conjugate transpose when :math:`L` is complex,
+and the transpose when :math:`L` is real-valued.
+
+Computes the inverse matrix :math:`A^{-1}`.
+
+Supports input of float, double, cfloat and cdouble dtypes.
+Also supports batches of matrices, and if :math:`A` is a batch of matrices
+then the output has the same batch dimensions.
+
+Args:
+    L (Tensor): tensor of shape `(*, n, n)` where `*` is zero or more batch dimensions
+        consisting of lower or upper triangular Cholesky decompositions of
+        symmetric or Hermitian positive-definite matrices.
+    upper (bool, optional): flag that indicates whether :math:`L` is lower triangular
+        or upper triangular. Default: ``False``
+
+Keyword args:
+    out (Tensor, optional): output tensor. Ignored if `None`. Default: `None`.
+
+Example::
+
+    >>> A = torch.randn(3, 3)
+    >>> A = A @ A.T + torch.eye(3) * 1e-3 # Creates a symmetric positive-definite matrix
+    >>> L = torch.linalg.cholesky(A) # Extract Cholesky decomposition
+    >>> torch.cholesky_inverse(L)
+    tensor([[ 1.9314,  1.2251, -0.0889],
+            [ 1.2251,  2.4439,  0.2122],
+            [-0.0889,  0.2122,  0.1412]])
+    >>> A.inverse()
+    tensor([[ 1.9314,  1.2251, -0.0889],
+            [ 1.2251,  2.4439,  0.2122],
+            [-0.0889,  0.2122,  0.1412]])
+
+    >>> A = torch.randn(3, 2, 2, dtype=torch.complex64)
+    >>> A = A @ A.mH + torch.eye(2) * 1e-3 # Batch of Hermitian positive-definite matrices
+    >>> L = torch.linalg.cholesky(A)
+    >>> torch.dist(torch.inverse(A), torch.cholesky_inverse(L))
+    tensor(5.6358e-7)
+""",
+)
+
+add_docstr(
+    torch.clone,
+    r"""
+clone(input, *, memory_format=torch.preserve_format) -> Tensor
+
+Returns a copy of :attr:`input`.
+
+.. note::
+
+    This function is differentiable, so gradients will flow back from the
+    result of this operation to :attr:`input`. To create a tensor without an
+    autograd relationship to :attr:`input` see :meth:`~Tensor.detach`.
+
+Args:
+    {input}
+
+Keyword args:
+    {memory_format}
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.clamp,
+    r"""
+clamp(input, min=None, max=None, *, out=None) -> Tensor
+
+Clamps all elements in :attr:`input` into the range `[` :attr:`min`, :attr:`max` `]`.
+Letting min_value and max_value be :attr:`min` and :attr:`max`, respectively, this returns:
+
+.. math::
+    y_i = \min(\max(x_i, \text{min\_value}_i), \text{max\_value}_i)
+
+If :attr:`min` is ``None``, there is no lower bound.
+Or, if :attr:`max` is ``None`` there is no upper bound.
+"""
+    + r"""
+
+.. note::
+    If :attr:`min` is greater than :attr:`max` :func:`torch.clamp(..., min, max) <torch.clamp>`
+    sets all elements in :attr:`input` to the value of :attr:`max`.
+
+Args:
+    {input}
+    min (Number or Tensor, optional): lower-bound of the range to be clamped to
+    max (Number or Tensor, optional): upper-bound of the range to be clamped to
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-1.7120,  0.1734, -0.0478, -0.0922])
+    >>> torch.clamp(a, min=-0.5, max=0.5)
+    tensor([-0.5000,  0.1734, -0.0478, -0.0922])
+
+    >>> min = torch.linspace(-1, 1, steps=4)
+    >>> torch.clamp(a, min=min)
+    tensor([-1.0000,  0.1734,  0.3333,  1.0000])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.clip,
+    r"""
+clip(input, min=None, max=None, *, out=None) -> Tensor
+
+Alias for :func:`torch.clamp`.
+""",
+)
+
+add_docstr(
+    torch.column_stack,
+    r"""
+column_stack(tensors, *, out=None) -> Tensor
+
+Creates a new tensor by horizontally stacking the tensors in :attr:`tensors`.
+
+Equivalent to ``torch.hstack(tensors)``, except each zero or one dimensional tensor ``t``
+in :attr:`tensors` is first reshaped into a ``(t.numel(), 1)`` column before being stacked horizontally.
+
+Args:
+    tensors (sequence of Tensors): sequence of tensors to concatenate
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([1, 2, 3])
+    >>> b = torch.tensor([4, 5, 6])
+    >>> torch.column_stack((a, b))
+    tensor([[1, 4],
+        [2, 5],
+        [3, 6]])
+    >>> a = torch.arange(5)
+    >>> b = torch.arange(10).reshape(5, 2)
+    >>> torch.column_stack((a, b, b))
+    tensor([[0, 0, 1, 0, 1],
+            [1, 2, 3, 2, 3],
+            [2, 4, 5, 4, 5],
+            [3, 6, 7, 6, 7],
+            [4, 8, 9, 8, 9]])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.complex,
+    r"""
+complex(real, imag, *, out=None) -> Tensor
+
+Constructs a complex tensor with its real part equal to :attr:`real` and its
+imaginary part equal to :attr:`imag`.
+
+Args:
+    real (Tensor): The real part of the complex tensor. Must be half, float or double.
+    imag (Tensor): The imaginary part of the complex tensor. Must be same dtype
+        as :attr:`real`.
+
+Keyword args:
+    out (Tensor): If the inputs are ``torch.float32``, must be
+        ``torch.complex64``. If the inputs are ``torch.float64``, must be
+        ``torch.complex128``.
+
+Example::
+
+    >>> real = torch.tensor([1, 2], dtype=torch.float32)
+    >>> imag = torch.tensor([3, 4], dtype=torch.float32)
+    >>> z = torch.complex(real, imag)
+    >>> z
+    tensor([(1.+3.j), (2.+4.j)])
+    >>> z.dtype
+    torch.complex64
+
+""",
+)
+
+add_docstr(
+    torch.polar,
+    r"""
+polar(abs, angle, *, out=None) -> Tensor
+
+Constructs a complex tensor whose elements are Cartesian coordinates
+corresponding to the polar coordinates with absolute value :attr:`abs` and angle
+:attr:`angle`.
+
+.. math::
+    \text{out} = \text{abs} \cdot \cos(\text{angle}) + \text{abs} \cdot \sin(\text{angle}) \cdot j
+
+.. note::
+    `torch.polar` is similar to
+    `std::polar <https://en.cppreference.com/w/cpp/numeric/complex/polar>`_
+    and does not compute the polar decomposition
+    of a complex tensor like Python's `cmath.polar` and SciPy's `linalg.polar` do.
+    The behavior of this function is undefined if `abs` is negative or NaN, or if `angle` is
+    infinite.
+
+"""
+    + r"""
+Args:
+    abs (Tensor): The absolute value the complex tensor. Must be float or double.
+    angle (Tensor): The angle of the complex tensor. Must be same dtype as
+        :attr:`abs`.
+
+Keyword args:
+    out (Tensor): If the inputs are ``torch.float32``, must be
+        ``torch.complex64``. If the inputs are ``torch.float64``, must be
+        ``torch.complex128``.
+
+Example::
+
+    >>> import numpy as np
+    >>> abs = torch.tensor([1, 2], dtype=torch.float64)
+    >>> angle = torch.tensor([np.pi / 2, 5 * np.pi / 4], dtype=torch.float64)
+    >>> z = torch.polar(abs, angle)
+    >>> z
+    tensor([(0.0000+1.0000j), (-1.4142-1.4142j)], dtype=torch.complex128)
+""",
+)
+
+add_docstr(
+    torch.conj_physical,
+    r"""
+conj_physical(input, *, out=None) -> Tensor
+
+Computes the element-wise conjugate of the given :attr:`input` tensor.
+If :attr:`input` has a non-complex dtype, this function just returns :attr:`input`.
+
+.. note::
+   This performs the conjugate operation regardless of the fact conjugate bit is set or not.
+
+.. warning:: In the future, :func:`torch.conj_physical` may return a non-writeable view for an :attr:`input` of
+             non-complex dtype. It's recommended that programs not modify the tensor returned by :func:`torch.conj_physical`
+             when :attr:`input` is of non-complex dtype to be compatible with this change.
+
+.. math::
+    \text{out}_{i} = conj(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.conj_physical(torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j]))
+    tensor([-1 - 1j, -2 - 2j, 3 + 3j])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.conj,
+    r"""
+conj(input) -> Tensor
+
+Returns a view of :attr:`input` with a flipped conjugate bit. If :attr:`input` has a non-complex dtype,
+this function just returns :attr:`input`.
+
+.. note::
+    :func:`torch.conj` performs a lazy conjugation, but the actual conjugated tensor can be materialized
+    at any time using :func:`torch.resolve_conj`.
+
+.. warning:: In the future, :func:`torch.conj` may return a non-writeable view for an :attr:`input` of
+             non-complex dtype. It's recommended that programs not modify the tensor returned by :func:`torch.conj_physical`
+             when :attr:`input` is of non-complex dtype to be compatible with this change.
+
+Args:
+    {input}
+
+Example::
+
+    >>> x = torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j])
+    >>> x.is_conj()
+    False
+    >>> y = torch.conj(x)
+    >>> y.is_conj()
+    True
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.resolve_conj,
+    r"""
+resolve_conj(input) -> Tensor
+
+Returns a new tensor with materialized conjugation if :attr:`input`'s conjugate bit is set to `True`,
+else returns :attr:`input`. The output tensor will always have its conjugate bit set to `False`.
+
+Args:
+    {input}
+
+Example::
+
+    >>> x = torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j])
+    >>> y = x.conj()
+    >>> y.is_conj()
+    True
+    >>> z = y.resolve_conj()
+    >>> z
+    tensor([-1 - 1j, -2 - 2j, 3 + 3j])
+    >>> z.is_conj()
+    False
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.resolve_neg,
+    r"""
+resolve_neg(input) -> Tensor
+
+Returns a new tensor with materialized negation if :attr:`input`'s negative bit is set to `True`,
+else returns :attr:`input`. The output tensor will always have its negative bit set to `False`.
+
+Args:
+    {input}
+
+Example::
+
+    >>> x = torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j])
+    >>> y = x.conj()
+    >>> z = y.imag
+    >>> z.is_neg()
+    True
+    >>> out = z.resolve_neg()
+    >>> out
+    tensor([-1., -2., 3.])
+    >>> out.is_neg()
+    False
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.copysign,
+    r"""
+copysign(input, other, *, out=None) -> Tensor
+
+Create a new floating-point tensor with the magnitude of :attr:`input` and the sign of :attr:`other`, elementwise.
+
+.. math::
+    \text{out}_{i} = \begin{cases}
+        -|\text{input}_{i}| & \text{if } \text{other}_{i} \leq -0.0 \\
+         |\text{input}_{i}| & \text{if } \text{other}_{i} \geq 0.0 \\
+    \end{cases}
+"""
+    + r"""
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+and integer and float inputs.
+
+Args:
+    input (Tensor): magnitudes.
+    other (Tensor or Number): contains value(s) whose signbit(s) are
+        applied to the magnitudes in :attr:`input`.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(5)
+    >>> a
+    tensor([-1.2557, -0.0026, -0.5387,  0.4740, -0.9244])
+    >>> torch.copysign(a, 1)
+    tensor([1.2557, 0.0026, 0.5387, 0.4740, 0.9244])
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[ 0.7079,  0.2778, -1.0249,  0.5719],
+            [-0.0059, -0.2600, -0.4475, -1.3948],
+            [ 0.3667, -0.9567, -2.5757, -0.1751],
+            [ 0.2046, -0.0742,  0.2998, -0.1054]])
+    >>> b = torch.randn(4)
+    tensor([ 0.2373,  0.3120,  0.3190, -1.1128])
+    >>> torch.copysign(a, b)
+    tensor([[ 0.7079,  0.2778,  1.0249, -0.5719],
+            [ 0.0059,  0.2600,  0.4475, -1.3948],
+            [ 0.3667,  0.9567,  2.5757, -0.1751],
+            [ 0.2046,  0.0742,  0.2998, -0.1054]])
+    >>> a = torch.tensor([1.])
+    >>> b = torch.tensor([-0.])
+    >>> torch.copysign(a, b)
+    tensor([-1.])
+
+.. note::
+    copysign handles signed zeros. If the other argument has a negative zero (-0),
+    the corresponding output value will be negative.
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.cos,
+    r"""
+cos(input, *, out=None) -> Tensor
+
+Returns a new tensor with the cosine  of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \cos(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 1.4309,  1.2706, -0.8562,  0.9796])
+    >>> torch.cos(a)
+    tensor([ 0.1395,  0.2957,  0.6553,  0.5574])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.cosh,
+    r"""
+cosh(input, *, out=None) -> Tensor
+
+Returns a new tensor with the hyperbolic cosine  of the elements of
+:attr:`input`.
+
+.. math::
+    \text{out}_{i} = \cosh(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.1632,  1.1835, -0.6979, -0.7325])
+    >>> torch.cosh(a)
+    tensor([ 1.0133,  1.7860,  1.2536,  1.2805])
+
+.. note::
+   When :attr:`input` is on the CPU, the implementation of torch.cosh may use
+   the Sleef library, which rounds very large results to infinity or negative
+   infinity. See `here <https://sleef.org/purec.xhtml>`_ for details.
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.cross,
+    r"""
+cross(input, other, dim=None, *, out=None) -> Tensor
+
+
+Returns the cross product of vectors in dimension :attr:`dim` of :attr:`input`
+and :attr:`other`.
+
+Supports input of float, double, cfloat and cdouble dtypes. Also supports batches
+of vectors, for which it computes the product along the dimension :attr:`dim`.
+In this case, the output has the same batch dimensions as the inputs.
+
+.. warning::
+    If :attr:`dim` is not given, it defaults to the first dimension found
+    with the size 3. Note that this might be unexpected.
+
+    This behavior is deprecated and will be changed to match that of :func:`torch.linalg.cross`
+    in a future release.
+
+.. seealso::
+        :func:`torch.linalg.cross` which has dim=-1 as default.
+
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+    dim  (int, optional): the dimension to take the cross-product in.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4, 3)
+    >>> a
+    tensor([[-0.3956,  1.1455,  1.6895],
+            [-0.5849,  1.3672,  0.3599],
+            [-1.1626,  0.7180, -0.0521],
+            [-0.1339,  0.9902, -2.0225]])
+    >>> b = torch.randn(4, 3)
+    >>> b
+    tensor([[-0.0257, -1.4725, -1.2251],
+            [-1.1479, -0.7005, -1.9757],
+            [-1.3904,  0.3726, -1.1836],
+            [-0.9688, -0.7153,  0.2159]])
+    >>> torch.cross(a, b, dim=1)
+    tensor([[ 1.0844, -0.5281,  0.6120],
+            [-2.4490, -1.5687,  1.9792],
+            [-0.8304, -1.3037,  0.5650],
+            [-1.2329,  1.9883,  1.0551]])
+    >>> torch.cross(a, b)
+    tensor([[ 1.0844, -0.5281,  0.6120],
+            [-2.4490, -1.5687,  1.9792],
+            [-0.8304, -1.3037,  0.5650],
+            [-1.2329,  1.9883,  1.0551]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.logcumsumexp,
+    r"""
+logcumsumexp(input, dim, *, out=None) -> Tensor
+Returns the logarithm of the cumulative summation of the exponentiation of
+elements of :attr:`input` in the dimension :attr:`dim`.
+
+For summation index :math:`j` given by `dim` and other indices :math:`i`, the result is
+
+    .. math::
+        \text{{logcumsumexp}}(x)_{{ij}} = \log \sum\limits_{{j=0}}^{{i}} \exp(x_{{ij}})
+
+Args:
+    {input}
+    dim  (int): the dimension to do the operation over
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(10)
+    >>> torch.logcumsumexp(a, dim=0)
+    tensor([-0.42296738, -0.04462666,  0.86278635,  0.94622083,  1.05277811,
+             1.39202815,  1.83525007,  1.84492621,  2.06084887,  2.06844475]))
+""".format(
+        **reduceops_common_args
+    ),
+)
+
+add_docstr(
+    torch.cummax,
+    r"""
+cummax(input, dim, *, out=None) -> (Tensor, LongTensor)
+Returns a namedtuple ``(values, indices)`` where ``values`` is the cumulative maximum of
+elements of :attr:`input` in the dimension :attr:`dim`. And ``indices`` is the index
+location of each maximum value found in the dimension :attr:`dim`.
+
+.. math::
+    y_i = max(x_1, x_2, x_3, \dots, x_i)
+
+Args:
+    {input}
+    dim  (int): the dimension to do the operation over
+
+Keyword args:
+    out (tuple, optional): the result tuple of two output tensors (values, indices)
+
+Example::
+
+    >>> a = torch.randn(10)
+    >>> a
+    tensor([-0.3449, -1.5447,  0.0685, -1.5104, -1.1706,  0.2259,  1.4696, -1.3284,
+         1.9946, -0.8209])
+    >>> torch.cummax(a, dim=0)
+    torch.return_types.cummax(
+        values=tensor([-0.3449, -0.3449,  0.0685,  0.0685,  0.0685,  0.2259,  1.4696,  1.4696,
+         1.9946,  1.9946]),
+        indices=tensor([0, 0, 2, 2, 2, 5, 6, 6, 8, 8]))
+""".format(
+        **reduceops_common_args
+    ),
+)
+
+add_docstr(
+    torch.cummin,
+    r"""
+cummin(input, dim, *, out=None) -> (Tensor, LongTensor)
+Returns a namedtuple ``(values, indices)`` where ``values`` is the cumulative minimum of
+elements of :attr:`input` in the dimension :attr:`dim`. And ``indices`` is the index
+location of each maximum value found in the dimension :attr:`dim`.
+
+.. math::
+    y_i = min(x_1, x_2, x_3, \dots, x_i)
+
+Args:
+    {input}
+    dim  (int): the dimension to do the operation over
+
+Keyword args:
+    out (tuple, optional): the result tuple of two output tensors (values, indices)
+
+Example::
+
+    >>> a = torch.randn(10)
+    >>> a
+    tensor([-0.2284, -0.6628,  0.0975,  0.2680, -1.3298, -0.4220, -0.3885,  1.1762,
+         0.9165,  1.6684])
+    >>> torch.cummin(a, dim=0)
+    torch.return_types.cummin(
+        values=tensor([-0.2284, -0.6628, -0.6628, -0.6628, -1.3298, -1.3298, -1.3298, -1.3298,
+        -1.3298, -1.3298]),
+        indices=tensor([0, 1, 1, 1, 4, 4, 4, 4, 4, 4]))
+""".format(
+        **reduceops_common_args
+    ),
+)
+
+add_docstr(
+    torch.cumprod,
+    r"""
+cumprod(input, dim, *, dtype=None, out=None) -> Tensor
+
+Returns the cumulative product of elements of :attr:`input` in the dimension
+:attr:`dim`.
+
+For example, if :attr:`input` is a vector of size N, the result will also be
+a vector of size N, with elements.
+
+.. math::
+    y_i = x_1 \times x_2\times x_3\times \dots \times x_i
+
+Args:
+    {input}
+    dim  (int): the dimension to do the operation over
+
+Keyword args:
+    {dtype}
+    {out}
+
+Example::
+
+    >>> a = torch.randn(10)
+    >>> a
+    tensor([ 0.6001,  0.2069, -0.1919,  0.9792,  0.6727,  1.0062,  0.4126,
+            -0.2129, -0.4206,  0.1968])
+    >>> torch.cumprod(a, dim=0)
+    tensor([ 0.6001,  0.1241, -0.0238, -0.0233, -0.0157, -0.0158, -0.0065,
+             0.0014, -0.0006, -0.0001])
+
+    >>> a[5] = 0.0
+    >>> torch.cumprod(a, dim=0)
+    tensor([ 0.6001,  0.1241, -0.0238, -0.0233, -0.0157, -0.0000, -0.0000,
+             0.0000, -0.0000, -0.0000])
+""".format(
+        **reduceops_common_args
+    ),
+)
+
+add_docstr(
+    torch.cumsum,
+    r"""
+cumsum(input, dim, *, dtype=None, out=None) -> Tensor
+
+Returns the cumulative sum of elements of :attr:`input` in the dimension
+:attr:`dim`.
+
+For example, if :attr:`input` is a vector of size N, the result will also be
+a vector of size N, with elements.
+
+.. math::
+    y_i = x_1 + x_2 + x_3 + \dots + x_i
+
+Args:
+    {input}
+    dim  (int): the dimension to do the operation over
+
+Keyword args:
+    {dtype}
+    {out}
+
+Example::
+
+    >>> a = torch.randint(1, 20, (10,))
+    >>> a
+    tensor([13,  7,  3, 10, 13,  3, 15, 10,  9, 10])
+    >>> torch.cumsum(a, dim=0)
+    tensor([13, 20, 23, 33, 46, 49, 64, 74, 83, 93])
+""".format(
+        **reduceops_common_args
+    ),
+)
+
+add_docstr(
+    torch.count_nonzero,
+    r"""
+count_nonzero(input, dim=None) -> Tensor
+
+Counts the number of non-zero values in the tensor :attr:`input` along the given :attr:`dim`.
+If no dim is specified then all non-zeros in the tensor are counted.
+
+Args:
+    {input}
+    dim (int or tuple of ints, optional): Dim or tuple of dims along which to count non-zeros.
+
+Example::
+
+    >>> x = torch.zeros(3,3)
+    >>> x[torch.randn(3,3) > 0.5] = 1
+    >>> x
+    tensor([[0., 1., 1.],
+            [0., 0., 0.],
+            [0., 0., 1.]])
+    >>> torch.count_nonzero(x)
+    tensor(3)
+    >>> torch.count_nonzero(x, dim=0)
+    tensor([0, 1, 2])
+""".format(
+        **reduceops_common_args
+    ),
+)
+
+add_docstr(
+    torch.dequantize,
+    r"""
+dequantize(tensor) -> Tensor
+
+Returns an fp32 Tensor by dequantizing a quantized Tensor
+
+Args:
+    tensor (Tensor): A quantized Tensor
+
+.. function:: dequantize(tensors) -> sequence of Tensors
+   :noindex:
+
+Given a list of quantized Tensors, dequantize them and return a list of fp32 Tensors
+
+Args:
+     tensors (sequence of Tensors): A list of quantized Tensors
+""",
+)
+
+add_docstr(
+    torch.diag,
+    r"""
+diag(input, diagonal=0, *, out=None) -> Tensor
+
+- If :attr:`input` is a vector (1-D tensor), then returns a 2-D square tensor
+  with the elements of :attr:`input` as the diagonal.
+- If :attr:`input` is a matrix (2-D tensor), then returns a 1-D tensor with
+  the diagonal elements of :attr:`input`.
+
+The argument :attr:`diagonal` controls which diagonal to consider:
+
+- If :attr:`diagonal` = 0, it is the main diagonal.
+- If :attr:`diagonal` > 0, it is above the main diagonal.
+- If :attr:`diagonal` < 0, it is below the main diagonal.
+
+Args:
+    {input}
+    diagonal (int, optional): the diagonal to consider
+
+Keyword args:
+    {out}
+
+.. seealso::
+
+        :func:`torch.diagonal` always returns the diagonal of its input.
+
+        :func:`torch.diagflat` always constructs a tensor with diagonal elements
+        specified by the input.
+
+Examples:
+
+Get the square matrix where the input vector is the diagonal::
+
+    >>> a = torch.randn(3)
+    >>> a
+    tensor([ 0.5950,-0.0872, 2.3298])
+    >>> torch.diag(a)
+    tensor([[ 0.5950, 0.0000, 0.0000],
+            [ 0.0000,-0.0872, 0.0000],
+            [ 0.0000, 0.0000, 2.3298]])
+    >>> torch.diag(a, 1)
+    tensor([[ 0.0000, 0.5950, 0.0000, 0.0000],
+            [ 0.0000, 0.0000,-0.0872, 0.0000],
+            [ 0.0000, 0.0000, 0.0000, 2.3298],
+            [ 0.0000, 0.0000, 0.0000, 0.0000]])
+
+Get the k-th diagonal of a given matrix::
+
+    >>> a = torch.randn(3, 3)
+    >>> a
+    tensor([[-0.4264, 0.0255,-0.1064],
+            [ 0.8795,-0.2429, 0.1374],
+            [ 0.1029,-0.6482,-1.6300]])
+    >>> torch.diag(a, 0)
+    tensor([-0.4264,-0.2429,-1.6300])
+    >>> torch.diag(a, 1)
+    tensor([ 0.0255, 0.1374])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.diag_embed,
+    r"""
+diag_embed(input, offset=0, dim1=-2, dim2=-1) -> Tensor
+
+Creates a tensor whose diagonals of certain 2D planes (specified by
+:attr:`dim1` and :attr:`dim2`) are filled by :attr:`input`.
+To facilitate creating batched diagonal matrices, the 2D planes formed by
+the last two dimensions of the returned tensor are chosen by default.
+
+The argument :attr:`offset` controls which diagonal to consider:
+
+- If :attr:`offset` = 0, it is the main diagonal.
+- If :attr:`offset` > 0, it is above the main diagonal.
+- If :attr:`offset` < 0, it is below the main diagonal.
+
+The size of the new matrix will be calculated to make the specified diagonal
+of the size of the last input dimension.
+Note that for :attr:`offset` other than :math:`0`, the order of :attr:`dim1`
+and :attr:`dim2` matters. Exchanging them is equivalent to changing the
+sign of :attr:`offset`.
+
+Applying :meth:`torch.diagonal` to the output of this function with
+the same arguments yields a matrix identical to input. However,
+:meth:`torch.diagonal` has different default dimensions, so those
+need to be explicitly specified.
+
+Args:
+    {input} Must be at least 1-dimensional.
+    offset (int, optional): which diagonal to consider. Default: 0
+        (main diagonal).
+    dim1 (int, optional): first dimension with respect to which to
+        take diagonal. Default: -2.
+    dim2 (int, optional): second dimension with respect to which to
+        take diagonal. Default: -1.
+
+Example::
+
+    >>> a = torch.randn(2, 3)
+    >>> torch.diag_embed(a)
+    tensor([[[ 1.5410,  0.0000,  0.0000],
+             [ 0.0000, -0.2934,  0.0000],
+             [ 0.0000,  0.0000, -2.1788]],
+
+            [[ 0.5684,  0.0000,  0.0000],
+             [ 0.0000, -1.0845,  0.0000],
+             [ 0.0000,  0.0000, -1.3986]]])
+
+    >>> torch.diag_embed(a, offset=1, dim1=0, dim2=2)
+    tensor([[[ 0.0000,  1.5410,  0.0000,  0.0000],
+             [ 0.0000,  0.5684,  0.0000,  0.0000]],
+
+            [[ 0.0000,  0.0000, -0.2934,  0.0000],
+             [ 0.0000,  0.0000, -1.0845,  0.0000]],
+
+            [[ 0.0000,  0.0000,  0.0000, -2.1788],
+             [ 0.0000,  0.0000,  0.0000, -1.3986]],
+
+            [[ 0.0000,  0.0000,  0.0000,  0.0000],
+             [ 0.0000,  0.0000,  0.0000,  0.0000]]])
+""".format(
+        **common_args
+    ),
+)
+
+
+add_docstr(
+    torch.diagflat,
+    r"""
+diagflat(input, offset=0) -> Tensor
+
+- If :attr:`input` is a vector (1-D tensor), then returns a 2-D square tensor
+  with the elements of :attr:`input` as the diagonal.
+- If :attr:`input` is a tensor with more than one dimension, then returns a
+  2-D tensor with diagonal elements equal to a flattened :attr:`input`.
+
+The argument :attr:`offset` controls which diagonal to consider:
+
+- If :attr:`offset` = 0, it is the main diagonal.
+- If :attr:`offset` > 0, it is above the main diagonal.
+- If :attr:`offset` < 0, it is below the main diagonal.
+
+Args:
+    {input}
+    offset (int, optional): the diagonal to consider. Default: 0 (main
+        diagonal).
+
+Examples::
+
+    >>> a = torch.randn(3)
+    >>> a
+    tensor([-0.2956, -0.9068,  0.1695])
+    >>> torch.diagflat(a)
+    tensor([[-0.2956,  0.0000,  0.0000],
+            [ 0.0000, -0.9068,  0.0000],
+            [ 0.0000,  0.0000,  0.1695]])
+    >>> torch.diagflat(a, 1)
+    tensor([[ 0.0000, -0.2956,  0.0000,  0.0000],
+            [ 0.0000,  0.0000, -0.9068,  0.0000],
+            [ 0.0000,  0.0000,  0.0000,  0.1695],
+            [ 0.0000,  0.0000,  0.0000,  0.0000]])
+
+    >>> a = torch.randn(2, 2)
+    >>> a
+    tensor([[ 0.2094, -0.3018],
+            [-0.1516,  1.9342]])
+    >>> torch.diagflat(a)
+    tensor([[ 0.2094,  0.0000,  0.0000,  0.0000],
+            [ 0.0000, -0.3018,  0.0000,  0.0000],
+            [ 0.0000,  0.0000, -0.1516,  0.0000],
+            [ 0.0000,  0.0000,  0.0000,  1.9342]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.diagonal,
+    r"""
+diagonal(input, offset=0, dim1=0, dim2=1) -> Tensor
+
+Returns a partial view of :attr:`input` with the its diagonal elements
+with respect to :attr:`dim1` and :attr:`dim2` appended as a dimension
+at the end of the shape.
+
+The argument :attr:`offset` controls which diagonal to consider:
+
+- If :attr:`offset` = 0, it is the main diagonal.
+- If :attr:`offset` > 0, it is above the main diagonal.
+- If :attr:`offset` < 0, it is below the main diagonal.
+
+Applying :meth:`torch.diag_embed` to the output of this function with
+the same arguments yields a diagonal matrix with the diagonal entries
+of the input. However, :meth:`torch.diag_embed` has different default
+dimensions, so those need to be explicitly specified.
+
+Args:
+    {input} Must be at least 2-dimensional.
+    offset (int, optional): which diagonal to consider. Default: 0
+        (main diagonal).
+    dim1 (int, optional): first dimension with respect to which to
+        take diagonal. Default: 0.
+    dim2 (int, optional): second dimension with respect to which to
+        take diagonal. Default: 1.
+
+.. note::  To take a batch diagonal, pass in dim1=-2, dim2=-1.
+
+Examples::
+
+    >>> a = torch.randn(3, 3)
+    >>> a
+    tensor([[-1.0854,  1.1431, -0.1752],
+            [ 0.8536, -0.0905,  0.0360],
+            [ 0.6927, -0.3735, -0.4945]])
+
+
+    >>> torch.diagonal(a, 0)
+    tensor([-1.0854, -0.0905, -0.4945])
+
+
+    >>> torch.diagonal(a, 1)
+    tensor([ 1.1431,  0.0360])
+
+
+    >>> x = torch.randn(2, 5, 4, 2)
+    >>> torch.diagonal(x, offset=-1, dim1=1, dim2=2)
+    tensor([[[-1.2631,  0.3755, -1.5977, -1.8172],
+             [-1.1065,  1.0401, -0.2235, -0.7938]],
+
+            [[-1.7325, -0.3081,  0.6166,  0.2335],
+             [ 1.0500,  0.7336, -0.3836, -1.1015]]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.diagonal_scatter,
+    r"""
+diagonal_scatter(input, src, offset=0, dim1=0, dim2=1) -> Tensor
+
+Embeds the values of the :attr:`src` tensor into :attr:`input` along
+the diagonal elements of :attr:`input`, with respect to :attr:`dim1`
+and :attr:`dim2`.
+
+This function returns a tensor with fresh storage; it does not
+return a view.
+
+The argument :attr:`offset` controls which diagonal to consider:
+
+- If :attr:`offset` = 0, it is the main diagonal.
+- If :attr:`offset` > 0, it is above the main diagonal.
+- If :attr:`offset` < 0, it is below the main diagonal.
+
+Args:
+    {input} Must be at least 2-dimensional.
+    src (Tensor): the tensor to embed into :attr:`input`.
+    offset (int, optional): which diagonal to consider. Default: 0
+        (main diagonal).
+    dim1 (int, optional): first dimension with respect to which to
+        take diagonal. Default: 0.
+    dim2 (int, optional): second dimension with respect to which to
+        take diagonal. Default: 1.
+
+.. note::
+
+    :attr:`src` must be of the proper size in order to be embedded
+    into :attr:`input`. Specifically, it should have the same shape as
+    ``torch.diagonal(input, offset, dim1, dim2)``
+
+Examples::
+
+    >>> a = torch.zeros(3, 3)
+    >>> a
+    tensor([[0., 0., 0.],
+            [0., 0., 0.],
+            [0., 0., 0.]])
+
+    >>> torch.diagonal_scatter(a, torch.ones(3), 0)
+    tensor([[1., 0., 0.],
+            [0., 1., 0.],
+            [0., 0., 1.]])
+
+    >>> torch.diagonal_scatter(a, torch.ones(2), 1)
+    tensor([[0., 1., 0.],
+            [0., 0., 1.],
+            [0., 0., 0.]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.as_strided_scatter,
+    r"""
+as_strided_scatter(input, src, size, stride, storage_offset=None) -> Tensor
+
+Embeds the values of the :attr:`src` tensor into :attr:`input` along
+the elements corresponding to the result of calling
+input.as_strided(size, stride, storage_offset).
+
+This function returns a tensor with fresh storage; it does not
+return a view.
+
+Args:
+    {input}
+    size (tuple or ints): the shape of the output tensor
+    stride (tuple or ints): the stride of the output tensor
+    storage_offset (int, optional): the offset in the underlying storage of the output tensor
+
+.. note::
+
+    :attr:`src` must be of the proper size in order to be embedded
+    into :attr:`input`. Specifically, it should have the same shape as
+    `torch.as_strided(input, size, stride, storage_offset)`
+
+Example::
+
+    >>> a = torch.arange(4).reshape(2, 2) + 1
+    >>> a
+    tensor([[1, 2],
+            [3, 4]])
+    >>> b = torch.zeros(3, 3)
+    >>> b
+    tensor([[0., 0., 0.],
+            [0., 0., 0.],
+            [0., 0., 0.]])
+    >>> torch.as_strided_scatter(b, a, (2, 2), (1, 2))
+    tensor([[1., 3., 2.],
+            [4., 0., 0.],
+            [0., 0., 0.]])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.diff,
+    r"""
+diff(input, n=1, dim=-1, prepend=None, append=None) -> Tensor
+
+Computes the n-th forward difference along the given dimension.
+
+The first-order differences are given by `out[i] = input[i + 1] - input[i]`. Higher-order
+differences are calculated by using :func:`torch.diff` recursively.
+
+Args:
+    input (Tensor): the tensor to compute the differences on
+    n (int, optional): the number of times to recursively compute the difference
+    dim (int, optional): the dimension to compute the difference along.
+        Default is the last dimension.
+    prepend, append (Tensor, optional): values to prepend or append to
+        :attr:`input` along :attr:`dim` before computing the difference.
+        Their dimensions must be equivalent to that of input, and their shapes
+        must match input's shape except on :attr:`dim`.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([1, 3, 2])
+    >>> torch.diff(a)
+    tensor([ 2, -1])
+    >>> b = torch.tensor([4, 5])
+    >>> torch.diff(a, append=b)
+    tensor([ 2, -1,  2,  1])
+    >>> c = torch.tensor([[1, 2, 3], [3, 4, 5]])
+    >>> torch.diff(c, dim=0)
+    tensor([[2, 2, 2]])
+    >>> torch.diff(c, dim=1)
+    tensor([[1, 1],
+            [1, 1]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.digamma,
+    r"""
+digamma(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.digamma`.
+""",
+)
+
+add_docstr(
+    torch.dist,
+    r"""
+dist(input, other, p=2) -> Tensor
+
+Returns the p-norm of (:attr:`input` - :attr:`other`)
+
+The shapes of :attr:`input` and :attr:`other` must be
+:ref:`broadcastable <broadcasting-semantics>`.
+
+Args:
+    {input}
+    other (Tensor): the Right-hand-side input tensor
+    p (float, optional): the norm to be computed
+
+Example::
+
+    >>> x = torch.randn(4)
+    >>> x
+    tensor([-1.5393, -0.8675,  0.5916,  1.6321])
+    >>> y = torch.randn(4)
+    >>> y
+    tensor([ 0.0967, -1.0511,  0.6295,  0.8360])
+    >>> torch.dist(x, y, 3.5)
+    tensor(1.6727)
+    >>> torch.dist(x, y, 3)
+    tensor(1.6973)
+    >>> torch.dist(x, y, 0)
+    tensor(4.)
+    >>> torch.dist(x, y, 1)
+    tensor(2.6537)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.div,
+    r"""
+div(input, other, *, rounding_mode=None, out=None) -> Tensor
+
+Divides each element of the input ``input`` by the corresponding element of
+:attr:`other`.
+
+.. math::
+    \text{{out}}_i = \frac{{\text{{input}}_i}}{{\text{{other}}_i}}
+
+.. note::
+    By default, this performs a "true" division like Python 3.
+    See the :attr:`rounding_mode` argument for floor division.
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+:ref:`type promotion <type-promotion-doc>`, and integer, float, and complex inputs.
+Always promotes integer types to the default scalar type.
+
+Args:
+    input (Tensor): the dividend
+    other (Tensor or Number): the divisor
+
+Keyword args:
+    rounding_mode (str, optional): Type of rounding applied to the result:
+
+        * None - default behavior. Performs no rounding and, if both :attr:`input` and
+          :attr:`other` are integer types, promotes the inputs to the default scalar type.
+          Equivalent to true division in Python (the ``/`` operator) and NumPy's ``np.true_divide``.
+        * ``"trunc"`` - rounds the results of the division towards zero.
+          Equivalent to C-style integer division.
+        * ``"floor"`` - rounds the results of the division down.
+          Equivalent to floor division in Python (the ``//`` operator) and NumPy's ``np.floor_divide``.
+
+    {out}
+
+Examples::
+
+    >>> x = torch.tensor([ 0.3810,  1.2774, -0.2972, -0.3719,  0.4637])
+    >>> torch.div(x, 0.5)
+    tensor([ 0.7620,  2.5548, -0.5944, -0.7438,  0.9274])
+
+    >>> a = torch.tensor([[-0.3711, -1.9353, -0.4605, -0.2917],
+    ...                   [ 0.1815, -1.0111,  0.9805, -1.5923],
+    ...                   [ 0.1062,  1.4581,  0.7759, -1.2344],
+    ...                   [-0.1830, -0.0313,  1.1908, -1.4757]])
+    >>> b = torch.tensor([ 0.8032,  0.2930, -0.8113, -0.2308])
+    >>> torch.div(a, b)
+    tensor([[-0.4620, -6.6051,  0.5676,  1.2639],
+            [ 0.2260, -3.4509, -1.2086,  6.8990],
+            [ 0.1322,  4.9764, -0.9564,  5.3484],
+            [-0.2278, -0.1068, -1.4678,  6.3938]])
+
+    >>> torch.div(a, b, rounding_mode='trunc')
+    tensor([[-0., -6.,  0.,  1.],
+            [ 0., -3., -1.,  6.],
+            [ 0.,  4., -0.,  5.],
+            [-0., -0., -1.,  6.]])
+
+    >>> torch.div(a, b, rounding_mode='floor')
+    tensor([[-1., -7.,  0.,  1.],
+            [ 0., -4., -2.,  6.],
+            [ 0.,  4., -1.,  5.],
+            [-1., -1., -2.,  6.]])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.divide,
+    r"""
+divide(input, other, *, rounding_mode=None, out=None) -> Tensor
+
+Alias for :func:`torch.div`.
+""",
+)
+
+add_docstr(
+    torch.dot,
+    r"""
+dot(input, other, *, out=None) -> Tensor
+
+Computes the dot product of two 1D tensors.
+
+.. note::
+
+    Unlike NumPy's dot, torch.dot intentionally only supports computing the dot product
+    of two 1D tensors with the same number of elements.
+
+Args:
+    input (Tensor): first tensor in the dot product, must be 1D.
+    other (Tensor): second tensor in the dot product, must be 1D.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.dot(torch.tensor([2, 3]), torch.tensor([2, 1]))
+    tensor(7)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.vdot,
+    r"""
+vdot(input, other, *, out=None) -> Tensor
+
+Computes the dot product of two 1D vectors along a dimension.
+
+In symbols, this function computes
+
+.. math::
+
+    \sum_{i=1}^n \overline{x_i}y_i.
+
+where :math:`\overline{x_i}` denotes the conjugate for complex
+vectors, and it is the identity for real vectors.
+
+.. note::
+
+    Unlike NumPy's vdot, torch.vdot intentionally only supports computing the dot product
+    of two 1D tensors with the same number of elements.
+
+.. seealso::
+
+        :func:`torch.linalg.vecdot` computes the dot product of two batches of vectors along a dimension.
+
+Args:
+    input (Tensor): first tensor in the dot product, must be 1D. Its conjugate is used if it's complex.
+    other (Tensor): second tensor in the dot product, must be 1D.
+
+Keyword args:
+"""
+    + rf"""
+.. note:: {common_args["out"]}
+"""
+    + r"""
+
+Example::
+
+    >>> torch.vdot(torch.tensor([2, 3]), torch.tensor([2, 1]))
+    tensor(7)
+    >>> a = torch.tensor((1 +2j, 3 - 1j))
+    >>> b = torch.tensor((2 +1j, 4 - 0j))
+    >>> torch.vdot(a, b)
+    tensor([16.+1.j])
+    >>> torch.vdot(b, a)
+    tensor([16.-1.j])
+""",
+)
+
+add_docstr(
+    torch.eq,
+    r"""
+eq(input, other, *, out=None) -> Tensor
+
+Computes element-wise equality
+
+The second argument can be a number or a tensor whose shape is
+:ref:`broadcastable <broadcasting-semantics>` with the first argument.
+
+Args:
+    input (Tensor): the tensor to compare
+    other (Tensor or float): the tensor or value to compare
+
+Keyword args:
+    {out}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is equal to :attr:`other` and False elsewhere
+
+Example::
+
+    >>> torch.eq(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]]))
+    tensor([[ True, False],
+            [False, True]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.equal,
+    r"""
+equal(input, other) -> bool
+
+``True`` if two tensors have the same size and elements, ``False`` otherwise.
+
+Example::
+
+    >>> torch.equal(torch.tensor([1, 2]), torch.tensor([1, 2]))
+    True
+""",
+)
+
+add_docstr(
+    torch.erf,
+    r"""
+erf(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.erf`.
+""",
+)
+
+add_docstr(
+    torch.erfc,
+    r"""
+erfc(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.erfc`.
+""",
+)
+
+add_docstr(
+    torch.erfinv,
+    r"""
+erfinv(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.erfinv`.
+""",
+)
+
+add_docstr(
+    torch.exp,
+    r"""
+exp(input, *, out=None) -> Tensor
+
+Returns a new tensor with the exponential of the elements
+of the input tensor :attr:`input`.
+
+.. math::
+    y_{i} = e^{x_{i}}
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.exp(torch.tensor([0, math.log(2.)]))
+    tensor([ 1.,  2.])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.exp2,
+    r"""
+exp2(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.exp2`.
+""",
+)
+
+add_docstr(
+    torch.expm1,
+    r"""
+expm1(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.expm1`.
+""",
+)
+
+add_docstr(
+    torch.eye,
+    r"""
+eye(n, m=None, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+
+Returns a 2-D tensor with ones on the diagonal and zeros elsewhere.
+
+Args:
+    n (int): the number of rows
+    m (int, optional): the number of columns with default being :attr:`n`
+
+Keyword arguments:
+    {out}
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+
+Returns:
+    Tensor: A 2-D tensor with ones on the diagonal and zeros elsewhere
+
+Example::
+
+    >>> torch.eye(3)
+    tensor([[ 1.,  0.,  0.],
+            [ 0.,  1.,  0.],
+            [ 0.,  0.,  1.]])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.floor,
+    r"""
+floor(input, *, out=None) -> Tensor
+
+Returns a new tensor with the floor of the elements of :attr:`input`,
+the largest integer less than or equal to each element.
+
+For integer inputs, follows the array-api convention of returning a
+copy of the input tensor.
+
+.. math::
+    \text{out}_{i} = \left\lfloor \text{input}_{i} \right\rfloor
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-0.8166,  1.5308, -0.2530, -0.2091])
+    >>> torch.floor(a)
+    tensor([-1.,  1., -1., -1.])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.floor_divide,
+    r"""
+floor_divide(input, other, *, out=None) -> Tensor
+
+.. note::
+
+    Before PyTorch 1.13 :func:`torch.floor_divide` incorrectly performed
+    truncation division. To restore the previous behavior use
+    :func:`torch.div` with ``rounding_mode='trunc'``.
+
+Computes :attr:`input` divided by :attr:`other`, elementwise, and floors
+the result.
+
+.. math::
+    \text{{out}}_i = \text{floor} \left( \frac{{\text{{input}}_i}}{{\text{{other}}_i}} \right)
+
+"""
+    + r"""
+
+Supports broadcasting to a common shape, type promotion, and integer and float inputs.
+
+Args:
+    input (Tensor or Number): the dividend
+    other (Tensor or Number): the divisor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([4.0, 3.0])
+    >>> b = torch.tensor([2.0, 2.0])
+    >>> torch.floor_divide(a, b)
+    tensor([2.0, 1.0])
+    >>> torch.floor_divide(a, 1.4)
+    tensor([2.0, 2.0])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.fmod,
+    r"""
+fmod(input, other, *, out=None) -> Tensor
+
+Applies C++'s `std::fmod <https://en.cppreference.com/w/cpp/numeric/math/fmod>`_ entrywise.
+The result has the same sign as the dividend :attr:`input` and its absolute value
+is less than that of :attr:`other`.
+
+This function may be defined in terms of :func:`torch.div` as
+
+.. code:: python
+
+    torch.fmod(a, b) == a - a.div(b, rounding_mode="trunc") * b
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+:ref:`type promotion <type-promotion-doc>`, and integer and float inputs.
+
+.. note::
+
+    When the divisor is zero, returns ``NaN`` for floating point dtypes
+    on both CPU and GPU; raises ``RuntimeError`` for integer division by
+    zero on CPU; Integer division by zero on GPU may return any value.
+
+.. note::
+
+   Complex inputs are not supported. In some cases, it is not mathematically
+   possible to satisfy the definition of a modulo operation with complex numbers.
+
+.. seealso::
+
+    :func:`torch.remainder` which implements Python's modulus operator.
+    This one is defined using division rounding down the result.
+
+Args:
+    input (Tensor): the dividend
+    other (Tensor or Scalar): the divisor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.fmod(torch.tensor([-3., -2, -1, 1, 2, 3]), 2)
+    tensor([-1., -0., -1.,  1.,  0.,  1.])
+    >>> torch.fmod(torch.tensor([1, 2, 3, 4, 5]), -1.5)
+    tensor([1.0000, 0.5000, 0.0000, 1.0000, 0.5000])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.frac,
+    r"""
+frac(input, *, out=None) -> Tensor
+
+Computes the fractional portion of each element in :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \text{input}_{i} - \left\lfloor |\text{input}_{i}| \right\rfloor * \operatorname{sgn}(\text{input}_{i})
+
+Example::
+
+    >>> torch.frac(torch.tensor([1, 2.5, -3.2]))
+    tensor([ 0.0000,  0.5000, -0.2000])
+""",
+)
+
+add_docstr(
+    torch.frexp,
+    r"""
+frexp(input, *, out=None) -> (Tensor mantissa, Tensor exponent)
+
+Decomposes :attr:`input` into mantissa and exponent tensors
+such that :math:`\text{input} = \text{mantissa} \times 2^{\text{exponent}}`.
+
+The range of mantissa is the open interval (-1, 1).
+
+Supports float inputs.
+
+Args:
+    input (Tensor): the input tensor
+
+
+Keyword args:
+    out (tuple, optional): the output tensors
+
+Example::
+
+    >>> x = torch.arange(9.)
+    >>> mantissa, exponent = torch.frexp(x)
+    >>> mantissa
+    tensor([0.0000, 0.5000, 0.5000, 0.7500, 0.5000, 0.6250, 0.7500, 0.8750, 0.5000])
+    >>> exponent
+    tensor([0, 1, 2, 2, 3, 3, 3, 3, 4], dtype=torch.int32)
+    >>> torch.ldexp(mantissa, exponent)
+    tensor([0., 1., 2., 3., 4., 5., 6., 7., 8.])
+""",
+)
+
+add_docstr(
+    torch.from_numpy,
+    r"""
+from_numpy(ndarray) -> Tensor
+
+Creates a :class:`Tensor` from a :class:`numpy.ndarray`.
+
+The returned tensor and :attr:`ndarray` share the same memory. Modifications to
+the tensor will be reflected in the :attr:`ndarray` and vice versa. The returned
+tensor is not resizable.
+
+It currently accepts :attr:`ndarray` with dtypes of ``numpy.float64``,
+``numpy.float32``, ``numpy.float16``, ``numpy.complex64``, ``numpy.complex128``,
+``numpy.int64``, ``numpy.int32``, ``numpy.int16``, ``numpy.int8``, ``numpy.uint8``,
+and ``bool``.
+
+.. warning::
+    Writing to a tensor created from a read-only NumPy array is not supported and will result in undefined behavior.
+
+Example::
+
+    >>> a = numpy.array([1, 2, 3])
+    >>> t = torch.from_numpy(a)
+    >>> t
+    tensor([ 1,  2,  3])
+    >>> t[0] = -1
+    >>> a
+    array([-1,  2,  3])
+""",
+)
+
+add_docstr(
+    torch.frombuffer,
+    r"""
+frombuffer(buffer, *, dtype, count=-1, offset=0, requires_grad=False) -> Tensor
+
+Creates a 1-dimensional :class:`Tensor` from an object that implements
+the Python buffer protocol.
+
+Skips the first :attr:`offset` bytes in the buffer, and interprets the rest of
+the raw bytes as a 1-dimensional tensor of type :attr:`dtype` with :attr:`count`
+elements.
+
+Note that either of the following must be true:
+
+1. :attr:`count` is a positive non-zero number, and the total number of bytes
+in the buffer is more than :attr:`offset` plus :attr:`count` times the size
+(in bytes) of :attr:`dtype`.
+
+2. :attr:`count` is negative, and the length (number of bytes) of the buffer
+subtracted by the :attr:`offset` is a multiple of the size (in bytes) of
+:attr:`dtype`.
+
+The returned tensor and buffer share the same memory. Modifications to
+the tensor will be reflected in the buffer and vice versa. The returned
+tensor is not resizable.
+
+.. note::
+    This function increments the reference count for the object that
+    owns the shared memory. Therefore, such memory will not be deallocated
+    before the returned tensor goes out of scope.
+
+.. warning::
+    This function's behavior is undefined when passed an object implementing
+    the buffer protocol whose data is not on the CPU. Doing so is likely to
+    cause a segmentation fault.
+
+.. warning::
+    This function does not try to infer the :attr:`dtype` (hence, it is not
+    optional). Passing a different :attr:`dtype` than its source may result
+    in unexpected behavior.
+
+Args:
+    buffer (object): a Python object that exposes the buffer interface.
+
+Keyword args:
+    dtype (:class:`torch.dtype`): the desired data type of returned tensor.
+    count (int, optional): the number of desired elements to be read.
+        If negative, all the elements (until the end of the buffer) will be
+        read. Default: -1.
+    offset (int, optional): the number of bytes to skip at the start of
+        the buffer. Default: 0.
+    {requires_grad}
+
+Example::
+
+    >>> import array
+    >>> a = array.array('i', [1, 2, 3])
+    >>> t = torch.frombuffer(a, dtype=torch.int32)
+    >>> t
+    tensor([ 1,  2,  3])
+    >>> t[0] = -1
+    >>> a
+    array([-1,  2,  3])
+
+    >>> # Interprets the signed char bytes as 32-bit integers.
+    >>> # Each 4 signed char elements will be interpreted as
+    >>> # 1 signed 32-bit integer.
+    >>> import array
+    >>> a = array.array('b', [-1, 0, 0, 0])
+    >>> torch.frombuffer(a, dtype=torch.int32)
+    tensor([255], dtype=torch.int32)
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.from_file,
+    r"""
+from_file(filename, shared=None, size=0, *, dtype=None, layout=None, device=None, pin_memory=False)
+
+Creates a CPU tensor with a storage backed by a memory-mapped file.
+
+If ``shared`` is True, then memory is shared between processes. All changes are written to the file.
+If ``shared`` is False, then changes to the tensor do not affect the file.
+
+``size`` is the number of elements in the Tensor. If ``shared`` is ``False``, then the file must contain
+at least ``size * sizeof(dtype)`` bytes. If ``shared`` is ``True`` the file will be created if needed.
+
+.. note::
+    Only CPU tensors can be mapped to files.
+
+.. note::
+    For now, tensors with storages backed by a memory-mapped file cannot be created in pinned memory.
+
+
+Args:
+    filename (str): file name to map
+    shared (bool): whether to share memory (whether ``MAP_SHARED`` or ``MAP_PRIVATE`` is passed to the
+                    underlying `mmap(2) call <https://man7.org/linux/man-pages/man2/mmap.2.html>`_)
+    size (int): number of elements in the tensor
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {pin_memory}
+
+Example::
+    >>> t = torch.randn(2, 5, dtype=torch.float64)
+    >>> t.numpy().tofile('storage.pt')
+    >>> t_mapped = torch.from_file('storage.pt', shared=False, size=10, dtype=torch.float64)
+    """.format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.flatten,
+    r"""
+flatten(input, start_dim=0, end_dim=-1) -> Tensor
+
+Flattens :attr:`input` by reshaping it into a one-dimensional tensor. If :attr:`start_dim` or :attr:`end_dim`
+are passed, only dimensions starting with :attr:`start_dim` and ending with :attr:`end_dim` are flattened.
+The order of elements in :attr:`input` is unchanged.
+
+Unlike NumPy's flatten, which always copies input's data, this function may return the original object, a view,
+or copy. If no dimensions are flattened, then the original object :attr:`input` is returned. Otherwise, if input can
+be viewed as the flattened shape, then that view is returned. Finally, only if the input cannot be viewed as the
+flattened shape is input's data copied. See :meth:`torch.Tensor.view` for details on when a view will be returned.
+
+.. note::
+    Flattening a zero-dimensional tensor will return a one-dimensional view.
+
+Args:
+    {input}
+    start_dim (int): the first dim to flatten
+    end_dim (int): the last dim to flatten
+
+Example::
+
+    >>> t = torch.tensor([[[1, 2],
+    ...                    [3, 4]],
+    ...                   [[5, 6],
+    ...                    [7, 8]]])
+    >>> torch.flatten(t)
+    tensor([1, 2, 3, 4, 5, 6, 7, 8])
+    >>> torch.flatten(t, start_dim=1)
+    tensor([[1, 2, 3, 4],
+            [5, 6, 7, 8]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.unflatten,
+    r"""
+unflatten(input, dim, sizes) -> Tensor
+
+Expands a dimension of the input tensor over multiple dimensions.
+
+.. seealso::
+
+    :func:`torch.flatten` the inverse of this function. It coalesces several dimensions into one.
+
+Args:
+    {input}
+    dim (int): Dimension to be unflattened, specified as an index into
+         ``input.shape``.
+    sizes (Tuple[int]): New shape of the unflattened dimension.
+         One of its elements can be `-1` in which case the corresponding output
+         dimension is inferred. Otherwise, the product of ``sizes`` *must*
+         equal ``input.shape[dim]``.
+
+Returns:
+    A View of input with the specified dimension unflattened.
+
+Examples::
+    >>> torch.unflatten(torch.randn(3, 4, 1), 1, (2, 2)).shape
+    torch.Size([3, 2, 2, 1])
+    >>> torch.unflatten(torch.randn(3, 4, 1), 1, (-1, 2)).shape
+    torch.Size([3, 2, 2, 1])
+    >>> torch.unflatten(torch.randn(5, 12, 3), -2, (2, 2, 3, 1, 1)).shape
+    torch.Size([5, 2, 2, 3, 1, 1, 3])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.gather,
+    r"""
+gather(input, dim, index, *, sparse_grad=False, out=None) -> Tensor
+
+Gathers values along an axis specified by `dim`.
+
+For a 3-D tensor the output is specified by::
+
+    out[i][j][k] = input[index[i][j][k]][j][k]  # if dim == 0
+    out[i][j][k] = input[i][index[i][j][k]][k]  # if dim == 1
+    out[i][j][k] = input[i][j][index[i][j][k]]  # if dim == 2
+
+:attr:`input` and :attr:`index` must have the same number of dimensions.
+It is also required that ``index.size(d) <= input.size(d)`` for all
+dimensions ``d != dim``.  :attr:`out` will have the same shape as :attr:`index`.
+Note that ``input`` and ``index`` do not broadcast against each other.
+
+Args:
+    input (Tensor): the source tensor
+    dim (int): the axis along which to index
+    index (LongTensor): the indices of elements to gather
+
+Keyword arguments:
+    sparse_grad (bool, optional): If ``True``, gradient w.r.t. :attr:`input` will be a sparse tensor.
+    out (Tensor, optional): the destination tensor
+
+Example::
+
+    >>> t = torch.tensor([[1, 2], [3, 4]])
+    >>> torch.gather(t, 1, torch.tensor([[0, 0], [1, 0]]))
+    tensor([[ 1,  1],
+            [ 4,  3]])
+""",
+)
+
+
+add_docstr(
+    torch.gcd,
+    r"""
+gcd(input, other, *, out=None) -> Tensor
+
+Computes the element-wise greatest common divisor (GCD) of :attr:`input` and :attr:`other`.
+
+Both :attr:`input` and :attr:`other` must have integer types.
+
+.. note::
+    This defines :math:`gcd(0, 0) = 0`.
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([5, 10, 15])
+    >>> b = torch.tensor([3, 4, 5])
+    >>> torch.gcd(a, b)
+    tensor([1, 2, 5])
+    >>> c = torch.tensor([3])
+    >>> torch.gcd(a, c)
+    tensor([1, 1, 3])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.ge,
+    r"""
+ge(input, other, *, out=None) -> Tensor
+
+Computes :math:`\text{input} \geq \text{other}` element-wise.
+"""
+    + r"""
+
+The second argument can be a number or a tensor whose shape is
+:ref:`broadcastable <broadcasting-semantics>` with the first argument.
+
+Args:
+    input (Tensor): the tensor to compare
+    other (Tensor or float): the tensor or value to compare
+
+Keyword args:
+    {out}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is greater than or equal to :attr:`other` and False elsewhere
+
+Example::
+
+    >>> torch.ge(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]]))
+    tensor([[True, True], [False, True]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.greater_equal,
+    r"""
+greater_equal(input, other, *, out=None) -> Tensor
+
+Alias for :func:`torch.ge`.
+""",
+)
+
+add_docstr(
+    torch.gradient,
+    r"""
+gradient(input, *, spacing=1, dim=None, edge_order=1) -> List of Tensors
+
+Estimates the gradient of a function :math:`g : \mathbb{R}^n \rightarrow \mathbb{R}` in
+one or more dimensions using the `second-order accurate central differences method
+<https://www.ams.org/journals/mcom/1988-51-184/S0025-5718-1988-0935077-0/S0025-5718-1988-0935077-0.pdf>`_ and
+either first or second order estimates at the boundaries.
+
+The gradient of :math:`g` is estimated using samples. By default, when :attr:`spacing` is not
+specified, the samples are entirely described by :attr:`input`, and the mapping of input coordinates
+to an output is the same as the tensor's mapping of indices to values. For example, for a three-dimensional
+:attr:`input` the function described is :math:`g : \mathbb{R}^3 \rightarrow \mathbb{R}`, and
+:math:`g(1, 2, 3)\ == input[1, 2, 3]`.
+
+When :attr:`spacing` is specified, it modifies the relationship between :attr:`input` and input coordinates.
+This is detailed in the "Keyword Arguments" section below.
+
+The gradient is estimated by estimating each partial derivative of :math:`g` independently. This estimation is
+accurate if :math:`g` is in :math:`C^3` (it has at least 3 continuous derivatives), and the estimation can be
+improved by providing closer samples. Mathematically, the value at each interior point of a partial derivative
+is estimated using `Taylor’s theorem with remainder <https://en.wikipedia.org/wiki/Taylor%27s_theorem>`_.
+Letting :math:`x` be an interior point with :math:`x-h_l` and :math:`x+h_r` be points neighboring
+it to the left and right respectively, :math:`f(x+h_r)` and :math:`f(x-h_l)` can be estimated using:
+
+.. math::
+    \begin{aligned}
+        f(x+h_r) = f(x) + h_r f'(x) + {h_r}^2  \frac{f''(x)}{2} + {h_r}^3 \frac{f'''(\xi_1)}{6}, \xi_1 \in (x, x+h_r) \\
+        f(x-h_l) = f(x) - h_l f'(x) + {h_l}^2  \frac{f''(x)}{2} - {h_l}^3 \frac{f'''(\xi_2)}{6}, \xi_2 \in (x, x-h_l) \\
+    \end{aligned}
+
+Using the fact that :math:`f \in C^3` and solving the linear system, we derive:
+
+.. math::
+    f'(x) \approx \frac{ {h_l}^2 f(x+h_r) - {h_r}^2 f(x-h_l)
+          + ({h_r}^2-{h_l}^2 ) f(x) }{ {h_r} {h_l}^2 + {h_r}^2 {h_l} }
+
+.. note::
+    We estimate the gradient of functions in complex domain
+    :math:`g : \mathbb{C}^n \rightarrow \mathbb{C}` in the same way.
+
+The value of each partial derivative at the boundary points is computed differently. See edge_order below.
+
+Args:
+    input (``Tensor``): the tensor that represents the values of the function
+
+Keyword args:
+    spacing (``scalar``, ``list of scalar``, ``list of Tensor``, optional): :attr:`spacing` can be used to modify
+        how the :attr:`input` tensor's indices relate to sample coordinates. If :attr:`spacing` is a scalar then
+        the indices are multiplied by the scalar to produce the coordinates. For example, if :attr:`spacing=2` the
+        indices (1, 2, 3) become coordinates (2, 4, 6). If :attr:`spacing` is a list of scalars then the corresponding
+        indices are multiplied. For example, if :attr:`spacing=(2, -1, 3)` the indices (1, 2, 3) become coordinates (2, -2, 9).
+        Finally, if :attr:`spacing` is a list of one-dimensional tensors then each tensor specifies the coordinates for
+        the corresponding dimension. For example, if the indices are (1, 2, 3) and the tensors are (t0, t1, t2), then
+        the coordinates are (t0[1], t1[2], t2[3])
+
+    dim (``int``, ``list of int``, optional): the dimension or dimensions to approximate the gradient over.  By default
+        the partial  gradient in every dimension is computed. Note that when :attr:`dim` is  specified the elements of
+        the :attr:`spacing` argument must correspond with the specified dims."
+
+    edge_order (``int``, optional): 1 or 2, for `first-order
+        <https://www.ams.org/journals/mcom/1988-51-184/S0025-5718-1988-0935077-0/S0025-5718-1988-0935077-0.pdf>`_ or
+        `second-order <https://www.ams.org/journals/mcom/1988-51-184/S0025-5718-1988-0935077-0/S0025-5718-1988-0935077-0.pdf>`_
+        estimation of the boundary ("edge") values, respectively.
+
+Examples::
+
+    >>> # Estimates the gradient of f(x)=x^2 at points [-2, -1, 2, 4]
+    >>> coordinates = (torch.tensor([-2., -1., 1., 4.]),)
+    >>> values = torch.tensor([4., 1., 1., 16.], )
+    >>> torch.gradient(values, spacing = coordinates)
+    (tensor([-3., -2., 2., 5.]),)
+
+    >>> # Estimates the gradient of the R^2 -> R function whose samples are
+    >>> # described by the tensor t. Implicit coordinates are [0, 1] for the outermost
+    >>> # dimension and [0, 1, 2, 3] for the innermost dimension, and function estimates
+    >>> # partial derivative for both dimensions.
+    >>> t = torch.tensor([[1, 2, 4, 8], [10, 20, 40, 80]])
+    >>> torch.gradient(t)
+    (tensor([[ 9., 18., 36., 72.],
+             [ 9., 18., 36., 72.]]),
+     tensor([[ 1.0000, 1.5000, 3.0000, 4.0000],
+             [10.0000, 15.0000, 30.0000, 40.0000]]))
+
+    >>> # A scalar value for spacing modifies the relationship between tensor indices
+    >>> # and input coordinates by multiplying the indices to find the
+    >>> # coordinates. For example, below the indices of the innermost
+    >>> # 0, 1, 2, 3 translate to coordinates of [0, 2, 4, 6], and the indices of
+    >>> # the outermost dimension 0, 1 translate to coordinates of [0, 2].
+    >>> torch.gradient(t, spacing = 2.0) # dim = None (implicitly [0, 1])
+    (tensor([[ 4.5000, 9.0000, 18.0000, 36.0000],
+              [ 4.5000, 9.0000, 18.0000, 36.0000]]),
+     tensor([[ 0.5000, 0.7500, 1.5000, 2.0000],
+              [ 5.0000, 7.5000, 15.0000, 20.0000]]))
+    >>> # doubling the spacing between samples halves the estimated partial gradients.
+
+    >>>
+    >>> # Estimates only the partial derivative for dimension 1
+    >>> torch.gradient(t, dim = 1) # spacing = None (implicitly 1.)
+    (tensor([[ 1.0000, 1.5000, 3.0000, 4.0000],
+             [10.0000, 15.0000, 30.0000, 40.0000]]),)
+
+    >>> # When spacing is a list of scalars, the relationship between the tensor
+    >>> # indices and input coordinates changes based on dimension.
+    >>> # For example, below, the indices of the innermost dimension 0, 1, 2, 3 translate
+    >>> # to coordinates of [0, 3, 6, 9], and the indices of the outermost dimension
+    >>> # 0, 1 translate to coordinates of [0, 2].
+    >>> torch.gradient(t, spacing = [3., 2.])
+    (tensor([[ 4.5000, 9.0000, 18.0000, 36.0000],
+             [ 4.5000, 9.0000, 18.0000, 36.0000]]),
+     tensor([[ 0.3333, 0.5000, 1.0000, 1.3333],
+             [ 3.3333, 5.0000, 10.0000, 13.3333]]))
+
+    >>> # The following example is a replication of the previous one with explicit
+    >>> # coordinates.
+    >>> coords = (torch.tensor([0, 2]), torch.tensor([0, 3, 6, 9]))
+    >>> torch.gradient(t, spacing = coords)
+    (tensor([[ 4.5000, 9.0000, 18.0000, 36.0000],
+             [ 4.5000, 9.0000, 18.0000, 36.0000]]),
+     tensor([[ 0.3333, 0.5000, 1.0000, 1.3333],
+             [ 3.3333, 5.0000, 10.0000, 13.3333]]))
+
+""",
+)
+
+add_docstr(
+    torch.geqrf,
+    r"""
+geqrf(input, *, out=None) -> (Tensor, Tensor)
+
+This is a low-level function for calling LAPACK's geqrf directly. This function
+returns a namedtuple (a, tau) as defined in `LAPACK documentation for geqrf`_ .
+
+Computes a QR decomposition of :attr:`input`.
+Both `Q` and `R` matrices are stored in the same output tensor `a`.
+The elements of `R` are stored on and above the diagonal.
+Elementary reflectors (or Householder vectors) implicitly defining matrix `Q`
+are stored below the diagonal.
+The results of this function can be used together with :func:`torch.linalg.householder_product`
+to obtain the `Q` matrix or
+with :func:`torch.ormqr`, which uses an implicit representation of the `Q` matrix,
+for an efficient matrix-matrix multiplication.
+
+See `LAPACK documentation for geqrf`_ for further details.
+
+.. note::
+    See also :func:`torch.linalg.qr`, which computes Q and R matrices, and :func:`torch.linalg.lstsq`
+    with the ``driver="gels"`` option for a function that can solve matrix equations using a QR decomposition.
+
+Args:
+    input (Tensor): the input matrix
+
+Keyword args:
+    out (tuple, optional): the output tuple of (Tensor, Tensor). Ignored if `None`. Default: `None`.
+
+.. _LAPACK documentation for geqrf:
+    http://www.netlib.org/lapack/explore-html/df/dc5/group__variants_g_ecomputational_ga3766ea903391b5cf9008132f7440ec7b.html
+
+""",
+)
+
+add_docstr(
+    torch.inner,
+    r"""
+inner(input, other, *, out=None) -> Tensor
+
+Computes the dot product for 1D tensors. For higher dimensions, sums the product
+of elements from :attr:`input` and :attr:`other` along their last dimension.
+
+.. note::
+
+    If either :attr:`input` or :attr:`other` is a scalar, the result is equivalent
+    to `torch.mul(input, other)`.
+
+    If both :attr:`input` and :attr:`other` are non-scalars, the size of their last
+    dimension must match and the result is equivalent to `torch.tensordot(input,
+    other, dims=([-1], [-1]))`
+
+Args:
+    input (Tensor): First input tensor
+    other (Tensor): Second input tensor
+
+Keyword args:
+    out (Tensor, optional): Optional output tensor to write result into. The output
+                            shape is `input.shape[:-1] + other.shape[:-1]`.
+
+Example::
+
+    # Dot product
+    >>> torch.inner(torch.tensor([1, 2, 3]), torch.tensor([0, 2, 1]))
+    tensor(7)
+
+    # Multidimensional input tensors
+    >>> a = torch.randn(2, 3)
+    >>> a
+    tensor([[0.8173, 1.0874, 1.1784],
+            [0.3279, 0.1234, 2.7894]])
+    >>> b = torch.randn(2, 4, 3)
+    >>> b
+    tensor([[[-0.4682, -0.7159,  0.1506],
+            [ 0.4034, -0.3657,  1.0387],
+            [ 0.9892, -0.6684,  0.1774],
+            [ 0.9482,  1.3261,  0.3917]],
+
+            [[ 0.4537,  0.7493,  1.1724],
+            [ 0.2291,  0.5749, -0.2267],
+            [-0.7920,  0.3607, -0.3701],
+            [ 1.3666, -0.5850, -1.7242]]])
+    >>> torch.inner(a, b)
+    tensor([[[-0.9837,  1.1560,  0.2907,  2.6785],
+            [ 2.5671,  0.5452, -0.6912, -1.5509]],
+
+            [[ 0.1782,  2.9843,  0.7366,  1.5672],
+            [ 3.5115, -0.4864, -1.2476, -4.4337]]])
+
+    # Scalar input
+    >>> torch.inner(a, torch.tensor(2))
+    tensor([[1.6347, 2.1748, 2.3567],
+            [0.6558, 0.2469, 5.5787]])
+""",
+)
+
+add_docstr(
+    torch.outer,
+    r"""
+outer(input, vec2, *, out=None) -> Tensor
+
+Outer product of :attr:`input` and :attr:`vec2`.
+If :attr:`input` is a vector of size :math:`n` and :attr:`vec2` is a vector of
+size :math:`m`, then :attr:`out` must be a matrix of size :math:`(n \times m)`.
+
+.. note:: This function does not :ref:`broadcast <broadcasting-semantics>`.
+
+Args:
+    input (Tensor): 1-D input vector
+    vec2 (Tensor): 1-D input vector
+
+Keyword args:
+    out (Tensor, optional): optional output matrix
+
+Example::
+
+    >>> v1 = torch.arange(1., 5.)
+    >>> v2 = torch.arange(1., 4.)
+    >>> torch.outer(v1, v2)
+    tensor([[  1.,   2.,   3.],
+            [  2.,   4.,   6.],
+            [  3.,   6.,   9.],
+            [  4.,   8.,  12.]])
+""",
+)
+
+add_docstr(
+    torch.ger,
+    r"""
+ger(input, vec2, *, out=None) -> Tensor
+
+Alias of :func:`torch.outer`.
+
+.. warning::
+    This function is deprecated and will be removed in a future PyTorch release.
+    Use :func:`torch.outer` instead.
+""",
+)
+
+add_docstr(
+    torch.get_default_dtype,
+    r"""
+get_default_dtype() -> torch.dtype
+
+Get the current default floating point :class:`torch.dtype`.
+
+Example::
+
+    >>> torch.get_default_dtype()  # initial default for floating point is torch.float32
+    torch.float32
+    >>> torch.set_default_dtype(torch.float64)
+    >>> torch.get_default_dtype()  # default is now changed to torch.float64
+    torch.float64
+
+""",
+)
+
+add_docstr(
+    torch.get_num_threads,
+    r"""
+get_num_threads() -> int
+
+Returns the number of threads used for parallelizing CPU operations
+""",
+)
+
+add_docstr(
+    torch.get_num_interop_threads,
+    r"""
+get_num_interop_threads() -> int
+
+Returns the number of threads used for inter-op parallelism on CPU
+(e.g. in JIT interpreter)
+""",
+)
+
+add_docstr(
+    torch.gt,
+    r"""
+gt(input, other, *, out=None) -> Tensor
+
+Computes :math:`\text{input} > \text{other}` element-wise.
+"""
+    + r"""
+
+The second argument can be a number or a tensor whose shape is
+:ref:`broadcastable <broadcasting-semantics>` with the first argument.
+
+Args:
+    input (Tensor): the tensor to compare
+    other (Tensor or float): the tensor or value to compare
+
+Keyword args:
+    {out}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is greater than :attr:`other` and False elsewhere
+
+Example::
+
+    >>> torch.gt(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]]))
+    tensor([[False, True], [False, False]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.greater,
+    r"""
+greater(input, other, *, out=None) -> Tensor
+
+Alias for :func:`torch.gt`.
+""",
+)
+
+add_docstr(
+    torch.histc,
+    r"""
+histc(input, bins=100, min=0, max=0, *, out=None) -> Tensor
+
+Computes the histogram of a tensor.
+
+The elements are sorted into equal width bins between :attr:`min` and
+:attr:`max`. If :attr:`min` and :attr:`max` are both zero, the minimum and
+maximum values of the data are used.
+
+Elements lower than min and higher than max and ``NaN`` elements are ignored.
+
+Args:
+    {input}
+    bins (int): number of histogram bins
+    min (Scalar): lower end of the range (inclusive)
+    max (Scalar): upper end of the range (inclusive)
+
+Keyword args:
+    {out}
+
+Returns:
+    Tensor: Histogram represented as a tensor
+
+Example::
+
+    >>> torch.histc(torch.tensor([1., 2, 1]), bins=4, min=0, max=3)
+    tensor([ 0.,  2.,  1.,  0.])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.histogram,
+    r"""
+histogram(input, bins, *, range=None, weight=None, density=False, out=None) -> (Tensor, Tensor)
+
+Computes a histogram of the values in a tensor.
+
+:attr:`bins` can be an integer or a 1D tensor.
+
+If :attr:`bins` is an int, it specifies the number of equal-width bins.
+By default, the lower and upper range of the bins is determined by the
+minimum and maximum elements of the input tensor. The :attr:`range`
+argument can be provided to specify a range for the bins.
+
+If :attr:`bins` is a 1D tensor, it specifies the sequence of bin edges
+including the rightmost edge. It should contain at least 2 elements
+and its elements should be increasing.
+
+Args:
+    {input}
+    bins: int or 1D Tensor. If int, defines the number of equal-width bins. If tensor,
+          defines the sequence of bin edges including the rightmost edge.
+
+Keyword args:
+    range (tuple of float): Defines the range of the bins.
+    weight (Tensor): If provided, weight should have the same shape as input. Each value in
+                     input contributes its associated weight towards its bin's result.
+    density (bool): If False, the result will contain the count (or total weight) in each bin.
+                    If True, the result is the value of the probability density function over the bins,
+                    normalized such that the integral over the range of the bins is 1.
+    {out} (tuple, optional): The result tuple of two output tensors (hist, bin_edges).
+
+Returns:
+    hist (Tensor): 1D Tensor containing the values of the histogram.
+    bin_edges(Tensor): 1D Tensor containing the edges of the histogram bins.
+
+Example::
+
+    >>> torch.histogram(torch.tensor([1., 2, 1]), bins=4, range=(0., 3.), weight=torch.tensor([1., 2., 4.]))
+    (tensor([ 0.,  5.,  2.,  0.]), tensor([0., 0.75, 1.5, 2.25, 3.]))
+    >>> torch.histogram(torch.tensor([1., 2, 1]), bins=4, range=(0., 3.), weight=torch.tensor([1., 2., 4.]), density=True)
+    (tensor([ 0.,  0.9524,  0.3810,  0.]), tensor([0., 0.75, 1.5, 2.25, 3.]))
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.histogramdd,
+    r"""
+histogramdd(input, bins, *, range=None, weight=None, density=False, out=None) -> (Tensor, Tensor[])
+
+Computes a multi-dimensional histogram of the values in a tensor.
+
+Interprets the elements of an input tensor whose innermost dimension has size N
+as a collection of N-dimensional points. Maps each of the points into a set of
+N-dimensional bins and returns the number of points (or total weight) in each bin.
+
+:attr:`input` must be a tensor with at least 2 dimensions.
+If input has shape (M, N), each of its M rows defines a point in N-dimensional space.
+If input has three or more dimensions, all but the last dimension are flattened.
+
+Each dimension is independently associated with its own strictly increasing sequence
+of bin edges. Bin edges may be specified explicitly by passing a sequence of 1D
+tensors. Alternatively, bin edges may be constructed automatically by passing a
+sequence of integers specifying the number of equal-width bins in each dimension.
+
+For each N-dimensional point in input:
+    - Each of its coordinates is binned independently among the bin edges
+        corresponding to its dimension
+    - Binning results are combined to identify the N-dimensional bin (if any)
+        into which the point falls
+    - If the point falls into a bin, the bin's count (or total weight) is incremented
+    - Points which do not fall into any bin do not contribute to the output
+
+:attr:`bins` can be a sequence of N 1D tensors, a sequence of N ints, or a single int.
+
+If :attr:`bins` is a sequence of N 1D tensors, it explicitly specifies the N sequences
+of bin edges. Each 1D tensor should contain a strictly increasing sequence with at
+least one element. A sequence of K bin edges defines K-1 bins, explicitly specifying
+the left and right edges of all bins. Every bin is exclusive of its left edge. Only
+the rightmost bin is inclusive of its right edge.
+
+If :attr:`bins` is a sequence of N ints, it specifies the number of equal-width bins
+in each dimension. By default, the leftmost and rightmost bin edges in each dimension
+are determined by the minimum and maximum elements of the input tensor in the
+corresponding dimension. The :attr:`range` argument can be provided to manually
+specify the leftmost and rightmost bin edges in each dimension.
+
+If :attr:`bins` is an int, it specifies the number of equal-width bins for all dimensions.
+
+.. note::
+    See also :func:`torch.histogram`, which specifically computes 1D histograms.
+    While :func:`torch.histogramdd` infers the dimensionality of its bins and
+    binned values from the shape of :attr:`input`, :func:`torch.histogram`
+    accepts and flattens :attr:`input` of any shape.
+
+Args:
+    {input}
+    bins: Tensor[], int[], or int.
+            If Tensor[], defines the sequences of bin edges.
+            If int[], defines the number of equal-width bins in each dimension.
+            If int, defines the number of equal-width bins for all dimensions.
+Keyword args:
+    range (sequence of float): Defines the leftmost and rightmost bin edges
+                                in each dimension.
+    weight (Tensor): By default, each value in the input has weight 1. If a weight
+                        tensor is passed, each N-dimensional coordinate in input
+                        contributes its associated weight towards its bin's result.
+                        The weight tensor should have the same shape as the :attr:`input`
+                        tensor excluding its innermost dimension N.
+    density (bool): If False (default), the result will contain the count (or total weight)
+                    in each bin. If True, each count (weight) is divided by the total count
+                    (total weight), then divided by the volume of its associated bin.
+Returns:
+    hist (Tensor): N-dimensional Tensor containing the values of the histogram.
+    bin_edges(Tensor[]): sequence of N 1D Tensors containing the bin edges.
+
+Example::
+    >>> torch.histogramdd(torch.tensor([[0., 1.], [1., 0.], [2., 0.], [2., 2.]]), bins=[3, 3],
+    ...                   weight=torch.tensor([1., 2., 4., 8.]))
+        torch.return_types.histogramdd(
+            hist=tensor([[0., 1., 0.],
+                         [2., 0., 0.],
+                         [4., 0., 8.]]),
+            bin_edges=(tensor([0.0000, 0.6667, 1.3333, 2.0000]),
+                       tensor([0.0000, 0.6667, 1.3333, 2.0000])))
+
+    >>> torch.histogramdd(torch.tensor([[0., 0.], [1., 1.], [2., 2.]]), bins=[2, 2],
+    ...                   range=[0., 1., 0., 1.], density=True)
+        torch.return_types.histogramdd(
+           hist=tensor([[2., 0.],
+                        [0., 2.]]),
+           bin_edges=(tensor([0.0000, 0.5000, 1.0000]),
+                      tensor([0.0000, 0.5000, 1.0000])))
+
+""".format(
+        **common_args
+    ),
+)
+# TODO: Fix via https://github.com/pytorch/pytorch/issues/75798
+torch.histogramdd.__module__ = "torch"
+
+add_docstr(
+    torch.hypot,
+    r"""
+hypot(input, other, *, out=None) -> Tensor
+
+Given the legs of a right triangle, return its hypotenuse.
+
+.. math::
+    \text{out}_{i} = \sqrt{\text{input}_{i}^{2} + \text{other}_{i}^{2}}
+
+The shapes of ``input`` and ``other`` must be
+:ref:`broadcastable <broadcasting-semantics>`.
+"""
+    + r"""
+Args:
+    input (Tensor): the first input tensor
+    other (Tensor): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0]))
+    tensor([5.0000, 5.6569, 6.4031])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.i0,
+    r"""
+i0(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.i0`.
+""",
+)
+
+add_docstr(
+    torch.igamma,
+    r"""
+igamma(input, other, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.gammainc`.
+""",
+)
+
+add_docstr(
+    torch.igammac,
+    r"""
+igammac(input, other, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.gammaincc`.
+""",
+)
+
+add_docstr(
+    torch.index_select,
+    r"""
+index_select(input, dim, index, *, out=None) -> Tensor
+
+Returns a new tensor which indexes the :attr:`input` tensor along dimension
+:attr:`dim` using the entries in :attr:`index` which is a `LongTensor`.
+
+The returned tensor has the same number of dimensions as the original tensor
+(:attr:`input`).  The :attr:`dim`\ th dimension has the same size as the length
+of :attr:`index`; other dimensions have the same size as in the original tensor.
+
+.. note:: The returned tensor does **not** use the same storage as the original
+          tensor.  If :attr:`out` has a different shape than expected, we
+          silently change it to the correct shape, reallocating the underlying
+          storage if necessary.
+
+Args:
+    {input}
+    dim (int): the dimension in which we index
+    index (IntTensor or LongTensor): the 1-D tensor containing the indices to index
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> x = torch.randn(3, 4)
+    >>> x
+    tensor([[ 0.1427,  0.0231, -0.5414, -1.0009],
+            [-0.4664,  0.2647, -0.1228, -1.1068],
+            [-1.1734, -0.6571,  0.7230, -0.6004]])
+    >>> indices = torch.tensor([0, 2])
+    >>> torch.index_select(x, 0, indices)
+    tensor([[ 0.1427,  0.0231, -0.5414, -1.0009],
+            [-1.1734, -0.6571,  0.7230, -0.6004]])
+    >>> torch.index_select(x, 1, indices)
+    tensor([[ 0.1427, -0.5414],
+            [-0.4664, -0.1228],
+            [-1.1734,  0.7230]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.inverse,
+    r"""
+inverse(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.linalg.inv`
+""",
+)
+
+add_docstr(
+    torch.isin,
+    r"""
+isin(elements, test_elements, *, assume_unique=False, invert=False) -> Tensor
+
+Tests if each element of :attr:`elements` is in :attr:`test_elements`. Returns
+a boolean tensor of the same shape as :attr:`elements` that is True for elements
+in :attr:`test_elements` and False otherwise.
+
+.. note::
+    One of :attr:`elements` or :attr:`test_elements` can be a scalar, but not both.
+
+Args:
+    elements (Tensor or Scalar): Input elements
+    test_elements (Tensor or Scalar): Values against which to test for each input element
+    assume_unique (bool, optional): If True, assumes both :attr:`elements` and
+        :attr:`test_elements` contain unique elements, which can speed up the
+        calculation. Default: False
+    invert (bool, optional): If True, inverts the boolean return tensor, resulting in True
+        values for elements *not* in :attr:`test_elements`. Default: False
+
+Returns:
+    A boolean tensor of the same shape as :attr:`elements` that is True for elements in
+    :attr:`test_elements` and False otherwise
+
+Example:
+    >>> torch.isin(torch.tensor([[1, 2], [3, 4]]), torch.tensor([2, 3]))
+    tensor([[False,  True],
+            [ True, False]])
+""",
+)
+
+add_docstr(
+    torch.isinf,
+    r"""
+isinf(input) -> Tensor
+
+Tests if each element of :attr:`input` is infinite
+(positive or negative infinity) or not.
+
+.. note::
+    Complex values are infinite when their real or imaginary part is
+    infinite.
+
+Args:
+    {input}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is infinite and False elsewhere
+
+Example::
+
+    >>> torch.isinf(torch.tensor([1, float('inf'), 2, float('-inf'), float('nan')]))
+    tensor([False,  True,  False,  True,  False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.isposinf,
+    r"""
+isposinf(input, *, out=None) -> Tensor
+Tests if each element of :attr:`input` is positive infinity or not.
+
+Args:
+  {input}
+
+Keyword args:
+  {out}
+
+Example::
+
+    >>> a = torch.tensor([-float('inf'), float('inf'), 1.2])
+    >>> torch.isposinf(a)
+    tensor([False,  True, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.isneginf,
+    r"""
+isneginf(input, *, out=None) -> Tensor
+Tests if each element of :attr:`input` is negative infinity or not.
+
+Args:
+  {input}
+
+Keyword args:
+  {out}
+
+Example::
+
+    >>> a = torch.tensor([-float('inf'), float('inf'), 1.2])
+    >>> torch.isneginf(a)
+    tensor([ True, False, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.isclose,
+    r"""
+isclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False) -> Tensor
+
+Returns a new tensor with boolean elements representing if each element of
+:attr:`input` is "close" to the corresponding element of :attr:`other`.
+Closeness is defined as:
+
+.. math::
+    \lvert \text{input} - \text{other} \rvert \leq \texttt{atol} + \texttt{rtol} \times \lvert \text{other} \rvert
+"""
+    + r"""
+
+where :attr:`input` and :attr:`other` are finite. Where :attr:`input`
+and/or :attr:`other` are nonfinite they are close if and only if
+they are equal, with NaNs being considered equal to each other when
+:attr:`equal_nan` is True.
+
+Args:
+    input (Tensor): first tensor to compare
+    other (Tensor): second tensor to compare
+    atol (float, optional): absolute tolerance. Default: 1e-08
+    rtol (float, optional): relative tolerance. Default: 1e-05
+    equal_nan (bool, optional): if ``True``, then two ``NaN`` s will be considered equal. Default: ``False``
+
+Examples::
+
+    >>> torch.isclose(torch.tensor((1., 2, 3)), torch.tensor((1 + 1e-10, 3, 4)))
+    tensor([ True, False, False])
+    >>> torch.isclose(torch.tensor((float('inf'), 4)), torch.tensor((float('inf'), 6)), rtol=.5)
+    tensor([True, True])
+""",
+)
+
+add_docstr(
+    torch.isfinite,
+    r"""
+isfinite(input) -> Tensor
+
+Returns a new tensor with boolean elements representing if each element is `finite` or not.
+
+Real values are finite when they are not NaN, negative infinity, or infinity.
+Complex values are finite when both their real and imaginary parts are finite.
+
+Args:
+    {input}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is finite and False elsewhere
+
+Example::
+
+    >>> torch.isfinite(torch.tensor([1, float('inf'), 2, float('-inf'), float('nan')]))
+    tensor([True,  False,  True,  False,  False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.isnan,
+    r"""
+isnan(input) -> Tensor
+
+Returns a new tensor with boolean elements representing if each element of :attr:`input`
+is NaN or not. Complex values are considered NaN when either their real
+and/or imaginary part is NaN.
+
+Arguments:
+    {input}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is NaN and False elsewhere
+
+Example::
+
+    >>> torch.isnan(torch.tensor([1, float('nan'), 2]))
+    tensor([False, True, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.isreal,
+    r"""
+isreal(input) -> Tensor
+
+Returns a new tensor with boolean elements representing if each element of :attr:`input` is real-valued or not.
+All real-valued types are considered real. Complex values are considered real when their imaginary part is 0.
+
+Arguments:
+    {input}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is real and False elsewhere
+
+Example::
+
+    >>> torch.isreal(torch.tensor([1, 1+1j, 2+0j]))
+    tensor([True, False, True])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.is_floating_point,
+    r"""
+is_floating_point(input) -> (bool)
+
+Returns True if the data type of :attr:`input` is a floating point data type i.e.,
+one of ``torch.float64``, ``torch.float32``, ``torch.float16``, and ``torch.bfloat16``.
+
+Args:
+    {input}
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.is_complex,
+    r"""
+is_complex(input) -> (bool)
+
+Returns True if the data type of :attr:`input` is a complex data type i.e.,
+one of ``torch.complex64``, and ``torch.complex128``.
+
+Args:
+    {input}
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.is_grad_enabled,
+    r"""
+is_grad_enabled() -> (bool)
+
+Returns True if grad mode is currently enabled.
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.is_inference_mode_enabled,
+    r"""
+is_inference_mode_enabled() -> (bool)
+
+Returns True if inference mode is currently enabled.
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.is_inference,
+    r"""
+is_inference(input) -> (bool)
+
+Returns True if :attr:`input` is an inference tensor.
+
+A non-view tensor is an inference tensor if and only if it was
+allocated during inference mode. A view tensor is an inference
+tensor if and only if the tensor it is a view of is an inference tensor.
+
+For details on inference mode please see
+`Inference Mode <https://pytorch.org/cppdocs/notes/inference_mode.html>`_.
+
+Args:
+    {input}
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.is_conj,
+    r"""
+is_conj(input) -> (bool)
+
+Returns True if the :attr:`input` is a conjugated tensor, i.e. its conjugate bit is set to `True`.
+
+Args:
+    {input}
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.is_nonzero,
+    r"""
+is_nonzero(input) -> (bool)
+
+Returns True if the :attr:`input` is a single element tensor which is not equal to zero
+after type conversions.
+i.e. not equal to ``torch.tensor([0.])`` or ``torch.tensor([0])`` or
+``torch.tensor([False])``.
+Throws a ``RuntimeError`` if ``torch.numel() != 1`` (even in case
+of sparse tensors).
+
+Args:
+    {input}
+
+Examples::
+
+    >>> torch.is_nonzero(torch.tensor([0.]))
+    False
+    >>> torch.is_nonzero(torch.tensor([1.5]))
+    True
+    >>> torch.is_nonzero(torch.tensor([False]))
+    False
+    >>> torch.is_nonzero(torch.tensor([3]))
+    True
+    >>> torch.is_nonzero(torch.tensor([1, 3, 5]))
+    Traceback (most recent call last):
+    ...
+    RuntimeError: bool value of Tensor with more than one value is ambiguous
+    >>> torch.is_nonzero(torch.tensor([]))
+    Traceback (most recent call last):
+    ...
+    RuntimeError: bool value of Tensor with no values is ambiguous
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.kron,
+    r"""
+kron(input, other, *, out=None) -> Tensor
+
+Computes the Kronecker product, denoted by :math:`\otimes`, of :attr:`input` and :attr:`other`.
+
+If :attr:`input` is a :math:`(a_0 \times a_1 \times \dots \times a_n)` tensor and :attr:`other` is a
+:math:`(b_0 \times b_1 \times \dots \times b_n)` tensor, the result will be a
+:math:`(a_0*b_0 \times a_1*b_1 \times \dots \times a_n*b_n)` tensor with the following entries:
+
+.. math::
+    (\text{input} \otimes \text{other})_{k_0, k_1, \dots, k_n} =
+        \text{input}_{i_0, i_1, \dots, i_n} * \text{other}_{j_0, j_1, \dots, j_n},
+
+where :math:`k_t = i_t * b_t + j_t` for :math:`0 \leq t \leq n`.
+If one tensor has fewer dimensions than the other it is unsqueezed until it has the same number of dimensions.
+
+Supports real-valued and complex-valued inputs.
+
+.. note::
+    This function generalizes the typical definition of the Kronecker product for two matrices to two tensors,
+    as described above. When :attr:`input` is a :math:`(m \times n)` matrix and :attr:`other` is a
+    :math:`(p \times q)` matrix, the result will be a :math:`(p*m \times q*n)` block matrix:
+
+    .. math::
+        \mathbf{A} \otimes \mathbf{B}=\begin{bmatrix}
+        a_{11} \mathbf{B} & \cdots & a_{1 n} \mathbf{B} \\
+        \vdots & \ddots & \vdots \\
+        a_{m 1} \mathbf{B} & \cdots & a_{m n} \mathbf{B} \end{bmatrix}
+
+    where :attr:`input` is :math:`\mathbf{A}` and :attr:`other` is :math:`\mathbf{B}`.
+
+Arguments:
+    input (Tensor)
+    other (Tensor)
+
+Keyword args:
+    out (Tensor, optional): The output tensor. Ignored if ``None``. Default: ``None``
+
+Examples::
+
+    >>> mat1 = torch.eye(2)
+    >>> mat2 = torch.ones(2, 2)
+    >>> torch.kron(mat1, mat2)
+    tensor([[1., 1., 0., 0.],
+            [1., 1., 0., 0.],
+            [0., 0., 1., 1.],
+            [0., 0., 1., 1.]])
+
+    >>> mat1 = torch.eye(2)
+    >>> mat2 = torch.arange(1, 5).reshape(2, 2)
+    >>> torch.kron(mat1, mat2)
+    tensor([[1., 2., 0., 0.],
+            [3., 4., 0., 0.],
+            [0., 0., 1., 2.],
+            [0., 0., 3., 4.]])
+""",
+)
+
+add_docstr(
+    torch.kthvalue,
+    r"""
+kthvalue(input, k, dim=None, keepdim=False, *, out=None) -> (Tensor, LongTensor)
+
+Returns a namedtuple ``(values, indices)`` where ``values`` is the :attr:`k` th
+smallest element of each row of the :attr:`input` tensor in the given dimension
+:attr:`dim`. And ``indices`` is the index location of each element found.
+
+If :attr:`dim` is not given, the last dimension of the `input` is chosen.
+
+If :attr:`keepdim` is ``True``, both the :attr:`values` and :attr:`indices` tensors
+are the same size as :attr:`input`, except in the dimension :attr:`dim` where
+they are of size 1. Otherwise, :attr:`dim` is squeezed
+(see :func:`torch.squeeze`), resulting in both the :attr:`values` and
+:attr:`indices` tensors having 1 fewer dimension than the :attr:`input` tensor.
+
+.. note::
+    When :attr:`input` is a CUDA tensor and there are multiple valid
+    :attr:`k` th values, this function may nondeterministically return
+    :attr:`indices` for any of them.
+
+Args:
+    {input}
+    k (int): k for the k-th smallest element
+    dim (int, optional): the dimension to find the kth value along
+    {keepdim}
+
+Keyword args:
+    out (tuple, optional): the output tuple of (Tensor, LongTensor)
+                           can be optionally given to be used as output buffers
+
+Example::
+
+    >>> x = torch.arange(1., 6.)
+    >>> x
+    tensor([ 1.,  2.,  3.,  4.,  5.])
+    >>> torch.kthvalue(x, 4)
+    torch.return_types.kthvalue(values=tensor(4.), indices=tensor(3))
+
+    >>> x=torch.arange(1.,7.).resize_(2,3)
+    >>> x
+    tensor([[ 1.,  2.,  3.],
+            [ 4.,  5.,  6.]])
+    >>> torch.kthvalue(x, 2, 0, True)
+    torch.return_types.kthvalue(values=tensor([[4., 5., 6.]]), indices=tensor([[1, 1, 1]]))
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.lcm,
+    r"""
+lcm(input, other, *, out=None) -> Tensor
+
+Computes the element-wise least common multiple (LCM) of :attr:`input` and :attr:`other`.
+
+Both :attr:`input` and :attr:`other` must have integer types.
+
+.. note::
+    This defines :math:`lcm(0, 0) = 0` and :math:`lcm(0, a) = 0`.
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([5, 10, 15])
+    >>> b = torch.tensor([3, 4, 5])
+    >>> torch.lcm(a, b)
+    tensor([15, 20, 15])
+    >>> c = torch.tensor([3])
+    >>> torch.lcm(a, c)
+    tensor([15, 30, 15])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.ldexp,
+    r"""
+ldexp(input, other, *, out=None) -> Tensor
+
+Multiplies :attr:`input` by 2 ** :attr:`other`.
+
+.. math::
+    \text{{out}}_i = \text{{input}}_i * 2^\text{{other}}_i
+"""
+    + r"""
+
+Typically this function is used to construct floating point numbers by multiplying
+mantissas in :attr:`input` with integral powers of two created from the exponents
+in :attr:`other`.
+
+Args:
+    {input}
+    other (Tensor): a tensor of exponents, typically integers.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.ldexp(torch.tensor([1.]), torch.tensor([1]))
+    tensor([2.])
+    >>> torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4]))
+    tensor([ 2.,  4.,  8., 16.])
+
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.le,
+    r"""
+le(input, other, *, out=None) -> Tensor
+
+Computes :math:`\text{input} \leq \text{other}` element-wise.
+"""
+    + r"""
+
+The second argument can be a number or a tensor whose shape is
+:ref:`broadcastable <broadcasting-semantics>` with the first argument.
+
+Args:
+    input (Tensor): the tensor to compare
+    other (Tensor or Scalar): the tensor or value to compare
+
+Keyword args:
+    {out}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is less than or equal to
+    :attr:`other` and False elsewhere
+
+Example::
+
+    >>> torch.le(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]]))
+    tensor([[True, False], [True, True]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.less_equal,
+    r"""
+less_equal(input, other, *, out=None) -> Tensor
+
+Alias for :func:`torch.le`.
+""",
+)
+
+add_docstr(
+    torch.lerp,
+    r"""
+lerp(input, end, weight, *, out=None)
+
+Does a linear interpolation of two tensors :attr:`start` (given by :attr:`input`) and :attr:`end` based
+on a scalar or tensor :attr:`weight` and returns the resulting :attr:`out` tensor.
+
+.. math::
+    \text{out}_i = \text{start}_i + \text{weight}_i \times (\text{end}_i - \text{start}_i)
+"""
+    + r"""
+The shapes of :attr:`start` and :attr:`end` must be
+:ref:`broadcastable <broadcasting-semantics>`. If :attr:`weight` is a tensor, then
+the shapes of :attr:`weight`, :attr:`start`, and :attr:`end` must be :ref:`broadcastable <broadcasting-semantics>`.
+
+Args:
+    input (Tensor): the tensor with the starting points
+    end (Tensor): the tensor with the ending points
+    weight (float or tensor): the weight for the interpolation formula
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> start = torch.arange(1., 5.)
+    >>> end = torch.empty(4).fill_(10)
+    >>> start
+    tensor([ 1.,  2.,  3.,  4.])
+    >>> end
+    tensor([ 10.,  10.,  10.,  10.])
+    >>> torch.lerp(start, end, 0.5)
+    tensor([ 5.5000,  6.0000,  6.5000,  7.0000])
+    >>> torch.lerp(start, end, torch.full_like(start, 0.5))
+    tensor([ 5.5000,  6.0000,  6.5000,  7.0000])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.lgamma,
+    r"""
+lgamma(input, *, out=None) -> Tensor
+
+Computes the natural logarithm of the absolute value of the gamma function on :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \ln |\Gamma(\text{input}_{i})|
+"""
+    + """
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.arange(0.5, 2, 0.5)
+    >>> torch.lgamma(a)
+    tensor([ 0.5724,  0.0000, -0.1208])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.linspace,
+    r"""
+linspace(start, end, steps, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+
+Creates a one-dimensional tensor of size :attr:`steps` whose values are evenly
+spaced from :attr:`start` to :attr:`end`, inclusive. That is, the value are:
+
+.. math::
+    (\text{start},
+    \text{start} + \frac{\text{end} - \text{start}}{\text{steps} - 1},
+    \ldots,
+    \text{start} + (\text{steps} - 2) * \frac{\text{end} - \text{start}}{\text{steps} - 1},
+    \text{end})
+"""
+    + """
+
+From PyTorch 1.11 linspace requires the steps argument. Use steps=100 to restore the previous behavior.
+
+Args:
+    start (float or Tensor): the starting value for the set of points. If `Tensor`, it must be 0-dimensional
+    end (float or Tensor): the ending value for the set of points. If `Tensor`, it must be 0-dimensional
+    steps (int): size of the constructed tensor
+
+Keyword arguments:
+    {out}
+    dtype (torch.dtype, optional): the data type to perform the computation in.
+        Default: if None, uses the global default dtype (see torch.get_default_dtype())
+        when both :attr:`start` and :attr:`end` are real,
+        and corresponding complex dtype when either is complex.
+    {layout}
+    {device}
+    {requires_grad}
+
+
+Example::
+
+    >>> torch.linspace(3, 10, steps=5)
+    tensor([  3.0000,   4.7500,   6.5000,   8.2500,  10.0000])
+    >>> torch.linspace(-10, 10, steps=5)
+    tensor([-10.,  -5.,   0.,   5.,  10.])
+    >>> torch.linspace(start=-10, end=10, steps=5)
+    tensor([-10.,  -5.,   0.,   5.,  10.])
+    >>> torch.linspace(start=-10, end=10, steps=1)
+    tensor([-10.])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.log,
+    r"""
+log(input, *, out=None) -> Tensor
+
+Returns a new tensor with the natural logarithm of the elements
+of :attr:`input`.
+
+.. math::
+    y_{i} = \log_{e} (x_{i})
+"""
+    + r"""
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.rand(5) * 5
+    >>> a
+    tensor([4.7767, 4.3234, 1.2156, 0.2411, 4.5739])
+    >>> torch.log(a)
+    tensor([ 1.5637,  1.4640,  0.1952, -1.4226,  1.5204])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.log10,
+    r"""
+log10(input, *, out=None) -> Tensor
+
+Returns a new tensor with the logarithm to the base 10 of the elements
+of :attr:`input`.
+
+.. math::
+    y_{i} = \log_{10} (x_{i})
+"""
+    + r"""
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.rand(5)
+    >>> a
+    tensor([ 0.5224,  0.9354,  0.7257,  0.1301,  0.2251])
+
+
+    >>> torch.log10(a)
+    tensor([-0.2820, -0.0290, -0.1392, -0.8857, -0.6476])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.log1p,
+    r"""
+log1p(input, *, out=None) -> Tensor
+
+Returns a new tensor with the natural logarithm of (1 + :attr:`input`).
+
+.. math::
+    y_i = \log_{e} (x_i + 1)
+"""
+    + r"""
+.. note:: This function is more accurate than :func:`torch.log` for small
+          values of :attr:`input`
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(5)
+    >>> a
+    tensor([-1.0090, -0.9923,  1.0249, -0.5372,  0.2492])
+    >>> torch.log1p(a)
+    tensor([    nan, -4.8653,  0.7055, -0.7705,  0.2225])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.log2,
+    r"""
+log2(input, *, out=None) -> Tensor
+
+Returns a new tensor with the logarithm to the base 2 of the elements
+of :attr:`input`.
+
+.. math::
+    y_{i} = \log_{2} (x_{i})
+"""
+    + r"""
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.rand(5)
+    >>> a
+    tensor([ 0.8419,  0.8003,  0.9971,  0.5287,  0.0490])
+
+
+    >>> torch.log2(a)
+    tensor([-0.2483, -0.3213, -0.0042, -0.9196, -4.3504])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.logaddexp,
+    r"""
+logaddexp(input, other, *, out=None) -> Tensor
+
+Logarithm of the sum of exponentiations of the inputs.
+
+Calculates pointwise :math:`\log\left(e^x + e^y\right)`. This function is useful
+in statistics where the calculated probabilities of events may be so small as to
+exceed the range of normal floating point numbers. In such cases the logarithm
+of the calculated probability is stored. This function allows adding
+probabilities stored in such a fashion.
+
+This op should be disambiguated with :func:`torch.logsumexp` which performs a
+reduction on a single tensor.
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1.0, -2, -3]))
+    tensor([-0.3069, -0.6867, -0.8731])
+    >>> torch.logaddexp(torch.tensor([-100.0, -200, -300]), torch.tensor([-1.0, -2, -3]))
+    tensor([-1., -2., -3.])
+    >>> torch.logaddexp(torch.tensor([1.0, 2000, 30000]), torch.tensor([-1.0, -2, -3]))
+    tensor([1.1269e+00, 2.0000e+03, 3.0000e+04])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.logaddexp2,
+    r"""
+logaddexp2(input, other, *, out=None) -> Tensor
+
+Logarithm of the sum of exponentiations of the inputs in base-2.
+
+Calculates pointwise :math:`\log_2\left(2^x + 2^y\right)`. See
+:func:`torch.logaddexp` for more details.
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+
+Keyword arguments:
+    {out}
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.xlogy,
+    r"""
+xlogy(input, other, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.xlogy`.
+""",
+)
+
+add_docstr(
+    torch.logical_and,
+    r"""
+logical_and(input, other, *, out=None) -> Tensor
+
+Computes the element-wise logical AND of the given input tensors. Zeros are treated as ``False`` and nonzeros are
+treated as ``True``.
+
+Args:
+    {input}
+    other (Tensor): the tensor to compute AND with
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.logical_and(torch.tensor([True, False, True]), torch.tensor([True, False, False]))
+    tensor([ True, False, False])
+    >>> a = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
+    >>> b = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
+    >>> torch.logical_and(a, b)
+    tensor([False, False,  True, False])
+    >>> torch.logical_and(a.double(), b.double())
+    tensor([False, False,  True, False])
+    >>> torch.logical_and(a.double(), b)
+    tensor([False, False,  True, False])
+    >>> torch.logical_and(a, b, out=torch.empty(4, dtype=torch.bool))
+    tensor([False, False,  True, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.logical_not,
+    r"""
+logical_not(input, *, out=None) -> Tensor
+
+Computes the element-wise logical NOT of the given input tensor. If not specified, the output tensor will have the bool
+dtype. If the input tensor is not a bool tensor, zeros are treated as ``False`` and non-zeros are treated as ``True``.
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.logical_not(torch.tensor([True, False]))
+    tensor([False,  True])
+    >>> torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8))
+    tensor([ True, False, False])
+    >>> torch.logical_not(torch.tensor([0., 1.5, -10.], dtype=torch.double))
+    tensor([ True, False, False])
+    >>> torch.logical_not(torch.tensor([0., 1., -10.], dtype=torch.double), out=torch.empty(3, dtype=torch.int16))
+    tensor([1, 0, 0], dtype=torch.int16)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.logical_or,
+    r"""
+logical_or(input, other, *, out=None) -> Tensor
+
+Computes the element-wise logical OR of the given input tensors. Zeros are treated as ``False`` and nonzeros are
+treated as ``True``.
+
+Args:
+    {input}
+    other (Tensor): the tensor to compute OR with
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.logical_or(torch.tensor([True, False, True]), torch.tensor([True, False, False]))
+    tensor([ True, False,  True])
+    >>> a = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
+    >>> b = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
+    >>> torch.logical_or(a, b)
+    tensor([ True,  True,  True, False])
+    >>> torch.logical_or(a.double(), b.double())
+    tensor([ True,  True,  True, False])
+    >>> torch.logical_or(a.double(), b)
+    tensor([ True,  True,  True, False])
+    >>> torch.logical_or(a, b, out=torch.empty(4, dtype=torch.bool))
+    tensor([ True,  True,  True, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.logical_xor,
+    r"""
+logical_xor(input, other, *, out=None) -> Tensor
+
+Computes the element-wise logical XOR of the given input tensors. Zeros are treated as ``False`` and nonzeros are
+treated as ``True``.
+
+Args:
+    {input}
+    other (Tensor): the tensor to compute XOR with
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.logical_xor(torch.tensor([True, False, True]), torch.tensor([True, False, False]))
+    tensor([False, False,  True])
+    >>> a = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
+    >>> b = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
+    >>> torch.logical_xor(a, b)
+    tensor([ True,  True, False, False])
+    >>> torch.logical_xor(a.double(), b.double())
+    tensor([ True,  True, False, False])
+    >>> torch.logical_xor(a.double(), b)
+    tensor([ True,  True, False, False])
+    >>> torch.logical_xor(a, b, out=torch.empty(4, dtype=torch.bool))
+    tensor([ True,  True, False, False])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.logspace,
+    """
+logspace(start, end, steps, base=10.0, *, \
+         out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+"""
+    + r"""
+
+Creates a one-dimensional tensor of size :attr:`steps` whose values are evenly
+spaced from :math:`{{\text{{base}}}}^{{\text{{start}}}}` to
+:math:`{{\text{{base}}}}^{{\text{{end}}}}`, inclusive, on a logarithmic scale
+with base :attr:`base`. That is, the values are:
+
+.. math::
+    (\text{base}^{\text{start}},
+    \text{base}^{(\text{start} + \frac{\text{end} - \text{start}}{ \text{steps} - 1})},
+    \ldots,
+    \text{base}^{(\text{start} + (\text{steps} - 2) * \frac{\text{end} - \text{start}}{ \text{steps} - 1})},
+    \text{base}^{\text{end}})
+"""
+    + """
+
+
+From PyTorch 1.11 logspace requires the steps argument. Use steps=100 to restore the previous behavior.
+
+Args:
+    start (float or Tensor): the starting value for the set of points. If `Tensor`, it must be 0-dimensional
+    end (float or Tensor): the ending value for the set of points. If `Tensor`, it must be 0-dimensional
+    steps (int): size of the constructed tensor
+    base (float, optional): base of the logarithm function. Default: ``10.0``.
+
+Keyword arguments:
+    {out}
+    dtype (torch.dtype, optional): the data type to perform the computation in.
+        Default: if None, uses the global default dtype (see torch.get_default_dtype())
+        when both :attr:`start` and :attr:`end` are real,
+        and corresponding complex dtype when either is complex.
+    {layout}
+    {device}
+    {requires_grad}
+
+Example::
+
+    >>> torch.logspace(start=-10, end=10, steps=5)
+    tensor([ 1.0000e-10,  1.0000e-05,  1.0000e+00,  1.0000e+05,  1.0000e+10])
+    >>> torch.logspace(start=0.1, end=1.0, steps=5)
+    tensor([  1.2589,   2.1135,   3.5481,   5.9566,  10.0000])
+    >>> torch.logspace(start=0.1, end=1.0, steps=1)
+    tensor([1.2589])
+    >>> torch.logspace(start=2, end=2, steps=1, base=2)
+    tensor([4.0])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.logsumexp,
+    r"""
+logsumexp(input, dim, keepdim=False, *, out=None)
+
+Returns the log of summed exponentials of each row of the :attr:`input`
+tensor in the given dimension :attr:`dim`. The computation is numerically
+stabilized.
+
+For summation index :math:`j` given by `dim` and other indices :math:`i`, the result is
+
+    .. math::
+        \text{{logsumexp}}(x)_{{i}} = \log \sum_j \exp(x_{{ij}})
+
+{keepdim_details}
+
+Args:
+    {input}
+    {opt_dim}
+    {keepdim}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(3, 3)
+    >>> torch.logsumexp(a, 1)
+    tensor([1.4907, 1.0593, 1.5696])
+    >>> torch.dist(torch.logsumexp(a, 1), torch.log(torch.sum(torch.exp(a), 1)))
+    tensor(1.6859e-07)
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.lt,
+    r"""
+lt(input, other, *, out=None) -> Tensor
+
+Computes :math:`\text{input} < \text{other}` element-wise.
+"""
+    + r"""
+
+The second argument can be a number or a tensor whose shape is
+:ref:`broadcastable <broadcasting-semantics>` with the first argument.
+
+Args:
+    input (Tensor): the tensor to compare
+    other (Tensor or float): the tensor or value to compare
+
+Keyword args:
+    {out}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is less than :attr:`other` and False elsewhere
+
+Example::
+
+    >>> torch.lt(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]]))
+    tensor([[False, False], [True, False]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.lu_unpack,
+    r"""
+lu_unpack(LU_data, LU_pivots, unpack_data=True, unpack_pivots=True, *, out=None) -> (Tensor, Tensor, Tensor)
+
+Unpacks the LU decomposition returned by :func:`~linalg.lu_factor` into the `P, L, U` matrices.
+
+.. seealso::
+
+    :func:`~linalg.lu` returns the matrices from the LU decomposition. Its gradient formula is more efficient
+    than that of doing :func:`~linalg.lu_factor` followed by :func:`~linalg.lu_unpack`.
+
+Args:
+    LU_data (Tensor): the packed LU factorization data
+    LU_pivots (Tensor): the packed LU factorization pivots
+    unpack_data (bool): flag indicating if the data should be unpacked.
+                        If ``False``, then the returned ``L`` and ``U`` are empty tensors.
+                        Default: ``True``
+    unpack_pivots (bool): flag indicating if the pivots should be unpacked into a permutation matrix ``P``.
+                          If ``False``, then the returned ``P`` is  an empty tensor.
+                          Default: ``True``
+
+Keyword args:
+    out (tuple, optional): output tuple of three tensors. Ignored if `None`.
+
+Returns:
+    A namedtuple ``(P, L, U)``
+
+Examples::
+
+    >>> A = torch.randn(2, 3, 3)
+    >>> LU, pivots = torch.linalg.lu_factor(A)
+    >>> P, L, U = torch.lu_unpack(LU, pivots)
+    >>> # We can recover A from the factorization
+    >>> A_ = P @ L @ U
+    >>> torch.allclose(A, A_)
+    True
+
+    >>> # LU factorization of a rectangular matrix:
+    >>> A = torch.randn(2, 3, 2)
+    >>> LU, pivots = torch.linalg.lu_factor(A)
+    >>> P, L, U = torch.lu_unpack(LU, pivots)
+    >>> # P, L, U are the same as returned by linalg.lu
+    >>> P_, L_, U_ = torch.linalg.lu(A)
+    >>> torch.allclose(P, P_) and torch.allclose(L, L_) and torch.allclose(U, U_)
+    True
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.less,
+    r"""
+less(input, other, *, out=None) -> Tensor
+
+Alias for :func:`torch.lt`.
+""",
+)
+
+add_docstr(
+    torch.lu_solve,
+    r"""
+lu_solve(b, LU_data, LU_pivots, *, out=None) -> Tensor
+
+Returns the LU solve of the linear system :math:`Ax = b` using the partially pivoted
+LU factorization of A from :func:`~linalg.lu_factor`.
+
+This function supports ``float``, ``double``, ``cfloat`` and ``cdouble`` dtypes for :attr:`input`.
+
+.. warning::
+
+    :func:`torch.lu_solve` is deprecated in favor of :func:`torch.linalg.lu_solve`.
+    :func:`torch.lu_solve` will be removed in a future PyTorch release.
+    ``X = torch.lu_solve(B, LU, pivots)`` should be replaced with
+
+    .. code:: python
+
+        X = linalg.lu_solve(LU, pivots, B)
+
+Arguments:
+    b (Tensor): the RHS tensor of size :math:`(*, m, k)`, where :math:`*`
+                is zero or more batch dimensions.
+    LU_data (Tensor): the pivoted LU factorization of A from :meth:`~linalg.lu_factor` of size :math:`(*, m, m)`,
+                       where :math:`*` is zero or more batch dimensions.
+    LU_pivots (IntTensor): the pivots of the LU factorization from :meth:`~linalg.lu_factor` of size :math:`(*, m)`,
+                           where :math:`*` is zero or more batch dimensions.
+                           The batch dimensions of :attr:`LU_pivots` must be equal to the batch dimensions of
+                           :attr:`LU_data`.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> A = torch.randn(2, 3, 3)
+    >>> b = torch.randn(2, 3, 1)
+    >>> LU, pivots = torch.linalg.lu_factor(A)
+    >>> x = torch.lu_solve(b, LU, pivots)
+    >>> torch.dist(A @ x, b)
+    tensor(1.00000e-07 *
+           2.8312)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.masked_select,
+    r"""
+masked_select(input, mask, *, out=None) -> Tensor
+
+Returns a new 1-D tensor which indexes the :attr:`input` tensor according to
+the boolean mask :attr:`mask` which is a `BoolTensor`.
+
+The shapes of the :attr:`mask` tensor and the :attr:`input` tensor don't need
+to match, but they must be :ref:`broadcastable <broadcasting-semantics>`.
+
+.. note:: The returned tensor does **not** use the same storage
+          as the original tensor
+
+Args:
+    {input}
+    mask  (BoolTensor): the tensor containing the binary mask to index with
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> x = torch.randn(3, 4)
+    >>> x
+    tensor([[ 0.3552, -2.3825, -0.8297,  0.3477],
+            [-1.2035,  1.2252,  0.5002,  0.6248],
+            [ 0.1307, -2.0608,  0.1244,  2.0139]])
+    >>> mask = x.ge(0.5)
+    >>> mask
+    tensor([[False, False, False, False],
+            [False, True, True, True],
+            [False, False, False, True]])
+    >>> torch.masked_select(x, mask)
+    tensor([ 1.2252,  0.5002,  0.6248,  2.0139])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.matrix_power,
+    r"""
+matrix_power(input, n, *, out=None) -> Tensor
+
+Alias for :func:`torch.linalg.matrix_power`
+""",
+)
+
+add_docstr(
+    torch.matrix_exp,
+    r"""
+matrix_exp(A) -> Tensor
+
+Alias for :func:`torch.linalg.matrix_exp`.
+""",
+)
+
+add_docstr(
+    torch.max,
+    r"""
+max(input) -> Tensor
+
+Returns the maximum value of all elements in the ``input`` tensor.
+
+.. warning::
+    This function produces deterministic (sub)gradients unlike ``max(dim=0)``
+
+Args:
+    {input}
+
+Example::
+
+    >>> a = torch.randn(1, 3)
+    >>> a
+    tensor([[ 0.6763,  0.7445, -2.2369]])
+    >>> torch.max(a)
+    tensor(0.7445)
+
+.. function:: max(input, dim, keepdim=False, *, out=None) -> (Tensor, LongTensor)
+   :noindex:
+
+Returns a namedtuple ``(values, indices)`` where ``values`` is the maximum
+value of each row of the :attr:`input` tensor in the given dimension
+:attr:`dim`. And ``indices`` is the index location of each maximum value found
+(argmax).
+
+If ``keepdim`` is ``True``, the output tensors are of the same size
+as ``input`` except in the dimension ``dim`` where they are of size 1.
+Otherwise, ``dim`` is squeezed (see :func:`torch.squeeze`), resulting
+in the output tensors having 1 fewer dimension than ``input``.
+
+.. note:: If there are multiple maximal values in a reduced row then
+          the indices of the first maximal value are returned.
+
+Args:
+    {input}
+    {dim}
+    {keepdim} Default: ``False``.
+
+Keyword args:
+    out (tuple, optional): the result tuple of two output tensors (max, max_indices)
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[-1.2360, -0.2942, -0.1222,  0.8475],
+            [ 1.1949, -1.1127, -2.2379, -0.6702],
+            [ 1.5717, -0.9207,  0.1297, -1.8768],
+            [-0.6172,  1.0036, -0.6060, -0.2432]])
+    >>> torch.max(a, 1)
+    torch.return_types.max(values=tensor([0.8475, 1.1949, 1.5717, 1.0036]), indices=tensor([3, 0, 0, 1]))
+
+.. function:: max(input, other, *, out=None) -> Tensor
+   :noindex:
+
+See :func:`torch.maximum`.
+
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.maximum,
+    r"""
+maximum(input, other, *, out=None) -> Tensor
+
+Computes the element-wise maximum of :attr:`input` and :attr:`other`.
+
+.. note::
+    If one of the elements being compared is a NaN, then that element is returned.
+    :func:`maximum` is not supported for tensors with complex dtypes.
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor((1, 2, -1))
+    >>> b = torch.tensor((3, 0, 4))
+    >>> torch.maximum(a, b)
+    tensor([3, 2, 4])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.fmax,
+    r"""
+fmax(input, other, *, out=None) -> Tensor
+
+Computes the element-wise maximum of :attr:`input` and :attr:`other`.
+
+This is like :func:`torch.maximum` except it handles NaNs differently:
+if exactly one of the two elements being compared is a NaN then the non-NaN element is taken as the maximum.
+Only if both elements are NaN is NaN propagated.
+
+This function is a wrapper around C++'s ``std::fmax`` and is similar to NumPy's ``fmax`` function.
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+:ref:`type promotion <type-promotion-doc>`, and integer and floating-point inputs.
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([9.7, float('nan'), 3.1, float('nan')])
+    >>> b = torch.tensor([-2.2, 0.5, float('nan'), float('nan')])
+    >>> torch.fmax(a, b)
+    tensor([9.7000, 0.5000, 3.1000,    nan])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.amax,
+    r"""
+amax(input, dim, keepdim=False, *, out=None) -> Tensor
+
+Returns the maximum value of each slice of the :attr:`input` tensor in the given
+dimension(s) :attr:`dim`.
+
+.. note::
+    The difference between ``max``/``min`` and ``amax``/``amin`` is:
+        - ``amax``/``amin`` supports reducing on multiple dimensions,
+        - ``amax``/``amin`` does not return indices,
+        - ``amax``/``amin`` evenly distributes gradient between equal values,
+          while ``max(dim)``/``min(dim)`` propagates gradient only to a single
+          index in the source tensor.
+
+{keepdim_details}
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+  {out}
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[ 0.8177,  1.4878, -0.2491,  0.9130],
+            [-0.7158,  1.1775,  2.0992,  0.4817],
+            [-0.0053,  0.0164, -1.3738, -0.0507],
+            [ 1.9700,  1.1106, -1.0318, -1.0816]])
+    >>> torch.amax(a, 1)
+    tensor([1.4878, 2.0992, 0.0164, 1.9700])
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.argmax,
+    r"""
+argmax(input) -> LongTensor
+
+Returns the indices of the maximum value of all elements in the :attr:`input` tensor.
+
+This is the second value returned by :meth:`torch.max`. See its
+documentation for the exact semantics of this method.
+
+.. note:: If there are multiple maximal values then the indices of the first maximal value are returned.
+
+Args:
+    {input}
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[ 1.3398,  0.2663, -0.2686,  0.2450],
+            [-0.7401, -0.8805, -0.3402, -1.1936],
+            [ 0.4907, -1.3948, -1.0691, -0.3132],
+            [-1.6092,  0.5419, -0.2993,  0.3195]])
+    >>> torch.argmax(a)
+    tensor(0)
+
+.. function:: argmax(input, dim, keepdim=False) -> LongTensor
+   :noindex:
+
+Returns the indices of the maximum values of a tensor across a dimension.
+
+This is the second value returned by :meth:`torch.max`. See its
+documentation for the exact semantics of this method.
+
+Args:
+    {input}
+    {dim} If ``None``, the argmax of the flattened input is returned.
+    {keepdim}
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[ 1.3398,  0.2663, -0.2686,  0.2450],
+            [-0.7401, -0.8805, -0.3402, -1.1936],
+            [ 0.4907, -1.3948, -1.0691, -0.3132],
+            [-1.6092,  0.5419, -0.2993,  0.3195]])
+    >>> torch.argmax(a, dim=1)
+    tensor([ 0,  2,  0,  1])
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.argwhere,
+    r"""
+argwhere(input) -> Tensor
+
+Returns a tensor containing the indices of all non-zero elements of
+:attr:`input`.  Each row in the result contains the indices of a non-zero
+element in :attr:`input`. The result is sorted lexicographically, with
+the last index changing the fastest (C-style).
+
+If :attr:`input` has :math:`n` dimensions, then the resulting indices tensor
+:attr:`out` is of size :math:`(z \times n)`, where :math:`z` is the total number of
+non-zero elements in the :attr:`input` tensor.
+
+.. note::
+    This function is similar to NumPy's `argwhere`.
+
+    When :attr:`input` is on CUDA, this function causes host-device synchronization.
+
+Args:
+    {input}
+
+Example::
+
+    >>> t = torch.tensor([1, 0, 1])
+    >>> torch.argwhere(t)
+    tensor([[0],
+            [2]])
+    >>> t = torch.tensor([[1, 0, 1], [0, 1, 1]])
+    >>> torch.argwhere(t)
+    tensor([[0, 0],
+            [0, 2],
+            [1, 1],
+            [1, 2]])
+""",
+)
+
+add_docstr(
+    torch.mean,
+    r"""
+mean(input, *, dtype=None) -> Tensor
+
+Returns the mean value of all elements in the :attr:`input` tensor. Input must be floating point or complex.
+
+Args:
+    input (Tensor):
+      the input tensor, either of floating point or complex dtype
+
+Keyword args:
+    {dtype}
+
+Example::
+
+    >>> a = torch.randn(1, 3)
+    >>> a
+    tensor([[ 0.2294, -0.5481,  1.3288]])
+    >>> torch.mean(a)
+    tensor(0.3367)
+
+.. function:: mean(input, dim, keepdim=False, *, dtype=None, out=None) -> Tensor
+   :noindex:
+
+Returns the mean value of each row of the :attr:`input` tensor in the given
+dimension :attr:`dim`. If :attr:`dim` is a list of dimensions,
+reduce over all of them.
+
+{keepdim_details}
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+    {dtype}
+    {out}
+
+.. seealso::
+
+    :func:`torch.nanmean` computes the mean value of `non-NaN` elements.
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[-0.3841,  0.6320,  0.4254, -0.7384],
+            [-0.9644,  1.0131, -0.6549, -1.4279],
+            [-0.2951, -1.3350, -0.7694,  0.5600],
+            [ 1.0842, -0.9580,  0.3623,  0.2343]])
+    >>> torch.mean(a, 1)
+    tensor([-0.0163, -0.5085, -0.4599,  0.1807])
+    >>> torch.mean(a, 1, True)
+    tensor([[-0.0163],
+            [-0.5085],
+            [-0.4599],
+            [ 0.1807]])
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.nanmean,
+    r"""
+nanmean(input, dim=None, keepdim=False, *, dtype=None, out=None) -> Tensor
+
+Computes the mean of all `non-NaN` elements along the specified dimensions.
+
+This function is identical to :func:`torch.mean` when there are no `NaN` values
+in the :attr:`input` tensor. In the presence of `NaN`, :func:`torch.mean` will
+propagate the `NaN` to the output whereas :func:`torch.nanmean` will ignore the
+`NaN` values (`torch.nanmean(a)` is equivalent to `torch.mean(a[~a.isnan()])`).
+
+{keepdim_details}
+
+Args:
+    {input}
+    {opt_dim}
+    {keepdim}
+
+Keyword args:
+    {dtype}
+    {out}
+
+.. seealso::
+
+    :func:`torch.mean` computes the mean value, propagating `NaN`.
+
+Example::
+
+    >>> x = torch.tensor([[torch.nan, 1, 2], [1, 2, 3]])
+    >>> x.mean()
+    tensor(nan)
+    >>> x.nanmean()
+    tensor(1.8000)
+    >>> x.mean(dim=0)
+    tensor([   nan, 1.5000, 2.5000])
+    >>> x.nanmean(dim=0)
+    tensor([1.0000, 1.5000, 2.5000])
+
+    # If all elements in the reduced dimensions are NaN then the result is NaN
+    >>> torch.tensor([torch.nan]).nanmean()
+    tensor(nan)
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.median,
+    r"""
+median(input) -> Tensor
+
+Returns the median of the values in :attr:`input`.
+
+.. note::
+    The median is not unique for :attr:`input` tensors with an even number
+    of elements. In this case the lower of the two medians is returned. To
+    compute the mean of both medians, use :func:`torch.quantile` with ``q=0.5`` instead.
+
+.. warning::
+    This function produces deterministic (sub)gradients unlike ``median(dim=0)``
+
+Args:
+    {input}
+
+Example::
+
+    >>> a = torch.randn(1, 3)
+    >>> a
+    tensor([[ 1.5219, -1.5212,  0.2202]])
+    >>> torch.median(a)
+    tensor(0.2202)
+
+.. function:: median(input, dim=-1, keepdim=False, *, out=None) -> (Tensor, LongTensor)
+   :noindex:
+
+Returns a namedtuple ``(values, indices)`` where ``values`` contains the median of each row of :attr:`input`
+in the dimension :attr:`dim`, and ``indices`` contains the index of the median values found in the dimension :attr:`dim`.
+
+By default, :attr:`dim` is the last dimension of the :attr:`input` tensor.
+
+If :attr:`keepdim` is ``True``, the output tensors are of the same size
+as :attr:`input` except in the dimension :attr:`dim` where they are of size 1.
+Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in
+the outputs tensor having 1 fewer dimension than :attr:`input`.
+
+.. note::
+    The median is not unique for :attr:`input` tensors with an even number
+    of elements in the dimension :attr:`dim`. In this case the lower of the
+    two medians is returned. To compute the mean of both medians in
+    :attr:`input`, use :func:`torch.quantile` with ``q=0.5`` instead.
+
+.. warning::
+    ``indices`` does not necessarily contain the first occurrence of each
+    median value found, unless it is unique.
+    The exact implementation details are device-specific.
+    Do not expect the same result when run on CPU and GPU in general.
+    For the same reason do not expect the gradients to be deterministic.
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+    out ((Tensor, Tensor), optional): The first tensor will be populated with the median values and the second
+                                      tensor, which must have dtype long, with their indices in the dimension
+                                      :attr:`dim` of :attr:`input`.
+
+Example::
+
+    >>> a = torch.randn(4, 5)
+    >>> a
+    tensor([[ 0.2505, -0.3982, -0.9948,  0.3518, -1.3131],
+            [ 0.3180, -0.6993,  1.0436,  0.0438,  0.2270],
+            [-0.2751,  0.7303,  0.2192,  0.3321,  0.2488],
+            [ 1.0778, -1.9510,  0.7048,  0.4742, -0.7125]])
+    >>> torch.median(a, 1)
+    torch.return_types.median(values=tensor([-0.3982,  0.2270,  0.2488,  0.4742]), indices=tensor([1, 4, 4, 3]))
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.nanmedian,
+    r"""
+nanmedian(input) -> Tensor
+
+Returns the median of the values in :attr:`input`, ignoring ``NaN`` values.
+
+This function is identical to :func:`torch.median` when there are no ``NaN`` values in :attr:`input`.
+When :attr:`input` has one or more ``NaN`` values, :func:`torch.median` will always return ``NaN``,
+while this function will return the median of the non-``NaN`` elements in :attr:`input`.
+If all the elements in :attr:`input` are ``NaN`` it will also return ``NaN``.
+
+Args:
+    {input}
+
+Example::
+
+    >>> a = torch.tensor([1, float('nan'), 3, 2])
+    >>> a.median()
+    tensor(nan)
+    >>> a.nanmedian()
+    tensor(2.)
+
+.. function:: nanmedian(input, dim=-1, keepdim=False, *, out=None) -> (Tensor, LongTensor)
+   :noindex:
+
+Returns a namedtuple ``(values, indices)`` where ``values`` contains the median of each row of :attr:`input`
+in the dimension :attr:`dim`, ignoring ``NaN`` values, and ``indices`` contains the index of the median values
+found in the dimension :attr:`dim`.
+
+This function is identical to :func:`torch.median` when there are no ``NaN`` values in a reduced row. When a reduced row has
+one or more ``NaN`` values, :func:`torch.median` will always reduce it to ``NaN``, while this function will reduce it to the
+median of the non-``NaN`` elements. If all the elements in a reduced row are ``NaN`` then it will be reduced to ``NaN``, too.
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+    out ((Tensor, Tensor), optional): The first tensor will be populated with the median values and the second
+                                      tensor, which must have dtype long, with their indices in the dimension
+                                      :attr:`dim` of :attr:`input`.
+
+Example::
+
+    >>> a = torch.tensor([[2, 3, 1], [float('nan'), 1, float('nan')]])
+    >>> a
+    tensor([[2., 3., 1.],
+            [nan, 1., nan]])
+    >>> a.median(0)
+    torch.return_types.median(values=tensor([nan, 1., nan]), indices=tensor([1, 1, 1]))
+    >>> a.nanmedian(0)
+    torch.return_types.nanmedian(values=tensor([2., 1., 1.]), indices=tensor([0, 1, 0]))
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.quantile,
+    r"""
+quantile(input, q, dim=None, keepdim=False, *, interpolation='linear', out=None) -> Tensor
+
+Computes the q-th quantiles of each row of the :attr:`input` tensor along the dimension :attr:`dim`.
+
+To compute the quantile, we map q in [0, 1] to the range of indices [0, n] to find the location
+of the quantile in the sorted input. If the quantile lies between two data points ``a < b`` with
+indices ``i`` and ``j`` in the sorted order, result is computed according to the given
+:attr:`interpolation` method as follows:
+
+- ``linear``: ``a + (b - a) * fraction``, where ``fraction`` is the fractional part of the computed quantile index.
+- ``lower``: ``a``.
+- ``higher``: ``b``.
+- ``nearest``: ``a`` or ``b``, whichever's index is closer to the computed quantile index (rounding down for .5 fractions).
+- ``midpoint``: ``(a + b) / 2``.
+
+If :attr:`q` is a 1D tensor, the first dimension of the output represents the quantiles and has size
+equal to the size of :attr:`q`, the remaining dimensions are what remains from the reduction.
+
+.. note::
+    By default :attr:`dim` is ``None`` resulting in the :attr:`input` tensor being flattened before computation.
+
+Args:
+    {input}
+    q (float or Tensor): a scalar or 1D tensor of values in the range [0, 1].
+    {dim}
+    {keepdim}
+
+Keyword arguments:
+    interpolation (str): interpolation method to use when the desired quantile lies between two data points.
+                            Can be ``linear``, ``lower``, ``higher``, ``midpoint`` and ``nearest``.
+                            Default is ``linear``.
+    {out}
+
+Example::
+
+    >>> a = torch.randn(2, 3)
+    >>> a
+    tensor([[ 0.0795, -1.2117,  0.9765],
+            [ 1.1707,  0.6706,  0.4884]])
+    >>> q = torch.tensor([0.25, 0.5, 0.75])
+    >>> torch.quantile(a, q, dim=1, keepdim=True)
+    tensor([[[-0.5661],
+            [ 0.5795]],
+
+            [[ 0.0795],
+            [ 0.6706]],
+
+            [[ 0.5280],
+            [ 0.9206]]])
+    >>> torch.quantile(a, q, dim=1, keepdim=True).shape
+    torch.Size([3, 2, 1])
+    >>> a = torch.arange(4.)
+    >>> a
+    tensor([0., 1., 2., 3.])
+    >>> torch.quantile(a, 0.6, interpolation='linear')
+    tensor(1.8000)
+    >>> torch.quantile(a, 0.6, interpolation='lower')
+    tensor(1.)
+    >>> torch.quantile(a, 0.6, interpolation='higher')
+    tensor(2.)
+    >>> torch.quantile(a, 0.6, interpolation='midpoint')
+    tensor(1.5000)
+    >>> torch.quantile(a, 0.6, interpolation='nearest')
+    tensor(2.)
+    >>> torch.quantile(a, 0.4, interpolation='nearest')
+    tensor(1.)
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.nanquantile,
+    r"""
+nanquantile(input, q, dim=None, keepdim=False, *, interpolation='linear', out=None) -> Tensor
+
+This is a variant of :func:`torch.quantile` that "ignores" ``NaN`` values,
+computing the quantiles :attr:`q` as if ``NaN`` values in :attr:`input` did
+not exist. If all values in a reduced row are ``NaN`` then the quantiles for
+that reduction will be ``NaN``. See the documentation for :func:`torch.quantile`.
+
+Args:
+    {input}
+    q (float or Tensor): a scalar or 1D tensor of quantile values in the range [0, 1]
+    {dim}
+    {keepdim}
+
+Keyword arguments:
+    interpolation (str): interpolation method to use when the desired quantile lies between two data points.
+                            Can be ``linear``, ``lower``, ``higher``, ``midpoint`` and ``nearest``.
+                            Default is ``linear``.
+    {out}
+
+Example::
+
+    >>> t = torch.tensor([float('nan'), 1, 2])
+    >>> t.quantile(0.5)
+    tensor(nan)
+    >>> t.nanquantile(0.5)
+    tensor(1.5000)
+    >>> t = torch.tensor([[float('nan'), float('nan')], [1, 2]])
+    >>> t
+    tensor([[nan, nan],
+            [1., 2.]])
+    >>> t.nanquantile(0.5, dim=0)
+    tensor([1., 2.])
+    >>> t.nanquantile(0.5, dim=1)
+    tensor([   nan, 1.5000])
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.min,
+    r"""
+min(input) -> Tensor
+
+Returns the minimum value of all elements in the :attr:`input` tensor.
+
+.. warning::
+    This function produces deterministic (sub)gradients unlike ``min(dim=0)``
+
+Args:
+    {input}
+
+Example::
+
+    >>> a = torch.randn(1, 3)
+    >>> a
+    tensor([[ 0.6750,  1.0857,  1.7197]])
+    >>> torch.min(a)
+    tensor(0.6750)
+
+.. function:: min(input, dim, keepdim=False, *, out=None) -> (Tensor, LongTensor)
+   :noindex:
+
+Returns a namedtuple ``(values, indices)`` where ``values`` is the minimum
+value of each row of the :attr:`input` tensor in the given dimension
+:attr:`dim`. And ``indices`` is the index location of each minimum value found
+(argmin).
+
+If :attr:`keepdim` is ``True``, the output tensors are of the same size as
+:attr:`input` except in the dimension :attr:`dim` where they are of size 1.
+Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in
+the output tensors having 1 fewer dimension than :attr:`input`.
+
+.. note:: If there are multiple minimal values in a reduced row then
+          the indices of the first minimal value are returned.
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+    out (tuple, optional): the tuple of two output tensors (min, min_indices)
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[-0.6248,  1.1334, -1.1899, -0.2803],
+            [-1.4644, -0.2635, -0.3651,  0.6134],
+            [ 0.2457,  0.0384,  1.0128,  0.7015],
+            [-0.1153,  2.9849,  2.1458,  0.5788]])
+    >>> torch.min(a, 1)
+    torch.return_types.min(values=tensor([-1.1899, -1.4644,  0.0384, -0.1153]), indices=tensor([2, 0, 1, 0]))
+
+.. function:: min(input, other, *, out=None) -> Tensor
+   :noindex:
+
+See :func:`torch.minimum`.
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.minimum,
+    r"""
+minimum(input, other, *, out=None) -> Tensor
+
+Computes the element-wise minimum of :attr:`input` and :attr:`other`.
+
+.. note::
+    If one of the elements being compared is a NaN, then that element is returned.
+    :func:`minimum` is not supported for tensors with complex dtypes.
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor((1, 2, -1))
+    >>> b = torch.tensor((3, 0, 4))
+    >>> torch.minimum(a, b)
+    tensor([1, 0, -1])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.fmin,
+    r"""
+fmin(input, other, *, out=None) -> Tensor
+
+Computes the element-wise minimum of :attr:`input` and :attr:`other`.
+
+This is like :func:`torch.minimum` except it handles NaNs differently:
+if exactly one of the two elements being compared is a NaN then the non-NaN element is taken as the minimum.
+Only if both elements are NaN is NaN propagated.
+
+This function is a wrapper around C++'s ``std::fmin`` and is similar to NumPy's ``fmin`` function.
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+:ref:`type promotion <type-promotion-doc>`, and integer and floating-point inputs.
+
+Args:
+    {input}
+    other (Tensor): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([2.2, float('nan'), 2.1, float('nan')])
+    >>> b = torch.tensor([-9.3, 0.1, float('nan'), float('nan')])
+    >>> torch.fmin(a, b)
+    tensor([-9.3000, 0.1000, 2.1000,    nan])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.amin,
+    r"""
+amin(input, dim, keepdim=False, *, out=None) -> Tensor
+
+Returns the minimum value of each slice of the :attr:`input` tensor in the given
+dimension(s) :attr:`dim`.
+
+.. note::
+    The difference between ``max``/``min`` and ``amax``/``amin`` is:
+        - ``amax``/``amin`` supports reducing on multiple dimensions,
+        - ``amax``/``amin`` does not return indices,
+        - ``amax``/``amin`` evenly distributes gradient between equal values,
+          while ``max(dim)``/``min(dim)`` propagates gradient only to a single
+          index in the source tensor.
+
+{keepdim_details}
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+  {out}
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[ 0.6451, -0.4866,  0.2987, -1.3312],
+            [-0.5744,  1.2980,  1.8397, -0.2713],
+            [ 0.9128,  0.9214, -1.7268, -0.2995],
+            [ 0.9023,  0.4853,  0.9075, -1.6165]])
+    >>> torch.amin(a, 1)
+    tensor([-1.3312, -0.5744, -1.7268, -1.6165])
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.aminmax,
+    r"""
+aminmax(input, *, dim=None, keepdim=False, out=None) -> (Tensor min, Tensor max)
+
+Computes the minimum and maximum values of the :attr:`input` tensor.
+
+Args:
+    input (Tensor):
+        The input tensor
+
+Keyword Args:
+    dim (Optional[int]):
+        The dimension along which to compute the values. If `None`,
+        computes the values over the entire :attr:`input` tensor.
+        Default is `None`.
+    keepdim (bool):
+        If `True`, the reduced dimensions will be kept in the output
+        tensor as dimensions with size 1 for broadcasting, otherwise
+        they will be removed, as if calling (:func:`torch.squeeze`).
+        Default is `False`.
+    out (Optional[Tuple[Tensor, Tensor]]):
+        Optional tensors on which to write the result. Must have the same
+        shape and dtype as the expected output.
+        Default is `None`.
+
+Returns:
+    A named tuple `(min, max)` containing the minimum and maximum values.
+
+Raises:
+    RuntimeError
+        If any of the dimensions to compute the values over has size 0.
+
+.. note::
+    NaN values are propagated to the output if at least one value is NaN.
+
+.. seealso::
+    :func:`torch.amin` computes just the minimum value
+    :func:`torch.amax` computes just the maximum value
+
+Example::
+
+    >>> torch.aminmax(torch.tensor([1, -3, 5]))
+    torch.return_types.aminmax(
+    min=tensor(-3),
+    max=tensor(5))
+
+    >>> # aminmax propagates NaNs
+    >>> torch.aminmax(torch.tensor([1, -3, 5, torch.nan]))
+    torch.return_types.aminmax(
+    min=tensor(nan),
+    max=tensor(nan))
+
+    >>> t = torch.arange(10).view(2, 5)
+    >>> t
+    tensor([[0, 1, 2, 3, 4],
+            [5, 6, 7, 8, 9]])
+    >>> t.aminmax(dim=0, keepdim=True)
+    torch.return_types.aminmax(
+    min=tensor([[0, 1, 2, 3, 4]]),
+    max=tensor([[5, 6, 7, 8, 9]]))
+""",
+)
+
+add_docstr(
+    torch.argmin,
+    r"""
+argmin(input, dim=None, keepdim=False) -> LongTensor
+
+Returns the indices of the minimum value(s) of the flattened tensor or along a dimension
+
+This is the second value returned by :meth:`torch.min`. See its
+documentation for the exact semantics of this method.
+
+.. note:: If there are multiple minimal values then the indices of the first minimal value are returned.
+
+Args:
+    {input}
+    {dim} If ``None``, the argmin of the flattened input is returned.
+    {keepdim}
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[ 0.1139,  0.2254, -0.1381,  0.3687],
+            [ 1.0100, -1.1975, -0.0102, -0.4732],
+            [-0.9240,  0.1207, -0.7506, -1.0213],
+            [ 1.7809, -1.2960,  0.9384,  0.1438]])
+    >>> torch.argmin(a)
+    tensor(13)
+    >>> torch.argmin(a, dim=1)
+    tensor([ 2,  1,  3,  1])
+    >>> torch.argmin(a, dim=1, keepdim=True)
+    tensor([[2],
+            [1],
+            [3],
+            [1]])
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.mm,
+    r"""
+mm(input, mat2, *, out=None) -> Tensor
+
+Performs a matrix multiplication of the matrices :attr:`input` and :attr:`mat2`.
+
+If :attr:`input` is a :math:`(n \times m)` tensor, :attr:`mat2` is a
+:math:`(m \times p)` tensor, :attr:`out` will be a :math:`(n \times p)` tensor.
+
+.. note:: This function does not :ref:`broadcast <broadcasting-semantics>`.
+          For broadcasting matrix products, see :func:`torch.matmul`.
+
+Supports strided and sparse 2-D tensors as inputs, autograd with
+respect to strided inputs.
+
+This operation has support for arguments with :ref:`sparse layouts<sparse-docs>`.
+If :attr:`out` is provided it's layout will be used. Otherwise, the result
+layout will be deduced from that of :attr:`input`.
+
+{sparse_beta_warning}
+
+{tf32_note}
+
+{rocm_fp16_note}
+
+Args:
+    input (Tensor): the first matrix to be matrix multiplied
+    mat2 (Tensor): the second matrix to be matrix multiplied
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> mat1 = torch.randn(2, 3)
+    >>> mat2 = torch.randn(3, 3)
+    >>> torch.mm(mat1, mat2)
+    tensor([[ 0.4851,  0.5037, -0.3633],
+            [-0.0760, -3.6705,  2.4784]])
+""".format(
+        **common_args, **tf32_notes, **rocm_fp16_notes, **sparse_support_notes
+    ),
+)
+
+add_docstr(
+    torch.hspmm,
+    r"""
+hspmm(mat1, mat2, *, out=None) -> Tensor
+
+Performs a matrix multiplication of a :ref:`sparse COO matrix
+<sparse-coo-docs>` :attr:`mat1` and a strided matrix :attr:`mat2`. The
+result is a (1 + 1)-dimensional :ref:`hybrid COO matrix
+<sparse-hybrid-coo-docs>`.
+
+Args:
+    mat1 (Tensor): the first sparse matrix to be matrix multiplied
+    mat2 (Tensor): the second strided matrix to be matrix multiplied
+
+Keyword args:
+    {out}
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.matmul,
+    r"""
+matmul(input, other, *, out=None) -> Tensor
+
+Matrix product of two tensors.
+
+The behavior depends on the dimensionality of the tensors as follows:
+
+- If both tensors are 1-dimensional, the dot product (scalar) is returned.
+- If both arguments are 2-dimensional, the matrix-matrix product is returned.
+- If the first argument is 1-dimensional and the second argument is 2-dimensional,
+  a 1 is prepended to its dimension for the purpose of the matrix multiply.
+  After the matrix multiply, the prepended dimension is removed.
+- If the first argument is 2-dimensional and the second argument is 1-dimensional,
+  the matrix-vector product is returned.
+- If both arguments are at least 1-dimensional and at least one argument is
+  N-dimensional (where N > 2), then a batched matrix multiply is returned.  If the first
+  argument is 1-dimensional, a 1 is prepended to its dimension for the purpose of the
+  batched matrix multiply and removed after.  If the second argument is 1-dimensional, a
+  1 is appended to its dimension for the purpose of the batched matrix multiple and removed after.
+  The non-matrix (i.e. batch) dimensions are :ref:`broadcasted <broadcasting-semantics>` (and thus
+  must be broadcastable).  For example, if :attr:`input` is a
+  :math:`(j \times 1 \times n \times n)` tensor and :attr:`other` is a :math:`(k \times n \times n)`
+  tensor, :attr:`out` will be a :math:`(j \times k \times n \times n)` tensor.
+
+  Note that the broadcasting logic only looks at the batch dimensions when determining if the inputs
+  are broadcastable, and not the matrix dimensions. For example, if :attr:`input` is a
+  :math:`(j \times 1 \times n \times m)` tensor and :attr:`other` is a :math:`(k \times m \times p)`
+  tensor, these inputs are valid for broadcasting even though the final two dimensions (i.e. the
+  matrix dimensions) are different. :attr:`out` will be a :math:`(j \times k \times n \times p)` tensor.
+
+This operation has support for arguments with :ref:`sparse layouts<sparse-docs>`. In particular the
+matrix-matrix (both arguments 2-dimensional) supports sparse arguments with the same restrictions
+as :func:`torch.mm`
+
+{sparse_beta_warning}
+
+{tf32_note}
+
+{rocm_fp16_note}
+
+.. note::
+
+    The 1-dimensional dot product version of this function does not support an :attr:`out` parameter.
+
+Arguments:
+    input (Tensor): the first tensor to be multiplied
+    other (Tensor): the second tensor to be multiplied
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> # vector x vector
+    >>> tensor1 = torch.randn(3)
+    >>> tensor2 = torch.randn(3)
+    >>> torch.matmul(tensor1, tensor2).size()
+    torch.Size([])
+    >>> # matrix x vector
+    >>> tensor1 = torch.randn(3, 4)
+    >>> tensor2 = torch.randn(4)
+    >>> torch.matmul(tensor1, tensor2).size()
+    torch.Size([3])
+    >>> # batched matrix x broadcasted vector
+    >>> tensor1 = torch.randn(10, 3, 4)
+    >>> tensor2 = torch.randn(4)
+    >>> torch.matmul(tensor1, tensor2).size()
+    torch.Size([10, 3])
+    >>> # batched matrix x batched matrix
+    >>> tensor1 = torch.randn(10, 3, 4)
+    >>> tensor2 = torch.randn(10, 4, 5)
+    >>> torch.matmul(tensor1, tensor2).size()
+    torch.Size([10, 3, 5])
+    >>> # batched matrix x broadcasted matrix
+    >>> tensor1 = torch.randn(10, 3, 4)
+    >>> tensor2 = torch.randn(4, 5)
+    >>> torch.matmul(tensor1, tensor2).size()
+    torch.Size([10, 3, 5])
+
+""".format(
+        **common_args, **tf32_notes, **rocm_fp16_notes, **sparse_support_notes
+    ),
+)
+
+add_docstr(
+    torch.mode,
+    r"""
+mode(input, dim=-1, keepdim=False, *, out=None) -> (Tensor, LongTensor)
+
+Returns a namedtuple ``(values, indices)`` where ``values`` is the mode
+value of each row of the :attr:`input` tensor in the given dimension
+:attr:`dim`, i.e. a value which appears most often
+in that row, and ``indices`` is the index location of each mode value found.
+
+By default, :attr:`dim` is the last dimension of the :attr:`input` tensor.
+
+If :attr:`keepdim` is ``True``, the output tensors are of the same size as
+:attr:`input` except in the dimension :attr:`dim` where they are of size 1.
+Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting
+in the output tensors having 1 fewer dimension than :attr:`input`.
+
+.. note:: This function is not defined for ``torch.cuda.Tensor`` yet.
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+    out (tuple, optional): the result tuple of two output tensors (values, indices)
+
+Example::
+
+    >>> b = torch.tensor(
+           [[0, 0, 0, 2, 0, 0, 2],
+            [0, 3, 0, 0, 2, 0, 1],
+            [2, 2, 2, 0, 0, 0, 3],
+            [2, 2, 3, 0, 1, 1, 0],
+            [1, 1, 0, 0, 2, 0, 2]])
+    >>> torch.mode(b, 0)
+    torch.return_types.mode(
+    values=tensor([0, 2, 0, 0, 0, 0, 2]),
+    indices=tensor([1, 3, 4, 4, 2, 4, 4]))
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.mul,
+    r"""
+mul(input, other, *, out=None) -> Tensor
+
+Multiplies :attr:`input` by :attr:`other`.
+
+
+.. math::
+    \text{out}_i = \text{input}_i \times \text{other}_i
+"""
+    + r"""
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+:ref:`type promotion <type-promotion-doc>`, and integer, float, and complex inputs.
+
+Args:
+    {input}
+    other (Tensor or Number) - the tensor or number to multiply input by.
+
+Keyword args:
+    {out}
+
+Examples::
+
+    >>> a = torch.randn(3)
+    >>> a
+    tensor([ 0.2015, -0.4255,  2.6087])
+    >>> torch.mul(a, 100)
+    tensor([  20.1494,  -42.5491,  260.8663])
+
+    >>> b = torch.randn(4, 1)
+    >>> b
+    tensor([[ 1.1207],
+            [-0.3137],
+            [ 0.0700],
+            [ 0.8378]])
+    >>> c = torch.randn(1, 4)
+    >>> c
+    tensor([[ 0.5146,  0.1216, -0.5244,  2.2382]])
+    >>> torch.mul(b, c)
+    tensor([[ 0.5767,  0.1363, -0.5877,  2.5083],
+            [-0.1614, -0.0382,  0.1645, -0.7021],
+            [ 0.0360,  0.0085, -0.0367,  0.1567],
+            [ 0.4312,  0.1019, -0.4394,  1.8753]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.multiply,
+    r"""
+multiply(input, other, *, out=None)
+
+Alias for :func:`torch.mul`.
+""",
+)
+
+add_docstr(
+    torch.multinomial,
+    r"""
+multinomial(input, num_samples, replacement=False, *, generator=None, out=None) -> LongTensor
+
+Returns a tensor where each row contains :attr:`num_samples` indices sampled
+from the multinomial (a stricter definition would be multivariate,
+refer to torch.distributions.multinomial.Multinomial for more details)
+probability distribution located in the corresponding row
+of tensor :attr:`input`.
+
+.. note::
+    The rows of :attr:`input` do not need to sum to one (in which case we use
+    the values as weights), but must be non-negative, finite and have
+    a non-zero sum.
+
+Indices are ordered from left to right according to when each was sampled
+(first samples are placed in first column).
+
+If :attr:`input` is a vector, :attr:`out` is a vector of size :attr:`num_samples`.
+
+If :attr:`input` is a matrix with `m` rows, :attr:`out` is an matrix of shape
+:math:`(m \times \text{{num\_samples}})`.
+
+If replacement is ``True``, samples are drawn with replacement.
+
+If not, they are drawn without replacement, which means that when a
+sample index is drawn for a row, it cannot be drawn again for that row.
+
+.. note::
+    When drawn without replacement, :attr:`num_samples` must be lower than
+    number of non-zero elements in :attr:`input` (or the min number of non-zero
+    elements in each row of :attr:`input` if it is a matrix).
+
+Args:
+    input (Tensor): the input tensor containing probabilities
+    num_samples (int): number of samples to draw
+    replacement (bool, optional): whether to draw with replacement or not
+
+Keyword args:
+    {generator}
+    {out}
+
+Example::
+
+    >>> weights = torch.tensor([0, 10, 3, 0], dtype=torch.float) # create a tensor of weights
+    >>> torch.multinomial(weights, 2)
+    tensor([1, 2])
+    >>> torch.multinomial(weights, 4) # ERROR!
+    RuntimeError: invalid argument 2: invalid multinomial distribution (with replacement=False,
+    not enough non-negative category to sample) at ../aten/src/TH/generic/THTensorRandom.cpp:320
+    >>> torch.multinomial(weights, 4, replacement=True)
+    tensor([ 2,  1,  1,  1])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.mv,
+    r"""
+mv(input, vec, *, out=None) -> Tensor
+
+Performs a matrix-vector product of the matrix :attr:`input` and the vector
+:attr:`vec`.
+
+If :attr:`input` is a :math:`(n \times m)` tensor, :attr:`vec` is a 1-D tensor of
+size :math:`m`, :attr:`out` will be 1-D of size :math:`n`.
+
+.. note:: This function does not :ref:`broadcast <broadcasting-semantics>`.
+
+Args:
+    input (Tensor): matrix to be multiplied
+    vec (Tensor): vector to be multiplied
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> mat = torch.randn(2, 3)
+    >>> vec = torch.randn(3)
+    >>> torch.mv(mat, vec)
+    tensor([ 1.0404, -0.6361])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.mvlgamma,
+    r"""
+mvlgamma(input, p, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.multigammaln`.
+""",
+)
+
+add_docstr(
+    torch.movedim,
+    r"""
+movedim(input, source, destination) -> Tensor
+
+Moves the dimension(s) of :attr:`input` at the position(s) in :attr:`source`
+to the position(s) in :attr:`destination`.
+
+Other dimensions of :attr:`input` that are not explicitly moved remain in
+their original order and appear at the positions not specified in :attr:`destination`.
+
+Args:
+    {input}
+    source (int or tuple of ints): Original positions of the dims to move. These must be unique.
+    destination (int or tuple of ints): Destination positions for each of the original dims. These must also be unique.
+
+Examples::
+
+    >>> t = torch.randn(3,2,1)
+    >>> t
+    tensor([[[-0.3362],
+            [-0.8437]],
+
+            [[-0.9627],
+            [ 0.1727]],
+
+            [[ 0.5173],
+            [-0.1398]]])
+    >>> torch.movedim(t, 1, 0).shape
+    torch.Size([2, 3, 1])
+    >>> torch.movedim(t, 1, 0)
+    tensor([[[-0.3362],
+            [-0.9627],
+            [ 0.5173]],
+
+            [[-0.8437],
+            [ 0.1727],
+            [-0.1398]]])
+    >>> torch.movedim(t, (1, 2), (0, 1)).shape
+    torch.Size([2, 1, 3])
+    >>> torch.movedim(t, (1, 2), (0, 1))
+    tensor([[[-0.3362, -0.9627,  0.5173]],
+
+            [[-0.8437,  0.1727, -0.1398]]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.moveaxis,
+    r"""
+moveaxis(input, source, destination) -> Tensor
+
+Alias for :func:`torch.movedim`.
+
+This function is equivalent to NumPy's moveaxis function.
+
+Examples::
+
+    >>> t = torch.randn(3,2,1)
+    >>> t
+    tensor([[[-0.3362],
+            [-0.8437]],
+
+            [[-0.9627],
+            [ 0.1727]],
+
+            [[ 0.5173],
+            [-0.1398]]])
+    >>> torch.moveaxis(t, 1, 0).shape
+    torch.Size([2, 3, 1])
+    >>> torch.moveaxis(t, 1, 0)
+    tensor([[[-0.3362],
+            [-0.9627],
+            [ 0.5173]],
+
+            [[-0.8437],
+            [ 0.1727],
+            [-0.1398]]])
+    >>> torch.moveaxis(t, (1, 2), (0, 1)).shape
+    torch.Size([2, 1, 3])
+    >>> torch.moveaxis(t, (1, 2), (0, 1))
+    tensor([[[-0.3362, -0.9627,  0.5173]],
+
+            [[-0.8437,  0.1727, -0.1398]]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.swapdims,
+    r"""
+swapdims(input, dim0, dim1) -> Tensor
+
+Alias for :func:`torch.transpose`.
+
+This function is equivalent to NumPy's swapaxes function.
+
+Examples::
+
+    >>> x = torch.tensor([[[0,1],[2,3]],[[4,5],[6,7]]])
+    >>> x
+    tensor([[[0, 1],
+            [2, 3]],
+
+            [[4, 5],
+            [6, 7]]])
+    >>> torch.swapdims(x, 0, 1)
+    tensor([[[0, 1],
+            [4, 5]],
+
+            [[2, 3],
+            [6, 7]]])
+    >>> torch.swapdims(x, 0, 2)
+    tensor([[[0, 4],
+            [2, 6]],
+
+            [[1, 5],
+            [3, 7]]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.swapaxes,
+    r"""
+swapaxes(input, axis0, axis1) -> Tensor
+
+Alias for :func:`torch.transpose`.
+
+This function is equivalent to NumPy's swapaxes function.
+
+Examples::
+
+    >>> x = torch.tensor([[[0,1],[2,3]],[[4,5],[6,7]]])
+    >>> x
+    tensor([[[0, 1],
+            [2, 3]],
+
+            [[4, 5],
+            [6, 7]]])
+    >>> torch.swapaxes(x, 0, 1)
+    tensor([[[0, 1],
+            [4, 5]],
+
+            [[2, 3],
+            [6, 7]]])
+    >>> torch.swapaxes(x, 0, 2)
+    tensor([[[0, 4],
+            [2, 6]],
+
+            [[1, 5],
+            [3, 7]]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.narrow,
+    r"""
+narrow(input, dim, start, length) -> Tensor
+
+Returns a new tensor that is a narrowed version of :attr:`input` tensor. The
+dimension :attr:`dim` is input from :attr:`start` to ``start + length``. The
+returned tensor and :attr:`input` tensor share the same underlying storage.
+
+Args:
+    input (Tensor): the tensor to narrow
+    dim (int): the dimension along which to narrow
+    start (int or Tensor): index of the element to start the narrowed dimension
+        from. Can be negative, which means indexing from the end of `dim`. If
+        `Tensor`, it must be an 0-dim integral `Tensor` (bools not allowed)
+    length (int): length of the narrowed dimension, must be weakly positive
+
+Example::
+
+    >>> x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    >>> torch.narrow(x, 0, 0, 2)
+    tensor([[ 1,  2,  3],
+            [ 4,  5,  6]])
+    >>> torch.narrow(x, 1, 1, 2)
+    tensor([[ 2,  3],
+            [ 5,  6],
+            [ 8,  9]])
+    >>> torch.narrow(x, -1, torch.tensor(-1), 1)
+    tensor([[3],
+            [6],
+            [9]])
+""",
+)
+
+add_docstr(
+    torch.narrow_copy,
+    r"""
+narrow_copy(input, dim, start, length, *, out=None) -> Tensor
+
+Same as :meth:`Tensor.narrow` except this returns a copy rather
+than shared storage. This is primarily for sparse tensors, which
+do not have a shared-storage narrow method.
+
+Args:
+    input (Tensor): the tensor to narrow
+    dim (int): the dimension along which to narrow
+    start (int): index of the element to start the narrowed dimension from. Can
+        be negative, which means indexing from the end of `dim`
+    length (int): length of the narrowed dimension, must be weakly positive
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    >>> torch.narrow_copy(x, 0, 0, 2)
+    tensor([[ 1,  2,  3],
+            [ 4,  5,  6]])
+    >>> torch.narrow_copy(x, 1, 1, 2)
+    tensor([[ 2,  3],
+            [ 5,  6],
+            [ 8,  9]])
+    >>> s = torch.arange(16).reshape(2, 2, 2, 2).to_sparse(2)
+    >>> torch.narrow_copy(s, 0, 0, 1)
+    tensor(indices=tensor([[0, 0],
+                           [0, 1]]),
+           values=tensor([[[0, 1],
+                           [2, 3]],
+
+                          [[4, 5],
+                           [6, 7]]]),
+           size=(1, 2, 2, 2), nnz=2, layout=torch.sparse_coo)
+
+.. seealso::
+
+        :func:`torch.narrow` for a non copy variant
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.nan_to_num,
+    r"""
+nan_to_num(input, nan=0.0, posinf=None, neginf=None, *, out=None) -> Tensor
+
+Replaces :literal:`NaN`, positive infinity, and negative infinity values in :attr:`input`
+with the values specified by :attr:`nan`, :attr:`posinf`, and :attr:`neginf`, respectively.
+By default, :literal:`NaN`\ s are replaced with zero, positive infinity is replaced with the
+greatest finite value representable by :attr:`input`'s dtype, and negative infinity
+is replaced with the least finite value representable by :attr:`input`'s dtype.
+
+Args:
+    {input}
+    nan (Number, optional): the value to replace :literal:`NaN`\s with. Default is zero.
+    posinf (Number, optional): if a Number, the value to replace positive infinity values with.
+        If None, positive infinity values are replaced with the greatest finite value representable by :attr:`input`'s dtype.
+        Default is None.
+    neginf (Number, optional): if a Number, the value to replace negative infinity values with.
+        If None, negative infinity values are replaced with the lowest finite value representable by :attr:`input`'s dtype.
+        Default is None.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> x = torch.tensor([float('nan'), float('inf'), -float('inf'), 3.14])
+    >>> torch.nan_to_num(x)
+    tensor([ 0.0000e+00,  3.4028e+38, -3.4028e+38,  3.1400e+00])
+    >>> torch.nan_to_num(x, nan=2.0)
+    tensor([ 2.0000e+00,  3.4028e+38, -3.4028e+38,  3.1400e+00])
+    >>> torch.nan_to_num(x, nan=2.0, posinf=1.0)
+    tensor([ 2.0000e+00,  1.0000e+00, -3.4028e+38,  3.1400e+00])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.ne,
+    r"""
+ne(input, other, *, out=None) -> Tensor
+
+Computes :math:`\text{input} \neq \text{other}` element-wise.
+"""
+    + r"""
+
+The second argument can be a number or a tensor whose shape is
+:ref:`broadcastable <broadcasting-semantics>` with the first argument.
+
+Args:
+    input (Tensor): the tensor to compare
+    other (Tensor or float): the tensor or value to compare
+
+Keyword args:
+    {out}
+
+Returns:
+    A boolean tensor that is True where :attr:`input` is not equal to :attr:`other` and False elsewhere
+
+Example::
+
+    >>> torch.ne(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]]))
+    tensor([[False, True], [True, False]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.not_equal,
+    r"""
+not_equal(input, other, *, out=None) -> Tensor
+
+Alias for :func:`torch.ne`.
+""",
+)
+
+add_docstr(
+    torch.neg,
+    r"""
+neg(input, *, out=None) -> Tensor
+
+Returns a new tensor with the negative of the elements of :attr:`input`.
+
+.. math::
+    \text{out} = -1 \times \text{input}
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(5)
+    >>> a
+    tensor([ 0.0090, -0.2262, -0.0682, -0.2866,  0.3940])
+    >>> torch.neg(a)
+    tensor([-0.0090,  0.2262,  0.0682,  0.2866, -0.3940])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.negative,
+    r"""
+negative(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.neg`
+""",
+)
+
+add_docstr(
+    torch.nextafter,
+    r"""
+nextafter(input, other, *, out=None) -> Tensor
+
+Return the next floating-point value after :attr:`input` towards :attr:`other`, elementwise.
+
+The shapes of ``input`` and ``other`` must be
+:ref:`broadcastable <broadcasting-semantics>`.
+
+Args:
+    input (Tensor): the first input tensor
+    other (Tensor): the second input tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> eps = torch.finfo(torch.float32).eps
+    >>> torch.nextafter(torch.tensor([1.0, 2.0]), torch.tensor([2.0, 1.0])) == torch.tensor([eps + 1, 2 - eps])
+    tensor([True, True])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.nonzero,
+    r"""
+nonzero(input, *, out=None, as_tuple=False) -> LongTensor or tuple of LongTensors
+
+.. note::
+    :func:`torch.nonzero(..., as_tuple=False) <torch.nonzero>` (default) returns a
+    2-D tensor where each row is the index for a nonzero value.
+
+    :func:`torch.nonzero(..., as_tuple=True) <torch.nonzero>` returns a tuple of 1-D
+    index tensors, allowing for advanced indexing, so ``x[x.nonzero(as_tuple=True)]``
+    gives all nonzero values of tensor ``x``. Of the returned tuple, each index tensor
+    contains nonzero indices for a certain dimension.
+
+    See below for more details on the two behaviors.
+
+    When :attr:`input` is on CUDA, :func:`torch.nonzero() <torch.nonzero>` causes
+    host-device synchronization.
+
+**When** :attr:`as_tuple` **is** ``False`` **(default)**:
+
+Returns a tensor containing the indices of all non-zero elements of
+:attr:`input`.  Each row in the result contains the indices of a non-zero
+element in :attr:`input`. The result is sorted lexicographically, with
+the last index changing the fastest (C-style).
+
+If :attr:`input` has :math:`n` dimensions, then the resulting indices tensor
+:attr:`out` is of size :math:`(z \times n)`, where :math:`z` is the total number of
+non-zero elements in the :attr:`input` tensor.
+
+**When** :attr:`as_tuple` **is** ``True``:
+
+Returns a tuple of 1-D tensors, one for each dimension in :attr:`input`,
+each containing the indices (in that dimension) of all non-zero elements of
+:attr:`input` .
+
+If :attr:`input` has :math:`n` dimensions, then the resulting tuple contains :math:`n`
+tensors of size :math:`z`, where :math:`z` is the total number of
+non-zero elements in the :attr:`input` tensor.
+
+As a special case, when :attr:`input` has zero dimensions and a nonzero scalar
+value, it is treated as a one-dimensional tensor with one element.
+
+Args:
+    {input}
+
+Keyword args:
+    out (LongTensor, optional): the output tensor containing indices
+
+Returns:
+    LongTensor or tuple of LongTensor: If :attr:`as_tuple` is ``False``, the output
+    tensor containing indices. If :attr:`as_tuple` is ``True``, one 1-D tensor for
+    each dimension, containing the indices of each nonzero element along that
+    dimension.
+
+Example::
+
+    >>> torch.nonzero(torch.tensor([1, 1, 1, 0, 1]))
+    tensor([[ 0],
+            [ 1],
+            [ 2],
+            [ 4]])
+    >>> torch.nonzero(torch.tensor([[0.6, 0.0, 0.0, 0.0],
+    ...                             [0.0, 0.4, 0.0, 0.0],
+    ...                             [0.0, 0.0, 1.2, 0.0],
+    ...                             [0.0, 0.0, 0.0,-0.4]]))
+    tensor([[ 0,  0],
+            [ 1,  1],
+            [ 2,  2],
+            [ 3,  3]])
+    >>> torch.nonzero(torch.tensor([1, 1, 1, 0, 1]), as_tuple=True)
+    (tensor([0, 1, 2, 4]),)
+    >>> torch.nonzero(torch.tensor([[0.6, 0.0, 0.0, 0.0],
+    ...                             [0.0, 0.4, 0.0, 0.0],
+    ...                             [0.0, 0.0, 1.2, 0.0],
+    ...                             [0.0, 0.0, 0.0,-0.4]]), as_tuple=True)
+    (tensor([0, 1, 2, 3]), tensor([0, 1, 2, 3]))
+    >>> torch.nonzero(torch.tensor(5), as_tuple=True)
+    (tensor([0]),)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.normal,
+    r"""
+normal(mean, std, *, generator=None, out=None) -> Tensor
+
+Returns a tensor of random numbers drawn from separate normal distributions
+whose mean and standard deviation are given.
+
+The :attr:`mean` is a tensor with the mean of
+each output element's normal distribution
+
+The :attr:`std` is a tensor with the standard deviation of
+each output element's normal distribution
+
+The shapes of :attr:`mean` and :attr:`std` don't need to match, but the
+total number of elements in each tensor need to be the same.
+
+.. note:: When the shapes do not match, the shape of :attr:`mean`
+          is used as the shape for the returned output tensor
+
+.. note:: When :attr:`std` is a CUDA tensor, this function synchronizes
+          its device with the CPU.
+
+Args:
+    mean (Tensor): the tensor of per-element means
+    std (Tensor): the tensor of per-element standard deviations
+
+Keyword args:
+    {generator}
+    {out}
+
+Example::
+
+    >>> torch.normal(mean=torch.arange(1., 11.), std=torch.arange(1, 0, -0.1))
+    tensor([  1.0425,   3.5672,   2.7969,   4.2925,   4.7229,   6.2134,
+              8.0505,   8.1408,   9.0563,  10.0566])
+
+.. function:: normal(mean=0.0, std, *, out=None) -> Tensor
+   :noindex:
+
+Similar to the function above, but the means are shared among all drawn
+elements.
+
+Args:
+    mean (float, optional): the mean for all distributions
+    std (Tensor): the tensor of per-element standard deviations
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.normal(mean=0.5, std=torch.arange(1., 6.))
+    tensor([-1.2793, -1.0732, -2.0687,  5.1177, -1.2303])
+
+.. function:: normal(mean, std=1.0, *, out=None) -> Tensor
+   :noindex:
+
+Similar to the function above, but the standard deviations are shared among
+all drawn elements.
+
+Args:
+    mean (Tensor): the tensor of per-element means
+    std (float, optional): the standard deviation for all distributions
+
+Keyword args:
+    out (Tensor, optional): the output tensor
+
+Example::
+
+    >>> torch.normal(mean=torch.arange(1., 6.))
+    tensor([ 1.1552,  2.6148,  2.6535,  5.8318,  4.2361])
+
+.. function:: normal(mean, std, size, *, out=None) -> Tensor
+   :noindex:
+
+Similar to the function above, but the means and standard deviations are shared
+among all drawn elements. The resulting tensor has size given by :attr:`size`.
+
+Args:
+    mean (float): the mean for all distributions
+    std (float): the standard deviation for all distributions
+    size (int...): a sequence of integers defining the shape of the output tensor.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.normal(2, 3, size=(1, 4))
+    tensor([[-1.3987, -1.9544,  3.6048,  0.7909]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.numel,
+    r"""
+numel(input) -> int
+
+Returns the total number of elements in the :attr:`input` tensor.
+
+Args:
+    {input}
+
+Example::
+
+    >>> a = torch.randn(1, 2, 3, 4, 5)
+    >>> torch.numel(a)
+    120
+    >>> a = torch.zeros(4,4)
+    >>> torch.numel(a)
+    16
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.ones,
+    r"""
+ones(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+
+Returns a tensor filled with the scalar value `1`, with the shape defined
+by the variable argument :attr:`size`.
+
+Args:
+    size (int...): a sequence of integers defining the shape of the output tensor.
+        Can be a variable number of arguments or a collection like a list or tuple.
+
+Keyword arguments:
+    {out}
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+
+Example::
+
+    >>> torch.ones(2, 3)
+    tensor([[ 1.,  1.,  1.],
+            [ 1.,  1.,  1.]])
+
+    >>> torch.ones(5)
+    tensor([ 1.,  1.,  1.,  1.,  1.])
+
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.ones_like,
+    r"""
+ones_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor
+
+Returns a tensor filled with the scalar value `1`, with the same size as
+:attr:`input`. ``torch.ones_like(input)`` is equivalent to
+``torch.ones(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``.
+
+.. warning::
+    As of 0.4, this function does not support an :attr:`out` keyword. As an alternative,
+    the old ``torch.ones_like(input, out=output)`` is equivalent to
+    ``torch.ones(input.size(), out=output)``.
+
+Args:
+    {input}
+
+Keyword arguments:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {memory_format}
+
+Example::
+
+    >>> input = torch.empty(2, 3)
+    >>> torch.ones_like(input)
+    tensor([[ 1.,  1.,  1.],
+            [ 1.,  1.,  1.]])
+""".format(
+        **factory_like_common_args
+    ),
+)
+
+add_docstr(
+    torch.orgqr,
+    r"""
+orgqr(input, tau) -> Tensor
+
+Alias for :func:`torch.linalg.householder_product`.
+""",
+)
+
+add_docstr(
+    torch.ormqr,
+    r"""
+ormqr(input, tau, other, left=True, transpose=False, *, out=None) -> Tensor
+
+Computes the matrix-matrix multiplication of a product of Householder matrices with a general matrix.
+
+Multiplies a :math:`m \times n` matrix `C` (given by :attr:`other`) with a matrix `Q`,
+where `Q` is represented using Householder reflectors `(input, tau)`.
+See `Representation of Orthogonal or Unitary Matrices`_ for further details.
+
+If :attr:`left` is `True` then `op(Q)` times `C` is computed, otherwise the result is `C` times `op(Q)`.
+When :attr:`left` is `True`, the implicit matrix `Q` has size :math:`m \times m`.
+It has size :math:`n \times n` otherwise.
+If :attr:`transpose` is `True` then `op` is the conjugate transpose operation, otherwise it's a no-op.
+
+Supports inputs of float, double, cfloat and cdouble dtypes.
+Also supports batched inputs, and, if the input is batched, the output is batched with the same dimensions.
+
+.. seealso::
+        :func:`torch.geqrf` can be used to form the Householder representation `(input, tau)` of matrix `Q`
+        from the QR decomposition.
+
+.. note::
+        This function supports backward but it is only fast when ``(input, tau)`` do not require gradients
+        and/or ``tau.size(-1)`` is very small.
+        ``
+
+Args:
+    input (Tensor): tensor of shape `(*, mn, k)` where `*` is zero or more batch dimensions
+                    and `mn` equals to `m` or `n` depending on the :attr:`left`.
+    tau (Tensor): tensor of shape `(*, min(mn, k))` where `*` is zero or more batch dimensions.
+    other (Tensor): tensor of shape `(*, m, n)` where `*` is zero or more batch dimensions.
+    left (bool): controls the order of multiplication.
+    transpose (bool): controls whether the matrix `Q` is conjugate transposed or not.
+
+Keyword args:
+    out (Tensor, optional): the output Tensor. Ignored if `None`. Default: `None`.
+
+.. _Representation of Orthogonal or Unitary Matrices:
+    https://www.netlib.org/lapack/lug/node128.html
+""",
+)
+
+add_docstr(
+    torch.permute,
+    r"""
+permute(input, dims) -> Tensor
+
+Returns a view of the original tensor :attr:`input` with its dimensions permuted.
+
+Args:
+    {input}
+    dims (tuple of int): The desired ordering of dimensions
+
+Example:
+    >>> x = torch.randn(2, 3, 5)
+    >>> x.size()
+    torch.Size([2, 3, 5])
+    >>> torch.permute(x, (2, 0, 1)).size()
+    torch.Size([5, 2, 3])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.poisson,
+    r"""
+poisson(input, generator=None) -> Tensor
+
+Returns a tensor of the same size as :attr:`input` with each element
+sampled from a Poisson distribution with rate parameter given by the corresponding
+element in :attr:`input` i.e.,
+
+.. math::
+    \text{{out}}_i \sim \text{{Poisson}}(\text{{input}}_i)
+
+:attr:`input` must be non-negative.
+
+Args:
+    input (Tensor): the input tensor containing the rates of the Poisson distribution
+
+Keyword args:
+    {generator}
+
+Example::
+
+    >>> rates = torch.rand(4, 4) * 5  # rate parameter between 0 and 5
+    >>> torch.poisson(rates)
+    tensor([[9., 1., 3., 5.],
+            [8., 6., 6., 0.],
+            [0., 4., 5., 3.],
+            [2., 1., 4., 2.]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.polygamma,
+    r"""
+polygamma(n, input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.polygamma`.
+""",
+)
+
+add_docstr(
+    torch.positive,
+    r"""
+positive(input) -> Tensor
+
+Returns :attr:`input`.
+Throws a runtime error if :attr:`input` is a bool tensor.
+"""
+    + r"""
+Args:
+    {input}
+
+Example::
+
+    >>> t = torch.randn(5)
+    >>> t
+    tensor([ 0.0090, -0.2262, -0.0682, -0.2866,  0.3940])
+    >>> torch.positive(t)
+    tensor([ 0.0090, -0.2262, -0.0682, -0.2866,  0.3940])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.pow,
+    r"""
+pow(input, exponent, *, out=None) -> Tensor
+
+Takes the power of each element in :attr:`input` with :attr:`exponent` and
+returns a tensor with the result.
+
+:attr:`exponent` can be either a single ``float`` number or a `Tensor`
+with the same number of elements as :attr:`input`.
+
+When :attr:`exponent` is a scalar value, the operation applied is:
+
+.. math::
+    \text{out}_i = x_i ^ \text{exponent}
+
+When :attr:`exponent` is a tensor, the operation applied is:
+
+.. math::
+    \text{out}_i = x_i ^ {\text{exponent}_i}
+"""
+    + r"""
+When :attr:`exponent` is a tensor, the shapes of :attr:`input`
+and :attr:`exponent` must be :ref:`broadcastable <broadcasting-semantics>`.
+
+Args:
+    {input}
+    exponent (float or tensor): the exponent value
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.4331,  1.2475,  0.6834, -0.2791])
+    >>> torch.pow(a, 2)
+    tensor([ 0.1875,  1.5561,  0.4670,  0.0779])
+    >>> exp = torch.arange(1., 5.)
+
+    >>> a = torch.arange(1., 5.)
+    >>> a
+    tensor([ 1.,  2.,  3.,  4.])
+    >>> exp
+    tensor([ 1.,  2.,  3.,  4.])
+    >>> torch.pow(a, exp)
+    tensor([   1.,    4.,   27.,  256.])
+
+.. function:: pow(self, exponent, *, out=None) -> Tensor
+   :noindex:
+
+:attr:`self` is a scalar ``float`` value, and :attr:`exponent` is a tensor.
+The returned tensor :attr:`out` is of the same shape as :attr:`exponent`
+
+The operation applied is:
+
+.. math::
+    \text{{out}}_i = \text{{self}} ^ {{\text{{exponent}}_i}}
+
+Args:
+    self (float): the scalar base value for the power operation
+    exponent (Tensor): the exponent tensor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> exp = torch.arange(1., 5.)
+    >>> base = 2
+    >>> torch.pow(base, exp)
+    tensor([  2.,   4.,   8.,  16.])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.float_power,
+    r"""
+float_power(input, exponent, *, out=None) -> Tensor
+
+Raises :attr:`input` to the power of :attr:`exponent`, elementwise, in double precision.
+If neither input is complex returns a ``torch.float64`` tensor,
+and if one or more inputs is complex returns a ``torch.complex128`` tensor.
+
+.. note::
+    This function always computes in double precision, unlike :func:`torch.pow`,
+    which implements more typical :ref:`type promotion <type-promotion-doc>`.
+    This is useful when the computation needs to be performed in a wider or more precise dtype,
+    or the results of the computation may contain fractional values not representable in the input dtypes,
+    like when an integer base is raised to a negative integer exponent.
+
+Args:
+    input (Tensor or Number): the base value(s)
+    exponent (Tensor or Number): the exponent value(s)
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randint(10, (4,))
+    >>> a
+    tensor([6, 4, 7, 1])
+    >>> torch.float_power(a, 2)
+    tensor([36., 16., 49.,  1.], dtype=torch.float64)
+
+    >>> a = torch.arange(1, 5)
+    >>> a
+    tensor([ 1,  2,  3,  4])
+    >>> exp = torch.tensor([2, -3, 4, -5])
+    >>> exp
+    tensor([ 2, -3,  4, -5])
+    >>> torch.float_power(a, exp)
+    tensor([1.0000e+00, 1.2500e-01, 8.1000e+01, 9.7656e-04], dtype=torch.float64)
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.prod,
+    r"""
+prod(input, *, dtype=None) -> Tensor
+
+Returns the product of all elements in the :attr:`input` tensor.
+
+Args:
+    {input}
+
+Keyword args:
+    {dtype}
+
+Example::
+
+    >>> a = torch.randn(1, 3)
+    >>> a
+    tensor([[-0.8020,  0.5428, -1.5854]])
+    >>> torch.prod(a)
+    tensor(0.6902)
+
+.. function:: prod(input, dim, keepdim=False, *, dtype=None) -> Tensor
+   :noindex:
+
+Returns the product of each row of the :attr:`input` tensor in the given
+dimension :attr:`dim`.
+
+{keepdim_details}
+
+Args:
+    {input}
+    {dim}
+    {keepdim}
+
+Keyword args:
+    {dtype}
+
+Example::
+
+    >>> a = torch.randn(4, 2)
+    >>> a
+    tensor([[ 0.5261, -0.3837],
+            [ 1.1857, -0.2498],
+            [-1.1646,  0.0705],
+            [ 1.1131, -1.0629]])
+    >>> torch.prod(a, 1)
+    tensor([-0.2018, -0.2962, -0.0821, -1.1831])
+""".format(
+        **single_dim_common
+    ),
+)
+
+add_docstr(
+    torch.promote_types,
+    r"""
+promote_types(type1, type2) -> dtype
+
+Returns the :class:`torch.dtype` with the smallest size and scalar kind that is
+not smaller nor of lower kind than either `type1` or `type2`. See type promotion
+:ref:`documentation <type-promotion-doc>` for more information on the type
+promotion logic.
+
+Args:
+    type1 (:class:`torch.dtype`)
+    type2 (:class:`torch.dtype`)
+
+Example::
+
+    >>> torch.promote_types(torch.int32, torch.float32)
+    torch.float32
+    >>> torch.promote_types(torch.uint8, torch.long)
+    torch.long
+""",
+)
+
+add_docstr(
+    torch.qr,
+    r"""
+qr(input, some=True, *, out=None) -> (Tensor, Tensor)
+
+Computes the QR decomposition of a matrix or a batch of matrices :attr:`input`,
+and returns a namedtuple (Q, R) of tensors such that :math:`\text{input} = Q R`
+with :math:`Q` being an orthogonal matrix or batch of orthogonal matrices and
+:math:`R` being an upper triangular matrix or batch of upper triangular matrices.
+
+If :attr:`some` is ``True``, then this function returns the thin (reduced) QR factorization.
+Otherwise, if :attr:`some` is ``False``, this function returns the complete QR factorization.
+
+.. warning::
+
+    :func:`torch.qr` is deprecated in favor of :func:`torch.linalg.qr`
+    and will be removed in a future PyTorch release. The boolean parameter :attr:`some` has been
+    replaced with a string parameter :attr:`mode`.
+
+    ``Q, R = torch.qr(A)`` should be replaced with
+
+    .. code:: python
+
+        Q, R = torch.linalg.qr(A)
+
+    ``Q, R = torch.qr(A, some=False)`` should be replaced with
+
+    .. code:: python
+
+        Q, R = torch.linalg.qr(A, mode="complete")
+
+.. warning::
+          If you plan to backpropagate through QR, note that the current backward implementation
+          is only well-defined when the first :math:`\min(input.size(-1), input.size(-2))`
+          columns of :attr:`input` are linearly independent.
+          This behavior will probably change once QR supports pivoting.
+
+.. note:: This function uses LAPACK for CPU inputs and MAGMA for CUDA inputs,
+          and may produce different (valid) decompositions on different device types
+          or different platforms.
+
+Args:
+    input (Tensor): the input tensor of size :math:`(*, m, n)` where `*` is zero or more
+                batch dimensions consisting of matrices of dimension :math:`m \times n`.
+    some (bool, optional): Set to ``True`` for reduced QR decomposition and ``False`` for
+                complete QR decomposition. If `k = min(m, n)` then:
+
+                  * ``some=True`` : returns `(Q, R)` with dimensions (m, k), (k, n) (default)
+
+                  * ``'some=False'``: returns `(Q, R)` with dimensions (m, m), (m, n)
+
+Keyword args:
+    out (tuple, optional): tuple of `Q` and `R` tensors.
+                The dimensions of `Q` and `R` are detailed in the description of :attr:`some` above.
+
+Example::
+
+    >>> a = torch.tensor([[12., -51, 4], [6, 167, -68], [-4, 24, -41]])
+    >>> q, r = torch.qr(a)
+    >>> q
+    tensor([[-0.8571,  0.3943,  0.3314],
+            [-0.4286, -0.9029, -0.0343],
+            [ 0.2857, -0.1714,  0.9429]])
+    >>> r
+    tensor([[ -14.0000,  -21.0000,   14.0000],
+            [   0.0000, -175.0000,   70.0000],
+            [   0.0000,    0.0000,  -35.0000]])
+    >>> torch.mm(q, r).round()
+    tensor([[  12.,  -51.,    4.],
+            [   6.,  167.,  -68.],
+            [  -4.,   24.,  -41.]])
+    >>> torch.mm(q.t(), q).round()
+    tensor([[ 1.,  0.,  0.],
+            [ 0.,  1., -0.],
+            [ 0., -0.,  1.]])
+    >>> a = torch.randn(3, 4, 5)
+    >>> q, r = torch.qr(a, some=False)
+    >>> torch.allclose(torch.matmul(q, r), a)
+    True
+    >>> torch.allclose(torch.matmul(q.mT, q), torch.eye(5))
+    True
+""",
+)
+
+add_docstr(
+    torch.rad2deg,
+    r"""
+rad2deg(input, *, out=None) -> Tensor
+
+Returns a new tensor with each of the elements of :attr:`input`
+converted from angles in radians to degrees.
+
+Args:
+    {input}
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([[3.142, -3.142], [6.283, -6.283], [1.570, -1.570]])
+    >>> torch.rad2deg(a)
+    tensor([[ 180.0233, -180.0233],
+            [ 359.9894, -359.9894],
+            [  89.9544,  -89.9544]])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.deg2rad,
+    r"""
+deg2rad(input, *, out=None) -> Tensor
+
+Returns a new tensor with each of the elements of :attr:`input`
+converted from angles in degrees to radians.
+
+Args:
+    {input}
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0, -90.0]])
+    >>> torch.deg2rad(a)
+    tensor([[ 3.1416, -3.1416],
+            [ 6.2832, -6.2832],
+            [ 1.5708, -1.5708]])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.heaviside,
+    r"""
+heaviside(input, values, *, out=None) -> Tensor
+
+Computes the Heaviside step function for each element in :attr:`input`.
+The Heaviside step function is defined as:
+
+.. math::
+    \text{{heaviside}}(input, values) = \begin{cases}
+        0, & \text{if input < 0}\\
+        values, & \text{if input == 0}\\
+        1, & \text{if input > 0}
+    \end{cases}
+"""
+    + r"""
+
+Args:
+    {input}
+    values (Tensor): The values to use where :attr:`input` is zero.
+
+Keyword arguments:
+    {out}
+
+Example::
+
+    >>> input = torch.tensor([-1.5, 0, 2.0])
+    >>> values = torch.tensor([0.5])
+    >>> torch.heaviside(input, values)
+    tensor([0.0000, 0.5000, 1.0000])
+    >>> values = torch.tensor([1.2, -2.0, 3.5])
+    >>> torch.heaviside(input, values)
+    tensor([0., -2., 1.])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.rand,
+    """
+rand(*size, *, generator=None, out=None, dtype=None, layout=torch.strided, device=None, \
+requires_grad=False, pin_memory=False) -> Tensor
+"""
+    + r"""
+Returns a tensor filled with random numbers from a uniform distribution
+on the interval :math:`[0, 1)`
+
+The shape of the tensor is defined by the variable argument :attr:`size`.
+
+Args:
+    size (int...): a sequence of integers defining the shape of the output tensor.
+        Can be a variable number of arguments or a collection like a list or tuple.
+
+Keyword args:
+    {generator}
+    {out}
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {pin_memory}
+
+Example::
+
+    >>> torch.rand(4)
+    tensor([ 0.5204,  0.2503,  0.3525,  0.5673])
+    >>> torch.rand(2, 3)
+    tensor([[ 0.8237,  0.5781,  0.6879],
+            [ 0.3816,  0.7249,  0.0998]])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.rand_like,
+    r"""
+rand_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor
+
+Returns a tensor with the same size as :attr:`input` that is filled with
+random numbers from a uniform distribution on the interval :math:`[0, 1)`.
+``torch.rand_like(input)`` is equivalent to
+``torch.rand(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``.
+
+Args:
+    {input}
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {memory_format}
+
+""".format(
+        **factory_like_common_args
+    ),
+)
+
+add_docstr(
+    torch.randint,
+    """
+randint(low=0, high, size, \\*, generator=None, out=None, \
+dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+
+Returns a tensor filled with random integers generated uniformly
+between :attr:`low` (inclusive) and :attr:`high` (exclusive).
+
+The shape of the tensor is defined by the variable argument :attr:`size`.
+
+.. note::
+    With the global dtype default (``torch.float32``), this function returns
+    a tensor with dtype ``torch.int64``.
+
+Args:
+    low (int, optional): Lowest integer to be drawn from the distribution. Default: 0.
+    high (int): One above the highest integer to be drawn from the distribution.
+    size (tuple): a tuple defining the shape of the output tensor.
+
+Keyword args:
+    {generator}
+    {out}
+    dtype (`torch.dtype`, optional) - the desired data type of returned tensor. Default: if ``None``,
+        this function returns a tensor with dtype ``torch.int64``.
+    {layout}
+    {device}
+    {requires_grad}
+
+Example::
+
+    >>> torch.randint(3, 5, (3,))
+    tensor([4, 3, 4])
+
+
+    >>> torch.randint(10, (2, 2))
+    tensor([[0, 2],
+            [5, 5]])
+
+
+    >>> torch.randint(3, 10, (2, 2))
+    tensor([[4, 5],
+            [6, 7]])
+
+
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.randint_like,
+    """
+randint_like(input, low=0, high, \\*, dtype=None, layout=torch.strided, device=None, requires_grad=False, \
+memory_format=torch.preserve_format) -> Tensor
+
+Returns a tensor with the same shape as Tensor :attr:`input` filled with
+random integers generated uniformly between :attr:`low` (inclusive) and
+:attr:`high` (exclusive).
+
+.. note:
+    With the global dtype default (``torch.float32``), this function returns
+    a tensor with dtype ``torch.int64``.
+
+Args:
+    {input}
+    low (int, optional): Lowest integer to be drawn from the distribution. Default: 0.
+    high (int): One above the highest integer to be drawn from the distribution.
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {memory_format}
+
+""".format(
+        **factory_like_common_args
+    ),
+)
+
+add_docstr(
+    torch.randn,
+    """
+randn(*size, *, generator=None, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False, \
+pin_memory=False) -> Tensor
+"""
+    + r"""
+
+Returns a tensor filled with random numbers from a normal distribution
+with mean `0` and variance `1` (also called the standard normal
+distribution).
+
+.. math::
+    \text{{out}}_{{i}} \sim \mathcal{{N}}(0, 1)
+
+For complex dtypes, the tensor is i.i.d. sampled from a `complex normal distribution`_ with zero mean and
+unit variance as
+
+.. math::
+    \text{{out}}_{{i}} \sim \mathcal{{CN}}(0, 1)
+
+This is equivalent to separately sampling the real :math:`(\operatorname{{Re}})` and imaginary
+:math:`(\operatorname{{Im}})` part of :math:`\text{{out}}_i` as
+
+.. math::
+    \operatorname{{Re}}(\text{{out}}_{{i}}) \sim \mathcal{{N}}(0, \frac{{1}}{{2}}),\quad
+    \operatorname{{Im}}(\text{{out}}_{{i}}) \sim \mathcal{{N}}(0, \frac{{1}}{{2}})
+
+The shape of the tensor is defined by the variable argument :attr:`size`.
+
+
+Args:
+    size (int...): a sequence of integers defining the shape of the output tensor.
+        Can be a variable number of arguments or a collection like a list or tuple.
+
+Keyword args:
+    {generator}
+    {out}
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {pin_memory}
+
+Example::
+
+    >>> torch.randn(4)
+    tensor([-2.1436,  0.9966,  2.3426, -0.6366])
+    >>> torch.randn(2, 3)
+    tensor([[ 1.5954,  2.8929, -1.0923],
+            [ 1.1719, -0.4709, -0.1996]])
+
+.. _complex normal distribution: https://en.wikipedia.org/wiki/Complex_normal_distribution
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.randn_like,
+    r"""
+randn_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor
+
+Returns a tensor with the same size as :attr:`input` that is filled with
+random numbers from a normal distribution with mean 0 and variance 1. Please refer to :func:`torch.randn` for the
+sampling process of complex dtypes. ``torch.randn_like(input)`` is equivalent to
+``torch.randn(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``.
+
+Args:
+    {input}
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {memory_format}
+
+""".format(
+        **factory_like_common_args
+    ),
+)
+
+add_docstr(
+    torch.randperm,
+    """
+randperm(n, *, generator=None, out=None, dtype=torch.int64,layout=torch.strided, \
+device=None, requires_grad=False, pin_memory=False) -> Tensor
+"""
+    + r"""
+Returns a random permutation of integers from ``0`` to ``n - 1``.
+
+Args:
+    n (int): the upper bound (exclusive)
+
+Keyword args:
+    {generator}
+    {out}
+    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
+        Default: ``torch.int64``.
+    {layout}
+    {device}
+    {requires_grad}
+    {pin_memory}
+
+Example::
+
+    >>> torch.randperm(4)
+    tensor([2, 1, 0, 3])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.tensor,
+    r"""
+tensor(data, *, dtype=None, device=None, requires_grad=False, pin_memory=False) -> Tensor
+
+Constructs a tensor with no autograd history (also known as a "leaf tensor", see :doc:`/notes/autograd`) by copying :attr:`data`.
+
+.. warning::
+
+    When working with tensors prefer using :func:`torch.Tensor.clone`,
+    :func:`torch.Tensor.detach`, and :func:`torch.Tensor.requires_grad_` for
+    readability. Letting `t` be a tensor, ``torch.tensor(t)`` is equivalent to
+    ``t.clone().detach()``, and ``torch.tensor(t, requires_grad=True)``
+    is equivalent to ``t.clone().detach().requires_grad_(True)``.
+
+.. seealso::
+
+    :func:`torch.as_tensor` preserves autograd history and avoids copies where possible.
+    :func:`torch.from_numpy` creates a tensor that shares storage with a NumPy array.
+
+Args:
+    {data}
+
+Keyword args:
+    {dtype}
+    device (:class:`torch.device`, optional): the device of the constructed tensor. If None and data is a tensor
+        then the device of data is used. If None and data is not a tensor then
+        the result tensor is constructed on the current device.
+    {requires_grad}
+    {pin_memory}
+
+
+Example::
+
+    >>> torch.tensor([[0.1, 1.2], [2.2, 3.1], [4.9, 5.2]])
+    tensor([[ 0.1000,  1.2000],
+            [ 2.2000,  3.1000],
+            [ 4.9000,  5.2000]])
+
+    >>> torch.tensor([0, 1])  # Type inference on data
+    tensor([ 0,  1])
+
+    >>> torch.tensor([[0.11111, 0.222222, 0.3333333]],
+    ...              dtype=torch.float64,
+    ...              device=torch.device('cuda:0'))  # creates a double tensor on a CUDA device
+    tensor([[ 0.1111,  0.2222,  0.3333]], dtype=torch.float64, device='cuda:0')
+
+    >>> torch.tensor(3.14159)  # Create a zero-dimensional (scalar) tensor
+    tensor(3.1416)
+
+    >>> torch.tensor([])  # Create an empty tensor (of size (0,))
+    tensor([])
+""".format(
+        **factory_data_common_args
+    ),
+)
+
+add_docstr(
+    torch.range,
+    r"""
+range(start=0, end, step=1, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+
+Returns a 1-D tensor of size :math:`\left\lfloor \frac{\text{end} - \text{start}}{\text{step}} \right\rfloor + 1`
+with values from :attr:`start` to :attr:`end` with step :attr:`step`. Step is
+the gap between two values in the tensor.
+
+.. math::
+    \text{out}_{i+1} = \text{out}_i + \text{step}.
+"""
+    + r"""
+.. warning::
+    This function is deprecated and will be removed in a future release because its behavior is inconsistent with
+    Python's range builtin. Instead, use :func:`torch.arange`, which produces values in [start, end).
+
+Args:
+    start (float): the starting value for the set of points. Default: ``0``.
+    end (float): the ending value for the set of points
+    step (float): the gap between each pair of adjacent points. Default: ``1``.
+
+Keyword args:
+    {out}
+    {dtype} If `dtype` is not given, infer the data type from the other input
+        arguments. If any of `start`, `end`, or `stop` are floating-point, the
+        `dtype` is inferred to be the default dtype, see
+        :meth:`~torch.get_default_dtype`. Otherwise, the `dtype` is inferred to
+        be `torch.int64`.
+    {layout}
+    {device}
+    {requires_grad}
+
+Example::
+
+    >>> torch.range(1, 4)
+    tensor([ 1.,  2.,  3.,  4.])
+    >>> torch.range(1, 4, 0.5)
+    tensor([ 1.0000,  1.5000,  2.0000,  2.5000,  3.0000,  3.5000,  4.0000])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.arange,
+    r"""
+arange(start=0, end, step=1, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+
+Returns a 1-D tensor of size :math:`\left\lceil \frac{\text{end} - \text{start}}{\text{step}} \right\rceil`
+with values from the interval ``[start, end)`` taken with common difference
+:attr:`step` beginning from `start`.
+
+Note that non-integer :attr:`step` is subject to floating point rounding errors when
+comparing against :attr:`end`; to avoid inconsistency, we advise subtracting a small epsilon from :attr:`end`
+in such cases.
+
+.. math::
+    \text{out}_{{i+1}} = \text{out}_{i} + \text{step}
+"""
+    + r"""
+Args:
+    start (Number): the starting value for the set of points. Default: ``0``.
+    end (Number): the ending value for the set of points
+    step (Number): the gap between each pair of adjacent points. Default: ``1``.
+
+Keyword args:
+    {out}
+    {dtype} If `dtype` is not given, infer the data type from the other input
+        arguments. If any of `start`, `end`, or `stop` are floating-point, the
+        `dtype` is inferred to be the default dtype, see
+        :meth:`~torch.get_default_dtype`. Otherwise, the `dtype` is inferred to
+        be `torch.int64`.
+    {layout}
+    {device}
+    {requires_grad}
+
+Example::
+
+    >>> torch.arange(5)
+    tensor([ 0,  1,  2,  3,  4])
+    >>> torch.arange(1, 4)
+    tensor([ 1,  2,  3])
+    >>> torch.arange(1, 2.5, 0.5)
+    tensor([ 1.0000,  1.5000,  2.0000])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.ravel,
+    r"""
+ravel(input) -> Tensor
+
+Return a contiguous flattened tensor. A copy is made only if needed.
+
+Args:
+    {input}
+
+Example::
+
+    >>> t = torch.tensor([[[1, 2],
+    ...                    [3, 4]],
+    ...                   [[5, 6],
+    ...                    [7, 8]]])
+    >>> torch.ravel(t)
+    tensor([1, 2, 3, 4, 5, 6, 7, 8])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.remainder,
+    r"""
+remainder(input, other, *, out=None) -> Tensor
+
+Computes
+`Python's modulus operation <https://docs.python.org/3/reference/expressions.html#binary-arithmetic-operations>`_
+entrywise.  The result has the same sign as the divisor :attr:`other` and its absolute value
+is less than that of :attr:`other`.
+
+It may also be defined in terms of :func:`torch.div` as
+
+.. code:: python
+
+    torch.remainder(a, b) == a - a.div(b, rounding_mode="floor") * b
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+:ref:`type promotion <type-promotion-doc>`, and integer and float inputs.
+
+.. note::
+    Complex inputs are not supported. In some cases, it is not mathematically
+    possible to satisfy the definition of a modulo operation with complex numbers.
+    See :func:`torch.fmod` for how division by zero is handled.
+
+.. seealso::
+
+    :func:`torch.fmod` which implements C++'s `std::fmod <https://en.cppreference.com/w/cpp/numeric/math/fmod>`_.
+    This one is defined in terms of division rounding towards zero.
+
+Args:
+    input (Tensor or Scalar): the dividend
+    other (Tensor or Scalar): the divisor
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.remainder(torch.tensor([-3., -2, -1, 1, 2, 3]), 2)
+    tensor([ 1.,  0.,  1.,  1.,  0.,  1.])
+    >>> torch.remainder(torch.tensor([1, 2, 3, 4, 5]), -1.5)
+    tensor([ -0.5000, -1.0000,  0.0000, -0.5000, -1.0000 ])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.renorm,
+    r"""
+renorm(input, p, dim, maxnorm, *, out=None) -> Tensor
+
+Returns a tensor where each sub-tensor of :attr:`input` along dimension
+:attr:`dim` is normalized such that the `p`-norm of the sub-tensor is lower
+than the value :attr:`maxnorm`
+
+.. note:: If the norm of a row is lower than `maxnorm`, the row is unchanged
+
+Args:
+    {input}
+    p (float): the power for the norm computation
+    dim (int): the dimension to slice over to get the sub-tensors
+    maxnorm (float): the maximum norm to keep each sub-tensor under
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> x = torch.ones(3, 3)
+    >>> x[1].fill_(2)
+    tensor([ 2.,  2.,  2.])
+    >>> x[2].fill_(3)
+    tensor([ 3.,  3.,  3.])
+    >>> x
+    tensor([[ 1.,  1.,  1.],
+            [ 2.,  2.,  2.],
+            [ 3.,  3.,  3.]])
+    >>> torch.renorm(x, 1, 0, 5)
+    tensor([[ 1.0000,  1.0000,  1.0000],
+            [ 1.6667,  1.6667,  1.6667],
+            [ 1.6667,  1.6667,  1.6667]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.reshape,
+    r"""
+reshape(input, shape) -> Tensor
+
+Returns a tensor with the same data and number of elements as :attr:`input`,
+but with the specified shape. When possible, the returned tensor will be a view
+of :attr:`input`. Otherwise, it will be a copy. Contiguous inputs and inputs
+with compatible strides can be reshaped without copying, but you should not
+depend on the copying vs. viewing behavior.
+
+See :meth:`torch.Tensor.view` on when it is possible to return a view.
+
+A single dimension may be -1, in which case it's inferred from the remaining
+dimensions and the number of elements in :attr:`input`.
+
+Args:
+    input (Tensor): the tensor to be reshaped
+    shape (tuple of int): the new shape
+
+Example::
+
+    >>> a = torch.arange(4.)
+    >>> torch.reshape(a, (2, 2))
+    tensor([[ 0.,  1.],
+            [ 2.,  3.]])
+    >>> b = torch.tensor([[0, 1], [2, 3]])
+    >>> torch.reshape(b, (-1,))
+    tensor([ 0,  1,  2,  3])
+""",
+)
+
+
+add_docstr(
+    torch.result_type,
+    r"""
+result_type(tensor1, tensor2) -> dtype
+
+Returns the :class:`torch.dtype` that would result from performing an arithmetic
+operation on the provided input tensors. See type promotion :ref:`documentation <type-promotion-doc>`
+for more information on the type promotion logic.
+
+Args:
+    tensor1 (Tensor or Number): an input tensor or number
+    tensor2 (Tensor or Number): an input tensor or number
+
+Example::
+
+    >>> torch.result_type(torch.tensor([1, 2], dtype=torch.int), 1.0)
+    torch.float32
+    >>> torch.result_type(torch.tensor([1, 2], dtype=torch.uint8), torch.tensor(1))
+    torch.uint8
+""",
+)
+
+add_docstr(
+    torch.row_stack,
+    r"""
+row_stack(tensors, *, out=None) -> Tensor
+
+Alias of :func:`torch.vstack`.
+""",
+)
+
+add_docstr(
+    torch.round,
+    r"""
+round(input, *, decimals=0, out=None) -> Tensor
+
+Rounds elements of :attr:`input` to the nearest integer.
+
+For integer inputs, follows the array-api convention of returning a
+copy of the input tensor.
+The return type of output is same as that of input's dtype.
+
+.. note::
+    This function implements the "round half to even" to
+    break ties when a number is equidistant from two
+    integers (e.g. `round(2.5)` is 2).
+
+    When the :attr:\`decimals\` argument is specified the
+    algorithm used is similar to NumPy's `around`. This
+    algorithm is fast but inexact and it can easily
+    overflow for low precision dtypes.
+    Eg. `round(tensor([10000], dtype=torch.float16), decimals=3)` is `inf`.
+
+.. seealso::
+    :func:`torch.ceil`, which rounds up.
+    :func:`torch.floor`, which rounds down.
+    :func:`torch.trunc`, which rounds towards zero.
+
+Args:
+    {input}
+    decimals (int): Number of decimal places to round to (default: 0).
+        If decimals is negative, it specifies the number of positions
+        to the left of the decimal point.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> torch.round(torch.tensor((4.7, -2.3, 9.1, -7.7)))
+    tensor([ 5.,  -2.,  9., -8.])
+
+    >>> # Values equidistant from two integers are rounded towards the
+    >>> #   the nearest even value (zero is treated as even)
+    >>> torch.round(torch.tensor([-0.5, 0.5, 1.5, 2.5]))
+    tensor([-0., 0., 2., 2.])
+
+    >>> # A positive decimals argument rounds to the to that decimal place
+    >>> torch.round(torch.tensor([0.1234567]), decimals=3)
+    tensor([0.1230])
+
+    >>> # A negative decimals argument rounds to the left of the decimal
+    >>> torch.round(torch.tensor([1200.1234567]), decimals=-3)
+    tensor([1000.])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.rsqrt,
+    r"""
+rsqrt(input, *, out=None) -> Tensor
+
+Returns a new tensor with the reciprocal of the square-root of each of
+the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \frac{1}{\sqrt{\text{input}_{i}}}
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-0.0370,  0.2970,  1.5420, -0.9105])
+    >>> torch.rsqrt(a)
+    tensor([    nan,  1.8351,  0.8053,     nan])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.scatter,
+    r"""
+scatter(input, dim, index, src) -> Tensor
+
+Out-of-place version of :meth:`torch.Tensor.scatter_`
+""",
+)
+
+add_docstr(
+    torch.scatter_add,
+    r"""
+scatter_add(input, dim, index, src) -> Tensor
+
+Out-of-place version of :meth:`torch.Tensor.scatter_add_`
+""",
+)
+
+add_docstr(
+    torch.scatter_reduce,
+    r"""
+scatter_reduce(input, dim, index, src, reduce, *, include_self=True) -> Tensor
+
+Out-of-place version of :meth:`torch.Tensor.scatter_reduce_`
+""",
+)
+
+add_docstr(
+    torch.select,
+    r"""
+select(input, dim, index) -> Tensor
+
+Slices the :attr:`input` tensor along the selected dimension at the given index.
+This function returns a view of the original tensor with the given dimension removed.
+
+.. note:: If :attr:`input` is a sparse tensor and returning a view of
+          the tensor is not possible, a RuntimeError exception is
+          raised. In this is the case, consider using
+          :func:`torch.select_copy` function.
+
+Args:
+    {input}
+    dim (int): the dimension to slice
+    index (int): the index to select with
+
+.. note::
+
+    :meth:`select` is equivalent to slicing. For example,
+    ``tensor.select(0, index)`` is equivalent to ``tensor[index]`` and
+    ``tensor.select(2, index)`` is equivalent to ``tensor[:,:,index]``.
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.select_scatter,
+    r"""
+select_scatter(input, src, dim, index) -> Tensor
+
+Embeds the values of the :attr:`src` tensor into :attr:`input` at the given index.
+This function returns a tensor with fresh storage; it does not create a view.
+
+
+Args:
+    {input}
+    src (Tensor): The tensor to embed into :attr:`input`
+    dim (int): the dimension to insert the slice into.
+    index (int): the index to select with
+
+.. note::
+
+    :attr:`src` must be of the proper size in order to be embedded
+    into :attr:`input`. Specifically, it should have the same shape as
+    ``torch.select(input, dim, index)``
+
+Example::
+
+    >>> a = torch.zeros(2, 2)
+    >>> b = torch.ones(2)
+    >>> a.select_scatter(b, 0, 0)
+    tensor([[1., 1.],
+            [0., 0.]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.slice_scatter,
+    r"""
+slice_scatter(input, src, dim=0, start=None, end=None, step=1) -> Tensor
+
+Embeds the values of the :attr:`src` tensor into :attr:`input` at the given
+dimension.
+This function returns a tensor with fresh storage; it does not create a view.
+
+
+Args:
+    {input}
+    src (Tensor): The tensor to embed into :attr:`input`
+    dim (int): the dimension to insert the slice into
+    start (Optional[int]): the start index of where to insert the slice
+    end (Optional[int]): the end index of where to insert the slice
+    step (int): the how many elements to skip in
+
+Example::
+
+    >>> a = torch.zeros(8, 8)
+    >>> b = torch.ones(2, 8)
+    >>> a.slice_scatter(b, start=6)
+    tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0.],
+            [1., 1., 1., 1., 1., 1., 1., 1.],
+            [1., 1., 1., 1., 1., 1., 1., 1.]])
+
+    >>> b = torch.ones(8, 2)
+    >>> a.slice_scatter(b, dim=1, start=2, end=6, step=2)
+    tensor([[0., 0., 1., 0., 1., 0., 0., 0.],
+            [0., 0., 1., 0., 1., 0., 0., 0.],
+            [0., 0., 1., 0., 1., 0., 0., 0.],
+            [0., 0., 1., 0., 1., 0., 0., 0.],
+            [0., 0., 1., 0., 1., 0., 0., 0.],
+            [0., 0., 1., 0., 1., 0., 0., 0.],
+            [0., 0., 1., 0., 1., 0., 0., 0.],
+            [0., 0., 1., 0., 1., 0., 0., 0.]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.set_flush_denormal,
+    r"""
+set_flush_denormal(mode) -> bool
+
+Disables denormal floating numbers on CPU.
+
+Returns ``True`` if your system supports flushing denormal numbers and it
+successfully configures flush denormal mode.  :meth:`~torch.set_flush_denormal`
+is supported on x86 architectures supporting SSE3 and AArch64 architecture.
+
+Args:
+    mode (bool): Controls whether to enable flush denormal mode or not
+
+Example::
+
+    >>> torch.set_flush_denormal(True)
+    True
+    >>> torch.tensor([1e-323], dtype=torch.float64)
+    tensor([ 0.], dtype=torch.float64)
+    >>> torch.set_flush_denormal(False)
+    True
+    >>> torch.tensor([1e-323], dtype=torch.float64)
+    tensor(9.88131e-324 *
+           [ 1.0000], dtype=torch.float64)
+""",
+)
+
+add_docstr(
+    torch.set_num_threads,
+    r"""
+set_num_threads(int)
+
+Sets the number of threads used for intraop parallelism on CPU.
+
+.. warning::
+    To ensure that the correct number of threads is used, set_num_threads
+    must be called before running eager, JIT or autograd code.
+""",
+)
+
+add_docstr(
+    torch.set_num_interop_threads,
+    r"""
+set_num_interop_threads(int)
+
+Sets the number of threads used for interop parallelism
+(e.g. in JIT interpreter) on CPU.
+
+.. warning::
+    Can only be called once and before any inter-op parallel work
+    is started (e.g. JIT execution).
+""",
+)
+
+add_docstr(
+    torch.sigmoid,
+    r"""
+sigmoid(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.expit`.
+""",
+)
+
+add_docstr(
+    torch.logit,
+    r"""
+logit(input, eps=None, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.logit`.
+""",
+)
+
+add_docstr(
+    torch.sign,
+    r"""
+sign(input, *, out=None) -> Tensor
+
+Returns a new tensor with the signs of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \operatorname{sgn}(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.tensor([0.7, -1.2, 0., 2.3])
+    >>> a
+    tensor([ 0.7000, -1.2000,  0.0000,  2.3000])
+    >>> torch.sign(a)
+    tensor([ 1., -1.,  0.,  1.])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.signbit,
+    r"""
+signbit(input, *, out=None) -> Tensor
+
+Tests if each element of :attr:`input` has its sign bit set or not.
+
+Args:
+  {input}
+
+Keyword args:
+  {out}
+
+Example::
+
+    >>> a = torch.tensor([0.7, -1.2, 0., 2.3])
+    >>> torch.signbit(a)
+    tensor([ False, True,  False,  False])
+    >>> a = torch.tensor([-0.0, 0.0])
+    >>> torch.signbit(a)
+    tensor([ True,  False])
+
+.. note::
+    signbit handles signed zeros, so negative zero (-0) returns True.
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.sgn,
+    r"""
+sgn(input, *, out=None) -> Tensor
+
+This function is an extension of torch.sign() to complex tensors.
+It computes a new tensor whose elements have
+the same angles as the corresponding elements of :attr:`input` and
+absolute values (i.e. magnitudes) of one for complex tensors and
+is equivalent to torch.sign() for non-complex tensors.
+
+.. math::
+    \text{out}_{i} = \begin{cases}
+                    0 & |\text{{input}}_i| == 0 \\
+                    \frac{{\text{{input}}_i}}{|{\text{{input}}_i}|} & \text{otherwise}
+                    \end{cases}
+
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+  {out}
+
+Example::
+
+    >>> t = torch.tensor([3+4j, 7-24j, 0, 1+2j])
+    >>> t.sgn()
+    tensor([0.6000+0.8000j, 0.2800-0.9600j, 0.0000+0.0000j, 0.4472+0.8944j])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.sin,
+    r"""
+sin(input, *, out=None) -> Tensor
+
+Returns a new tensor with the sine of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \sin(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-0.5461,  0.1347, -2.7266, -0.2746])
+    >>> torch.sin(a)
+    tensor([-0.5194,  0.1343, -0.4032, -0.2711])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.sinc,
+    r"""
+sinc(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.special.sinc`.
+""",
+)
+
+add_docstr(
+    torch.sinh,
+    r"""
+sinh(input, *, out=None) -> Tensor
+
+Returns a new tensor with the hyperbolic sine of the elements of
+:attr:`input`.
+
+.. math::
+    \text{out}_{i} = \sinh(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.5380, -0.8632, -0.1265,  0.9399])
+    >>> torch.sinh(a)
+    tensor([ 0.5644, -0.9744, -0.1268,  1.0845])
+
+.. note::
+   When :attr:`input` is on the CPU, the implementation of torch.sinh may use
+   the Sleef library, which rounds very large results to infinity or negative
+   infinity. See `here <https://sleef.org/purec.xhtml>`_ for details.
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.sort,
+    r"""
+sort(input, dim=-1, descending=False, stable=False, *, out=None) -> (Tensor, LongTensor)
+
+Sorts the elements of the :attr:`input` tensor along a given dimension
+in ascending order by value.
+
+If :attr:`dim` is not given, the last dimension of the `input` is chosen.
+
+If :attr:`descending` is ``True`` then the elements are sorted in descending
+order by value.
+
+If :attr:`stable` is ``True`` then the sorting routine becomes stable, preserving
+the order of equivalent elements.
+
+A namedtuple of (values, indices) is returned, where the `values` are the
+sorted values and `indices` are the indices of the elements in the original
+`input` tensor.
+
+Args:
+    {input}
+    dim (int, optional): the dimension to sort along
+    descending (bool, optional): controls the sorting order (ascending or descending)
+    stable (bool, optional): makes the sorting routine stable, which guarantees that the order
+       of equivalent elements is preserved.
+
+Keyword args:
+    out (tuple, optional): the output tuple of (`Tensor`, `LongTensor`) that can
+        be optionally given to be used as output buffers
+
+Example::
+
+    >>> x = torch.randn(3, 4)
+    >>> sorted, indices = torch.sort(x)
+    >>> sorted
+    tensor([[-0.2162,  0.0608,  0.6719,  2.3332],
+            [-0.5793,  0.0061,  0.6058,  0.9497],
+            [-0.5071,  0.3343,  0.9553,  1.0960]])
+    >>> indices
+    tensor([[ 1,  0,  2,  3],
+            [ 3,  1,  0,  2],
+            [ 0,  3,  1,  2]])
+
+    >>> sorted, indices = torch.sort(x, 0)
+    >>> sorted
+    tensor([[-0.5071, -0.2162,  0.6719, -0.5793],
+            [ 0.0608,  0.0061,  0.9497,  0.3343],
+            [ 0.6058,  0.9553,  1.0960,  2.3332]])
+    >>> indices
+    tensor([[ 2,  0,  0,  1],
+            [ 0,  1,  1,  2],
+            [ 1,  2,  2,  0]])
+    >>> x = torch.tensor([0, 1] * 9)
+    >>> x.sort()
+    torch.return_types.sort(
+        values=tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
+        indices=tensor([ 2, 16,  4,  6, 14,  8,  0, 10, 12,  9, 17, 15, 13, 11,  7,  5,  3,  1]))
+    >>> x.sort(stable=True)
+    torch.return_types.sort(
+        values=tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
+        indices=tensor([ 0,  2,  4,  6,  8, 10, 12, 14, 16,  1,  3,  5,  7,  9, 11, 13, 15, 17]))
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.argsort,
+    r"""
+argsort(input, dim=-1, descending=False, stable=False) -> Tensor
+
+Returns the indices that sort a tensor along a given dimension in ascending
+order by value.
+
+This is the second value returned by :meth:`torch.sort`.  See its documentation
+for the exact semantics of this method.
+
+If :attr:`stable` is ``True`` then the sorting routine becomes stable, preserving
+the order of equivalent elements. If ``False``, the relative order of values
+which compare equal is not guaranteed. ``True`` is slower.
+
+Args:
+    {input}
+    dim (int, optional): the dimension to sort along
+    descending (bool, optional): controls the sorting order (ascending or descending)
+    stable (bool, optional): controls the relative order of equivalent elements
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[ 0.0785,  1.5267, -0.8521,  0.4065],
+            [ 0.1598,  0.0788, -0.0745, -1.2700],
+            [ 1.2208,  1.0722, -0.7064,  1.2564],
+            [ 0.0669, -0.2318, -0.8229, -0.9280]])
+
+
+    >>> torch.argsort(a, dim=1)
+    tensor([[2, 0, 3, 1],
+            [3, 2, 1, 0],
+            [2, 1, 0, 3],
+            [3, 2, 1, 0]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.msort,
+    r"""
+msort(input, *, out=None) -> Tensor
+
+Sorts the elements of the :attr:`input` tensor along its first dimension
+in ascending order by value.
+
+.. note:: `torch.msort(t)` is equivalent to `torch.sort(t, dim=0)[0]`.
+          See also :func:`torch.sort`.
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> t = torch.randn(3, 4)
+    >>> t
+    tensor([[-0.1321,  0.4370, -1.2631, -1.1289],
+            [-2.0527, -1.1250,  0.2275,  0.3077],
+            [-0.0881, -0.1259, -0.5495,  1.0284]])
+    >>> torch.msort(t)
+    tensor([[-2.0527, -1.1250, -1.2631, -1.1289],
+            [-0.1321, -0.1259, -0.5495,  0.3077],
+            [-0.0881,  0.4370,  0.2275,  1.0284]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.sparse_compressed_tensor,
+    r"""sparse_compressed_tensor(compressed_indices, plain_indices, values, size=None, """
+    r"""*, dtype=None, layout=None, device=None, requires_grad=False, check_invariants=None) -> Tensor
+
+Constructs a :ref:`sparse tensor in Compressed Sparse format - CSR,
+CSC, BSR, or BSC - <sparse-compressed-docs>` with specified values at
+the given :attr:`compressed_indices` and :attr:`plain_indices`. Sparse
+matrix multiplication operations in Compressed Sparse format are
+typically faster than that for sparse tensors in COO format. Make you
+have a look at :ref:`the note on the data type of the indices
+<sparse-compressed-docs>`.
+
+{sparse_factory_device_note}
+
+Args:
+    compressed_indices (array_like): (B+1)-dimensional array of size
+        ``(*batchsize, compressed_dim_size + 1)``.  The last element of
+        each batch is the number of non-zero elements or blocks. This
+        tensor encodes the index in ``values`` and ``plain_indices``
+        depending on where the given compressed dimension (row or
+        column) starts. Each successive number in the tensor
+        subtracted by the number before it denotes the number of
+        elements or blocks in a given compressed dimension.
+    plain_indices (array_like): Plain dimension (column or row)
+        co-ordinates of each element or block in values. (B+1)-dimensional
+        tensor with the same length as values.
+
+    values (array_list): Initial values for the tensor. Can be a list,
+        tuple, NumPy ``ndarray``, scalar, and other types.  that
+        represents a (1+K)-dimensional (for CSR and CSC layouts) or
+        (1+2+K)-dimensional tensor (for BSR and BSC layouts) where
+        ``K`` is the number of dense dimensions.
+    size (list, tuple, :class:`torch.Size`, optional): Size of the
+        sparse tensor: ``(*batchsize, nrows * blocksize[0], ncols *
+        blocksize[1], *densesize)`` where ``blocksize[0] ==
+        blocksize[1] == 1`` for CSR and CSC formats. If not provided,
+        the size will be inferred as the minimum size big enough to
+        hold all non-zero elements or blocks.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the desired data type of
+        returned tensor.  Default: if None, infers data type from
+        :attr:`values`.
+    layout (:class:`torch.layout`, required): the desired layout of
+        returned tensor: :attr:`torch.sparse_csr`,
+        :attr:`torch.sparse_csc`, :attr:`torch.sparse_bsr`, or
+        :attr:`torch.sparse_bsc`.
+    device (:class:`torch.device`, optional): the desired device of
+        returned tensor.  Default: if None, uses the current device
+        for the default tensor type (see
+        :func:`torch.set_default_device`). :attr:`device` will be
+        the CPU for CPU tensor types and the current CUDA device for
+        CUDA tensor types.
+    {requires_grad}
+    {check_invariants}
+
+Example::
+    >>> compressed_indices = [0, 2, 4]
+    >>> plain_indices = [0, 1, 0, 1]
+    >>> values = [1, 2, 3, 4]
+    >>> torch.sparse_compressed_tensor(torch.tensor(compressed_indices, dtype=torch.int64),
+    ...                                torch.tensor(plain_indices, dtype=torch.int64),
+    ...                                torch.tensor(values), dtype=torch.double, layout=torch.sparse_csr)
+    tensor(crow_indices=tensor([0, 2, 4]),
+           col_indices=tensor([0, 1, 0, 1]),
+           values=tensor([1., 2., 3., 4.]), size=(2, 2), nnz=4,
+           dtype=torch.float64, layout=torch.sparse_csr)
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.sparse_csr_tensor,
+    r"""sparse_csr_tensor(crow_indices, col_indices, values, size=None, """
+    r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None) -> Tensor
+
+Constructs a :ref:`sparse tensor in CSR (Compressed Sparse Row) <sparse-csr-docs>` with specified
+values at the given :attr:`crow_indices` and :attr:`col_indices`. Sparse matrix multiplication operations
+in CSR format are typically faster than that for sparse tensors in COO format. Make you have a look
+at :ref:`the note on the data type of the indices <sparse-csr-docs>`.
+
+{sparse_factory_device_note}
+
+Args:
+    crow_indices (array_like): (B+1)-dimensional array of size
+        ``(*batchsize, nrows + 1)``.  The last element of each batch
+        is the number of non-zeros. This tensor encodes the index in
+        values and col_indices depending on where the given row
+        starts. Each successive number in the tensor subtracted by the
+        number before it denotes the number of elements in a given
+        row.
+    col_indices (array_like): Column co-ordinates of each element in
+        values. (B+1)-dimensional tensor with the same length
+        as values.
+    values (array_list): Initial values for the tensor. Can be a list,
+        tuple, NumPy ``ndarray``, scalar, and other types that
+        represents a (1+K)-dimensional tensor where ``K`` is the number
+        of dense dimensions.
+    size (list, tuple, :class:`torch.Size`, optional): Size of the
+        sparse tensor: ``(*batchsize, nrows, ncols, *densesize)``. If
+        not provided, the size will be inferred as the minimum size
+        big enough to hold all non-zero elements.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the desired data type of
+        returned tensor.  Default: if None, infers data type from
+        :attr:`values`.
+    device (:class:`torch.device`, optional): the desired device of
+        returned tensor.  Default: if None, uses the current device
+        for the default tensor type (see
+        :func:`torch.set_default_device`). :attr:`device` will be
+        the CPU for CPU tensor types and the current CUDA device for
+        CUDA tensor types.
+    {requires_grad}
+    {check_invariants}
+
+Example::
+    >>> crow_indices = [0, 2, 4]
+    >>> col_indices = [0, 1, 0, 1]
+    >>> values = [1, 2, 3, 4]
+    >>> torch.sparse_csr_tensor(torch.tensor(crow_indices, dtype=torch.int64),
+    ...                         torch.tensor(col_indices, dtype=torch.int64),
+    ...                         torch.tensor(values), dtype=torch.double)
+    tensor(crow_indices=tensor([0, 2, 4]),
+           col_indices=tensor([0, 1, 0, 1]),
+           values=tensor([1., 2., 3., 4.]), size=(2, 2), nnz=4,
+           dtype=torch.float64, layout=torch.sparse_csr)
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.sparse_csc_tensor,
+    r"""sparse_csc_tensor(ccol_indices, row_indices, values, size=None, """
+    r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None) -> Tensor
+
+Constructs a :ref:`sparse tensor in CSC (Compressed Sparse Column)
+<sparse-csc-docs>` with specified values at the given
+:attr:`ccol_indices` and :attr:`row_indices`. Sparse matrix
+multiplication operations in CSC format are typically faster than that
+for sparse tensors in COO format. Make you have a look at :ref:`the
+note on the data type of the indices <sparse-csc-docs>`.
+
+{sparse_factory_device_note}
+
+Args:
+    ccol_indices (array_like): (B+1)-dimensional array of size
+        ``(*batchsize, ncols + 1)``.  The last element of each batch
+        is the number of non-zeros. This tensor encodes the index in
+        values and row_indices depending on where the given column
+        starts. Each successive number in the tensor subtracted by the
+        number before it denotes the number of elements in a given
+        column.
+    row_indices (array_like): Row co-ordinates of each element in
+        values. (B+1)-dimensional tensor with the same length as
+        values.
+    values (array_list): Initial values for the tensor. Can be a list,
+        tuple, NumPy ``ndarray``, scalar, and other types that
+        represents a (1+K)-dimensional tensor where ``K`` is the number
+        of dense dimensions.
+    size (list, tuple, :class:`torch.Size`, optional): Size of the
+        sparse tensor: ``(*batchsize, nrows, ncols, *densesize)``. If
+        not provided, the size will be inferred as the minimum size
+        big enough to hold all non-zero elements.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the desired data type of
+        returned tensor.  Default: if None, infers data type from
+        :attr:`values`.
+    device (:class:`torch.device`, optional): the desired device of
+        returned tensor.  Default: if None, uses the current device
+        for the default tensor type (see
+        :func:`torch.set_default_device`). :attr:`device` will be
+        the CPU for CPU tensor types and the current CUDA device for
+        CUDA tensor types.
+    {requires_grad}
+    {check_invariants}
+
+Example::
+    >>> ccol_indices = [0, 2, 4]
+    >>> row_indices = [0, 1, 0, 1]
+    >>> values = [1, 2, 3, 4]
+    >>> torch.sparse_csc_tensor(torch.tensor(ccol_indices, dtype=torch.int64),
+    ...                         torch.tensor(row_indices, dtype=torch.int64),
+    ...                         torch.tensor(values), dtype=torch.double)
+    tensor(ccol_indices=tensor([0, 2, 4]),
+           row_indices=tensor([0, 1, 0, 1]),
+           values=tensor([1., 2., 3., 4.]), size=(2, 2), nnz=4,
+           dtype=torch.float64, layout=torch.sparse_csc)
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.sparse_bsr_tensor,
+    r"""sparse_bsr_tensor(crow_indices, col_indices, values, size=None, """
+    r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None) -> Tensor
+
+Constructs a :ref:`sparse tensor in BSR (Block Compressed Sparse Row))
+<sparse-bsr-docs>` with specified 2-dimensional blocks at the given
+:attr:`crow_indices` and :attr:`col_indices`. Sparse matrix
+multiplication operations in BSR format are typically faster than that
+for sparse tensors in COO format. Make you have a look at :ref:`the
+note on the data type of the indices <sparse-bsr-docs>`.
+
+{sparse_factory_device_note}
+
+Args:
+    crow_indices (array_like): (B+1)-dimensional array of size
+        ``(*batchsize, nrowblocks + 1)``.  The last element of each
+        batch is the number of non-zeros. This tensor encodes the
+        block index in values and col_indices depending on where the
+        given row block starts. Each successive number in the tensor
+        subtracted by the number before it denotes the number of
+        blocks in a given row.
+    col_indices (array_like): Column block co-ordinates of each block
+        in values. (B+1)-dimensional tensor with the same length as
+        values.
+    values (array_list): Initial values for the tensor. Can be a list,
+        tuple, NumPy ``ndarray``, scalar, and other types that
+        represents a (1 + 2 + K)-dimensional tensor where ``K`` is the
+        number of dense dimensions.
+    size (list, tuple, :class:`torch.Size`, optional): Size of the
+        sparse tensor: ``(*batchsize, nrows * blocksize[0], ncols *
+        blocksize[1], *densesize)`` where ``blocksize ==
+        values.shape[1:3]``. If not provided, the size will be
+        inferred as the minimum size big enough to hold all non-zero
+        blocks.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the desired data type of
+        returned tensor.  Default: if None, infers data type from
+        :attr:`values`.
+    device (:class:`torch.device`, optional): the desired device of
+        returned tensor.  Default: if None, uses the current device
+        for the default tensor type (see
+        :func:`torch.set_default_device`). :attr:`device` will be
+        the CPU for CPU tensor types and the current CUDA device for
+        CUDA tensor types.
+    {requires_grad}
+    {check_invariants}
+
+Example::
+    >>> crow_indices = [0, 1, 2]
+    >>> col_indices = [0, 1]
+    >>> values = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
+    >>> torch.sparse_bsr_tensor(torch.tensor(crow_indices, dtype=torch.int64),
+    ...                         torch.tensor(col_indices, dtype=torch.int64),
+    ...                         torch.tensor(values), dtype=torch.double)
+    tensor(crow_indices=tensor([0, 1, 2]),
+           col_indices=tensor([0, 1]),
+           values=tensor([[[1., 2.],
+                           [3., 4.]],
+                          [[5., 6.],
+                           [7., 8.]]]), size=(2, 2), nnz=2, dtype=torch.float64,
+           layout=torch.sparse_bsr)
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.sparse_bsc_tensor,
+    r"""sparse_bsc_tensor(ccol_indices, row_indices, values, size=None, """
+    r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None) -> Tensor
+
+Constructs a :ref:`sparse tensor in BSC (Block Compressed Sparse
+Column)) <sparse-bsc-docs>` with specified 2-dimensional blocks at the
+given :attr:`ccol_indices` and :attr:`row_indices`. Sparse matrix
+multiplication operations in BSC format are typically faster than that
+for sparse tensors in COO format. Make you have a look at :ref:`the
+note on the data type of the indices <sparse-bsc-docs>`.
+
+{sparse_factory_device_note}
+
+Args:
+    ccol_indices (array_like): (B+1)-dimensional array of size
+        ``(*batchsize, ncolblocks + 1)``. The last element of each
+        batch is the number of non-zeros. This tensor encodes the
+        index in values and row_indices depending on where the given
+        column starts. Each successive number in the tensor subtracted
+        by the number before it denotes the number of elements in a
+        given column.
+    row_indices (array_like): Row block co-ordinates of each block in
+        values. (B+1)-dimensional tensor with the same length
+        as values.
+    values (array_list): Initial blocks for the tensor. Can be a list,
+        tuple, NumPy ``ndarray``, and other types that
+        represents a (1 + 2 + K)-dimensional tensor where ``K`` is the
+        number of dense dimensions.
+    size (list, tuple, :class:`torch.Size`, optional): Size of the
+        sparse tensor: ``(*batchsize, nrows * blocksize[0], ncols *
+        blocksize[1], *densesize)`` If not provided, the size will be
+        inferred as the minimum size big enough to hold all non-zero
+        blocks.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the desired data type of
+        returned tensor.  Default: if None, infers data type from
+        :attr:`values`.
+    device (:class:`torch.device`, optional): the desired device of
+        returned tensor.  Default: if None, uses the current device
+        for the default tensor type (see
+        :func:`torch.set_default_device`). :attr:`device` will be
+        the CPU for CPU tensor types and the current CUDA device for
+        CUDA tensor types.
+    {requires_grad}
+    {check_invariants}
+
+Example::
+    >>> ccol_indices = [0, 1, 2]
+    >>> row_indices = [0, 1]
+    >>> values = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
+    >>> torch.sparse_bsc_tensor(torch.tensor(ccol_indices, dtype=torch.int64),
+    ...                         torch.tensor(row_indices, dtype=torch.int64),
+    ...                         torch.tensor(values), dtype=torch.double)
+    tensor(ccol_indices=tensor([0, 1, 2]),
+           row_indices=tensor([0, 1]),
+           values=tensor([[[1., 2.],
+                           [3., 4.]],
+                          [[5., 6.],
+                           [7., 8.]]]), size=(2, 2), nnz=2, dtype=torch.float64,
+           layout=torch.sparse_bsc)
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.sparse_coo_tensor,
+    r"""sparse_coo_tensor(indices, values, size=None, """
+    r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None, is_coalesced=None) -> Tensor
+
+Constructs a :ref:`sparse tensor in COO(rdinate) format
+<sparse-coo-docs>` with specified values at the given
+:attr:`indices`.
+
+.. note::
+
+   This function returns an :ref:`uncoalesced tensor
+   <sparse-uncoalesced-coo-docs>` when :attr:`is_coalesced` is
+   unspecified or ``None``.
+
+{sparse_factory_device_note}
+
+Args:
+    indices (array_like): Initial data for the tensor. Can be a list, tuple,
+        NumPy ``ndarray``, scalar, and other types. Will be cast to a :class:`torch.LongTensor`
+        internally. The indices are the coordinates of the non-zero values in the matrix, and thus
+        should be two-dimensional where the first dimension is the number of tensor dimensions and
+        the second dimension is the number of non-zero values.
+    values (array_like): Initial values for the tensor. Can be a list, tuple,
+        NumPy ``ndarray``, scalar, and other types.
+    size (list, tuple, or :class:`torch.Size`, optional): Size of the sparse tensor. If not
+        provided the size will be inferred as the minimum size big enough to hold all non-zero
+        elements.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
+        Default: if None, infers data type from :attr:`values`.
+    device (:class:`torch.device`, optional): the desired device of returned tensor.
+        Default: if None, uses the current device for the default tensor type
+        (see :func:`torch.set_default_device`). :attr:`device` will be the CPU
+        for CPU tensor types and the current CUDA device for CUDA tensor types.
+    {requires_grad}
+    {check_invariants}
+    is_coalesced (bool, optional): When``True``, the caller is
+        responsible for providing tensor indices that correspond to a
+        coalesced tensor.  If the :attr:`check_invariants` flag is
+        False, no error will be raised if the prerequisites are not
+        met and this will lead to silently incorrect results. To force
+        coalescion please use :meth:`coalesce` on the resulting
+        Tensor.
+        Default: None: except for trivial cases (e.g. nnz < 2) the
+        resulting Tensor has is_coalesced set to ``False```.
+
+Example::
+
+    >>> i = torch.tensor([[0, 1, 1],
+    ...                   [2, 0, 2]])
+    >>> v = torch.tensor([3, 4, 5], dtype=torch.float32)
+    >>> torch.sparse_coo_tensor(i, v, [2, 4])
+    tensor(indices=tensor([[0, 1, 1],
+                           [2, 0, 2]]),
+           values=tensor([3., 4., 5.]),
+           size=(2, 4), nnz=3, layout=torch.sparse_coo)
+
+    >>> torch.sparse_coo_tensor(i, v)  # Shape inference
+    tensor(indices=tensor([[0, 1, 1],
+                           [2, 0, 2]]),
+           values=tensor([3., 4., 5.]),
+           size=(2, 3), nnz=3, layout=torch.sparse_coo)
+
+    >>> torch.sparse_coo_tensor(i, v, [2, 4],
+    ...                         dtype=torch.float64,
+    ...                         device=torch.device('cuda:0'))
+    tensor(indices=tensor([[0, 1, 1],
+                           [2, 0, 2]]),
+           values=tensor([3., 4., 5.]),
+           device='cuda:0', size=(2, 4), nnz=3, dtype=torch.float64,
+           layout=torch.sparse_coo)
+
+    # Create an empty sparse tensor with the following invariants:
+    #   1. sparse_dim + dense_dim = len(SparseTensor.shape)
+    #   2. SparseTensor._indices().shape = (sparse_dim, nnz)
+    #   3. SparseTensor._values().shape = (nnz, SparseTensor.shape[sparse_dim:])
+    #
+    # For instance, to create an empty sparse tensor with nnz = 0, dense_dim = 0 and
+    # sparse_dim = 1 (hence indices is a 2D tensor of shape = (1, 0))
+    >>> S = torch.sparse_coo_tensor(torch.empty([1, 0]), [], [1])
+    tensor(indices=tensor([], size=(1, 0)),
+           values=tensor([], size=(0,)),
+           size=(1,), nnz=0, layout=torch.sparse_coo)
+
+    # and to create an empty sparse tensor with nnz = 0, dense_dim = 1 and
+    # sparse_dim = 1
+    >>> S = torch.sparse_coo_tensor(torch.empty([1, 0]), torch.empty([0, 2]), [1, 2])
+    tensor(indices=tensor([], size=(1, 0)),
+           values=tensor([], size=(0, 2)),
+           size=(1, 2), nnz=0, layout=torch.sparse_coo)
+
+.. _torch.sparse: https://pytorch.org/docs/stable/sparse.html
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.sqrt,
+    r"""
+sqrt(input, *, out=None) -> Tensor
+
+Returns a new tensor with the square-root of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \sqrt{\text{input}_{i}}
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-2.0755,  1.0226,  0.0831,  0.4806])
+    >>> torch.sqrt(a)
+    tensor([    nan,  1.0112,  0.2883,  0.6933])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.square,
+    r"""
+square(input, *, out=None) -> Tensor
+
+Returns a new tensor with the square of the elements of :attr:`input`.
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-2.0755,  1.0226,  0.0831,  0.4806])
+    >>> torch.square(a)
+    tensor([ 4.3077,  1.0457,  0.0069,  0.2310])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.squeeze,
+    r"""
+squeeze(input, dim=None) -> Tensor
+
+Returns a tensor with all specified dimensions of :attr:`input` of size `1` removed.
+
+For example, if `input` is of shape:
+:math:`(A \times 1 \times B \times C \times 1 \times D)` then the `input.squeeze()`
+will be of shape: :math:`(A \times B \times C \times D)`.
+
+When :attr:`dim` is given, a squeeze operation is done only in the given
+dimension(s). If `input` is of shape: :math:`(A \times 1 \times B)`,
+``squeeze(input, 0)`` leaves the tensor unchanged, but ``squeeze(input, 1)``
+will squeeze the tensor to the shape :math:`(A \times B)`.
+
+.. note:: The returned tensor shares the storage with the input tensor,
+          so changing the contents of one will change the contents of the other.
+
+.. warning:: If the tensor has a batch dimension of size 1, then `squeeze(input)`
+          will also remove the batch dimension, which can lead to unexpected
+          errors. Consider specifying only the dims you wish to be squeezed.
+
+Args:
+    {input}
+    dim (int or tuple of ints, optional): if given, the input will be squeezed
+           only in the specified dimensions.
+
+        .. versionchanged:: 2.0
+           :attr:`dim` now accepts tuples of dimensions.
+
+Example::
+
+    >>> x = torch.zeros(2, 1, 2, 1, 2)
+    >>> x.size()
+    torch.Size([2, 1, 2, 1, 2])
+    >>> y = torch.squeeze(x)
+    >>> y.size()
+    torch.Size([2, 2, 2])
+    >>> y = torch.squeeze(x, 0)
+    >>> y.size()
+    torch.Size([2, 1, 2, 1, 2])
+    >>> y = torch.squeeze(x, 1)
+    >>> y.size()
+    torch.Size([2, 2, 1, 2])
+    >>> y = torch.squeeze(x, (1, 2, 3))
+    torch.Size([2, 2, 2])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.std,
+    r"""
+std(input, dim=None, *, correction=1, keepdim=False, out=None) -> Tensor
+
+Calculates the standard deviation over the dimensions specified by :attr:`dim`.
+:attr:`dim` can be a single dimension, list of dimensions, or ``None`` to
+reduce over all dimensions.
+
+The standard deviation (:math:`\sigma`) is calculated as
+
+.. math:: \sigma = \sqrt{\frac{1}{\max(0,~N - \delta N)}\sum_{i=0}^{N-1}(x_i-\bar{x})^2}
+
+where :math:`x` is the sample set of elements, :math:`\bar{x}` is the
+sample mean, :math:`N` is the number of samples and :math:`\delta N` is
+the :attr:`correction`.
+"""
+    + r"""
+
+{keepdim_details}
+
+Args:
+    {input}
+    {dim}
+
+Keyword args:
+    correction (int): difference between the sample size and sample degrees of freedom.
+        Defaults to `Bessel's correction`_, ``correction=1``.
+
+        .. versionchanged:: 2.0
+            Previously this argument was called ``unbiased`` and was a boolean
+            with ``True`` corresponding to ``correction=1`` and ``False`` being
+            ``correction=0``.
+    {keepdim}
+    {out}
+
+Example:
+
+    >>> a = torch.tensor(
+    ...     [[ 0.2035,  1.2959,  1.8101, -0.4644],
+    ...      [ 1.5027, -0.3270,  0.5905,  0.6538],
+    ...      [-1.5745,  1.3330, -0.5596, -0.6548],
+    ...      [ 0.1264, -0.5080,  1.6420,  0.1992]])
+    >>> torch.std(a, dim=1, keepdim=True)
+    tensor([[1.0311],
+            [0.7477],
+            [1.2204],
+            [0.9087]])
+
+.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction
+
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.std_mean,
+    r"""
+std_mean(input, dim=None, *, correction=1, keepdim=False, out=None) -> (Tensor, Tensor)
+
+Calculates the standard deviation and mean over the dimensions specified by
+:attr:`dim`. :attr:`dim` can be a single dimension, list of dimensions, or
+``None`` to reduce over all dimensions.
+
+The standard deviation (:math:`\sigma`) is calculated as
+
+.. math:: \sigma = \sqrt{\frac{1}{\max(0,~N - \delta N)}\sum_{i=0}^{N-1}(x_i-\bar{x})^2}
+
+where :math:`x` is the sample set of elements, :math:`\bar{x}` is the
+sample mean, :math:`N` is the number of samples and :math:`\delta N` is
+the :attr:`correction`.
+
+"""
+    + r"""
+
+{keepdim_details}
+
+Args:
+    {input}
+    {opt_dim}
+
+Keyword args:
+    correction (int): difference between the sample size and sample degrees of freedom.
+        Defaults to `Bessel's correction`_, ``correction=1``.
+
+        .. versionchanged:: 2.0
+            Previously this argument was called ``unbiased`` and was a boolean
+            with ``True`` corresponding to ``correction=1`` and ``False`` being
+            ``correction=0``.
+    {keepdim}
+    {out}
+
+Returns:
+    A tuple (std, mean) containing the standard deviation and mean.
+
+Example:
+
+    >>> a = torch.tensor(
+    ...     [[ 0.2035,  1.2959,  1.8101, -0.4644],
+    ...      [ 1.5027, -0.3270,  0.5905,  0.6538],
+    ...      [-1.5745,  1.3330, -0.5596, -0.6548],
+    ...      [ 0.1264, -0.5080,  1.6420,  0.1992]])
+    >>> torch.std_mean(a, dim=0, keepdim=True)
+    (tensor([[1.2620, 1.0028, 1.0957, 0.6038]]),
+     tensor([[ 0.0645,  0.4485,  0.8707, -0.0665]]))
+
+.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction
+
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.sub,
+    r"""
+sub(input, other, *, alpha=1, out=None) -> Tensor
+
+Subtracts :attr:`other`, scaled by :attr:`alpha`, from :attr:`input`.
+
+.. math::
+    \text{{out}}_i = \text{{input}}_i - \text{{alpha}} \times \text{{other}}_i
+"""
+    + r"""
+
+Supports :ref:`broadcasting to a common shape <broadcasting-semantics>`,
+:ref:`type promotion <type-promotion-doc>`, and integer, float, and complex inputs.
+
+Args:
+    {input}
+    other (Tensor or Number): the tensor or number to subtract from :attr:`input`.
+
+Keyword args:
+    alpha (Number): the multiplier for :attr:`other`.
+    {out}
+
+Example::
+
+    >>> a = torch.tensor((1, 2))
+    >>> b = torch.tensor((0, 1))
+    >>> torch.sub(a, b, alpha=2)
+    tensor([1, 0])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.subtract,
+    r"""
+subtract(input, other, *, alpha=1, out=None) -> Tensor
+
+Alias for :func:`torch.sub`.
+""",
+)
+
+add_docstr(
+    torch.sum,
+    r"""
+sum(input, *, dtype=None) -> Tensor
+
+Returns the sum of all elements in the :attr:`input` tensor.
+
+Args:
+    {input}
+
+Keyword args:
+    {dtype}
+
+Example::
+
+    >>> a = torch.randn(1, 3)
+    >>> a
+    tensor([[ 0.1133, -0.9567,  0.2958]])
+    >>> torch.sum(a)
+    tensor(-0.5475)
+
+.. function:: sum(input, dim, keepdim=False, *, dtype=None) -> Tensor
+   :noindex:
+
+Returns the sum of each row of the :attr:`input` tensor in the given
+dimension :attr:`dim`. If :attr:`dim` is a list of dimensions,
+reduce over all of them.
+
+{keepdim_details}
+
+Args:
+    {input}
+    {opt_dim}
+    {keepdim}
+
+Keyword args:
+    {dtype}
+
+Example::
+
+    >>> a = torch.randn(4, 4)
+    >>> a
+    tensor([[ 0.0569, -0.2475,  0.0737, -0.3429],
+            [-0.2993,  0.9138,  0.9337, -1.6864],
+            [ 0.1132,  0.7892, -0.1003,  0.5688],
+            [ 0.3637, -0.9906, -0.4752, -1.5197]])
+    >>> torch.sum(a, 1)
+    tensor([-0.4598, -0.1381,  1.3708, -2.6217])
+    >>> b = torch.arange(4 * 5 * 6).view(4, 5, 6)
+    >>> torch.sum(b, (2, 1))
+    tensor([  435.,  1335.,  2235.,  3135.])
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.nansum,
+    r"""
+nansum(input, *, dtype=None) -> Tensor
+
+Returns the sum of all elements, treating Not a Numbers (NaNs) as zero.
+
+Args:
+    {input}
+
+Keyword args:
+    {dtype}
+
+Example::
+
+    >>> a = torch.tensor([1., 2., float('nan'), 4.])
+    >>> torch.nansum(a)
+    tensor(7.)
+
+.. function:: nansum(input, dim, keepdim=False, *, dtype=None) -> Tensor
+   :noindex:
+
+Returns the sum of each row of the :attr:`input` tensor in the given
+dimension :attr:`dim`, treating Not a Numbers (NaNs) as zero.
+If :attr:`dim` is a list of dimensions, reduce over all of them.
+
+{keepdim_details}
+
+Args:
+    {input}
+    {opt_dim}
+    {keepdim}
+
+Keyword args:
+    {dtype}
+
+Example::
+
+    >>> torch.nansum(torch.tensor([1., float("nan")]))
+    1.0
+    >>> a = torch.tensor([[1, 2], [3., float("nan")]])
+    >>> torch.nansum(a)
+    tensor(6.)
+    >>> torch.nansum(a, dim=0)
+    tensor([4., 2.])
+    >>> torch.nansum(a, dim=1)
+    tensor([3., 3.])
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.svd,
+    r"""
+svd(input, some=True, compute_uv=True, *, out=None) -> (Tensor, Tensor, Tensor)
+
+Computes the singular value decomposition of either a matrix or batch of
+matrices :attr:`input`. The singular value decomposition is represented as a
+namedtuple `(U, S, V)`, such that :attr:`input` :math:`= U \text{diag}(S) V^{\text{H}}`.
+where :math:`V^{\text{H}}` is the transpose of `V` for real inputs,
+and the conjugate transpose of `V` for complex inputs.
+If :attr:`input` is a batch of matrices, then `U`, `S`, and `V` are also
+batched with the same batch dimensions as :attr:`input`.
+
+If :attr:`some` is `True` (default), the method returns the reduced singular
+value decomposition. In this case, if the last two dimensions of :attr:`input` are
+`m` and `n`, then the returned `U` and `V` matrices will contain only
+`min(n, m)` orthonormal columns.
+
+If :attr:`compute_uv` is `False`, the returned `U` and `V` will be
+zero-filled matrices of shape `(m, m)` and `(n, n)`
+respectively, and the same device as :attr:`input`. The argument :attr:`some`
+has no effect when :attr:`compute_uv` is `False`.
+
+Supports :attr:`input` of float, double, cfloat and cdouble data types.
+The dtypes of `U` and `V` are the same as :attr:`input`'s. `S` will
+always be real-valued, even if :attr:`input` is complex.
+
+.. warning::
+
+    :func:`torch.svd` is deprecated in favor of :func:`torch.linalg.svd`
+    and will be removed in a future PyTorch release.
+
+    ``U, S, V = torch.svd(A, some=some, compute_uv=True)`` (default) should be replaced with
+
+    .. code:: python
+
+        U, S, Vh = torch.linalg.svd(A, full_matrices=not some)
+        V = Vh.mH
+
+    ``_, S, _ = torch.svd(A, some=some, compute_uv=False)`` should be replaced with
+
+    .. code:: python
+
+        S = torch.linalg.svdvals(A)
+
+.. note:: Differences with :func:`torch.linalg.svd`:
+
+             * :attr:`some` is the opposite of
+               :func:`torch.linalg.svd`'s :attr:`full_matrices`. Note that
+               default value for both is `True`, so the default behavior is
+               effectively the opposite.
+             * :func:`torch.svd` returns `V`, whereas :func:`torch.linalg.svd` returns
+               `Vh`, that is, :math:`V^{\text{H}}`.
+             * If :attr:`compute_uv` is `False`, :func:`torch.svd` returns zero-filled
+               tensors for `U` and `Vh`, whereas :func:`torch.linalg.svd` returns
+               empty tensors.
+
+.. note:: The singular values are returned in descending order. If :attr:`input` is a batch of matrices,
+          then the singular values of each matrix in the batch are returned in descending order.
+
+.. note:: The `S` tensor can only be used to compute gradients if :attr:`compute_uv` is `True`.
+
+.. note:: When :attr:`some` is `False`, the gradients on `U[..., :, min(m, n):]`
+          and `V[..., :, min(m, n):]` will be ignored in the backward pass, as those vectors
+          can be arbitrary bases of the corresponding subspaces.
+
+.. note:: The implementation of :func:`torch.linalg.svd` on CPU uses LAPACK's routine `?gesdd`
+          (a divide-and-conquer algorithm) instead of `?gesvd` for speed. Analogously,
+          on GPU, it uses cuSOLVER's routines `gesvdj` and `gesvdjBatched` on CUDA 10.1.243
+          and later, and MAGMA's routine `gesdd` on earlier versions of CUDA.
+
+.. note:: The returned `U` will not be contiguous. The matrix (or batch of matrices) will
+          be represented as a column-major matrix (i.e. Fortran-contiguous).
+
+.. warning:: The gradients with respect to `U` and `V` will only be finite when the input does not
+             have zero nor repeated singular values.
+
+.. warning:: If the distance between any two singular values is close to zero, the gradients with respect to
+             `U` and `V` will be numerically unstable, as they depends on
+             :math:`\frac{1}{\min_{i \neq j} \sigma_i^2 - \sigma_j^2}`. The same happens when the matrix
+             has small singular values, as these gradients also depend on `S^{-1}`.
+
+.. warning:: For complex-valued :attr:`input` the singular value decomposition is not unique,
+             as `U` and `V` may be multiplied by an arbitrary phase factor :math:`e^{i \phi}` on every column.
+             The same happens when :attr:`input` has repeated singular values, where one may multiply
+             the columns of the spanning subspace in `U` and `V` by a rotation matrix
+             and `the resulting vectors will span the same subspace`_.
+             Different platforms, like NumPy, or inputs on different device types,
+             may produce different `U` and `V` tensors.
+
+Args:
+    input (Tensor): the input tensor of size `(*, m, n)` where `*` is zero or more
+                    batch dimensions consisting of `(m, n)` matrices.
+    some (bool, optional): controls whether to compute the reduced or full decomposition, and
+                           consequently, the shape of returned `U` and `V`. Default: `True`.
+    compute_uv (bool, optional): controls whether to compute `U` and `V`. Default: `True`.
+
+Keyword args:
+    out (tuple, optional): the output tuple of tensors
+
+Example::
+
+    >>> a = torch.randn(5, 3)
+    >>> a
+    tensor([[ 0.2364, -0.7752,  0.6372],
+            [ 1.7201,  0.7394, -0.0504],
+            [-0.3371, -1.0584,  0.5296],
+            [ 0.3550, -0.4022,  1.5569],
+            [ 0.2445, -0.0158,  1.1414]])
+    >>> u, s, v = torch.svd(a)
+    >>> u
+    tensor([[ 0.4027,  0.0287,  0.5434],
+            [-0.1946,  0.8833,  0.3679],
+            [ 0.4296, -0.2890,  0.5261],
+            [ 0.6604,  0.2717, -0.2618],
+            [ 0.4234,  0.2481, -0.4733]])
+    >>> s
+    tensor([2.3289, 2.0315, 0.7806])
+    >>> v
+    tensor([[-0.0199,  0.8766,  0.4809],
+            [-0.5080,  0.4054, -0.7600],
+            [ 0.8611,  0.2594, -0.4373]])
+    >>> torch.dist(a, torch.mm(torch.mm(u, torch.diag(s)), v.t()))
+    tensor(8.6531e-07)
+    >>> a_big = torch.randn(7, 5, 3)
+    >>> u, s, v = torch.svd(a_big)
+    >>> torch.dist(a_big, torch.matmul(torch.matmul(u, torch.diag_embed(s)), v.mT))
+    tensor(2.6503e-06)
+
+.. _the resulting vectors will span the same subspace:
+       (https://en.wikipedia.org/wiki/Singular_value_decomposition#Singular_values,_singular_vectors,_and_their_relation_to_the_SVD)
+""",
+)
+
+
+add_docstr(
+    torch.t,
+    r"""
+t(input) -> Tensor
+
+Expects :attr:`input` to be <= 2-D tensor and transposes dimensions 0
+and 1.
+
+0-D and 1-D tensors are returned as is. When input is a 2-D tensor this
+is equivalent to ``transpose(input, 0, 1)``.
+
+Args:
+    {input}
+
+Example::
+
+    >>> x = torch.randn(())
+    >>> x
+    tensor(0.1995)
+    >>> torch.t(x)
+    tensor(0.1995)
+    >>> x = torch.randn(3)
+    >>> x
+    tensor([ 2.4320, -0.4608,  0.7702])
+    >>> torch.t(x)
+    tensor([ 2.4320, -0.4608,  0.7702])
+    >>> x = torch.randn(2, 3)
+    >>> x
+    tensor([[ 0.4875,  0.9158, -0.5872],
+            [ 0.3938, -0.6929,  0.6932]])
+    >>> torch.t(x)
+    tensor([[ 0.4875,  0.3938],
+            [ 0.9158, -0.6929],
+            [-0.5872,  0.6932]])
+
+See also :func:`torch.transpose`.
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.flip,
+    r"""
+flip(input, dims) -> Tensor
+
+Reverse the order of an n-D tensor along given axis in dims.
+
+.. note::
+    `torch.flip` makes a copy of :attr:`input`'s data. This is different from NumPy's `np.flip`,
+    which returns a view in constant time. Since copying a tensor's data is more work than viewing that data,
+    `torch.flip` is expected to be slower than `np.flip`.
+
+Args:
+    {input}
+    dims (a list or tuple): axis to flip on
+
+Example::
+
+    >>> x = torch.arange(8).view(2, 2, 2)
+    >>> x
+    tensor([[[ 0,  1],
+             [ 2,  3]],
+
+            [[ 4,  5],
+             [ 6,  7]]])
+    >>> torch.flip(x, [0, 1])
+    tensor([[[ 6,  7],
+             [ 4,  5]],
+
+            [[ 2,  3],
+             [ 0,  1]]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.fliplr,
+    r"""
+fliplr(input) -> Tensor
+
+Flip tensor in the left/right direction, returning a new tensor.
+
+Flip the entries in each row in the left/right direction.
+Columns are preserved, but appear in a different order than before.
+
+Note:
+    Requires the tensor to be at least 2-D.
+
+.. note::
+    `torch.fliplr` makes a copy of :attr:`input`'s data. This is different from NumPy's `np.fliplr`,
+    which returns a view in constant time. Since copying a tensor's data is more work than viewing that data,
+    `torch.fliplr` is expected to be slower than `np.fliplr`.
+
+Args:
+    input (Tensor): Must be at least 2-dimensional.
+
+Example::
+
+    >>> x = torch.arange(4).view(2, 2)
+    >>> x
+    tensor([[0, 1],
+            [2, 3]])
+    >>> torch.fliplr(x)
+    tensor([[1, 0],
+            [3, 2]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.flipud,
+    r"""
+flipud(input) -> Tensor
+
+Flip tensor in the up/down direction, returning a new tensor.
+
+Flip the entries in each column in the up/down direction.
+Rows are preserved, but appear in a different order than before.
+
+Note:
+    Requires the tensor to be at least 1-D.
+
+.. note::
+    `torch.flipud` makes a copy of :attr:`input`'s data. This is different from NumPy's `np.flipud`,
+    which returns a view in constant time. Since copying a tensor's data is more work than viewing that data,
+    `torch.flipud` is expected to be slower than `np.flipud`.
+
+Args:
+    input (Tensor): Must be at least 1-dimensional.
+
+Example::
+
+    >>> x = torch.arange(4).view(2, 2)
+    >>> x
+    tensor([[0, 1],
+            [2, 3]])
+    >>> torch.flipud(x)
+    tensor([[2, 3],
+            [0, 1]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.roll,
+    r"""
+roll(input, shifts, dims=None) -> Tensor
+
+Roll the tensor :attr:`input` along the given dimension(s). Elements that are
+shifted beyond the last position are re-introduced at the first position. If
+:attr:`dims` is `None`, the tensor will be flattened before rolling and then
+restored to the original shape.
+
+Args:
+    {input}
+    shifts (int or tuple of ints): The number of places by which the elements
+        of the tensor are shifted. If shifts is a tuple, dims must be a tuple of
+        the same size, and each dimension will be rolled by the corresponding
+        value
+    dims (int or tuple of ints): Axis along which to roll
+
+Example::
+
+    >>> x = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]).view(4, 2)
+    >>> x
+    tensor([[1, 2],
+            [3, 4],
+            [5, 6],
+            [7, 8]])
+    >>> torch.roll(x, 1)
+    tensor([[8, 1],
+            [2, 3],
+            [4, 5],
+            [6, 7]])
+    >>> torch.roll(x, 1, 0)
+    tensor([[7, 8],
+            [1, 2],
+            [3, 4],
+            [5, 6]])
+    >>> torch.roll(x, -1, 0)
+    tensor([[3, 4],
+            [5, 6],
+            [7, 8],
+            [1, 2]])
+    >>> torch.roll(x, shifts=(2, 1), dims=(0, 1))
+    tensor([[6, 5],
+            [8, 7],
+            [2, 1],
+            [4, 3]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.rot90,
+    r"""
+rot90(input, k=1, dims=[0,1]) -> Tensor
+
+Rotate an n-D tensor by 90 degrees in the plane specified by dims axis.
+Rotation direction is from the first towards the second axis if k > 0, and from the second towards the first for k < 0.
+
+Args:
+    {input}
+    k (int): number of times to rotate. Default value is 1
+    dims (a list or tuple): axis to rotate. Default value is [0, 1]
+
+Example::
+
+    >>> x = torch.arange(4).view(2, 2)
+    >>> x
+    tensor([[0, 1],
+            [2, 3]])
+    >>> torch.rot90(x, 1, [0, 1])
+    tensor([[1, 3],
+            [0, 2]])
+
+    >>> x = torch.arange(8).view(2, 2, 2)
+    >>> x
+    tensor([[[0, 1],
+             [2, 3]],
+
+            [[4, 5],
+             [6, 7]]])
+    >>> torch.rot90(x, 1, [1, 2])
+    tensor([[[1, 3],
+             [0, 2]],
+
+            [[5, 7],
+             [4, 6]]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.take,
+    r"""
+take(input, index) -> Tensor
+
+Returns a new tensor with the elements of :attr:`input` at the given indices.
+The input tensor is treated as if it were viewed as a 1-D tensor. The result
+takes the same shape as the indices.
+
+Args:
+    {input}
+    index (LongTensor): the indices into tensor
+
+Example::
+
+    >>> src = torch.tensor([[4, 3, 5],
+    ...                     [6, 7, 8]])
+    >>> torch.take(src, torch.tensor([0, 2, 5]))
+    tensor([ 4,  5,  8])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.take_along_dim,
+    r"""
+take_along_dim(input, indices, dim=None, *, out=None) -> Tensor
+
+Selects values from :attr:`input` at the 1-dimensional indices from :attr:`indices` along the given :attr:`dim`.
+
+If :attr:`dim` is None, the input array is treated as if it has been flattened to 1d.
+
+Functions that return indices along a dimension, like :func:`torch.argmax` and :func:`torch.argsort`,
+are designed to work with this function. See the examples below.
+
+.. note::
+    This function is similar to NumPy's `take_along_axis`.
+    See also :func:`torch.gather`.
+
+Args:
+    {input}
+    indices (tensor): the indices into :attr:`input`. Must have long dtype.
+    dim (int, optional): dimension to select along.
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> t = torch.tensor([[10, 30, 20], [60, 40, 50]])
+    >>> max_idx = torch.argmax(t)
+    >>> torch.take_along_dim(t, max_idx)
+    tensor([60])
+    >>> sorted_idx = torch.argsort(t, dim=1)
+    >>> torch.take_along_dim(t, sorted_idx, dim=1)
+    tensor([[10, 20, 30],
+            [40, 50, 60]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.tan,
+    r"""
+tan(input, *, out=None) -> Tensor
+
+Returns a new tensor with the tangent of the elements of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \tan(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([-1.2027, -1.7687,  0.4412, -1.3856])
+    >>> torch.tan(a)
+    tensor([-2.5930,  4.9859,  0.4722, -5.3366])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.tanh,
+    r"""
+tanh(input, *, out=None) -> Tensor
+
+Returns a new tensor with the hyperbolic tangent of the elements
+of :attr:`input`.
+
+.. math::
+    \text{out}_{i} = \tanh(\text{input}_{i})
+"""
+    + r"""
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 0.8986, -0.7279,  1.1745,  0.2611])
+    >>> torch.tanh(a)
+    tensor([ 0.7156, -0.6218,  0.8257,  0.2553])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    # torch.softmax doc str. Point this to torch.nn.functional.softmax
+    torch.softmax,
+    r"""
+softmax(input, dim, *, dtype=None) -> Tensor
+
+Alias for :func:`torch.nn.functional.softmax`.
+""",
+)
+
+add_docstr(
+    torch.topk,
+    r"""
+topk(input, k, dim=None, largest=True, sorted=True, *, out=None) -> (Tensor, LongTensor)
+
+Returns the :attr:`k` largest elements of the given :attr:`input` tensor along
+a given dimension.
+
+If :attr:`dim` is not given, the last dimension of the `input` is chosen.
+
+If :attr:`largest` is ``False`` then the `k` smallest elements are returned.
+
+A namedtuple of `(values, indices)` is returned with the `values` and
+`indices` of the largest `k` elements of each row of the `input` tensor in the
+given dimension `dim`.
+
+The boolean option :attr:`sorted` if ``True``, will make sure that the returned
+`k` elements are themselves sorted
+
+Args:
+    {input}
+    k (int): the k in "top-k"
+    dim (int, optional): the dimension to sort along
+    largest (bool, optional): controls whether to return largest or
+           smallest elements
+    sorted (bool, optional): controls whether to return the elements
+           in sorted order
+
+Keyword args:
+    out (tuple, optional): the output tuple of (Tensor, LongTensor) that can be
+        optionally given to be used as output buffers
+
+Example::
+
+    >>> x = torch.arange(1., 6.)
+    >>> x
+    tensor([ 1.,  2.,  3.,  4.,  5.])
+    >>> torch.topk(x, 3)
+    torch.return_types.topk(values=tensor([5., 4., 3.]), indices=tensor([4, 3, 2]))
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.trace,
+    r"""
+trace(input) -> Tensor
+
+Returns the sum of the elements of the diagonal of the input 2-D matrix.
+
+Example::
+
+    >>> x = torch.arange(1., 10.).view(3, 3)
+    >>> x
+    tensor([[ 1.,  2.,  3.],
+            [ 4.,  5.,  6.],
+            [ 7.,  8.,  9.]])
+    >>> torch.trace(x)
+    tensor(15.)
+""",
+)
+
+add_docstr(
+    torch.transpose,
+    r"""
+transpose(input, dim0, dim1) -> Tensor
+
+Returns a tensor that is a transposed version of :attr:`input`.
+The given dimensions :attr:`dim0` and :attr:`dim1` are swapped.
+
+If :attr:`input` is a strided tensor then the resulting :attr:`out`
+tensor shares its underlying storage with the :attr:`input` tensor, so
+changing the content of one would change the content of the other.
+
+If :attr:`input` is a :ref:`sparse tensor <sparse-docs>` then the
+resulting :attr:`out` tensor *does not* share the underlying storage
+with the :attr:`input` tensor.
+
+If :attr:`input` is a :ref:`sparse tensor <sparse-docs>` with compressed
+layout (SparseCSR, SparseBSR, SparseCSC or SparseBSC) the arguments
+:attr:`dim0` and :attr:`dim1` must be both batch dimensions, or must
+both be sparse dimensions. The batch dimensions of a sparse tensor are the
+dimensions preceding the sparse dimensions.
+
+.. note::
+    Transpositions which interchange the sparse dimensions of a `SparseCSR`
+    or `SparseCSC` layout tensor will result in the layout changing between
+    the two options. Transposition of the sparse dimensions of a ` SparseBSR`
+    or `SparseBSC` layout tensor will likewise generate a result with the
+    opposite layout.
+
+
+Args:
+    {input}
+    dim0 (int): the first dimension to be transposed
+    dim1 (int): the second dimension to be transposed
+
+Example::
+
+    >>> x = torch.randn(2, 3)
+    >>> x
+    tensor([[ 1.0028, -0.9893,  0.5809],
+            [-0.1669,  0.7299,  0.4942]])
+    >>> torch.transpose(x, 0, 1)
+    tensor([[ 1.0028, -0.1669],
+            [-0.9893,  0.7299],
+            [ 0.5809,  0.4942]])
+
+See also :func:`torch.t`.
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.triangular_solve,
+    r"""
+triangular_solve(b, A, upper=True, transpose=False, unitriangular=False, *, out=None) -> (Tensor, Tensor)
+
+Solves a system of equations with a square upper or lower triangular invertible matrix :math:`A`
+and multiple right-hand sides :math:`b`.
+
+In symbols, it solves :math:`AX = b` and assumes :math:`A` is square upper-triangular
+(or lower-triangular if :attr:`upper`\ `= False`) and does not have zeros on the diagonal.
+
+`torch.triangular_solve(b, A)` can take in 2D inputs `b, A` or inputs that are
+batches of 2D matrices. If the inputs are batches, then returns
+batched outputs `X`
+
+If the diagonal of :attr:`A` contains zeros or elements that are very close to zero and
+:attr:`unitriangular`\ `= False` (default) or if the input matrix is badly conditioned,
+the result may contain `NaN` s.
+
+Supports input of float, double, cfloat and cdouble data types.
+
+.. warning::
+
+    :func:`torch.triangular_solve` is deprecated in favor of :func:`torch.linalg.solve_triangular`
+    and will be removed in a future PyTorch release.
+    :func:`torch.linalg.solve_triangular` has its arguments reversed and does not return a
+    copy of one of the inputs.
+
+    ``X = torch.triangular_solve(B, A).solution`` should be replaced with
+
+    .. code:: python
+
+        X = torch.linalg.solve_triangular(A, B)
+
+Args:
+    b (Tensor): multiple right-hand sides of size :math:`(*, m, k)` where
+                :math:`*` is zero of more batch dimensions
+    A (Tensor): the input triangular coefficient matrix of size :math:`(*, m, m)`
+                where :math:`*` is zero or more batch dimensions
+    upper (bool, optional): whether :math:`A` is upper or lower triangular. Default: ``True``.
+    transpose (bool, optional): solves `op(A)X = b` where `op(A) = A^T` if this flag is ``True``,
+                                and `op(A) = A` if it is ``False``. Default: ``False``.
+    unitriangular (bool, optional): whether :math:`A` is unit triangular.
+        If True, the diagonal elements of :math:`A` are assumed to be
+        1 and not referenced from :math:`A`. Default: ``False``.
+
+Keyword args:
+    out ((Tensor, Tensor), optional): tuple of two tensors to write
+        the output to. Ignored if `None`. Default: `None`.
+
+Returns:
+    A namedtuple `(solution, cloned_coefficient)` where `cloned_coefficient`
+    is a clone of :math:`A` and `solution` is the solution :math:`X` to :math:`AX = b`
+    (or whatever variant of the system of equations, depending on the keyword arguments.)
+
+Examples::
+
+    >>> A = torch.randn(2, 2).triu()
+    >>> A
+    tensor([[ 1.1527, -1.0753],
+            [ 0.0000,  0.7986]])
+    >>> b = torch.randn(2, 3)
+    >>> b
+    tensor([[-0.0210,  2.3513, -1.5492],
+            [ 1.5429,  0.7403, -1.0243]])
+    >>> torch.triangular_solve(b, A)
+    torch.return_types.triangular_solve(
+    solution=tensor([[ 1.7841,  2.9046, -2.5405],
+            [ 1.9320,  0.9270, -1.2826]]),
+    cloned_coefficient=tensor([[ 1.1527, -1.0753],
+            [ 0.0000,  0.7986]]))
+""",
+)
+
+add_docstr(
+    torch.tril,
+    r"""
+tril(input, diagonal=0, *, out=None) -> Tensor
+
+Returns the lower triangular part of the matrix (2-D tensor) or batch of matrices
+:attr:`input`, the other elements of the result tensor :attr:`out` are set to 0.
+
+The lower triangular part of the matrix is defined as the elements on and
+below the diagonal.
+
+The argument :attr:`diagonal` controls which diagonal to consider. If
+:attr:`diagonal` = 0, all elements on and below the main diagonal are
+retained. A positive value includes just as many diagonals above the main
+diagonal, and similarly a negative value excludes just as many diagonals below
+the main diagonal. The main diagonal are the set of indices
+:math:`\lbrace (i, i) \rbrace` for :math:`i \in [0, \min\{d_{1}, d_{2}\} - 1]` where
+:math:`d_{1}, d_{2}` are the dimensions of the matrix.
+"""
+    + r"""
+Args:
+    {input}
+    diagonal (int, optional): the diagonal to consider
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(3, 3)
+    >>> a
+    tensor([[-1.0813, -0.8619,  0.7105],
+            [ 0.0935,  0.1380,  2.2112],
+            [-0.3409, -0.9828,  0.0289]])
+    >>> torch.tril(a)
+    tensor([[-1.0813,  0.0000,  0.0000],
+            [ 0.0935,  0.1380,  0.0000],
+            [-0.3409, -0.9828,  0.0289]])
+
+    >>> b = torch.randn(4, 6)
+    >>> b
+    tensor([[ 1.2219,  0.5653, -0.2521, -0.2345,  1.2544,  0.3461],
+            [ 0.4785, -0.4477,  0.6049,  0.6368,  0.8775,  0.7145],
+            [ 1.1502,  3.2716, -1.1243, -0.5413,  0.3615,  0.6864],
+            [-0.0614, -0.7344, -1.3164, -0.7648, -1.4024,  0.0978]])
+    >>> torch.tril(b, diagonal=1)
+    tensor([[ 1.2219,  0.5653,  0.0000,  0.0000,  0.0000,  0.0000],
+            [ 0.4785, -0.4477,  0.6049,  0.0000,  0.0000,  0.0000],
+            [ 1.1502,  3.2716, -1.1243, -0.5413,  0.0000,  0.0000],
+            [-0.0614, -0.7344, -1.3164, -0.7648, -1.4024,  0.0000]])
+    >>> torch.tril(b, diagonal=-1)
+    tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+            [ 0.4785,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+            [ 1.1502,  3.2716,  0.0000,  0.0000,  0.0000,  0.0000],
+            [-0.0614, -0.7344, -1.3164,  0.0000,  0.0000,  0.0000]])
+""".format(
+        **common_args
+    ),
+)
+
+# docstr is split in two parts to avoid format mis-captureing :math: braces '{}'
+# as common args.
+add_docstr(
+    torch.tril_indices,
+    r"""
+tril_indices(row, col, offset=0, *, dtype=torch.long, device='cpu', layout=torch.strided) -> Tensor
+
+Returns the indices of the lower triangular part of a :attr:`row`-by-
+:attr:`col` matrix in a 2-by-N Tensor, where the first row contains row
+coordinates of all indices and the second row contains column coordinates.
+Indices are ordered based on rows and then columns.
+
+The lower triangular part of the matrix is defined as the elements on and
+below the diagonal.
+
+The argument :attr:`offset` controls which diagonal to consider. If
+:attr:`offset` = 0, all elements on and below the main diagonal are
+retained. A positive value includes just as many diagonals above the main
+diagonal, and similarly a negative value excludes just as many diagonals below
+the main diagonal. The main diagonal are the set of indices
+:math:`\lbrace (i, i) \rbrace` for :math:`i \in [0, \min\{d_{1}, d_{2}\} - 1]`
+where :math:`d_{1}, d_{2}` are the dimensions of the matrix.
+
+.. note::
+    When running on CUDA, ``row * col`` must be less than :math:`2^{59}` to
+    prevent overflow during calculation.
+"""
+    + r"""
+Args:
+    row (``int``): number of rows in the 2-D matrix.
+    col (``int``): number of columns in the 2-D matrix.
+    offset (``int``): diagonal offset from the main diagonal.
+        Default: if not provided, 0.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
+        Default: if ``None``, ``torch.long``.
+    {device}
+    layout (:class:`torch.layout`, optional): currently only support ``torch.strided``.
+
+Example::
+
+    >>> a = torch.tril_indices(3, 3)
+    >>> a
+    tensor([[0, 1, 1, 2, 2, 2],
+            [0, 0, 1, 0, 1, 2]])
+
+    >>> a = torch.tril_indices(4, 3, -1)
+    >>> a
+    tensor([[1, 2, 2, 3, 3, 3],
+            [0, 0, 1, 0, 1, 2]])
+
+    >>> a = torch.tril_indices(4, 3, 1)
+    >>> a
+    tensor([[0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3],
+            [0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2]])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.triu,
+    r"""
+triu(input, diagonal=0, *, out=None) -> Tensor
+
+Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices
+:attr:`input`, the other elements of the result tensor :attr:`out` are set to 0.
+
+The upper triangular part of the matrix is defined as the elements on and
+above the diagonal.
+
+The argument :attr:`diagonal` controls which diagonal to consider. If
+:attr:`diagonal` = 0, all elements on and above the main diagonal are
+retained. A positive value excludes just as many diagonals above the main
+diagonal, and similarly a negative value includes just as many diagonals below
+the main diagonal. The main diagonal are the set of indices
+:math:`\lbrace (i, i) \rbrace` for :math:`i \in [0, \min\{d_{1}, d_{2}\} - 1]` where
+:math:`d_{1}, d_{2}` are the dimensions of the matrix.
+"""
+    + r"""
+Args:
+    {input}
+    diagonal (int, optional): the diagonal to consider
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(3, 3)
+    >>> a
+    tensor([[ 0.2309,  0.5207,  2.0049],
+            [ 0.2072, -1.0680,  0.6602],
+            [ 0.3480, -0.5211, -0.4573]])
+    >>> torch.triu(a)
+    tensor([[ 0.2309,  0.5207,  2.0049],
+            [ 0.0000, -1.0680,  0.6602],
+            [ 0.0000,  0.0000, -0.4573]])
+    >>> torch.triu(a, diagonal=1)
+    tensor([[ 0.0000,  0.5207,  2.0049],
+            [ 0.0000,  0.0000,  0.6602],
+            [ 0.0000,  0.0000,  0.0000]])
+    >>> torch.triu(a, diagonal=-1)
+    tensor([[ 0.2309,  0.5207,  2.0049],
+            [ 0.2072, -1.0680,  0.6602],
+            [ 0.0000, -0.5211, -0.4573]])
+
+    >>> b = torch.randn(4, 6)
+    >>> b
+    tensor([[ 0.5876, -0.0794, -1.8373,  0.6654,  0.2604,  1.5235],
+            [-0.2447,  0.9556, -1.2919,  1.3378, -0.1768, -1.0857],
+            [ 0.4333,  0.3146,  0.6576, -1.0432,  0.9348, -0.4410],
+            [-0.9888,  1.0679, -1.3337, -1.6556,  0.4798,  0.2830]])
+    >>> torch.triu(b, diagonal=1)
+    tensor([[ 0.0000, -0.0794, -1.8373,  0.6654,  0.2604,  1.5235],
+            [ 0.0000,  0.0000, -1.2919,  1.3378, -0.1768, -1.0857],
+            [ 0.0000,  0.0000,  0.0000, -1.0432,  0.9348, -0.4410],
+            [ 0.0000,  0.0000,  0.0000,  0.0000,  0.4798,  0.2830]])
+    >>> torch.triu(b, diagonal=-1)
+    tensor([[ 0.5876, -0.0794, -1.8373,  0.6654,  0.2604,  1.5235],
+            [-0.2447,  0.9556, -1.2919,  1.3378, -0.1768, -1.0857],
+            [ 0.0000,  0.3146,  0.6576, -1.0432,  0.9348, -0.4410],
+            [ 0.0000,  0.0000, -1.3337, -1.6556,  0.4798,  0.2830]])
+""".format(
+        **common_args
+    ),
+)
+
+# docstr is split in two parts to avoid format mis-capturing :math: braces '{}'
+# as common args.
+add_docstr(
+    torch.triu_indices,
+    r"""
+triu_indices(row, col, offset=0, *, dtype=torch.long, device='cpu', layout=torch.strided) -> Tensor
+
+Returns the indices of the upper triangular part of a :attr:`row` by
+:attr:`col` matrix in a 2-by-N Tensor, where the first row contains row
+coordinates of all indices and the second row contains column coordinates.
+Indices are ordered based on rows and then columns.
+
+The upper triangular part of the matrix is defined as the elements on and
+above the diagonal.
+
+The argument :attr:`offset` controls which diagonal to consider. If
+:attr:`offset` = 0, all elements on and above the main diagonal are
+retained. A positive value excludes just as many diagonals above the main
+diagonal, and similarly a negative value includes just as many diagonals below
+the main diagonal. The main diagonal are the set of indices
+:math:`\lbrace (i, i) \rbrace` for :math:`i \in [0, \min\{d_{1}, d_{2}\} - 1]`
+where :math:`d_{1}, d_{2}` are the dimensions of the matrix.
+
+.. note::
+    When running on CUDA, ``row * col`` must be less than :math:`2^{59}` to
+    prevent overflow during calculation.
+"""
+    + r"""
+Args:
+    row (``int``): number of rows in the 2-D matrix.
+    col (``int``): number of columns in the 2-D matrix.
+    offset (``int``): diagonal offset from the main diagonal.
+        Default: if not provided, 0.
+
+Keyword args:
+    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
+        Default: if ``None``, ``torch.long``.
+    {device}
+    layout (:class:`torch.layout`, optional): currently only support ``torch.strided``.
+
+Example::
+
+    >>> a = torch.triu_indices(3, 3)
+    >>> a
+    tensor([[0, 0, 0, 1, 1, 2],
+            [0, 1, 2, 1, 2, 2]])
+
+    >>> a = torch.triu_indices(4, 3, -1)
+    >>> a
+    tensor([[0, 0, 0, 1, 1, 1, 2, 2, 3],
+            [0, 1, 2, 0, 1, 2, 1, 2, 2]])
+
+    >>> a = torch.triu_indices(4, 3, 1)
+    >>> a
+    tensor([[0, 0, 1],
+            [1, 2, 2]])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.true_divide,
+    r"""
+true_divide(dividend, divisor, *, out) -> Tensor
+
+Alias for :func:`torch.div` with ``rounding_mode=None``.
+""",
+)
+
+add_docstr(
+    torch.trunc,
+    r"""
+trunc(input, *, out=None) -> Tensor
+
+Returns a new tensor with the truncated integer values of
+the elements of :attr:`input`.
+
+For integer inputs, follows the array-api convention of returning a
+copy of the input tensor.
+
+Args:
+    {input}
+
+Keyword args:
+    {out}
+
+Example::
+
+    >>> a = torch.randn(4)
+    >>> a
+    tensor([ 3.4742,  0.5466, -0.8008, -0.9079])
+    >>> torch.trunc(a)
+    tensor([ 3.,  0., -0., -0.])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.fake_quantize_per_tensor_affine,
+    r"""
+fake_quantize_per_tensor_affine(input, scale, zero_point, quant_min, quant_max) -> Tensor
+
+Returns a new tensor with the data in :attr:`input` fake quantized using :attr:`scale`,
+:attr:`zero_point`, :attr:`quant_min` and :attr:`quant_max`.
+
+.. math::
+    \text{output} = (
+        min(
+            \text{quant\_max},
+            max(
+                \text{quant\_min},
+                \text{std::nearby\_int}(\text{input} / \text{scale}) + \text{zero\_point}
+            )
+        ) - \text{zero\_point}
+    ) \times \text{scale}
+
+Args:
+    input (Tensor): the input value(s), ``torch.float32`` tensor
+    scale (double scalar or ``float32`` Tensor): quantization scale
+    zero_point (int64 scalar or ``int32`` Tensor): quantization zero_point
+    quant_min (int64): lower bound of the quantized domain
+    quant_max (int64): upper bound of the quantized domain
+
+Returns:
+    Tensor: A newly fake_quantized ``torch.float32`` tensor
+
+Example::
+
+    >>> x = torch.randn(4)
+    >>> x
+    tensor([ 0.0552,  0.9730,  0.3973, -1.0780])
+    >>> torch.fake_quantize_per_tensor_affine(x, 0.1, 0, 0, 255)
+    tensor([0.1000, 1.0000, 0.4000, 0.0000])
+    >>> torch.fake_quantize_per_tensor_affine(x, torch.tensor(0.1), torch.tensor(0), 0, 255)
+    tensor([0.1000, 1.0000, 0.4000, 0.0000])
+""",
+)
+
+add_docstr(
+    torch.fake_quantize_per_channel_affine,
+    r"""
+fake_quantize_per_channel_affine(input, scale, zero_point, axis, quant_min, quant_max) -> Tensor
+
+Returns a new tensor with the data in :attr:`input` fake quantized per channel using :attr:`scale`,
+:attr:`zero_point`, :attr:`quant_min` and :attr:`quant_max`, across the channel specified by :attr:`axis`.
+
+.. math::
+    \text{output} = (
+        min(
+            \text{quant\_max},
+            max(
+                \text{quant\_min},
+                \text{std::nearby\_int}(\text{input} / \text{scale}) + \text{zero\_point}
+            )
+        ) - \text{zero\_point}
+    ) \times \text{scale}
+
+Args:
+    input (Tensor): the input value(s), in ``torch.float32``
+    scale (Tensor): quantization scale, per channel in ``torch.float32``
+    zero_point (Tensor): quantization zero_point, per channel in ``torch.int32`` or ``torch.half`` or ``torch.float32``
+    axis (int32): channel axis
+    quant_min (int64): lower bound of the quantized domain
+    quant_max (int64): upper bound of the quantized domain
+
+Returns:
+    Tensor: A newly fake_quantized per channel ``torch.float32`` tensor
+
+Example::
+
+    >>> x = torch.randn(2, 2, 2)
+    >>> x
+    tensor([[[-0.2525, -0.0466],
+             [ 0.3491, -0.2168]],
+
+            [[-0.5906,  1.6258],
+             [ 0.6444, -0.0542]]])
+    >>> scales = (torch.randn(2) + 1) * 0.05
+    >>> scales
+    tensor([0.0475, 0.0486])
+    >>> zero_points = torch.zeros(2).to(torch.int32)
+    >>> zero_points
+    tensor([0, 0])
+    >>> torch.fake_quantize_per_channel_affine(x, scales, zero_points, 1, 0, 255)
+    tensor([[[0.0000, 0.0000],
+             [0.3405, 0.0000]],
+
+            [[0.0000, 1.6134],
+            [0.6323, 0.0000]]])
+""",
+)
+
+add_docstr(
+    torch.fix,
+    r"""
+fix(input, *, out=None) -> Tensor
+
+Alias for :func:`torch.trunc`
+""",
+)
+
+add_docstr(
+    torch.unsqueeze,
+    r"""
+unsqueeze(input, dim) -> Tensor
+
+Returns a new tensor with a dimension of size one inserted at the
+specified position.
+
+The returned tensor shares the same underlying data with this tensor.
+
+A :attr:`dim` value within the range ``[-input.dim() - 1, input.dim() + 1)``
+can be used. Negative :attr:`dim` will correspond to :meth:`unsqueeze`
+applied at :attr:`dim` = ``dim + input.dim() + 1``.
+
+Args:
+    {input}
+    dim (int): the index at which to insert the singleton dimension
+
+Example::
+
+    >>> x = torch.tensor([1, 2, 3, 4])
+    >>> torch.unsqueeze(x, 0)
+    tensor([[ 1,  2,  3,  4]])
+    >>> torch.unsqueeze(x, 1)
+    tensor([[ 1],
+            [ 2],
+            [ 3],
+            [ 4]])
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.var,
+    r"""
+var(input, dim=None, *, correction=1, keepdim=False, out=None) -> Tensor
+
+Calculates the variance over the dimensions specified by :attr:`dim`. :attr:`dim`
+can be a single dimension, list of dimensions, or ``None`` to reduce over all
+dimensions.
+
+The variance (:math:`\sigma^2`) is calculated as
+
+.. math:: \sigma^2 = \frac{1}{\max(0,~N - \delta N)}\sum_{i=0}^{N-1}(x_i-\bar{x})^2
+
+where :math:`x` is the sample set of elements, :math:`\bar{x}` is the
+sample mean, :math:`N` is the number of samples and :math:`\delta N` is
+the :attr:`correction`.
+"""
+    + r"""
+
+{keepdim_details}
+
+Args:
+    {input}
+    {opt_dim}
+
+Keyword args:
+    correction (int): difference between the sample size and sample degrees of freedom.
+        Defaults to `Bessel's correction`_, ``correction=1``.
+
+        .. versionchanged:: 2.0
+            Previously this argument was called ``unbiased`` and was a boolean
+            with ``True`` corresponding to ``correction=1`` and ``False`` being
+            ``correction=0``.
+    {keepdim}
+    {out}
+
+Example:
+
+    >>> a = torch.tensor(
+    ...     [[ 0.2035,  1.2959,  1.8101, -0.4644],
+    ...      [ 1.5027, -0.3270,  0.5905,  0.6538],
+    ...      [-1.5745,  1.3330, -0.5596, -0.6548],
+    ...      [ 0.1264, -0.5080,  1.6420,  0.1992]])
+    >>> torch.var(a, dim=1, keepdim=True)
+    tensor([[1.0631],
+            [0.5590],
+            [1.4893],
+            [0.8258]])
+
+.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction
+
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.var_mean,
+    r"""
+var_mean(input, dim=None, *, correction=1, keepdim=False, out=None) -> (Tensor, Tensor)
+
+Calculates the variance and mean over the dimensions specified by :attr:`dim`.
+:attr:`dim` can be a single dimension, list of dimensions, or ``None`` to
+reduce over all dimensions.
+
+The variance (:math:`\sigma^2`) is calculated as
+
+.. math:: \sigma^2 = \frac{1}{\max(0,~N - \delta N)}\sum_{i=0}^{N-1}(x_i-\bar{x})^2
+
+where :math:`x` is the sample set of elements, :math:`\bar{x}` is the
+sample mean, :math:`N` is the number of samples and :math:`\delta N` is
+the :attr:`correction`.
+"""
+    + r"""
+
+{keepdim_details}
+
+Args:
+    {input}
+    {opt_dim}
+
+Keyword args:
+    correction (int): difference between the sample size and sample degrees of freedom.
+        Defaults to `Bessel's correction`_, ``correction=1``.
+
+        .. versionchanged:: 2.0
+            Previously this argument was called ``unbiased`` and was a boolean
+            with ``True`` corresponding to ``correction=1`` and ``False`` being
+            ``correction=0``.
+    {keepdim}
+    {out}
+
+Returns:
+    A tuple (var, mean) containing the variance and mean.
+
+Example:
+
+    >>> a = torch.tensor(
+    ...     [[ 0.2035,  1.2959,  1.8101, -0.4644],
+    ...      [ 1.5027, -0.3270,  0.5905,  0.6538],
+    ...      [-1.5745,  1.3330, -0.5596, -0.6548],
+    ...      [ 0.1264, -0.5080,  1.6420,  0.1992]])
+    >>> torch.var_mean(a, dim=0, keepdim=True)
+    (tensor([[1.5926, 1.0056, 1.2005, 0.3646]]),
+     tensor([[ 0.0645,  0.4485,  0.8707, -0.0665]]))
+
+.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction
+
+""".format(
+        **multi_dim_common
+    ),
+)
+
+add_docstr(
+    torch.zeros,
+    r"""
+zeros(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+
+Returns a tensor filled with the scalar value `0`, with the shape defined
+by the variable argument :attr:`size`.
+
+Args:
+    size (int...): a sequence of integers defining the shape of the output tensor.
+        Can be a variable number of arguments or a collection like a list or tuple.
+
+Keyword args:
+    {out}
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+
+Example::
+
+    >>> torch.zeros(2, 3)
+    tensor([[ 0.,  0.,  0.],
+            [ 0.,  0.,  0.]])
+
+    >>> torch.zeros(5)
+    tensor([ 0.,  0.,  0.,  0.,  0.])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.zeros_like,
+    r"""
+zeros_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor
+
+Returns a tensor filled with the scalar value `0`, with the same size as
+:attr:`input`. ``torch.zeros_like(input)`` is equivalent to
+``torch.zeros(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``.
+
+.. warning::
+    As of 0.4, this function does not support an :attr:`out` keyword. As an alternative,
+    the old ``torch.zeros_like(input, out=output)`` is equivalent to
+    ``torch.zeros(input.size(), out=output)``.
+
+Args:
+    {input}
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {memory_format}
+
+Example::
+
+    >>> input = torch.empty(2, 3)
+    >>> torch.zeros_like(input)
+    tensor([[ 0.,  0.,  0.],
+            [ 0.,  0.,  0.]])
+""".format(
+        **factory_like_common_args
+    ),
+)
+
+add_docstr(
+    torch.empty,
+    """
+empty(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False, pin_memory=False, \
+memory_format=torch.contiguous_format) -> Tensor
+
+Returns a tensor filled with uninitialized data. The shape of the tensor is
+defined by the variable argument :attr:`size`.
+
+.. note::
+    If :func:`torch.use_deterministic_algorithms()` and
+    :attr:`torch.utils.deterministic.fill_uninitialized_memory` are both set to
+    ``True``, the output tensor is initialized to prevent any possible
+    nondeterministic behavior from using the data as an input to an operation.
+    Floating point and complex tensors are filled with NaN, and integer tensors
+    are filled with the maximum value.
+
+Args:
+    size (int...): a sequence of integers defining the shape of the output tensor.
+        Can be a variable number of arguments or a collection like a list or tuple.
+
+Keyword args:
+    {out}
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {pin_memory}
+    {memory_format}
+
+Example::
+
+    >>> torch.empty((2,3), dtype=torch.int64)
+    tensor([[ 9.4064e+13,  2.8000e+01,  9.3493e+13],
+            [ 7.5751e+18,  7.1428e+18,  7.5955e+18]])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.empty_like,
+    r"""
+empty_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor
+
+Returns an uninitialized tensor with the same size as :attr:`input`.
+``torch.empty_like(input)`` is equivalent to
+``torch.empty(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``.
+
+.. note::
+    If :func:`torch.use_deterministic_algorithms()` and
+    :attr:`torch.utils.deterministic.fill_uninitialized_memory` are both set to
+    ``True``, the output tensor is initialized to prevent any possible
+    nondeterministic behavior from using the data as an input to an operation.
+    Floating point and complex tensors are filled with NaN, and integer tensors
+    are filled with the maximum value.
+
+Args:
+    {input}
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {memory_format}
+
+Example::
+
+    >>> a=torch.empty((2,3), dtype=torch.int32, device = 'cuda')
+    >>> torch.empty_like(a)
+    tensor([[0, 0, 0],
+            [0, 0, 0]], device='cuda:0', dtype=torch.int32)
+""".format(
+        **factory_like_common_args
+    ),
+)
+
+add_docstr(
+    torch.empty_strided,
+    r"""
+empty_strided(size, stride, *, dtype=None, layout=None, device=None, requires_grad=False, pin_memory=False) -> Tensor
+
+Creates a tensor with the specified :attr:`size` and :attr:`stride` and filled with undefined data.
+
+.. warning::
+    If the constructed tensor is "overlapped" (with multiple indices referring to the same element
+    in memory) its behavior is undefined.
+
+.. note::
+    If :func:`torch.use_deterministic_algorithms()` and
+    :attr:`torch.utils.deterministic.fill_uninitialized_memory` are both set to
+    ``True``, the output tensor is initialized to prevent any possible
+    nondeterministic behavior from using the data as an input to an operation.
+    Floating point and complex tensors are filled with NaN, and integer tensors
+    are filled with the maximum value.
+
+Args:
+    size (tuple of int): the shape of the output tensor
+    stride (tuple of int): the strides of the output tensor
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {pin_memory}
+
+Example::
+
+    >>> a = torch.empty_strided((2, 3), (1, 2))
+    >>> a
+    tensor([[8.9683e-44, 4.4842e-44, 5.1239e+07],
+            [0.0000e+00, 0.0000e+00, 3.0705e-41]])
+    >>> a.stride()
+    (1, 2)
+    >>> a.size()
+    torch.Size([2, 3])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.empty_permuted,
+    r"""
+empty_permuted(size, physical_layout, *, dtype=None, layout=None, device=None, requires_grad=False, pin_memory=False) -> Tensor
+
+Creates an uninitialized, non-overlapping and dense tensor with the
+specified :attr:`size`, with :attr:`physical_layout` specifying how the
+dimensions are physically laid out in memory (each logical dimension is listed
+from outermost to innermost).  :attr:`physical_layout` is a generalization
+of NCHW/NHWC notation: if each dimension is assigned a number according to
+what order they occur in size (N=0, C=1, H=2, W=3), then NCHW is ``(0, 1, 2, 3)``
+while NHWC is ``(0, 2, 3, 1)``.  Equivalently, the strides of the output
+tensor ``t`` are such that ``t.stride(physical_layout[i]) == contiguous_strides[i]``
+(notably, this function is *not* equivalent to ``torch.empty(size).permute(physical_layout)``).
+
+Unlike :func:`torch.empty_strided`, this is guaranteed to produce a dense
+tensor with no overlaps.  If possible, prefer using this function over
+:func:`torch.empty_strided` or manual use of :func:`torch.as_strided`.
+
+.. note::
+    If :func:`torch.use_deterministic_algorithms()` and
+    :attr:`torch.utils.deterministic.fill_uninitialized_memory` are both set to
+    ``True``, the output tensor is initialized to prevent any possible
+    nondeterministic behavior from using the data as an input to an operation.
+    Floating point and complex tensors are filled with NaN, and integer tensors
+    are filled with the maximum value.
+
+Args:
+    size (tuple of int): the shape of the output tensor
+    physical_layout (tuple of int): the ordering of dimensions physically in memory
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {pin_memory}
+
+Examples:
+
+    >>> torch.empty((2, 3, 5, 7)).stride()
+    (105, 35, 7, 1)
+    >>> torch.empty_permuted((2, 3, 5, 7), (0, 1, 2, 3)).stride()
+    (105, 35, 7, 1)
+    >>> torch.empty((2, 3, 5, 7), memory_format=torch.channels_last).stride()
+    (105, 1, 21, 3)
+    >>> torch.empty_permuted((2, 3, 5, 7), (0, 2, 3, 1)).stride()
+    (105, 1, 21, 3)
+    >>> torch.empty_permuted((2, 3, 5, 7), (0, 2, 3, 1)).dim_order()
+    (0, 2, 3, 1)
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.full,
+    r"""
+full(size, fill_value, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+
+Creates a tensor of size :attr:`size` filled with :attr:`fill_value`. The
+tensor's dtype is inferred from :attr:`fill_value`.
+
+Args:
+    size (int...): a list, tuple, or :class:`torch.Size` of integers defining the
+        shape of the output tensor.
+    fill_value (Scalar): the value to fill the output tensor with.
+
+Keyword args:
+    {out}
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+
+Example::
+
+    >>> torch.full((2, 3), 3.141592)
+    tensor([[ 3.1416,  3.1416,  3.1416],
+            [ 3.1416,  3.1416,  3.1416]])
+""".format(
+        **factory_common_args
+    ),
+)
+
+add_docstr(
+    torch.full_like,
+    """
+full_like(input, fill_value, \\*, dtype=None, layout=torch.strided, device=None, requires_grad=False, \
+memory_format=torch.preserve_format) -> Tensor
+
+Returns a tensor with the same size as :attr:`input` filled with :attr:`fill_value`.
+``torch.full_like(input, fill_value)`` is equivalent to
+``torch.full(input.size(), fill_value, dtype=input.dtype, layout=input.layout, device=input.device)``.
+
+Args:
+    {input}
+    fill_value: the number to fill the output tensor with.
+
+Keyword args:
+    {dtype}
+    {layout}
+    {device}
+    {requires_grad}
+    {memory_format}
+""".format(
+        **factory_like_common_args
+    ),
+)
+
+add_docstr(
+    torch.det,
+    r"""
+det(input) -> Tensor
+
+Alias for :func:`torch.linalg.det`
+""",
+)
+
+add_docstr(
+    torch.where,
+    r"""
+where(condition, input, other, *, out=None) -> Tensor
+
+Return a tensor of elements selected from either :attr:`input` or :attr:`other`, depending on :attr:`condition`.
+
+The operation is defined as:
+
+.. math::
+    \text{out}_i = \begin{cases}
+        \text{input}_i & \text{if } \text{condition}_i \\
+        \text{other}_i & \text{otherwise} \\
+    \end{cases}
+"""
+    + r"""
+.. note::
+    The tensors :attr:`condition`, :attr:`input`, :attr:`other` must be :ref:`broadcastable <broadcasting-semantics>`.
+
+Arguments:
+    condition (BoolTensor): When True (nonzero), yield input, otherwise yield other
+    input (Tensor or Scalar): value (if :attr:`input` is a scalar) or values selected at indices
+                          where :attr:`condition` is ``True``
+    other (Tensor or Scalar): value (if :attr:`other` is a scalar) or values selected at indices
+                          where :attr:`condition` is ``False``
+
+Keyword args:
+    {out}
+
+Returns:
+    Tensor: A tensor of shape equal to the broadcasted shape of :attr:`condition`, :attr:`input`, :attr:`other`
+
+Example::
+
+    >>> x = torch.randn(3, 2)
+    >>> y = torch.ones(3, 2)
+    >>> x
+    tensor([[-0.4620,  0.3139],
+            [ 0.3898, -0.7197],
+            [ 0.0478, -0.1657]])
+    >>> torch.where(x > 0, 1.0, 0.0)
+    tensor([[0., 1.],
+            [1., 0.],
+            [1., 0.]])
+    >>> torch.where(x > 0, x, y)
+    tensor([[ 1.0000,  0.3139],
+            [ 0.3898,  1.0000],
+            [ 0.0478,  1.0000]])
+    >>> x = torch.randn(2, 2, dtype=torch.double)
+    >>> x
+    tensor([[ 1.0779,  0.0383],
+            [-0.8785, -1.1089]], dtype=torch.float64)
+    >>> torch.where(x > 0, x, 0.)
+    tensor([[1.0779, 0.0383],
+            [0.0000, 0.0000]], dtype=torch.float64)
+
+.. function:: where(condition) -> tuple of LongTensor
+   :noindex:
+
+``torch.where(condition)`` is identical to
+``torch.nonzero(condition, as_tuple=True)``.
+
+.. note::
+    See also :func:`torch.nonzero`.
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.logdet,
+    r"""
+logdet(input) -> Tensor
+
+Calculates log determinant of a square matrix or batches of square matrices.
+
+It returns ``-inf`` if the input has a determinant of zero, and ``NaN`` if it has
+a negative determinant.
+
+.. note::
+    Backward through :meth:`logdet` internally uses SVD results when :attr:`input`
+    is not invertible. In this case, double backward through :meth:`logdet` will
+    be unstable in when :attr:`input` doesn't have distinct singular values. See
+    :func:`torch.linalg.svd` for details.
+
+.. seealso::
+
+        :func:`torch.linalg.slogdet` computes the sign (resp. angle) and natural logarithm of the
+        absolute value of the determinant of real-valued (resp. complex) square matrices.
+
+Arguments:
+    input (Tensor): the input tensor of size ``(*, n, n)`` where ``*`` is zero or more
+                batch dimensions.
+
+Example::
+
+    >>> A = torch.randn(3, 3)
+    >>> torch.det(A)
+    tensor(0.2611)
+    >>> torch.logdet(A)
+    tensor(-1.3430)
+    >>> A
+    tensor([[[ 0.9254, -0.6213],
+             [-0.5787,  1.6843]],
+
+            [[ 0.3242, -0.9665],
+             [ 0.4539, -0.0887]],
+
+            [[ 1.1336, -0.4025],
+             [-0.7089,  0.9032]]])
+    >>> A.det()
+    tensor([1.1990, 0.4099, 0.7386])
+    >>> A.det().log()
+    tensor([ 0.1815, -0.8917, -0.3031])
+""",
+)
+
+add_docstr(
+    torch.slogdet,
+    r"""
+slogdet(input) -> (Tensor, Tensor)
+
+Alias for :func:`torch.linalg.slogdet`
+""",
+)
+
+add_docstr(
+    torch.pinverse,
+    r"""
+pinverse(input, rcond=1e-15) -> Tensor
+
+Alias for :func:`torch.linalg.pinv`
+""",
+)
+
+add_docstr(
+    torch.hann_window,
+    """
+hann_window(window_length, periodic=True, *, dtype=None, \
+layout=torch.strided, device=None, requires_grad=False) -> Tensor
+"""
+    + r"""
+Hann window function.
+
+.. math::
+    w[n] = \frac{1}{2}\ \left[1 - \cos \left( \frac{2 \pi n}{N - 1} \right)\right] =
+            \sin^2 \left( \frac{\pi n}{N - 1} \right),
+
+where :math:`N` is the full window size.
+
+The input :attr:`window_length` is a positive integer controlling the
+returned window size. :attr:`periodic` flag determines whether the returned
+window trims off the last duplicate value from the symmetric window and is
+ready to be used as a periodic window with functions like
+:meth:`torch.stft`. Therefore, if :attr:`periodic` is true, the :math:`N` in
+above formula is in fact :math:`\text{window\_length} + 1`. Also, we always have
+``torch.hann_window(L, periodic=True)`` equal to
+``torch.hann_window(L + 1, periodic=False)[:-1])``.
+
+.. note::
+    If :attr:`window_length` :math:`=1`, the returned window contains a single value 1.
+"""
+    + r"""
+Arguments:
+    window_length (int): the size of returned window
+    periodic (bool, optional): If True, returns a window to be used as periodic
+        function. If False, return a symmetric window.
+
+Keyword args:
+    {dtype} Only floating point types are supported.
+    layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only
+          ``torch.strided`` (dense layout) is supported.
+    {device}
+    {requires_grad}
+
+Returns:
+    Tensor: A 1-D tensor of size :math:`(\text{{window\_length}},)` containing the window
+
+""".format(
+        **factory_common_args
+    ),
+)
+
+
+add_docstr(
+    torch.hamming_window,
+    """
+hamming_window(window_length, periodic=True, alpha=0.54, beta=0.46, *, dtype=None, \
+layout=torch.strided, device=None, requires_grad=False) -> Tensor
+"""
+    + r"""
+Hamming window function.
+
+.. math::
+    w[n] = \alpha - \beta\ \cos \left( \frac{2 \pi n}{N - 1} \right),
+
+where :math:`N` is the full window size.
+
+The input :attr:`window_length` is a positive integer controlling the
+returned window size. :attr:`periodic` flag determines whether the returned
+window trims off the last duplicate value from the symmetric window and is
+ready to be used as a periodic window with functions like
+:meth:`torch.stft`. Therefore, if :attr:`periodic` is true, the :math:`N` in
+above formula is in fact :math:`\text{window\_length} + 1`. Also, we always have
+``torch.hamming_window(L, periodic=True)`` equal to
+``torch.hamming_window(L + 1, periodic=False)[:-1])``.
+
+.. note::
+    If :attr:`window_length` :math:`=1`, the returned window contains a single value 1.
+
+.. note::
+    This is a generalized version of :meth:`torch.hann_window`.
+"""
+    + r"""
+Arguments:
+    window_length (int): the size of returned window
+    periodic (bool, optional): If True, returns a window to be used as periodic
+        function. If False, return a symmetric window.
+    alpha (float, optional): The coefficient :math:`\alpha` in the equation above
+    beta (float, optional): The coefficient :math:`\beta` in the equation above
+
+Keyword args:
+    {dtype} Only floating point types are supported.
+    layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only
+          ``torch.strided`` (dense layout) is supported.
+    {device}
+    {requires_grad}
+
+Returns:
+    Tensor: A 1-D tensor of size :math:`(\text{{window\_length}},)` containing the window.
+
+""".format(
+        **factory_common_args
+    ),
+)
+
+
+add_docstr(
+    torch.bartlett_window,
+    """
+bartlett_window(window_length, periodic=True, *, dtype=None, \
+layout=torch.strided, device=None, requires_grad=False) -> Tensor
+"""
+    + r"""
+Bartlett window function.
+
+.. math::
+    w[n] = 1 - \left| \frac{2n}{N-1} - 1 \right| = \begin{cases}
+        \frac{2n}{N - 1} & \text{if } 0 \leq n \leq \frac{N - 1}{2} \\
+        2 - \frac{2n}{N - 1} & \text{if } \frac{N - 1}{2} < n < N \\
+    \end{cases},
+
+where :math:`N` is the full window size.
+
+The input :attr:`window_length` is a positive integer controlling the
+returned window size. :attr:`periodic` flag determines whether the returned
+window trims off the last duplicate value from the symmetric window and is
+ready to be used as a periodic window with functions like
+:meth:`torch.stft`. Therefore, if :attr:`periodic` is true, the :math:`N` in
+above formula is in fact :math:`\text{window\_length} + 1`. Also, we always have
+``torch.bartlett_window(L, periodic=True)`` equal to
+``torch.bartlett_window(L + 1, periodic=False)[:-1])``.
+
+.. note::
+    If :attr:`window_length` :math:`=1`, the returned window contains a single value 1.
+"""
+    + r"""
+Arguments:
+    window_length (int): the size of returned window
+    periodic (bool, optional): If True, returns a window to be used as periodic
+        function. If False, return a symmetric window.
+
+Keyword args:
+    {dtype} Only floating point types are supported.
+    layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only
+          ``torch.strided`` (dense layout) is supported.
+    {device}
+    {requires_grad}
+
+Returns:
+    Tensor: A 1-D tensor of size :math:`(\text{{window\_length}},)` containing the window
+
+""".format(
+        **factory_common_args
+    ),
+)
+
+
+add_docstr(
+    torch.blackman_window,
+    """
+blackman_window(window_length, periodic=True, *, dtype=None, \
+layout=torch.strided, device=None, requires_grad=False) -> Tensor
+"""
+    + r"""
+Blackman window function.
+
+.. math::
+    w[n] = 0.42 - 0.5 \cos \left( \frac{2 \pi n}{N - 1} \right) + 0.08 \cos \left( \frac{4 \pi n}{N - 1} \right)
+
+where :math:`N` is the full window size.
+
+The input :attr:`window_length` is a positive integer controlling the
+returned window size. :attr:`periodic` flag determines whether the returned
+window trims off the last duplicate value from the symmetric window and is
+ready to be used as a periodic window with functions like
+:meth:`torch.stft`. Therefore, if :attr:`periodic` is true, the :math:`N` in
+above formula is in fact :math:`\text{window\_length} + 1`. Also, we always have
+``torch.blackman_window(L, periodic=True)`` equal to
+``torch.blackman_window(L + 1, periodic=False)[:-1])``.
+
+.. note::
+    If :attr:`window_length` :math:`=1`, the returned window contains a single value 1.
+"""
+    + r"""
+Arguments:
+    window_length (int): the size of returned window
+    periodic (bool, optional): If True, returns a window to be used as periodic
+        function. If False, return a symmetric window.
+
+Keyword args:
+    {dtype} Only floating point types are supported.
+    layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only
+          ``torch.strided`` (dense layout) is supported.
+    {device}
+    {requires_grad}
+
+Returns:
+    Tensor: A 1-D tensor of size :math:`(\text{{window\_length}},)` containing the window
+
+""".format(
+        **factory_common_args
+    ),
+)
+
+
+add_docstr(
+    torch.kaiser_window,
+    """
+kaiser_window(window_length, periodic=True, beta=12.0, *, dtype=None, \
+layout=torch.strided, device=None, requires_grad=False) -> Tensor
+"""
+    + r"""
+Computes the Kaiser window with window length :attr:`window_length` and shape parameter :attr:`beta`.
+
+Let I_0 be the zeroth order modified Bessel function of the first kind (see :func:`torch.i0`) and
+``N = L - 1`` if :attr:`periodic` is False and ``L`` if :attr:`periodic` is True,
+where ``L`` is the :attr:`window_length`. This function computes:
+
+.. math::
+    out_i = I_0 \left( \beta \sqrt{1 - \left( {\frac{i - N/2}{N/2}} \right) ^2 } \right) / I_0( \beta )
+
+Calling ``torch.kaiser_window(L, B, periodic=True)`` is equivalent to calling
+``torch.kaiser_window(L + 1, B, periodic=False)[:-1])``.
+The :attr:`periodic` argument is intended as a helpful shorthand
+to produce a periodic window as input to functions like :func:`torch.stft`.
+
+.. note::
+    If :attr:`window_length` is one, then the returned window is a single element tensor containing a one.
+
+"""
+    + r"""
+Args:
+    window_length (int): length of the window.
+    periodic (bool, optional): If True, returns a periodic window suitable for use in spectral analysis.
+        If False, returns a symmetric window suitable for use in filter design.
+    beta (float, optional): shape parameter for the window.
+
+Keyword args:
+    {dtype}
+    layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only
+          ``torch.strided`` (dense layout) is supported.
+    {device}
+    {requires_grad}
+
+""".format(
+        **factory_common_args
+    ),
+)
+
+
+add_docstr(
+    torch.vander,
+    """
+vander(x, N=None, increasing=False) -> Tensor
+"""
+    + r"""
+Generates a Vandermonde matrix.
+
+The columns of the output matrix are elementwise powers of the input vector :math:`x^{{(N-1)}}, x^{{(N-2)}}, ..., x^0`.
+If increasing is True, the order of the columns is reversed :math:`x^0, x^1, ..., x^{{(N-1)}}`. Such a
+matrix with a geometric progression in each row is named for Alexandre-Theophile Vandermonde.
+
+Arguments:
+    x (Tensor): 1-D input tensor.
+    N (int, optional): Number of columns in the output. If N is not specified,
+        a square array is returned :math:`(N = len(x))`.
+    increasing (bool, optional): Order of the powers of the columns. If True,
+        the powers increase from left to right, if False (the default) they are reversed.
+
+Returns:
+    Tensor: Vandermonde matrix. If increasing is False, the first column is :math:`x^{{(N-1)}}`,
+    the second :math:`x^{{(N-2)}}` and so forth. If increasing is True, the columns
+    are :math:`x^0, x^1, ..., x^{{(N-1)}}`.
+
+Example::
+
+    >>> x = torch.tensor([1, 2, 3, 5])
+    >>> torch.vander(x)
+    tensor([[  1,   1,   1,   1],
+            [  8,   4,   2,   1],
+            [ 27,   9,   3,   1],
+            [125,  25,   5,   1]])
+    >>> torch.vander(x, N=3)
+    tensor([[ 1,  1,  1],
+            [ 4,  2,  1],
+            [ 9,  3,  1],
+            [25,  5,  1]])
+    >>> torch.vander(x, N=3, increasing=True)
+    tensor([[ 1,  1,  1],
+            [ 1,  2,  4],
+            [ 1,  3,  9],
+            [ 1,  5, 25]])
+
+""".format(
+        **factory_common_args
+    ),
+)
+
+
+add_docstr(
+    torch.unbind,
+    r"""
+unbind(input, dim=0) -> seq
+
+Removes a tensor dimension.
+
+Returns a tuple of all slices along a given dimension, already without it.
+
+Arguments:
+    input (Tensor): the tensor to unbind
+    dim (int): dimension to remove
+
+Example::
+
+    >>> torch.unbind(torch.tensor([[1, 2, 3],
+    >>>                            [4, 5, 6],
+    >>>                            [7, 8, 9]]))
+    (tensor([1, 2, 3]), tensor([4, 5, 6]), tensor([7, 8, 9]))
+""",
+)
+
+
+add_docstr(
+    torch.combinations,
+    r"""
+combinations(input, r=2, with_replacement=False) -> seq
+
+Compute combinations of length :math:`r` of the given tensor. The behavior is similar to
+python's `itertools.combinations` when `with_replacement` is set to `False`, and
+`itertools.combinations_with_replacement` when `with_replacement` is set to `True`.
+
+Arguments:
+    input (Tensor): 1D vector.
+    r (int, optional): number of elements to combine
+    with_replacement (bool, optional): whether to allow duplication in combination
+
+Returns:
+    Tensor: A tensor equivalent to converting all the input tensors into lists, do
+    `itertools.combinations` or `itertools.combinations_with_replacement` on these
+    lists, and finally convert the resulting list into tensor.
+
+Example::
+
+    >>> a = [1, 2, 3]
+    >>> list(itertools.combinations(a, r=2))
+    [(1, 2), (1, 3), (2, 3)]
+    >>> list(itertools.combinations(a, r=3))
+    [(1, 2, 3)]
+    >>> list(itertools.combinations_with_replacement(a, r=2))
+    [(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]
+    >>> tensor_a = torch.tensor(a)
+    >>> torch.combinations(tensor_a)
+    tensor([[1, 2],
+            [1, 3],
+            [2, 3]])
+    >>> torch.combinations(tensor_a, r=3)
+    tensor([[1, 2, 3]])
+    >>> torch.combinations(tensor_a, with_replacement=True)
+    tensor([[1, 1],
+            [1, 2],
+            [1, 3],
+            [2, 2],
+            [2, 3],
+            [3, 3]])
+
+""",
+)
+
+add_docstr(
+    torch.trapezoid,
+    r"""
+trapezoid(y, x=None, *, dx=None, dim=-1) -> Tensor
+
+Computes the `trapezoidal rule <https://en.wikipedia.org/wiki/Trapezoidal_rule>`_ along
+:attr:`dim`. By default the spacing between elements is assumed to be 1, but
+:attr:`dx` can be used to specify a different constant spacing, and :attr:`x` can be
+used to specify arbitrary spacing along :attr:`dim`.
+
+
+Assuming :attr:`y` is a one-dimensional tensor with elements :math:`{y_0, y_1, ..., y_n}`,
+the default computation is
+
+.. math::
+    \begin{aligned}
+        \sum_{i = 1}^{n-1} \frac{1}{2} (y_i + y_{i-1})
+    \end{aligned}
+
+When :attr:`dx` is specified the computation becomes
+
+.. math::
+    \begin{aligned}
+        \sum_{i = 1}^{n-1} \frac{\Delta x}{2} (y_i + y_{i-1})
+    \end{aligned}
+
+effectively multiplying the result by :attr:`dx`. When :attr:`x` is specified,
+assuming :attr:`x` is also a one-dimensional tensor with
+elements :math:`{x_0, x_1, ..., x_n}`, the computation becomes
+
+.. math::
+    \begin{aligned}
+        \sum_{i = 1}^{n-1} \frac{(x_i - x_{i-1})}{2} (y_i + y_{i-1})
+    \end{aligned}
+
+When :attr:`x` and :attr:`y` have the same size, the computation is as described above and no broadcasting is needed.
+The broadcasting behavior of this function is as follows when their sizes are different. For both :attr:`x`
+and :attr:`y`, the function computes the difference between consecutive elements along
+dimension :attr:`dim`. This effectively creates two tensors, `x_diff` and `y_diff`, that have
+the same shape as the original tensors except their lengths along the dimension :attr:`dim` is reduced by 1.
+After that, those two tensors are broadcast together to compute final output as part of the trapezoidal rule.
+See the examples below for details.
+
+.. note::
+    The trapezoidal rule is a technique for approximating the definite integral of a function
+    by averaging its left and right Riemann sums. The approximation becomes more accurate as
+    the resolution of the partition increases.
+
+Arguments:
+    y (Tensor): Values to use when computing the trapezoidal rule.
+    x (Tensor): If specified, defines spacing between values as specified above.
+
+Keyword arguments:
+    dx (float): constant spacing between values. If neither :attr:`x` or :attr:`dx`
+        are specified then this defaults to 1. Effectively multiplies the result by its value.
+    dim (int): The dimension along which to compute the trapezoidal rule.
+        The last (inner-most) dimension by default.
+
+Examples::
+
+    >>> # Computes the trapezoidal rule in 1D, spacing is implicitly 1
+    >>> y = torch.tensor([1, 5, 10])
+    >>> torch.trapezoid(y)
+    tensor(10.5)
+
+    >>> # Computes the same trapezoidal rule directly to verify
+    >>> (1 + 10 + 10) / 2
+    10.5
+
+    >>> # Computes the trapezoidal rule in 1D with constant spacing of 2
+    >>> # NOTE: the result is the same as before, but multiplied by 2
+    >>> torch.trapezoid(y, dx=2)
+    21.0
+
+    >>> # Computes the trapezoidal rule in 1D with arbitrary spacing
+    >>> x = torch.tensor([1, 3, 6])
+    >>> torch.trapezoid(y, x)
+    28.5
+
+    >>> # Computes the same trapezoidal rule directly to verify
+    >>> ((3 - 1) * (1 + 5) + (6 - 3) * (5 + 10)) / 2
+    28.5
+
+    >>> # Computes the trapezoidal rule for each row of a 3x3 matrix
+    >>> y = torch.arange(9).reshape(3, 3)
+    tensor([[0, 1, 2],
+            [3, 4, 5],
+            [6, 7, 8]])
+    >>> torch.trapezoid(y)
+    tensor([ 2., 8., 14.])
+
+    >>> # Computes the trapezoidal rule for each column of the matrix
+    >>> torch.trapezoid(y, dim=0)
+    tensor([ 6., 8., 10.])
+
+    >>> # Computes the trapezoidal rule for each row of a 3x3 ones matrix
+    >>> #   with the same arbitrary spacing
+    >>> y = torch.ones(3, 3)
+    >>> x = torch.tensor([1, 3, 6])
+    >>> torch.trapezoid(y, x)
+    array([5., 5., 5.])
+
+    >>> # Computes the trapezoidal rule for each row of a 3x3 ones matrix
+    >>> #   with different arbitrary spacing per row
+    >>> y = torch.ones(3, 3)
+    >>> x = torch.tensor([[1, 2, 3], [1, 3, 5], [1, 4, 7]])
+    >>> torch.trapezoid(y, x)
+    array([2., 4., 6.])
+""",
+)
+
+add_docstr(
+    torch.trapz,
+    r"""
+trapz(y, x, *, dim=-1) -> Tensor
+
+Alias for :func:`torch.trapezoid`.
+""",
+)
+
+add_docstr(
+    torch.cumulative_trapezoid,
+    r"""
+cumulative_trapezoid(y, x=None, *, dx=None, dim=-1) -> Tensor
+
+Cumulatively computes the `trapezoidal rule <https://en.wikipedia.org/wiki/Trapezoidal_rule>`_
+along :attr:`dim`. By default the spacing between elements is assumed to be 1, but
+:attr:`dx` can be used to specify a different constant spacing, and :attr:`x` can be
+used to specify arbitrary spacing along :attr:`dim`.
+
+For more details, please read :func:`torch.trapezoid`. The difference between :func:`torch.trapezoid`
+and this function is that, :func:`torch.trapezoid` returns a value for each integration,
+where as this function returns a cumulative value for every spacing within the integration. This
+is analogous to how `.sum` returns a value and `.cumsum` returns a cumulative sum.
+
+Arguments:
+    y (Tensor): Values to use when computing the trapezoidal rule.
+    x (Tensor): If specified, defines spacing between values as specified above.
+
+Keyword arguments:
+    dx (float): constant spacing between values. If neither :attr:`x` or :attr:`dx`
+        are specified then this defaults to 1. Effectively multiplies the result by its value.
+    dim (int): The dimension along which to compute the trapezoidal rule.
+        The last (inner-most) dimension by default.
+
+Examples::
+
+    >>> # Cumulatively computes the trapezoidal rule in 1D, spacing is implicitly 1.
+    >>> y = torch.tensor([1, 5, 10])
+    >>> torch.cumulative_trapezoid(y)
+    tensor([3., 10.5])
+
+    >>> # Computes the same trapezoidal rule directly up to each element to verify
+    >>> (1 + 5) / 2
+    3.0
+    >>> (1 + 10 + 10) / 2
+    10.5
+
+    >>> # Cumulatively computes the trapezoidal rule in 1D with constant spacing of 2
+    >>> # NOTE: the result is the same as before, but multiplied by 2
+    >>> torch.cumulative_trapezoid(y, dx=2)
+    tensor([6., 21.])
+
+    >>> # Cumulatively computes the trapezoidal rule in 1D with arbitrary spacing
+    >>> x = torch.tensor([1, 3, 6])
+    >>> torch.cumulative_trapezoid(y, x)
+    tensor([6., 28.5])
+
+    >>> # Computes the same trapezoidal rule directly up to each element to verify
+    >>> ((3 - 1) * (1 + 5)) / 2
+    6.0
+    >>> ((3 - 1) * (1 + 5) + (6 - 3) * (5 + 10)) / 2
+    28.5
+
+    >>> # Cumulatively computes the trapezoidal rule for each row of a 3x3 matrix
+    >>> y = torch.arange(9).reshape(3, 3)
+    tensor([[0, 1, 2],
+            [3, 4, 5],
+            [6, 7, 8]])
+    >>> torch.cumulative_trapezoid(y)
+    tensor([[ 0.5,  2.],
+            [ 3.5,  8.],
+            [ 6.5, 14.]])
+
+    >>> # Cumulatively computes the trapezoidal rule for each column of the matrix
+    >>> torch.cumulative_trapezoid(y, dim=0)
+    tensor([[ 1.5,  2.5,  3.5],
+            [ 6.0,  8.0, 10.0]])
+
+    >>> # Cumulatively computes the trapezoidal rule for each row of a 3x3 ones matrix
+    >>> #   with the same arbitrary spacing
+    >>> y = torch.ones(3, 3)
+    >>> x = torch.tensor([1, 3, 6])
+    >>> torch.cumulative_trapezoid(y, x)
+    tensor([[2., 5.],
+            [2., 5.],
+            [2., 5.]])
+
+    >>> # Cumulatively computes the trapezoidal rule for each row of a 3x3 ones matrix
+    >>> #   with different arbitrary spacing per row
+    >>> y = torch.ones(3, 3)
+    >>> x = torch.tensor([[1, 2, 3], [1, 3, 5], [1, 4, 7]])
+    >>> torch.cumulative_trapezoid(y, x)
+    tensor([[1., 2.],
+            [2., 4.],
+            [3., 6.]])
+""",
+)
+
+add_docstr(
+    torch.repeat_interleave,
+    r"""
+repeat_interleave(input, repeats, dim=None, *, output_size=None) -> Tensor
+
+Repeat elements of a tensor.
+
+.. warning::
+
+    This is different from :meth:`torch.Tensor.repeat` but similar to ``numpy.repeat``.
+
+Args:
+    {input}
+    repeats (Tensor or int): The number of repetitions for each element.
+        repeats is broadcasted to fit the shape of the given axis.
+    dim (int, optional): The dimension along which to repeat values.
+        By default, use the flattened input array, and return a flat output
+        array.
+
+Keyword args:
+    output_size (int, optional): Total output size for the given axis
+        ( e.g. sum of repeats). If given, it will avoid stream synchronization
+        needed to calculate output shape of the tensor.
+
+Returns:
+    Tensor: Repeated tensor which has the same shape as input, except along the given axis.
+
+Example::
+
+    >>> x = torch.tensor([1, 2, 3])
+    >>> x.repeat_interleave(2)
+    tensor([1, 1, 2, 2, 3, 3])
+    >>> y = torch.tensor([[1, 2], [3, 4]])
+    >>> torch.repeat_interleave(y, 2)
+    tensor([1, 1, 2, 2, 3, 3, 4, 4])
+    >>> torch.repeat_interleave(y, 3, dim=1)
+    tensor([[1, 1, 1, 2, 2, 2],
+            [3, 3, 3, 4, 4, 4]])
+    >>> torch.repeat_interleave(y, torch.tensor([1, 2]), dim=0)
+    tensor([[1, 2],
+            [3, 4],
+            [3, 4]])
+    >>> torch.repeat_interleave(y, torch.tensor([1, 2]), dim=0, output_size=3)
+    tensor([[1, 2],
+            [3, 4],
+            [3, 4]])
+
+If the `repeats` is `tensor([n1, n2, n3, ...])`, then the output will be
+`tensor([0, 0, ..., 1, 1, ..., 2, 2, ..., ...])` where `0` appears `n1` times,
+`1` appears `n2` times, `2` appears `n3` times, etc.
+
+.. function:: repeat_interleave(repeats, *) -> Tensor
+   :noindex:
+
+Repeats 0 repeats[0] times, 1 repeats[1] times, 2 repeats[2] times, etc.
+
+Args:
+    repeats (Tensor): The number of repetitions for each element.
+
+Returns:
+    Tensor: Repeated tensor of size `sum(repeats)`.
+
+Example::
+
+    >>> torch.repeat_interleave(torch.tensor([1, 2, 3]))
+    tensor([0, 1, 1, 2, 2, 2])
+
+""".format(
+        **common_args
+    ),
+)
+
+add_docstr(
+    torch.tile,
+    r"""
+tile(input, dims) -> Tensor
+
+Constructs a tensor by repeating the elements of :attr:`input`.
+The :attr:`dims` argument specifies the number of repetitions
+in each dimension.
+
+If :attr:`dims` specifies fewer dimensions than :attr:`input` has, then
+ones are prepended to :attr:`dims` until all dimensions are specified.
+For example, if :attr:`input` has shape (8, 6, 4, 2) and :attr:`dims`
+is (2, 2), then :attr:`dims` is treated as (1, 1, 2, 2).
+
+Analogously, if :attr:`input` has fewer dimensions than :attr:`dims`
+specifies, then :attr:`input` is treated as if it were unsqueezed at
+dimension zero until it has as many dimensions as :attr:`dims` specifies.
+For example, if :attr:`input` has shape (4, 2) and :attr:`dims`
+is (3, 3, 2, 2), then :attr:`input` is treated as if it had the
+shape (1, 1, 4, 2).
+
+.. note::
+
+    This function is similar to NumPy's tile function.
+
+Args:
+    input (Tensor): the tensor whose elements to repeat.
+    dims (tuple): the number of repetitions per dimension.
+
+Example::
+
+    >>> x = torch.tensor([1, 2, 3])
+    >>> x.tile((2,))
+    tensor([1, 2, 3, 1, 2, 3])
+    >>> y = torch.tensor([[1, 2], [3, 4]])
+    >>> torch.tile(y, (2, 2))
+    tensor([[1, 2, 1, 2],
+            [3, 4, 3, 4],
+            [1, 2, 1, 2],
+            [3, 4, 3, 4]])
+""",
+)
+
+add_docstr(
+    torch.quantize_per_tensor,
+    r"""
+quantize_per_tensor(input, scale, zero_point, dtype) -> Tensor
+
+Converts a float tensor to a quantized tensor with given scale and zero point.
+
+Arguments:
+    input (Tensor): float tensor or list of tensors to quantize
+    scale (float or Tensor): scale to apply in quantization formula
+    zero_point (int or Tensor): offset in integer value that maps to float zero
+    dtype (:class:`torch.dtype`): the desired data type of returned tensor.
+        Has to be one of the quantized dtypes: ``torch.quint8``, ``torch.qint8``, ``torch.qint32``
+
+Returns:
+    Tensor: A newly quantized tensor or list of quantized tensors.
+
+Example::
+
+    >>> torch.quantize_per_tensor(torch.tensor([-1.0, 0.0, 1.0, 2.0]), 0.1, 10, torch.quint8)
+    tensor([-1.,  0.,  1.,  2.], size=(4,), dtype=torch.quint8,
+           quantization_scheme=torch.per_tensor_affine, scale=0.1, zero_point=10)
+    >>> torch.quantize_per_tensor(torch.tensor([-1.0, 0.0, 1.0, 2.0]), 0.1, 10, torch.quint8).int_repr()
+    tensor([ 0, 10, 20, 30], dtype=torch.uint8)
+    >>> torch.quantize_per_tensor([torch.tensor([-1.0, 0.0]), torch.tensor([-2.0, 2.0])],
+    >>> torch.tensor([0.1, 0.2]), torch.tensor([10, 20]), torch.quint8)
+    (tensor([-1.,  0.], size=(2,), dtype=torch.quint8,
+        quantization_scheme=torch.per_tensor_affine, scale=0.1, zero_point=10),
+        tensor([-2.,  2.], size=(2,), dtype=torch.quint8,
+        quantization_scheme=torch.per_tensor_affine, scale=0.2, zero_point=20))
+    >>> torch.quantize_per_tensor(torch.tensor([-1.0, 0.0, 1.0, 2.0]), torch.tensor(0.1), torch.tensor(10), torch.quint8)
+    tensor([-1.,  0.,  1.,  2.], size=(4,), dtype=torch.quint8,
+       quantization_scheme=torch.per_tensor_affine, scale=0.10, zero_point=10)
+""",
+)
+
+add_docstr(
+    torch.quantize_per_tensor_dynamic,
+    r"""
+quantize_per_tensor_dynamic(input, dtype, reduce_range) -> Tensor
+
+Converts a float tensor to a quantized tensor with scale and zero_point calculated
+dynamically based on the input.
+
+Arguments:
+    input (Tensor): float tensor or list of tensors to quantize
+    dtype (:class:`torch.dtype`): the desired data type of returned tensor.
+        Has to be one of the quantized dtypes: ``torch.quint8``, ``torch.qint8``
+    reduce_range (bool): a flag to indicate whether to reduce the range of quantized
+    data by 1 bit, it's required to avoid instruction overflow for some hardwares
+
+Returns:
+    Tensor: A newly (dynamically) quantized tensor
+
+Example::
+
+    >>> t = torch.quantize_per_tensor_dynamic(torch.tensor([-1.0, 0.0, 1.0, 2.0]), torch.quint8, False)
+    >>> print(t)
+    tensor([-1.,  0.,  1.,  2.], size=(4,), dtype=torch.quint8,
+           quantization_scheme=torch.per_tensor_affine, scale=0.011764705882352941,
+           zero_point=85)
+    >>> t.int_repr()
+    tensor([  0,  85, 170, 255], dtype=torch.uint8)
+""",
+)
+
+add_docstr(
+    torch.quantize_per_channel,
+    r"""
+quantize_per_channel(input, scales, zero_points, axis, dtype) -> Tensor
+
+Converts a float tensor to a per-channel quantized tensor with given scales and zero points.
+
+Arguments:
+    input (Tensor): float tensor to quantize
+    scales (Tensor): float 1D tensor of scales to use, size should match ``input.size(axis)``
+    zero_points (int): integer 1D tensor of offset to use, size should match ``input.size(axis)``
+    axis (int): dimension on which apply per-channel quantization
+    dtype (:class:`torch.dtype`): the desired data type of returned tensor.
+        Has to be one of the quantized dtypes: ``torch.quint8``, ``torch.qint8``, ``torch.qint32``
+
+Returns:
+    Tensor: A newly quantized tensor
+
+Example::
+
+    >>> x = torch.tensor([[-1.0, 0.0], [1.0, 2.0]])
+    >>> torch.quantize_per_channel(x, torch.tensor([0.1, 0.01]), torch.tensor([10, 0]), 0, torch.quint8)
+    tensor([[-1.,  0.],
+            [ 1.,  2.]], size=(2, 2), dtype=torch.quint8,
+           quantization_scheme=torch.per_channel_affine,
+           scale=tensor([0.1000, 0.0100], dtype=torch.float64),
+           zero_point=tensor([10,  0]), axis=0)
+    >>> torch.quantize_per_channel(x, torch.tensor([0.1, 0.01]), torch.tensor([10, 0]), 0, torch.quint8).int_repr()
+    tensor([[  0,  10],
+            [100, 200]], dtype=torch.uint8)
+""",
+)
+
+
+add_docstr(
+    torch.quantized_batch_norm,
+    r"""
+quantized_batch_norm(input, weight=None, bias=None, mean, var, eps, output_scale, output_zero_point) -> Tensor
+
+Applies batch normalization on a 4D (NCHW) quantized tensor.
+
+.. math::
+
+        y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+
+Arguments:
+    input (Tensor): quantized tensor
+    weight (Tensor): float tensor that corresponds to the gamma, size C
+    bias (Tensor):  float tensor that corresponds to the beta, size C
+    mean (Tensor): float mean value in batch normalization, size C
+    var (Tensor): float tensor for variance, size C
+    eps (float): a value added to the denominator for numerical stability.
+    output_scale (float): output quantized tensor scale
+    output_zero_point (int): output quantized tensor zero_point
+
+Returns:
+    Tensor: A quantized tensor with batch normalization applied.
+
+Example::
+
+    >>> qx = torch.quantize_per_tensor(torch.rand(2, 2, 2, 2), 1.5, 3, torch.quint8)
+    >>> torch.quantized_batch_norm(qx, torch.ones(2), torch.zeros(2), torch.rand(2), torch.rand(2), 0.00001, 0.2, 2)
+    tensor([[[[-0.2000, -0.2000],
+          [ 1.6000, -0.2000]],
+
+         [[-0.4000, -0.4000],
+          [-0.4000,  0.6000]]],
+
+
+        [[[-0.2000, -0.2000],
+          [-0.2000, -0.2000]],
+
+         [[ 0.6000, -0.4000],
+          [ 0.6000, -0.4000]]]], size=(2, 2, 2, 2), dtype=torch.quint8,
+       quantization_scheme=torch.per_tensor_affine, scale=0.2, zero_point=2)
+""",
+)
+
+
+add_docstr(
+    torch.quantized_max_pool1d,
+    r"""
+quantized_max_pool1d(input, kernel_size, stride=[], padding=0, dilation=1, ceil_mode=False) -> Tensor
+
+Applies a 1D max pooling over an input quantized tensor composed of several input planes.
+
+Arguments:
+    input (Tensor): quantized tensor
+    kernel_size (list of int): the size of the sliding window
+    stride (``list of int``, optional): the stride of the sliding window
+    padding (``list of int``, optional): padding to be added on both sides, must be >= 0 and <= kernel_size / 2
+    dilation (``list of int``, optional): The stride between elements within a sliding window, must be > 0. Default 1
+    ceil_mode (bool, optional):  If True, will use ceil instead of floor to compute the output shape.
+        Defaults to False.
+
+
+Returns:
+    Tensor: A quantized tensor with max_pool1d applied.
+
+Example::
+
+    >>> qx = torch.quantize_per_tensor(torch.rand(2, 2), 1.5, 3, torch.quint8)
+    >>> torch.quantized_max_pool1d(qx, [2])
+    tensor([[0.0000],
+            [1.5000]], size=(2, 1), dtype=torch.quint8,
+        quantization_scheme=torch.per_tensor_affine, scale=1.5, zero_point=3)
+""",
+)
+
+
+add_docstr(
+    torch.quantized_max_pool2d,
+    r"""
+quantized_max_pool2d(input, kernel_size, stride=[], padding=0, dilation=1, ceil_mode=False) -> Tensor
+
+Applies a 2D max pooling over an input quantized tensor composed of several input planes.
+
+Arguments:
+    input (Tensor): quantized tensor
+    kernel_size (``list of int``): the size of the sliding window
+    stride (``list of int``, optional): the stride of the sliding window
+    padding (``list of int``, optional): padding to be added on both sides, must be >= 0 and <= kernel_size / 2
+    dilation (``list of int``, optional): The stride between elements within a sliding window, must be > 0. Default 1
+    ceil_mode (bool, optional):  If True, will use ceil instead of floor to compute the output shape.
+        Defaults to False.
+
+
+Returns:
+    Tensor: A quantized tensor with max_pool2d applied.
+
+Example::
+
+    >>> qx = torch.quantize_per_tensor(torch.rand(2, 2, 2, 2), 1.5, 3, torch.quint8)
+    >>> torch.quantized_max_pool2d(qx, [2,2])
+    tensor([[[[1.5000]],
+
+            [[1.5000]]],
+
+
+            [[[0.0000]],
+
+            [[0.0000]]]], size=(2, 2, 1, 1), dtype=torch.quint8,
+        quantization_scheme=torch.per_tensor_affine, scale=1.5, zero_point=3)
+""",
+)
+
+
+add_docstr(
+    torch.Generator,
+    r"""
+Generator(device='cpu') -> Generator
+
+Creates and returns a generator object that manages the state of the algorithm which
+produces pseudo random numbers. Used as a keyword argument in many :ref:`inplace-random-sampling`
+functions.
+
+Arguments:
+    device (:class:`torch.device`, optional): the desired device for the generator.
+
+Returns:
+    Generator: An torch.Generator object.
+
+Example::
+
+    >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
+    >>> g_cpu = torch.Generator()
+    >>> g_cuda = torch.Generator(device='cuda')
+""",
+)
+
+
+add_docstr(
+    torch.Generator.set_state,
+    r"""
+Generator.set_state(new_state) -> void
+
+Sets the Generator state.
+
+Arguments:
+    new_state (torch.ByteTensor): The desired state.
+
+Example::
+
+    >>> g_cpu = torch.Generator()
+    >>> g_cpu_other = torch.Generator()
+    >>> g_cpu.set_state(g_cpu_other.get_state())
+""",
+)
+
+
+add_docstr(
+    torch.Generator.get_state,
+    r"""
+Generator.get_state() -> Tensor
+
+Returns the Generator state as a ``torch.ByteTensor``.
+
+Returns:
+    Tensor: A ``torch.ByteTensor`` which contains all the necessary bits
+    to restore a Generator to a specific point in time.
+
+Example::
+
+    >>> g_cpu = torch.Generator()
+    >>> g_cpu.get_state()
+""",
+)
+
+
+add_docstr(
+    torch.Generator.manual_seed,
+    r"""
+Generator.manual_seed(seed) -> Generator
+
+Sets the seed for generating random numbers. Returns a `torch.Generator` object. Any 32-bit integer is a valid seed.
+
+Arguments:
+    seed (int): The desired seed. Value must be within the inclusive range
+        `[-0x8000_0000_0000_0000, 0xffff_ffff_ffff_ffff]`. Otherwise, a RuntimeError
+        is raised. Negative inputs are remapped to positive values with the formula
+        `0xffff_ffff_ffff_ffff + seed`.
+
+Returns:
+    Generator: An torch.Generator object.
+
+Example::
+
+    >>> g_cpu = torch.Generator()
+    >>> g_cpu.manual_seed(2147483647)
+""",
+)
+
+
+add_docstr(
+    torch.Generator.initial_seed,
+    r"""
+Generator.initial_seed() -> int
+
+Returns the initial seed for generating random numbers.
+
+Example::
+
+    >>> g_cpu = torch.Generator()
+    >>> g_cpu.initial_seed()
+    2147483647
+""",
+)
+
+
+add_docstr(
+    torch.Generator.seed,
+    r"""
+Generator.seed() -> int
+
+Gets a non-deterministic random number from std::random_device or the current
+time and uses it to seed a Generator.
+
+Example::
+
+    >>> g_cpu = torch.Generator()
+    >>> g_cpu.seed()
+    1516516984916
+""",
+)
+
+
+add_docstr(
+    torch.Generator.device,
+    r"""
+Generator.device -> device
+
+Gets the current device of the generator.
+
+Example::
+
+    >>> g_cpu = torch.Generator()
+    >>> g_cpu.device
+    device(type='cpu')
+""",
+)
+
+add_docstr(
+    torch._assert_async,
+    r"""
+_assert_async(tensor) -> void
+
+Asynchronously assert that the contents of tensor are nonzero.  For CPU tensors,
+this is equivalent to ``assert tensor`` or ``assert tensor.is_nonzero()``; for
+CUDA tensors, we DO NOT synchronize and you may only find out the assertion
+failed at a later CUDA kernel launch.  Asynchronous assertion can be helpful for
+testing invariants in CUDA tensors without giving up performance.  This function
+is NOT intended to be used for regular error checking, as it will trash your CUDA
+context if the assert fails (forcing you to restart your PyTorch process.)
+
+Args:
+    tensor (Tensor): a one element tensor to test to see if it is nonzero.  Zero
+        elements (including False for boolean tensors) cause an assertion failure
+        to be raised.
+""",
+)
+
+add_docstr(
+    torch.searchsorted,
+    r"""
+searchsorted(sorted_sequence, values, *, out_int32=False, right=False, side=None, out=None, sorter=None) -> Tensor
+
+Find the indices from the *innermost* dimension of :attr:`sorted_sequence` such that, if the
+corresponding values in :attr:`values` were inserted before the indices, when sorted, the order
+of the corresponding *innermost* dimension within :attr:`sorted_sequence` would be preserved.
+Return a new tensor with the same size as :attr:`values`. More formally,
+the returned index satisfies the following rules:
+
+.. list-table::
+   :widths: 12 10 78
+   :header-rows: 1
+
+   * - :attr:`sorted_sequence`
+     - :attr:`right`
+     - *returned index satisfies*
+   * - 1-D
+     - False
+     - ``sorted_sequence[i-1] < values[m][n]...[l][x] <= sorted_sequence[i]``
+   * - 1-D
+     - True
+     - ``sorted_sequence[i-1] <= values[m][n]...[l][x] < sorted_sequence[i]``
+   * - N-D
+     - False
+     - ``sorted_sequence[m][n]...[l][i-1] < values[m][n]...[l][x] <= sorted_sequence[m][n]...[l][i]``
+   * - N-D
+     - True
+     - ``sorted_sequence[m][n]...[l][i-1] <= values[m][n]...[l][x] < sorted_sequence[m][n]...[l][i]``
+
+Args:
+    sorted_sequence (Tensor): N-D or 1-D tensor, containing monotonically increasing sequence on the *innermost*
+                              dimension unless :attr:`sorter` is provided, in which case the sequence does not
+                              need to be sorted
+    values (Tensor or Scalar): N-D tensor or a Scalar containing the search value(s).
+
+Keyword args:
+    out_int32 (bool, optional): indicate the output data type. torch.int32 if True, torch.int64 otherwise.
+                                Default value is False, i.e. default output data type is torch.int64.
+    right (bool, optional): if False, return the first suitable location that is found. If True, return the
+                            last such index. If no suitable index found, return 0 for non-numerical value
+                            (eg. nan, inf) or the size of *innermost* dimension within :attr:`sorted_sequence`
+                            (one pass the last index of the *innermost* dimension). In other words, if False,
+                            gets the lower bound index for each value in :attr:`values` on the corresponding
+                            *innermost* dimension of the :attr:`sorted_sequence`. If True, gets the upper
+                            bound index instead. Default value is False. :attr:`side` does the same and is
+                            preferred. It will error if :attr:`side` is set to "left" while this is True.
+    side (str, optional): the same as :attr:`right` but preferred. "left" corresponds to False for :attr:`right`
+                            and "right" corresponds to True for :attr:`right`. It will error if this is set to
+                            "left" while :attr:`right` is True. Default value is None.
+    out (Tensor, optional): the output tensor, must be the same size as :attr:`values` if provided.
+    sorter (LongTensor, optional): if provided, a tensor matching the shape of the unsorted
+                            :attr:`sorted_sequence` containing a sequence of indices that sort it in the
+                            ascending order on the innermost dimension
+
+
+Example::
+
+    >>> sorted_sequence = torch.tensor([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]])
+    >>> sorted_sequence
+    tensor([[ 1,  3,  5,  7,  9],
+            [ 2,  4,  6,  8, 10]])
+    >>> values = torch.tensor([[3, 6, 9], [3, 6, 9]])
+    >>> values
+    tensor([[3, 6, 9],
+            [3, 6, 9]])
+    >>> torch.searchsorted(sorted_sequence, values)
+    tensor([[1, 3, 4],
+            [1, 2, 4]])
+    >>> torch.searchsorted(sorted_sequence, values, side='right')
+    tensor([[2, 3, 5],
+            [1, 3, 4]])
+
+    >>> sorted_sequence_1d = torch.tensor([1, 3, 5, 7, 9])
+    >>> sorted_sequence_1d
+    tensor([1, 3, 5, 7, 9])
+    >>> torch.searchsorted(sorted_sequence_1d, values)
+    tensor([[1, 3, 4],
+            [1, 3, 4]])
+""",
+)
+
+add_docstr(
+    torch.bucketize,
+    r"""
+bucketize(input, boundaries, *, out_int32=False, right=False, out=None) -> Tensor
+
+Returns the indices of the buckets to which each value in the :attr:`input` belongs, where the
+boundaries of the buckets are set by :attr:`boundaries`. Return a new tensor with the same size
+as :attr:`input`. If :attr:`right` is False (default), then the left boundary is open. Note that
+this behavior is opposite the behavior of
+`numpy.digitize <https://docs.scipy.org/doc/numpy/reference/generated/numpy.digitize.html>`_.
+More formally, the returned index satisfies the following rules:
+
+.. list-table::
+   :widths: 15 85
+   :header-rows: 1
+
+   * - :attr:`right`
+     - *returned index satisfies*
+   * - False
+     - ``boundaries[i-1] < input[m][n]...[l][x] <= boundaries[i]``
+   * - True
+     - ``boundaries[i-1] <= input[m][n]...[l][x] < boundaries[i]``
+
+Args:
+    input (Tensor or Scalar): N-D tensor or a Scalar containing the search value(s).
+    boundaries (Tensor): 1-D tensor, must contain a strictly increasing sequence, or the return value is undefined.
+
+Keyword args:
+    out_int32 (bool, optional): indicate the output data type. torch.int32 if True, torch.int64 otherwise.
+                                Default value is False, i.e. default output data type is torch.int64.
+    right (bool, optional): if False, return the first suitable location that is found. If True, return the
+                            last such index. If no suitable index found, return 0 for non-numerical value
+                            (eg. nan, inf) or the size of :attr:`boundaries` (one pass the last index).
+                            In other words, if False, gets the lower bound index for each value in :attr:`input`
+                            from :attr:`boundaries`. If True, gets the upper bound index instead.
+                            Default value is False.
+    out (Tensor, optional): the output tensor, must be the same size as :attr:`input` if provided.
+
+
+Example::
+
+    >>> boundaries = torch.tensor([1, 3, 5, 7, 9])
+    >>> boundaries
+    tensor([1, 3, 5, 7, 9])
+    >>> v = torch.tensor([[3, 6, 9], [3, 6, 9]])
+    >>> v
+    tensor([[3, 6, 9],
+            [3, 6, 9]])
+    >>> torch.bucketize(v, boundaries)
+    tensor([[1, 3, 4],
+            [1, 3, 4]])
+    >>> torch.bucketize(v, boundaries, right=True)
+    tensor([[2, 3, 5],
+            [2, 3, 5]])
+""",
+)
+
+add_docstr(
+    torch.view_as_real_copy,
+    r"""
+Performs the same operation as :func:`torch.view_as_real`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.view_as_complex_copy,
+    r"""
+Performs the same operation as :func:`torch.view_as_complex`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.as_strided_copy,
+    r"""
+Performs the same operation as :func:`torch.as_strided`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.diagonal_copy,
+    r"""
+Performs the same operation as :func:`torch.diagonal`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.expand_copy,
+    r"""
+Performs the same operation as :func:`torch.expand`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.permute_copy,
+    r"""
+Performs the same operation as :func:`torch.permute`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.select_copy,
+    r"""
+Performs the same operation as :func:`torch.select`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.detach_copy,
+    r"""
+Performs the same operation as :func:`torch.detach`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.slice_copy,
+    r"""
+Performs the same operation as :func:`torch.slice`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.split_copy,
+    r"""
+Performs the same operation as :func:`torch.split`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.split_with_sizes_copy,
+    r"""
+Performs the same operation as :func:`torch.split_with_sizes`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.squeeze_copy,
+    r"""
+Performs the same operation as :func:`torch.squeeze`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.t_copy,
+    r"""
+Performs the same operation as :func:`torch.t`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.transpose_copy,
+    r"""
+Performs the same operation as :func:`torch.transpose`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.unsqueeze_copy,
+    r"""
+Performs the same operation as :func:`torch.unsqueeze`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.indices_copy,
+    r"""
+Performs the same operation as :func:`torch.indices`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.values_copy,
+    r"""
+Performs the same operation as :func:`torch.values`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.crow_indices_copy,
+    r"""
+Performs the same operation as :func:`torch.crow_indices`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.col_indices_copy,
+    r"""
+Performs the same operation as :func:`torch.col_indices`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.unbind_copy,
+    r"""
+Performs the same operation as :func:`torch.unbind`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.view_copy,
+    r"""
+Performs the same operation as :func:`torch.view`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.unfold_copy,
+    r"""
+Performs the same operation as :func:`torch.unfold`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+add_docstr(
+    torch.alias_copy,
+    r"""
+Performs the same operation as :func:`torch.alias`, but all output tensors
+are freshly created instead of aliasing the input.
+""",
+)
+
+for unary_base_func_name in (
+    "exp",
+    "sqrt",
+    "abs",
+    "acos",
+    "asin",
+    "atan",
+    "ceil",
+    "cos",
+    "cosh",
+    "erf",
+    "erfc",
+    "expm1",
+    "floor",
+    "log",
+    "log10",
+    "log1p",
+    "log2",
+    "neg",
+    "tan",
+    "tanh",
+    "sin",
+    "sinh",
+    "round",
+    "lgamma",
+    "frac",
+    "reciprocal",
+    "sigmoid",
+    "trunc",
+    "zero",
+):
+    unary_foreach_func_name = f"_foreach_{unary_base_func_name}"
+    if hasattr(torch, unary_foreach_func_name):
+        add_docstr(
+            getattr(torch, unary_foreach_func_name),
+            rf"""
+{unary_foreach_func_name}(self: List[Tensor]) -> List[Tensor]
+
+Apply :func:`torch.{unary_base_func_name}` to each Tensor of the input list.
+            """,
+        )
+    unary_inplace_foreach_func_name = f"{unary_foreach_func_name}_"
+    if hasattr(torch, unary_inplace_foreach_func_name):
+        add_docstr(
+            getattr(torch, unary_inplace_foreach_func_name),
+            rf"""
+{unary_inplace_foreach_func_name}(self: List[Tensor]) -> None
+
+Apply :func:`torch.{unary_base_func_name}` to each Tensor of the input list.
+        """,
+        )
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..62fd410c23eac9297ebd28d0438e77ea0846e1e2
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/modules/__pycache__/conv_fused.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/modules/__pycache__/conv_fused.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eef02d728c877202fd7c1ab28055223abdaaaa5e
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/modules/__pycache__/conv_fused.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3d273ec77be768c084576a22b9267fa7e3d6e043
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce571862b4275063d49d60a259014cb4efcffd4d
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__init__.py
@@ -0,0 +1,6 @@
+import torch
+from .linear_relu import LinearReLU
+
+__all__ = [
+    'LinearReLU',
+]
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc55f075a928d33af5591aa94ebbc4336954184a
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/conv_relu.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/conv_relu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54cc19e3c808f806749d59eecafdfc1189e2c7c3
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/conv_relu.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d79bdbfe83209f18b17cc8c7b245f322871d6c0
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__init__.py
@@ -0,0 +1 @@
+from .modules import *  # noqa: F403
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fdd2800e5f21c288be20caf2261428b4309701b4
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5555796e825db3580b37a068e91991100572e951
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__pycache__/linear.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__pycache__/linear.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..776e0d8e79a400fed6364b9a67bd711a6cf582bc
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__pycache__/linear.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d79bdbfe83209f18b17cc8c7b245f322871d6c0
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__init__.py
@@ -0,0 +1 @@
+from .modules import *  # noqa: F403
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..acbefee3866bbdc2e8fdf9f40edd0d469053470c
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2fa82055e39c506efba2ed15460cfe5049839abd
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d68659c3aba5ffbb8050e492a30d798e8302efa
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/rnn.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/rnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c57d1ae9bc51b6270279d81ee8a46cf80148bea
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/rnn.py
@@ -0,0 +1,411 @@
+import numbers
+from typing import Optional, Tuple
+import warnings
+
+import torch
+from torch import Tensor
+
+"""
+We will recreate all the RNN modules as we require the modules to be decomposed
+into its building blocks to be able to observe.
+"""
+
+__all__ = [
+    "LSTMCell",
+    "LSTM"
+]
+
+class LSTMCell(torch.nn.Module):
+    r"""A quantizable long short-term memory (LSTM) cell.
+
+    For the description and the argument types, please, refer to :class:`~torch.nn.LSTMCell`
+
+    Examples::
+
+        >>> import torch.ao.nn.quantizable as nnqa
+        >>> rnn = nnqa.LSTMCell(10, 20)
+        >>> input = torch.randn(6, 10)
+        >>> hx = torch.randn(3, 20)
+        >>> cx = torch.randn(3, 20)
+        >>> output = []
+        >>> for i in range(6):
+        ...     hx, cx = rnn(input[i], (hx, cx))
+        ...     output.append(hx)
+    """
+    _FLOAT_MODULE = torch.nn.LSTMCell
+
+    def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True,
+                 device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__()
+        self.input_size = input_dim
+        self.hidden_size = hidden_dim
+        self.bias = bias
+
+        self.igates = torch.nn.Linear(input_dim, 4 * hidden_dim, bias=bias, **factory_kwargs)
+        self.hgates = torch.nn.Linear(hidden_dim, 4 * hidden_dim, bias=bias, **factory_kwargs)
+        self.gates = torch.ao.nn.quantized.FloatFunctional()
+
+        self.input_gate = torch.nn.Sigmoid()
+        self.forget_gate = torch.nn.Sigmoid()
+        self.cell_gate = torch.nn.Tanh()
+        self.output_gate = torch.nn.Sigmoid()
+
+        self.fgate_cx = torch.ao.nn.quantized.FloatFunctional()
+        self.igate_cgate = torch.ao.nn.quantized.FloatFunctional()
+        self.fgate_cx_igate_cgate = torch.ao.nn.quantized.FloatFunctional()
+
+        self.ogate_cy = torch.ao.nn.quantized.FloatFunctional()
+
+        self.initial_hidden_state_qparams: Tuple[float, int] = (1.0, 0)
+        self.initial_cell_state_qparams: Tuple[float, int] = (1.0, 0)
+        self.hidden_state_dtype: torch.dtype = torch.quint8
+        self.cell_state_dtype: torch.dtype = torch.quint8
+
+    def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
+        if hidden is None or hidden[0] is None or hidden[1] is None:
+            hidden = self.initialize_hidden(x.shape[0], x.is_quantized)
+        hx, cx = hidden
+
+        igates = self.igates(x)
+        hgates = self.hgates(hx)
+        gates = self.gates.add(igates, hgates)
+
+        input_gate, forget_gate, cell_gate, out_gate = gates.chunk(4, 1)
+
+        input_gate = self.input_gate(input_gate)
+        forget_gate = self.forget_gate(forget_gate)
+        cell_gate = self.cell_gate(cell_gate)
+        out_gate = self.output_gate(out_gate)
+
+        fgate_cx = self.fgate_cx.mul(forget_gate, cx)
+        igate_cgate = self.igate_cgate.mul(input_gate, cell_gate)
+        fgate_cx_igate_cgate = self.fgate_cx_igate_cgate.add(fgate_cx, igate_cgate)
+        cy = fgate_cx_igate_cgate
+
+        # TODO: make this tanh a member of the module so its qparams can be configured
+        tanh_cy = torch.tanh(cy)
+        hy = self.ogate_cy.mul(out_gate, tanh_cy)
+        return hy, cy
+
+    def initialize_hidden(self, batch_size: int, is_quantized: bool = False) -> Tuple[Tensor, Tensor]:
+        h, c = torch.zeros((batch_size, self.hidden_size)), torch.zeros((batch_size, self.hidden_size))
+        if is_quantized:
+            (h_scale, h_zp) = self.initial_hidden_state_qparams
+            (c_scale, c_zp) = self.initial_cell_state_qparams
+            h = torch.quantize_per_tensor(h, scale=h_scale, zero_point=h_zp, dtype=self.hidden_state_dtype)
+            c = torch.quantize_per_tensor(c, scale=c_scale, zero_point=c_zp, dtype=self.cell_state_dtype)
+        return h, c
+
+    def _get_name(self):
+        return 'QuantizableLSTMCell'
+
+    @classmethod
+    def from_params(cls, wi, wh, bi=None, bh=None):
+        """Uses the weights and biases to create a new LSTM cell.
+
+        Args:
+            wi, wh: Weights for the input and hidden layers
+            bi, bh: Biases for the input and hidden layers
+        """
+        assert (bi is None) == (bh is None)  # Either both None or both have values
+        input_size = wi.shape[1]
+        hidden_size = wh.shape[1]
+        cell = cls(input_dim=input_size, hidden_dim=hidden_size,
+                   bias=(bi is not None))
+        cell.igates.weight = torch.nn.Parameter(wi)
+        if bi is not None:
+            cell.igates.bias = torch.nn.Parameter(bi)
+        cell.hgates.weight = torch.nn.Parameter(wh)
+        if bh is not None:
+            cell.hgates.bias = torch.nn.Parameter(bh)
+        return cell
+
+    @classmethod
+    def from_float(cls, other):
+        assert type(other) == cls._FLOAT_MODULE
+        assert hasattr(other, 'qconfig'), "The float module must have 'qconfig'"
+        observed = cls.from_params(other.weight_ih, other.weight_hh,
+                                   other.bias_ih, other.bias_hh)
+        observed.qconfig = other.qconfig
+        observed.igates.qconfig = other.qconfig
+        observed.hgates.qconfig = other.qconfig
+        return observed
+
+
+class _LSTMSingleLayer(torch.nn.Module):
+    r"""A single one-directional LSTM layer.
+
+    The difference between a layer and a cell is that the layer can process a
+    sequence, while the cell only expects an instantaneous value.
+    """
+    def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True,
+                 device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__()
+        self.cell = LSTMCell(input_dim, hidden_dim, bias=bias, **factory_kwargs)
+
+    def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None):
+        result = []
+        seq_len = x.shape[0]
+        for i in range(seq_len):
+            hidden = self.cell(x[i], hidden)
+            result.append(hidden[0])  # type: ignore[index]
+        result_tensor = torch.stack(result, 0)
+        return result_tensor, hidden
+
+    @classmethod
+    def from_params(cls, *args, **kwargs):
+        cell = LSTMCell.from_params(*args, **kwargs)
+        layer = cls(cell.input_size, cell.hidden_size, cell.bias)
+        layer.cell = cell
+        return layer
+
+
+class _LSTMLayer(torch.nn.Module):
+    r"""A single bi-directional LSTM layer."""
+    def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True,
+                 batch_first: bool = False, bidirectional: bool = False,
+                 device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__()
+        self.batch_first = batch_first
+        self.bidirectional = bidirectional
+        self.layer_fw = _LSTMSingleLayer(input_dim, hidden_dim, bias=bias, **factory_kwargs)
+        if self.bidirectional:
+            self.layer_bw = _LSTMSingleLayer(input_dim, hidden_dim, bias=bias, **factory_kwargs)
+
+    def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None):
+        if self.batch_first:
+            x = x.transpose(0, 1)
+        if hidden is None:
+            hx_fw, cx_fw = (None, None)
+        else:
+            hx_fw, cx_fw = hidden
+        hidden_bw: Optional[Tuple[Tensor, Tensor]] = None
+        if self.bidirectional:
+            if hx_fw is None:
+                hx_bw = None
+            else:
+                hx_bw = hx_fw[1]
+                hx_fw = hx_fw[0]
+            if cx_fw is None:
+                cx_bw = None
+            else:
+                cx_bw = cx_fw[1]
+                cx_fw = cx_fw[0]
+            if hx_bw is not None and cx_bw is not None:
+                hidden_bw = hx_bw, cx_bw
+        if hx_fw is None and cx_fw is None:
+            hidden_fw = None
+        else:
+            hidden_fw = torch.jit._unwrap_optional(hx_fw), torch.jit._unwrap_optional(cx_fw)
+        result_fw, hidden_fw = self.layer_fw(x, hidden_fw)
+
+        if hasattr(self, 'layer_bw') and self.bidirectional:
+            x_reversed = x.flip(0)
+            result_bw, hidden_bw = self.layer_bw(x_reversed, hidden_bw)
+            result_bw = result_bw.flip(0)
+
+            result = torch.cat([result_fw, result_bw], result_fw.dim() - 1)
+            if hidden_fw is None and hidden_bw is None:
+                h = None
+                c = None
+            elif hidden_fw is None:
+                (h, c) = torch.jit._unwrap_optional(hidden_bw)
+            elif hidden_bw is None:
+                (h, c) = torch.jit._unwrap_optional(hidden_fw)
+            else:
+                h = torch.stack([hidden_fw[0], hidden_bw[0]], 0)  # type: ignore[list-item]
+                c = torch.stack([hidden_fw[1], hidden_bw[1]], 0)  # type: ignore[list-item]
+        else:
+            result = result_fw
+            h, c = torch.jit._unwrap_optional(hidden_fw)  # type: ignore[assignment]
+
+        if self.batch_first:
+            result.transpose_(0, 1)
+
+        return result, (h, c)
+
+    @classmethod
+    def from_float(cls, other, layer_idx=0, qconfig=None, **kwargs):
+        r"""
+        There is no FP equivalent of this class. This function is here just to
+        mimic the behavior of the `prepare` within the `torch.ao.quantization`
+        flow.
+        """
+        assert hasattr(other, 'qconfig') or (qconfig is not None)
+
+        input_size = kwargs.get('input_size', other.input_size)
+        hidden_size = kwargs.get('hidden_size', other.hidden_size)
+        bias = kwargs.get('bias', other.bias)
+        batch_first = kwargs.get('batch_first', other.batch_first)
+        bidirectional = kwargs.get('bidirectional', other.bidirectional)
+
+        layer = cls(input_size, hidden_size, bias, batch_first, bidirectional)
+        layer.qconfig = getattr(other, 'qconfig', qconfig)
+        wi = getattr(other, f'weight_ih_l{layer_idx}')
+        wh = getattr(other, f'weight_hh_l{layer_idx}')
+        bi = getattr(other, f'bias_ih_l{layer_idx}', None)
+        bh = getattr(other, f'bias_hh_l{layer_idx}', None)
+
+        layer.layer_fw = _LSTMSingleLayer.from_params(wi, wh, bi, bh)
+
+        if other.bidirectional:
+            wi = getattr(other, f'weight_ih_l{layer_idx}_reverse')
+            wh = getattr(other, f'weight_hh_l{layer_idx}_reverse')
+            bi = getattr(other, f'bias_ih_l{layer_idx}_reverse', None)
+            bh = getattr(other, f'bias_hh_l{layer_idx}_reverse', None)
+            layer.layer_bw = _LSTMSingleLayer.from_params(wi, wh, bi, bh)
+        return layer
+
+
+class LSTM(torch.nn.Module):
+    r"""A quantizable long short-term memory (LSTM).
+
+    For the description and the argument types, please, refer to :class:`~torch.nn.LSTM`
+
+    Attributes:
+        layers : instances of the `_LSTMLayer`
+
+    .. note::
+        To access the weights and biases, you need to access them per layer.
+        See examples below.
+
+    Examples::
+
+        >>> import torch.ao.nn.quantizable as nnqa
+        >>> rnn = nnqa.LSTM(10, 20, 2)
+        >>> input = torch.randn(5, 3, 10)
+        >>> h0 = torch.randn(2, 3, 20)
+        >>> c0 = torch.randn(2, 3, 20)
+        >>> output, (hn, cn) = rnn(input, (h0, c0))
+        >>> # To get the weights:
+        >>> # xdoctest: +SKIP
+        >>> print(rnn.layers[0].weight_ih)
+        tensor([[...]])
+        >>> print(rnn.layers[0].weight_hh)
+        AssertionError: There is no reverse path in the non-bidirectional layer
+    """
+    _FLOAT_MODULE = torch.nn.LSTM
+
+    def __init__(self, input_size: int, hidden_size: int,
+                 num_layers: int = 1, bias: bool = True,
+                 batch_first: bool = False, dropout: float = 0.,
+                 bidirectional: bool = False,
+                 device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.bias = bias
+        self.batch_first = batch_first
+        self.dropout = float(dropout)
+        self.bidirectional = bidirectional
+        self.training = False  # Default to eval mode. If we want to train, we will explicitly set to training.
+        num_directions = 2 if bidirectional else 1
+
+        if not isinstance(dropout, numbers.Number) or not 0 <= dropout <= 1 or \
+                isinstance(dropout, bool):
+            raise ValueError("dropout should be a number in range [0, 1] "
+                             "representing the probability of an element being "
+                             "zeroed")
+        if dropout > 0:
+            warnings.warn("dropout option for quantizable LSTM is ignored. "
+                          "If you are training, please, use nn.LSTM version "
+                          "followed by `prepare` step.")
+            if num_layers == 1:
+                warnings.warn("dropout option adds dropout after all but last "
+                              "recurrent layer, so non-zero dropout expects "
+                              f"num_layers greater than 1, but got dropout={dropout} "
+                              f"and num_layers={num_layers}")
+
+        layers = [_LSTMLayer(self.input_size, self.hidden_size,
+                             self.bias, batch_first=False,
+                             bidirectional=self.bidirectional, **factory_kwargs)]
+        for layer in range(1, num_layers):
+            layers.append(_LSTMLayer(self.hidden_size, self.hidden_size,
+                                     self.bias, batch_first=False,
+                                     bidirectional=self.bidirectional,
+                                     **factory_kwargs))
+        self.layers = torch.nn.ModuleList(layers)
+
+    def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None):
+        if self.batch_first:
+            x = x.transpose(0, 1)
+
+        max_batch_size = x.size(1)
+        num_directions = 2 if self.bidirectional else 1
+        if hidden is None:
+            zeros = torch.zeros(num_directions, max_batch_size,
+                                self.hidden_size, dtype=torch.float,
+                                device=x.device)
+            zeros.squeeze_(0)
+            if x.is_quantized:
+                zeros = torch.quantize_per_tensor(zeros, scale=1.0,
+                                                  zero_point=0, dtype=x.dtype)
+            hxcx = [(zeros, zeros) for _ in range(self.num_layers)]
+        else:
+            hidden_non_opt = torch.jit._unwrap_optional(hidden)
+            if isinstance(hidden_non_opt[0], Tensor):
+                hx = hidden_non_opt[0].reshape(self.num_layers, num_directions,
+                                               max_batch_size,
+                                               self.hidden_size)
+                cx = hidden_non_opt[1].reshape(self.num_layers, num_directions,
+                                               max_batch_size,
+                                               self.hidden_size)
+                hxcx = [(hx[idx].squeeze(0), cx[idx].squeeze(0)) for idx in range(self.num_layers)]
+            else:
+                hxcx = hidden_non_opt
+
+        hx_list = []
+        cx_list = []
+        for idx, layer in enumerate(self.layers):
+            x, (h, c) = layer(x, hxcx[idx])
+            hx_list.append(torch.jit._unwrap_optional(h))
+            cx_list.append(torch.jit._unwrap_optional(c))
+        hx_tensor = torch.stack(hx_list)
+        cx_tensor = torch.stack(cx_list)
+
+        # We are creating another dimension for bidirectional case
+        # need to collapse it
+        hx_tensor = hx_tensor.reshape(-1, hx_tensor.shape[-2], hx_tensor.shape[-1])
+        cx_tensor = cx_tensor.reshape(-1, cx_tensor.shape[-2], cx_tensor.shape[-1])
+
+        if self.batch_first:
+            x = x.transpose(0, 1)
+
+        return x, (hx_tensor, cx_tensor)
+
+    def _get_name(self):
+        return 'QuantizableLSTM'
+
+    @classmethod
+    def from_float(cls, other, qconfig=None):
+        assert isinstance(other, cls._FLOAT_MODULE)
+        assert (hasattr(other, 'qconfig') or qconfig)
+        observed = cls(other.input_size, other.hidden_size, other.num_layers,
+                       other.bias, other.batch_first, other.dropout,
+                       other.bidirectional)
+        observed.qconfig = getattr(other, 'qconfig', qconfig)
+        for idx in range(other.num_layers):
+            observed.layers[idx] = _LSTMLayer.from_float(other, idx, qconfig,
+                                                         batch_first=False)
+
+        # Prepare the model
+        if other.training:
+            observed.train()
+            observed = torch.ao.quantization.prepare_qat(observed, inplace=True)
+        else:
+            observed.eval()
+            observed = torch.ao.quantization.prepare(observed, inplace=True)
+        return observed
+
+    @classmethod
+    def from_observed(cls, other):
+        # The whole flow is float -> observed -> quantized
+        # This class does float -> observed only
+        raise NotImplementedError("It looks like you are trying to convert a "
+                                  "non-quantizable LSTM module. Please, see "
+                                  "the examples on quantizable LSTMs.")
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..53cf965c2e48af87b4542b9414e4e3767ed85e92
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__init__.py
@@ -0,0 +1,38 @@
+from . import functional
+from .modules import *  # noqa: F403
+from .modules import MaxPool2d
+
+__all__ = [
+    'BatchNorm2d',
+    'BatchNorm3d',
+    'Conv1d',
+    'Conv2d',
+    'Conv3d',
+    'ConvTranspose1d',
+    'ConvTranspose2d',
+    'ConvTranspose3d',
+    'DeQuantize',
+    'ELU',
+    'Embedding',
+    'EmbeddingBag',
+    'GroupNorm',
+    'Hardswish',
+    'InstanceNorm1d',
+    'InstanceNorm2d',
+    'InstanceNorm3d',
+    'LayerNorm',
+    'LeakyReLU',
+    'Linear',
+    'LSTM',
+    'MultiheadAttention',
+    'Quantize',
+    'ReLU6',
+    'Sigmoid',
+    'Softmax',
+    'Dropout',
+    'PReLU',
+    # Wrapper modules
+    'FloatFunctional',
+    'FXFloatFunctional',
+    'QFunctional',
+]
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45ca7141c08f1c1c58447dc22ee61e1f0e0976a5
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7a97e0a8da831d63d25d6f02edf77cb85b280a7
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__init__.py
@@ -0,0 +1,19 @@
+
+from .linear import Linear
+from .rnn import LSTM, GRU, LSTMCell, RNNCell, GRUCell
+from .conv import Conv1d, Conv2d, Conv3d, ConvTranspose1d, ConvTranspose2d, ConvTranspose3d
+
+__all__ = [
+    'Linear',
+    'LSTM',
+    'GRU',
+    'LSTMCell',
+    'RNNCell',
+    'GRUCell',
+    'Conv1d',
+    'Conv2d',
+    'Conv3d',
+    'ConvTranspose1d',
+    'ConvTranspose2d',
+    'ConvTranspose3d',
+]
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d626cdf296756b301ffa7bf4ae4715d0cae65a9
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/conv.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1af77964136556d209605665f77ed51c3a0d44e
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/conv.py
@@ -0,0 +1,399 @@
+r"""Dynamically quantized convolution modules."""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from torch import Tensor
+from torch._ops import ops
+from torch.nn.common_types import _size_1_t
+from torch.nn.modules.utils import _single, _pair, _triple
+from torch.ao.nn.quantized.modules.conv import _reverse_repeat_padding
+import torch.ao.nn.quantized as nnq
+import warnings
+
+__all__ = ['Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d']
+
+
+class Conv1d(nnq.Conv1d):
+    r"""A dynamically quantized conv module with floating point tensors as inputs and outputs.
+
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.Conv1d` and :class:`~torch.ao.nn.quantized.dynamic.Conv1d` and
+
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+
+    See :class:`~torch.nn.Conv1d` for other attributes.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> m = nn.quantized.dynamic.Conv1d(16, 33, 3, stride=2)
+        >>> input = torch.randn(20, 16, 100)
+        >>> output = m(input)
+
+    """
+
+    _FLOAT_MODULE = nn.Conv1d
+    _NNIQAT_CONV_BN_MODULE = None  # type: ignore[assignment]
+    _NNI_CONV_RELU_MODULE = None  # type: ignore[assignment]
+
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: _size_1_t,
+                 stride: _size_1_t = 1,
+                 padding: _size_1_t = 0,
+                 dilation: _size_1_t = 1,
+                 groups: int = 1,
+                 bias: bool = True,
+                 padding_mode: str = 'zeros',
+                 device=None,
+                 dtype=None,
+                 reduce_range=True):
+        warnings.warn(
+            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
+                self._get_name()
+            )
+        )
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _single(kernel_size)
+        stride = _single(stride)
+        padding = padding if isinstance(padding, str) else _single(padding)
+        dilation = _single(dilation)
+
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            groups, bias, padding_mode, **factory_kwargs)
+
+    def _get_name(self):
+        return 'DynamicQuantizedConv1d'
+
+    def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 3:
+            raise ValueError("Input shape must be `(N, C, L)`!")
+        if self.padding_mode != 'zeros':
+            # Padding in Conv1d is stored as (p, p), need to get (p,)
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding[:1])
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return ops.quantized.conv1d_dynamic(input, self._packed_params, reduce_range)
+
+
+class Conv2d(nnq.Conv2d):
+    r"""A dynamically quantized conv module with floating point tensors as inputs and outputs.
+
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.Conv2d` and :class:`~torch.ao.nn.quantized.dynamic.Conv2d` and
+
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+
+    See :class:`~torch.nn.Conv2d` for other attributes.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> # With square kernels and equal stride
+        >>> m = nn.quantized.dynamic.Conv2d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nn.quantized.dynamic.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+        >>> # non-square kernels and unequal stride and with padding and dilation
+        >>> m = nn.quantized.dynamic.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
+        >>> input = torch.randn(20, 16, 50, 100)
+        >>> output = m(input)
+
+    """
+    _FLOAT_MODULE = nn.Conv2d
+    _NNIQAT_CONV_BN_MODULE = None  # type: ignore[assignment]
+    _NNI_CONV_RELU_MODULE = None  # type: ignore[assignment]
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros', device=None, dtype=None):
+        warnings.warn(
+            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
+                self._get_name()
+            )
+        )
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        dilation = _pair(dilation)
+
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            groups, bias, padding_mode, **factory_kwargs)
+
+    def _get_name(self):
+        return 'DynamicQuantizedConv2d'
+
+    def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 4:
+            raise ValueError("Input shape must be `(N, C, H, W)`!")
+        if self.padding_mode != 'zeros':
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding)
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return ops.quantized.conv2d_dynamic(
+            input, self._packed_params, reduce_range)
+
+
+class Conv3d(nnq.Conv3d):
+    r"""A dynamically quantized conv module with floating point tensors as inputs and outputs.
+
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.Conv3d` and :class:`~torch.ao.nn.quantized.dynamic.Conv3d` and
+
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+
+    See :class:`~torch.nn.Conv3d` for other attributes.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> # With square kernels and equal stride
+        >>> m = nn.quantized.dynamic.Conv3d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nn.quantized.dynamic.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))
+        >>> # non-square kernels and unequal stride and with padding and dilation
+        >>> m = nn.quantized.dynamic.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2), dilation=(1, 2, 2))
+        >>> input = torch.randn(20, 16, 56, 56, 56)
+        >>> output = m(input)
+
+    """
+    _FLOAT_MODULE = nn.Conv3d
+    _NNIQAT_CONV_BN_MODULE = None  # type: ignore[assignment]
+    _NNI_CONV_RELU_MODULE = None  # type: ignore[assignment]
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros', device=None, dtype=None):
+        warnings.warn(
+            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
+                self._get_name()
+            )
+        )
+        assert padding_mode != 'reflect', "Conv3d does not support reflection padding"
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _triple(kernel_size)
+        stride = _triple(stride)
+        padding = _triple(padding)
+        dilation = _triple(dilation)
+        super()._init(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            False, _triple(0), groups, bias, padding_mode, **factory_kwargs)
+
+    def _get_name(self):
+        return 'DynamicQuantizedConv3d'
+
+    def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 5:
+            raise ValueError("Input shape must be `(N, C, D, H, W)`!")
+        if self.padding_mode != 'zeros':
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding)
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return ops.quantized.conv3d_dynamic(
+            input, self._packed_params, reduce_range)
+
+
+class ConvTranspose1d(nnq.ConvTranspose1d):
+    r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs.
+
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.ConvTranspose1d`.
+
+    For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv1d`
+
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.ConvTranspose1d` for other attributes.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> # With square kernels and equal stride
+        >>> m = nndq.ConvTranspose1d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nndq.ConvTranspose1d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+        >>> output = m(input)
+        >>> # exact output size can be also specified as an argument
+        >>> downsample = nndq.Conv1d(16, 16, 3, stride=2, padding=1)
+        >>> upsample = nndq.ConvTranspose1d(16, 16, 3, stride=2, padding=1)
+        >>> h = downsample(input)
+        >>> h.size()
+        torch.Size([1, 16, 6])
+        >>> output = upsample(h, output_size=input.size())
+        >>> output.size()
+        torch.Size([1, 16, 12])
+    """
+
+    _FLOAT_MODULE = nn.ConvTranspose1d
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0, groups=1, bias=True,
+                 dilation=1, padding_mode='zeros', device=None, dtype=None):
+        warnings.warn(
+            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
+                self._get_name()
+            )
+        )
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, padding, output_padding,
+            groups, bias, dilation, padding_mode, **factory_kwargs)
+
+    def _get_name(self):
+        return 'DynamicQuantizedConvTranspose1d'
+
+    def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 3:
+            raise ValueError("Input shape must be `(N, C, L)`!")
+        return torch.ops.quantized.conv_transpose1d_dynamic(
+            input, self._packed_params, reduce_range)
+
+
+class ConvTranspose2d(nnq.ConvTranspose2d):
+    r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs.
+
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.ConvTranspose2d`.
+
+    For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv2d`
+
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.ConvTranspose2d` for other attributes.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> # With square kernels and equal stride
+        >>> m = nnq.ConvTranspose2d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nnq.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+        >>> output = m(input)
+        >>> # exact output size can be also specified as an argument
+        >>> downsample = nnq.Conv2d(16, 16, 3, stride=2, padding=1)
+        >>> upsample = nnq.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
+        >>> h = downsample(input)
+        >>> h.size()
+        torch.Size([1, 16, 6, 6])
+        >>> output = upsample(h, output_size=input.size())
+        >>> output.size()
+        torch.Size([1, 16, 12, 12])
+    """
+
+    _FLOAT_MODULE = nn.ConvTranspose2d
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0, groups=1, bias=True,
+                 dilation=1, padding_mode='zeros', device=None, dtype=None):
+        warnings.warn(
+            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
+                self._get_name()
+            )
+        )
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, padding, output_padding,
+            groups, bias, dilation, padding_mode, **factory_kwargs)
+
+    def _get_name(self):
+        return 'DynamicQuantizedConvTranspose2d'
+
+    def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 4:
+            raise ValueError("Input shape must be `(N, C, H, W)`!")
+        return ops.quantized.conv_transpose2d_dynamic(
+            input, self._packed_params, reduce_range)
+
+
+class ConvTranspose3d(nnq.ConvTranspose3d):
+    r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs.
+
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.ConvTranspose3d`.
+
+    For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv3d`
+
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.ConvTranspose3d` for other attributes.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> # With cubic kernels and equal stride
+        >>> m = nnq.ConvTranspose3d(16, 33, 3, stride=2)
+        >>> # non-cubic kernels and unequal stride and with padding
+        >>> m = nnq.ConvTranspose3d(16, 33, (3, 3, 5), stride=(2, 1, 1), padding=(4, 2, 2))
+        >>> output = m(input)
+        >>> # exact output size can be also specified as an argument
+        >>> downsample = nnq.Conv3d(16, 16, 3, stride=2, padding=1)
+        >>> upsample = nnq.ConvTranspose3d(16, 16, 3, stride=2, padding=1)
+        >>> h = downsample(input)
+        >>> h.size()
+        torch.Size([1, 16, 6, 6, 6])
+        >>> output = upsample(h, output_size=input.size())
+        >>> output.size()
+        torch.Size([1, 16, 12, 12, 12])
+    """
+
+    _FLOAT_MODULE = nn.ConvTranspose3d
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0, groups=1, bias=True,
+                 dilation=1, padding_mode='zeros', device=None, dtype=None):
+        warnings.warn(
+            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
+                self._get_name()
+            )
+        )
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, padding, output_padding,
+            groups, bias, dilation, padding_mode, **factory_kwargs)
+
+    def _get_name(self):
+        return 'DynamicQuantizedConvTranspose3d'
+
+    def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 5:
+            raise ValueError("Input shape must be `(N, C, T, H, W)`!")
+        return ops.quantized.conv_transpose3d_dynamic(
+            input, self._packed_params, reduce_range)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8a366e57f53cd022fe6e12e20b76f76afdd4726
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py
@@ -0,0 +1,132 @@
+import torch
+import torch.ao.nn.quantized as nnq
+from torch.ao.nn.quantized.modules.utils import _quantize_weight
+import torch.ao.nn.intrinsic as nni
+
+__all__ = [
+    "Linear",
+]
+
+
+class Linear(nnq.Linear):
+    r"""
+    A dynamic quantized linear module with floating point tensor as inputs and outputs.
+    We adopt the same interface as `torch.nn.Linear`, please see
+    https://pytorch.org/docs/stable/nn.html#torch.nn.Linear for documentation.
+
+    Similar to :class:`torch.nn.Linear`, attributes will be randomly
+    initialized at module creation time and will be overwritten later
+
+    Attributes:
+        weight (Tensor): the non-learnable quantized weights of the module which are of
+                         shape :math:`(\text{out\_features}, \text{in\_features})`.
+        bias (Tensor): the non-learnable floating point bias of the module of shape
+                       :math:`(\text{out\_features})`. If :attr:`bias` is ``True``,
+                       the values are initialized to zero.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> m = nn.quantized.dynamic.Linear(20, 30)
+        >>> input = torch.randn(128, 20)
+        >>> output = m(input)
+        >>> print(output.size())
+        torch.Size([128, 30])
+    """
+    # version used in this class is different from the parent class nnq.Linear
+    _version = 4
+
+    def __init__(self, in_features, out_features, bias_=True, dtype=torch.qint8):
+        super().__init__(in_features, out_features, bias_, dtype=dtype)
+        # We don't muck around with buffers or attributes or anything here
+        # to keep the module simple. *everything* is simply a Python attribute.
+        # Serialization logic is explicitly handled in the below serialization and
+        # deserialization modules
+        self.version = 4
+
+    def forward(self, x):
+        # Note that we can handle self.bias == None case.
+        if self._packed_params.dtype == torch.qint8:
+            if self.version is None or self.version < 4:
+                Y = torch.ops.quantized.linear_dynamic(
+                    x, self._packed_params._packed_params)
+            else:
+                Y = torch.ops.quantized.linear_dynamic(
+                    x, self._packed_params._packed_params, reduce_range=True)
+        elif self._packed_params.dtype == torch.float16:
+            Y = torch.ops.quantized.linear_dynamic_fp16(
+                x, self._packed_params._packed_params)
+        else:
+            raise RuntimeError('Unsupported dtype on dynamic quantized linear!')
+        return Y.to(x.dtype)
+
+    def _get_name(self):
+        return 'DynamicQuantizedLinear'
+
+    def extra_repr(self):
+        extra_repr_str = 'in_features={}, out_features={}, dtype={}'.format(
+            self.in_features, self.out_features, self._packed_params.dtype
+        )
+        if self._packed_params.dtype == torch.qint8:
+            extra_repr_str += f', qscheme={self.weight().qscheme()}'
+        return extra_repr_str
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        version = local_metadata.get('version', None)
+        self.version = version
+        super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
+                                      missing_keys, unexpected_keys, error_msgs)
+
+    @classmethod
+    def from_float(cls, mod):
+        r"""Create a dynamic quantized module from a float module or qparams_dict
+
+        Args:
+            mod (Module): a float module, either produced by torch.ao.quantization
+                          utilities or provided by the user
+        """
+        float_modules = [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear,
+                         torch.ao.nn.intrinsic.modules.fused.LinearReLU, torch.ao.nn.qat.dynamic.Linear]
+
+        assert type(mod) in float_modules, \
+            'nn.quantized.dynamic.Linear.from_float only works for one of' + \
+            str([float_mod.__name__ for float_mod in float_modules])
+        assert hasattr(mod, 'qconfig'), 'Input float module must have qconfig defined'
+        if type(mod) == nni.LinearReLU:
+            mod = mod[0]
+        if mod.qconfig is not None and mod.qconfig.weight is not None:
+            weight_observer = mod.qconfig.weight()
+        else:
+            # We have the circular import issues if we import the qconfig in the beginning of this file:
+            # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the
+            # import until we need it.
+            from torch.ao.quantization.qconfig import default_dynamic_qconfig
+            weight_observer = default_dynamic_qconfig.weight()
+        dtype = weight_observer.dtype
+        assert dtype in [torch.qint8, torch.float16], "The only supported dtypes for " \
+            f"dynamic quantized linear are qint8 and float16 got: {dtype}"
+        weight_observer(mod.weight)
+        if dtype == torch.qint8:
+            qweight = _quantize_weight(mod.weight.float(), weight_observer)
+        elif dtype == torch.float16:
+            qweight = mod.weight.float()
+        else:
+            raise RuntimeError('Unsupported dtype specified for dynamic quantized Linear!')
+        qlinear = cls(mod.in_features, mod.out_features, dtype=dtype)
+        qlinear.set_weight_bias(qweight, mod.bias)
+        return qlinear
+
+    @classmethod
+    def from_reference(cls, ref_qlinear):
+        """ Create a (fbgemm/qnnpack) dynamic quantized module from a reference quantized
+        module
+        Args:
+            ref_qlinear (Module): a reference quantized  module, either produced by
+            torch.ao.quantization functions or provided by the user
+        """
+        qlinear = cls(ref_qlinear.in_features, ref_qlinear.out_features, dtype=ref_qlinear.weight_dtype)
+        qweight = ref_qlinear.get_quantized_weight()
+        bias = ref_qlinear.bias
+        qlinear.set_weight_bias(qweight, bias)
+        return qlinear
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/rnn.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/rnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..02badab6796401e317bb0e75741e501a5b3ca1eb
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/rnn.py
@@ -0,0 +1,1096 @@
+import numbers
+import warnings
+
+import torch
+import torch.nn as nn
+from torch import Tensor  # noqa: F401
+from torch._jit_internal import Tuple, Optional, List, Union, Dict  # noqa: F401
+from torch.nn.utils.rnn import PackedSequence
+from torch.ao.nn.quantized.modules.utils import _quantize_weight
+
+__all__ = ['pack_weight_bias', 'PackedParameter', 'RNNBase', 'LSTM', 'GRU', 'RNNCellBase', 'RNNCell', 'LSTMCell',
+           'GRUCell', "apply_permutation"]
+
+
+def _apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
+    return tensor.index_select(dim, permutation)
+
+
+def apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
+    warnings.warn("apply_permutation is deprecated, please use tensor.index_select(dim, permutation) instead")
+    return _apply_permutation(tensor, permutation, dim)
+
+
+def pack_weight_bias(qweight, bias, dtype):
+
+    if dtype == torch.qint8:
+        # for each layer, for each direction we need to quantize and pack
+        # weights and pack parameters in this order:
+        #
+        #   w_ih, w_hh
+        packed_weight = \
+            torch.ops.quantized.linear_prepack(qweight, bias)
+
+        return packed_weight
+    else:
+        # for each layer, for each direction we need to quantize and pack
+        # weights and pack parameters in this order:
+        #
+        #   packed_ih, packed_hh, b_ih, b_hh
+        packed_weight = torch.ops.quantized.linear_prepack_fp16(
+            qweight, bias)
+
+        return packed_weight
+
+
+class PackedParameter(torch.nn.Module):
+    def __init__(self, param):
+        super().__init__()
+        self.param = param
+
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        super()._save_to_state_dict(destination, prefix, keep_vars)
+        destination[prefix + 'param'] = self.param
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        self.param = state_dict[prefix + 'param']
+        super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
+                                      missing_keys, unexpected_keys, error_msgs)
+
+
+class RNNBase(torch.nn.Module):
+
+    _FLOAT_MODULE = nn.RNNBase
+
+    _version = 2
+
+    def __init__(self, mode, input_size, hidden_size,
+                 num_layers=1, bias=True, batch_first=False,
+                 dropout=0., bidirectional=False, dtype=torch.qint8):
+        super().__init__()
+
+        self.mode = mode
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.bias = bias
+        self.batch_first = batch_first
+        self.dropout = float(dropout)
+        self.bidirectional = bidirectional
+        self.dtype = dtype
+        self.version = 2
+        self.training = False
+        num_directions = 2 if bidirectional else 1
+
+        # "type: ignore" is required since ints and Numbers are not fully comparable
+        # https://github.com/python/mypy/issues/8566
+        if not isinstance(dropout, numbers.Number) \
+                or not 0 <= dropout <= 1 or isinstance(dropout, bool):  # type: ignore[operator]
+            raise ValueError("dropout should be a number in range [0, 1] "
+                             "representing the probability of an element being "
+                             "zeroed")
+        if dropout > 0 and num_layers == 1:  # type: ignore[operator]
+            warnings.warn("dropout option adds dropout after all but last "
+                          "recurrent layer, so non-zero dropout expects "
+                          f"num_layers greater than 1, but got dropout={dropout} and "
+                          f"num_layers={num_layers}")
+
+        if mode == 'LSTM':
+            gate_size = 4 * hidden_size
+        elif mode == 'GRU':
+            gate_size = 3 * hidden_size
+        else:
+            raise ValueError("Unrecognized RNN mode: " + mode)
+
+        _all_weight_values = []
+        for layer in range(num_layers):
+            for direction in range(num_directions):
+                layer_input_size = input_size if layer == 0 else hidden_size * num_directions
+
+                w_ih = torch.randn(gate_size, layer_input_size).to(torch.float)
+                w_hh = torch.randn(gate_size, hidden_size).to(torch.float)
+                b_ih = torch.randn(gate_size).to(torch.float)
+                b_hh = torch.randn(gate_size).to(torch.float)
+                if dtype == torch.qint8:
+                    w_ih = torch.quantize_per_tensor(w_ih, scale=0.1, zero_point=0, dtype=torch.qint8)
+                    w_hh = torch.quantize_per_tensor(w_hh, scale=0.1, zero_point=0, dtype=torch.qint8)
+                    packed_ih = \
+                        torch.ops.quantized.linear_prepack(w_ih, b_ih)
+                    packed_hh = \
+                        torch.ops.quantized.linear_prepack(w_hh, b_hh)
+                    if self.version is None or self.version < 2:
+                        cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
+                            packed_ih, packed_hh, b_ih, b_hh)
+                    else:
+                        cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
+                            packed_ih, packed_hh, b_ih, b_hh, True)
+                else:
+                    packed_ih = torch.ops.quantized.linear_prepack_fp16(w_ih, b_ih)
+                    packed_hh = torch.ops.quantized.linear_prepack_fp16(w_hh, b_hh)
+                    cell_params = torch.ops.quantized.make_quantized_cell_params_fp16(
+                        packed_ih, packed_hh)
+
+                _all_weight_values.append(PackedParameter(cell_params))
+        self._all_weight_values = torch.nn.ModuleList(_all_weight_values)
+
+    def _get_name(self):
+        return 'DynamicQuantizedRNN'
+
+    def extra_repr(self):
+        s = '{input_size}, {hidden_size}'
+        if self.num_layers != 1:
+            s += ', num_layers={num_layers}'
+        if self.bias is not True:
+            s += ', bias={bias}'
+        if self.batch_first is not False:
+            s += ', batch_first={batch_first}'
+        if self.dropout != 0:
+            s += ', dropout={dropout}'
+        if self.bidirectional is not False:
+            s += ', bidirectional={bidirectional}'
+        return s.format(**self.__dict__)
+
+    def __repr__(self):
+        # We don't want to show `ModuleList` children, hence custom
+        # `__repr__`. This is the same as nn.Module.__repr__, except the check
+        # for the `PackedParameter` and `nn.ModuleList`.
+        # You should still override `extra_repr` to add more info.
+        extra_lines = []
+        extra_repr = self.extra_repr()
+        # empty string will be split into list ['']
+        if extra_repr:
+            extra_lines = extra_repr.split('\n')
+        child_lines = []
+        for key, module in self._modules.items():
+            if isinstance(module, (PackedParameter, nn.ModuleList)):
+                continue
+            mod_str = repr(module)
+            mod_str = nn.modules.module._addindent(mod_str, 2)
+            child_lines.append('(' + key + '): ' + mod_str)
+        lines = extra_lines + child_lines
+
+        main_str = self._get_name() + '('
+        if lines:
+            # simple one-liner info, which most builtin Modules will use
+            if len(extra_lines) == 1 and not child_lines:
+                main_str += extra_lines[0]
+            else:
+                main_str += '\n  ' + '\n  '.join(lines) + '\n'
+
+        main_str += ')'
+        return main_str
+
+    def check_input(self, input: Tensor, batch_sizes: Optional[Tensor]) -> None:
+        expected_input_dim = 2 if batch_sizes is not None else 3
+        if input.dim() != expected_input_dim:
+            raise RuntimeError(
+                f'input must have {expected_input_dim} dimensions, got {input.dim()}')
+        if self.input_size != input.size(-1):
+            raise RuntimeError(
+                f'input.size(-1) must be equal to input_size. Expected {self.input_size}, got {input.size(-1)}')
+
+    def get_expected_hidden_size(self, input: Tensor, batch_sizes: Optional[Tensor]) -> Tuple[int, int, int]:
+        if batch_sizes is not None:
+            mini_batch = int(batch_sizes[0])
+        else:
+            mini_batch = input.size(0) if self.batch_first else input.size(1)
+        num_directions = 2 if self.bidirectional else 1
+        expected_hidden_size = (self.num_layers * num_directions,
+                                mini_batch, self.hidden_size)
+        return expected_hidden_size
+
+    def check_hidden_size(
+        self, hx: Tensor, expected_hidden_size: Tuple[int, int, int],
+        msg: str = 'Expected hidden size {}, got {}'
+    ) -> None:
+        if hx.size() != expected_hidden_size:
+            raise RuntimeError(msg.format(
+                expected_hidden_size, list(hx.size())))
+
+    def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]) -> None:
+        self.check_input(input, batch_sizes)
+        expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
+        self.check_hidden_size(hidden, expected_hidden_size,
+                               msg='Expected hidden size {}, got {}')
+
+    def permute_hidden(self, hx: Tensor, permutation: Optional[Tensor]) -> Tensor:
+        if permutation is None:
+            return hx
+        return _apply_permutation(hx, permutation)
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        version = local_metadata.get('version', None)
+        self.version = version
+        super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
+                                      missing_keys, unexpected_keys, error_msgs)
+
+    def set_weight_bias(self, weight_bias_dict):
+
+        def weight_bias_name(ihhh, layer, suffix):
+            weight_name = f"weight_{ihhh}_l{layer}{suffix}"
+            bias_name = f"bias_{ihhh}_l{layer}{suffix}"
+            return weight_name, bias_name
+
+        num_directions = 2 if self.bidirectional else 1
+        # TODO: dedup with __init__ of RNNBase
+        _all_weight_values = []
+        for layer in range(self.num_layers):
+            for direction in range(num_directions):
+                suffix = "_reverse" if direction == 1 else ""
+                w_ih_name, b_ih_name = weight_bias_name("ih", layer, suffix)
+                w_hh_name, b_hh_name = weight_bias_name("hh", layer, suffix)
+                w_ih = weight_bias_dict[w_ih_name]
+                b_ih = weight_bias_dict[b_ih_name]
+                w_hh = weight_bias_dict[w_hh_name]
+                b_hh = weight_bias_dict[b_hh_name]
+                if w_ih.dtype == torch.qint8:
+                    packed_ih = torch.ops.quantized.linear_prepack(w_ih, b_ih)
+                    packed_hh = torch.ops.quantized.linear_prepack(w_hh, b_hh)
+                    if self.version is None or self.version < 2:
+                        cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
+                            packed_ih, packed_hh, b_ih, b_hh)
+                    else:
+                        cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
+                            packed_ih, packed_hh, b_ih, b_hh, True)
+                else:
+                    packed_ih = torch.ops.quantized.linear_prepack_fp16(w_ih, b_ih)
+                    packed_hh = torch.ops.quantized.linear_prepack_fp16(w_hh, b_hh)
+                    cell_params = torch.ops.quantized.make_quantized_cell_params_fp16(
+                        packed_ih, packed_hh)
+
+                _all_weight_values.append(PackedParameter(cell_params))
+        self._all_weight_values = torch.nn.ModuleList(_all_weight_values)
+
+    @classmethod
+    def from_float(cls, mod):
+        assert type(mod) in {torch.nn.LSTM,
+                             torch.nn.GRU}, 'nn.quantized.dynamic.RNNBase.from_float only works for nn.LSTM and nn.GRU'
+        assert hasattr(
+            mod,
+            'qconfig'
+        ), 'Input float module must have qconfig defined'
+
+        if mod.qconfig is not None and mod.qconfig.weight is not None:
+            weight_observer_method = mod.qconfig.weight
+        else:
+            # We have the circular import issues if we import the qconfig in the beginning of this file:
+            # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the
+            # import until we need it.
+            from torch.ao.quantization.qconfig import default_dynamic_qconfig
+            weight_observer_method = default_dynamic_qconfig.weight
+
+        dtype = weight_observer_method().dtype
+        supported_scalar_types = [torch.qint8, torch.float16]
+        if dtype not in supported_scalar_types:
+            raise RuntimeError(f'Unsupported dtype for dynamic RNN quantization: {dtype}')
+        # RNNBase can be either LSTM or GRU
+        qRNNBase: Union[LSTM, GRU]
+        if mod.mode == 'LSTM':
+            qRNNBase = LSTM(mod.input_size, mod.hidden_size, mod.num_layers,
+                            mod.bias, mod.batch_first, mod.dropout, mod.bidirectional, dtype)
+        elif mod.mode == 'GRU':
+            qRNNBase = GRU(mod.input_size, mod.hidden_size, mod.num_layers,
+                           mod.bias, mod.batch_first, mod.dropout, mod.bidirectional, dtype)
+        else:
+            raise NotImplementedError('Only LSTM/GRU is supported for QuantizedRNN for now')
+
+        num_directions = 2 if mod.bidirectional else 1
+
+        assert mod.bias
+
+        _all_weight_values = []
+        for layer in range(qRNNBase.num_layers):
+            for direction in range(num_directions):
+                suffix = '_reverse' if direction == 1 else ''
+
+                def retrieve_weight_bias(ihhh):
+                    weight_name = f'weight_{ihhh}_l{layer}{suffix}'
+                    bias_name = f'bias_{ihhh}_l{layer}{suffix}'
+                    weight = getattr(mod, weight_name)
+                    bias = getattr(mod, bias_name)
+                    return weight, bias
+
+                weight_ih, bias_ih = retrieve_weight_bias('ih')
+                weight_hh, bias_hh = retrieve_weight_bias('hh')
+
+                if dtype == torch.qint8:
+                    def quantize_and_pack(w, b):
+                        weight_observer = weight_observer_method()
+                        weight_observer(w)
+                        qweight = _quantize_weight(w.float(), weight_observer)
+                        packed_weight = \
+                            torch.ops.quantized.linear_prepack(qweight, b)
+                        return packed_weight
+                    packed_ih = quantize_and_pack(weight_ih, bias_ih)
+                    packed_hh = quantize_and_pack(weight_hh, bias_hh)
+                    if qRNNBase.version is None or qRNNBase.version < 2:
+                        cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
+                            packed_ih, packed_hh, bias_ih, bias_hh)
+                    else:
+                        cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
+                            packed_ih, packed_hh, bias_ih, bias_hh, True)
+
+                elif dtype == torch.float16:
+                    packed_ih = torch.ops.quantized.linear_prepack_fp16(
+                        weight_ih.float(), bias_ih)
+                    packed_hh = torch.ops.quantized.linear_prepack_fp16(
+                        weight_hh.float(), bias_hh)
+
+                    cell_params = torch.ops.quantized.make_quantized_cell_params_fp16(
+                        packed_ih, packed_hh)
+                else:
+                    raise RuntimeError('Unsupported dtype specified for dynamic quantized LSTM!')
+
+                _all_weight_values.append(PackedParameter(cell_params))
+        qRNNBase._all_weight_values = torch.nn.ModuleList(_all_weight_values)
+
+        return qRNNBase
+
+    def _weight_bias(self):
+        # Returns a dict of weights and biases
+        weight_bias_dict: Dict[str, Dict] = {'weight' : {}, 'bias' : {}}
+        count = 0
+        num_directions = 2 if self.bidirectional else 1
+        for layer in range(self.num_layers):
+            for direction in range(num_directions):
+                suffix = '_reverse' if direction == 1 else ''
+                key_name1 = f'weight_ih_l{layer}{suffix}'
+                key_name2 = f'weight_hh_l{layer}{suffix}'
+                # packed weights are part of torchbind class, CellParamsSerializationType
+                # Within the packed weight class, the weight and bias are accessible as Tensors
+                packed_weight_bias = self._all_weight_values[count].param.__getstate__()[0][4]
+                weight_bias_dict['weight'][key_name1] = packed_weight_bias[0].__getstate__()[0][0]
+                weight_bias_dict['weight'][key_name2] = packed_weight_bias[1].__getstate__()[0][0]
+                key_name1 = f'bias_ih_l{layer}{suffix}'
+                key_name2 = f'bias_hh_l{layer}{suffix}'
+                weight_bias_dict['bias'][key_name1] = packed_weight_bias[0].__getstate__()[0][1]
+                weight_bias_dict['bias'][key_name2] = packed_weight_bias[1].__getstate__()[0][1]
+                count = count + 1
+        return weight_bias_dict
+
+    def get_weight(self):
+        return self._weight_bias()['weight']
+
+    def get_bias(self):
+        return self._weight_bias()['bias']
+
+
+class LSTM(RNNBase):
+    r"""
+    A dynamic quantized LSTM module with floating point tensor as inputs and outputs.
+    We adopt the same interface as `torch.nn.LSTM`, please see
+    https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM for documentation.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> rnn = nn.LSTM(10, 20, 2)
+        >>> input = torch.randn(5, 3, 10)
+        >>> h0 = torch.randn(2, 3, 20)
+        >>> c0 = torch.randn(2, 3, 20)
+        >>> output, (hn, cn) = rnn(input, (h0, c0))
+    """
+    _FLOAT_MODULE = nn.LSTM
+
+    __overloads__ = {'forward': ['forward_packed', 'forward_tensor']}
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('LSTM', *args, **kwargs)
+
+    def _get_name(self):
+        return 'DynamicQuantizedLSTM'
+
+    def forward_impl(
+        self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]],
+        batch_sizes: Optional[Tensor], max_batch_size: int,
+        sorted_indices: Optional[Tensor]
+    ) -> Tuple[Tensor, Tuple[Tensor, Tensor]]:
+        if hx is None:
+            num_directions = 2 if self.bidirectional else 1
+            zeros = torch.zeros(self.num_layers * num_directions,
+                                max_batch_size, self.hidden_size,
+                                dtype=input.dtype, device=input.device)
+            hx = (zeros, zeros)
+        else:
+            # Each batch of the hidden state should match the input sequence that
+            # the user believes he/she is passing in.
+            hx = self.permute_hidden(hx, sorted_indices)
+
+        self.check_forward_args(input, hx, batch_sizes)
+
+        _all_params = ([m.param for m in self._all_weight_values])
+        if batch_sizes is None:
+            result = torch.quantized_lstm(input, hx, _all_params, self.bias, self.num_layers,
+                                          float(self.dropout), self.training, self.bidirectional,
+                                          self.batch_first, dtype=self.dtype, use_dynamic=True)
+        else:
+            result = torch.quantized_lstm(input, batch_sizes, hx, _all_params, self.bias,
+                                          self.num_layers, float(self.dropout), self.training,
+                                          self.bidirectional, dtype=self.dtype, use_dynamic=True)
+        output = result[0]
+        hidden = result[1:]
+
+        return output, hidden
+
+    @torch.jit.export
+    def forward_tensor(
+        self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None
+    ) -> Tuple[Tensor, Tuple[Tensor, Tensor]]:
+        batch_sizes = None
+        max_batch_size = input.size(0) if self.batch_first else input.size(1)
+        sorted_indices = None
+        unsorted_indices = None
+
+        output, hidden = self.forward_impl(
+            input, hx, batch_sizes, max_batch_size, sorted_indices)
+
+        return output, self.permute_hidden(hidden, unsorted_indices)
+
+    @torch.jit.export
+    def forward_packed(
+        self, input: PackedSequence, hx: Optional[Tuple[Tensor, Tensor]] = None
+    ) -> Tuple[PackedSequence, Tuple[Tensor, Tensor]]:
+        input_, batch_sizes, sorted_indices, unsorted_indices = input
+        max_batch_size = int(batch_sizes[0])
+
+        output_, hidden = self.forward_impl(
+            input_, hx, batch_sizes, max_batch_size, sorted_indices
+        )
+
+        output = PackedSequence(output_, batch_sizes,
+                                sorted_indices, unsorted_indices)
+        return output, self.permute_hidden(hidden, unsorted_indices)
+
+    # "type: ignore" is required due to issue #43072
+    def permute_hidden(  # type: ignore[override]
+        self, hx: Tuple[Tensor, Tensor], permutation: Optional[Tensor]
+    ) -> Tuple[Tensor, Tensor]:
+        if permutation is None:
+            return hx
+        return _apply_permutation(hx[0], permutation), _apply_permutation(hx[1], permutation)
+
+    # "type: ignore" is required due to issue #43072
+    def check_forward_args(  # type: ignore[override]
+        self, input: Tensor, hidden: Tuple[Tensor, Tensor], batch_sizes: Optional[Tensor]
+    ) -> None:
+        self.check_input(input, batch_sizes)
+        expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
+
+        self.check_hidden_size(hidden[0], expected_hidden_size,
+                               'Expected hidden[0] size {}, got {}')
+        self.check_hidden_size(hidden[1], expected_hidden_size,
+                               'Expected hidden[1] size {}, got {}')
+
+    @torch.jit.ignore
+    def forward(self, input, hx=None):
+        if isinstance(input, PackedSequence):
+            return self.forward_packed(input, hx)
+        else:
+            return self.forward_tensor(input, hx)
+
+    @classmethod
+    def from_float(cls, mod):
+        return super().from_float(mod)
+
+    @classmethod
+    def from_reference(cls, ref_mod):
+        assert hasattr(ref_mod, "weight_ih_l0_dtype"), "We are assuming weight_ih_l0 "
+        "exists in LSTM, may need to relax the assumption to support the use case"
+        qmod = cls(
+            ref_mod.input_size,
+            ref_mod.hidden_size,
+            ref_mod.num_layers,
+            ref_mod.bias,
+            ref_mod.batch_first,
+            ref_mod.dropout,
+            ref_mod.bidirectional,
+            # assuming there is layer 0, which should be OK
+            ref_mod.weight_ih_l0_dtype,
+        )
+        qmod.set_weight_bias(ref_mod.get_quantized_weight_bias_dict())
+        return qmod
+
+
+class GRU(RNNBase):
+    r"""Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
+
+
+    For each element in the input sequence, each layer computes the following
+    function:
+
+    .. math::
+        \begin{array}{ll}
+            r_t = \sigma(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
+            z_t = \sigma(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\
+            n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) \\
+            h_t = (1 - z_t) \odot n_t + z_t \odot h_{(t-1)}
+        \end{array}
+
+    where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is the input
+    at time `t`, :math:`h_{(t-1)}` is the hidden state of the layer
+    at time `t-1` or the initial hidden state at time `0`, and :math:`r_t`,
+    :math:`z_t`, :math:`n_t` are the reset, update, and new gates, respectively.
+    :math:`\sigma` is the sigmoid function, and :math:`\odot` is the Hadamard product.
+
+    In a multilayer GRU, the input :math:`x^{(l)}_t` of the :math:`l` -th layer
+    (:math:`l >= 2`) is the hidden state :math:`h^{(l-1)}_t` of the previous layer multiplied by
+    dropout :math:`\delta^{(l-1)}_t` where each :math:`\delta^{(l-1)}_t` is a Bernoulli random
+    variable which is :math:`0` with probability :attr:`dropout`.
+
+    Args:
+        input_size: The number of expected features in the input `x`
+        hidden_size: The number of features in the hidden state `h`
+        num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
+            would mean stacking two GRUs together to form a `stacked GRU`,
+            with the second GRU taking in outputs of the first GRU and
+            computing the final results. Default: 1
+        bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
+            Default: ``True``
+        batch_first: If ``True``, then the input and output tensors are provided
+            as (batch, seq, feature). Default: ``False``
+        dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
+            GRU layer except the last layer, with dropout probability equal to
+            :attr:`dropout`. Default: 0
+        bidirectional: If ``True``, becomes a bidirectional GRU. Default: ``False``
+
+    Inputs: input, h_0
+        - **input** of shape `(seq_len, batch, input_size)`: tensor containing the features
+          of the input sequence. The input can also be a packed variable length
+          sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence`
+          for details.
+        - **h_0** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
+          containing the initial hidden state for each element in the batch.
+          Defaults to zero if not provided. If the RNN is bidirectional,
+          num_directions should be 2, else it should be 1.
+
+    Outputs: output, h_n
+        - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor
+          containing the output features h_t from the last layer of the GRU,
+          for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been
+          given as the input, the output will also be a packed sequence.
+          For the unpacked case, the directions can be separated
+          using ``output.view(seq_len, batch, num_directions, hidden_size)``,
+          with forward and backward being direction `0` and `1` respectively.
+
+          Similarly, the directions can be separated in the packed case.
+        - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
+          containing the hidden state for `t = seq_len`
+
+          Like *output*, the layers can be separated using
+          ``h_n.view(num_layers, num_directions, batch, hidden_size)``.
+
+    Shape:
+        - Input1: :math:`(L, N, H_{in})` tensor containing input features where
+          :math:`H_{in}=\text{input\_size}` and `L` represents a sequence length.
+        - Input2: :math:`(S, N, H_{out})` tensor
+          containing the initial hidden state for each element in the batch.
+          :math:`H_{out}=\text{hidden\_size}`
+          Defaults to zero if not provided. where :math:`S=\text{num\_layers} * \text{num\_directions}`
+          If the RNN is bidirectional, num_directions should be 2, else it should be 1.
+        - Output1: :math:`(L, N, H_{all})` where :math:`H_{all}=\text{num\_directions} * \text{hidden\_size}`
+        - Output2: :math:`(S, N, H_{out})` tensor containing the next hidden state
+          for each element in the batch
+
+    Attributes:
+        weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer
+            (W_ir|W_iz|W_in), of shape `(3*hidden_size, input_size)` for `k = 0`.
+            Otherwise, the shape is `(3*hidden_size, num_directions * hidden_size)`
+        weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer
+            (W_hr|W_hz|W_hn), of shape `(3*hidden_size, hidden_size)`
+        bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer
+            (b_ir|b_iz|b_in), of shape `(3*hidden_size)`
+        bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer
+            (b_hr|b_hz|b_hn), of shape `(3*hidden_size)`
+
+    .. note::
+        All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`
+        where :math:`k = \frac{1}{\text{hidden\_size}}`
+
+    .. note::
+        The calculation of new gate :math:`n_t` subtly differs from the original paper and other frameworks.
+        In the original implementation, the Hadamard product :math:`(\odot)` between :math:`r_t` and the
+        previous hidden state :math:`h_{(t-1)}` is done before the multiplication with the weight matrix
+        `W` and addition of bias:
+
+        .. math::
+            \begin{aligned}
+                n_t = \tanh(W_{in} x_t + b_{in} + W_{hn} ( r_t \odot h_{(t-1)} ) + b_{hn})
+            \end{aligned}
+
+        This is in contrast to PyTorch implementation, which is done after :math:`W_{hn} h_{(t-1)}`
+
+        .. math::
+            \begin{aligned}
+                n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn}))
+            \end{aligned}
+
+        This implementation differs on purpose for efficiency.
+
+    .. include:: ../cudnn_persistent_rnn.rst
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> rnn = nn.GRU(10, 20, 2)
+        >>> input = torch.randn(5, 3, 10)
+        >>> h0 = torch.randn(2, 3, 20)
+        >>> output, hn = rnn(input, h0)
+    """
+    _FLOAT_MODULE = nn.GRU
+
+    __overloads__ = {'forward': ['forward_packed', 'forward_tensor']}
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('GRU', *args, **kwargs)
+
+    def _get_name(self):
+        return 'DynamicQuantizedGRU'
+
+    def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]) -> None:
+        self.check_input(input, batch_sizes)
+        expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
+
+        self.check_hidden_size(hidden, expected_hidden_size,
+                               'Expected hidden size {}, got {}')
+
+    def forward_impl(
+        self, input: Tensor, hx: Optional[Tensor],
+        batch_sizes: Optional[Tensor], max_batch_size: int,
+        sorted_indices: Optional[Tensor]
+    ) -> Tuple[Tensor, Tensor]:
+        if hx is None:
+            num_directions = 2 if self.bidirectional else 1
+            zeros = torch.zeros(self.num_layers * num_directions,
+                                max_batch_size, self.hidden_size,
+                                dtype=input.dtype, device=input.device)
+            hx = zeros
+        else:
+            # Each batch of the hidden state should match the input sequence that
+            # the user believes he/she is passing in.
+            hx = self.permute_hidden(hx, sorted_indices)
+
+        self.check_forward_args(input, hx, batch_sizes)
+
+        _all_params = ([m.param for m in self._all_weight_values])
+        if batch_sizes is None:
+            result = torch.quantized_gru(input,
+                                         hx,
+                                         _all_params,
+                                         self.bias,
+                                         self.num_layers,
+                                         self.dropout,
+                                         self.training,
+                                         self.bidirectional,
+                                         self.batch_first)
+        else:
+            result = torch.quantized_gru(input,
+                                         batch_sizes,
+                                         hx,
+                                         _all_params,
+                                         self.bias,
+                                         self.num_layers,
+                                         self.dropout,
+                                         self.training,
+                                         self.bidirectional)
+        output = result[0]
+        hidden = result[1]
+
+        return output, hidden
+
+
+    @torch.jit.export
+    def forward_tensor(
+        self, input: Tensor, hx: Optional[Tensor] = None
+    ) -> Tuple[Tensor, Tensor]:
+        batch_sizes = None
+        max_batch_size = input.size(0) if self.batch_first else input.size(1)
+        sorted_indices = None
+        unsorted_indices = None
+
+        output, hidden = self.forward_impl(
+            input, hx, batch_sizes, max_batch_size, sorted_indices)
+
+        return output, self.permute_hidden(hidden, unsorted_indices)
+
+    @torch.jit.export
+    def forward_packed(
+        self, input: PackedSequence, hx: Optional[Tensor] = None
+    ) -> Tuple[PackedSequence, Tensor]:
+        input_, batch_sizes, sorted_indices, unsorted_indices = input
+        max_batch_size = int(batch_sizes[0])
+        output_, hidden = self.forward_impl(
+            input_, hx, batch_sizes, max_batch_size, sorted_indices
+        )
+
+        output = PackedSequence(output_, batch_sizes,
+                                sorted_indices, unsorted_indices)
+        return output, self.permute_hidden(hidden, unsorted_indices)
+
+    def permute_hidden(
+        self, hx: Tensor, permutation: Optional[Tensor]
+    ) -> Tensor:
+        if permutation is None:
+            return hx
+        return _apply_permutation(hx, permutation)
+
+    @torch.jit.ignore
+    def forward(self, input, hx=None):
+        if isinstance(input, PackedSequence):
+            return self.forward_packed(input, hx)
+        else:
+            return self.forward_tensor(input, hx)
+
+    @classmethod
+    def from_float(cls, mod):
+        return super().from_float(mod)
+
+    @classmethod
+    def from_reference(cls, ref_mod):
+        assert hasattr(ref_mod, "weight_ih_l0_dtype"), "We are assuming weight_ih_l0 "
+        "exists in LSTM, may need to relax the assumption to support the use case"
+        qmod = cls(
+            ref_mod.input_size,
+            ref_mod.hidden_size,
+            ref_mod.num_layers,
+            ref_mod.bias,
+            ref_mod.batch_first,
+            ref_mod.dropout,
+            ref_mod.bidirectional,
+            # assuming there is layer 0, which should be OK
+            ref_mod.weight_ih_l0_dtype,
+        )
+        qmod.set_weight_bias(ref_mod.get_quantized_weight_bias_dict())
+        return qmod
+
+class RNNCellBase(torch.nn.Module):
+    # _FLOAT_MODULE = nn.CellRNNBase
+    __constants__ = ['input_size', 'hidden_size', 'bias']
+
+    def __init__(self, input_size, hidden_size, bias=True, num_chunks=4, dtype=torch.qint8):
+        super().__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.bias = bias
+        self.weight_dtype = dtype
+        if bias:
+            self.bias_ih = torch.randn(num_chunks * hidden_size).to(dtype=torch.float)
+            self.bias_hh = torch.randn(num_chunks * hidden_size).to(dtype=torch.float)
+        else:
+            self.register_parameter('bias_ih', None)
+            self.register_parameter('bias_hh', None)
+
+        weight_ih = torch.randn(num_chunks * hidden_size, input_size).to(torch.float)
+        weight_hh = torch.randn(num_chunks * hidden_size, hidden_size).to(torch.float)
+        if dtype == torch.qint8:
+            weight_ih = torch.quantize_per_tensor(weight_ih, scale=1, zero_point=0, dtype=torch.qint8)
+            weight_hh = torch.quantize_per_tensor(weight_hh, scale=1, zero_point=0, dtype=torch.qint8)
+
+        if dtype == torch.qint8:
+            # for each layer, for each direction we need to quantize and pack
+            # weights and pack parameters in this order:
+            #
+            #   w_ih, w_hh
+            packed_weight_ih = \
+                torch.ops.quantized.linear_prepack(weight_ih, self.bias_ih)
+            packed_weight_hh = \
+                torch.ops.quantized.linear_prepack(weight_hh, self.bias_hh)
+        else:
+            # for each layer, for each direction we need to quantize and pack
+            # weights and pack parameters in this order:
+            #
+            #   packed_ih, packed_hh, b_ih, b_hh
+            packed_weight_ih = torch.ops.quantized.linear_prepack_fp16(
+                weight_ih, self.bias_ih)
+            packed_weight_hh = torch.ops.quantized.linear_prepack_fp16(
+                weight_hh, self.bias_hh)
+
+        self._packed_weight_ih = packed_weight_ih
+        self._packed_weight_hh = packed_weight_hh
+
+    def _get_name(self):
+        return 'DynamicQuantizedRNNBase'
+
+    def extra_repr(self):
+        s = '{input_size}, {hidden_size}'
+        if 'bias' in self.__dict__ and self.bias is not True:
+            s += ', bias={bias}'
+        if 'nonlinearity' in self.__dict__ and self.nonlinearity != "tanh":
+            s += ', nonlinearity={nonlinearity}'
+        return s.format(**self.__dict__)
+
+    def check_forward_input(self, input):
+        if input.size(1) != self.input_size:
+            raise RuntimeError(
+                f"input has inconsistent input_size: got {input.size(1)}, expected {self.input_size}")
+
+    def check_forward_hidden(self, input: Tensor, hx: Tensor, hidden_label: str = '') -> None:
+        if input.size(0) != hx.size(0):
+            raise RuntimeError(
+                f"Input batch size {input.size(0)} doesn't match hidden{hidden_label} batch size {hx.size(0)}")
+
+        if hx.size(1) != self.hidden_size:
+            raise RuntimeError(
+                f"hidden{hidden_label} has inconsistent hidden_size: got {hx.size(1)}, expected {self.hidden_size}")
+
+    @classmethod
+    def from_float(cls, mod):
+        assert type(mod) in {torch.nn.LSTMCell,
+                             torch.nn.GRUCell,
+                             torch.nn.RNNCell}, 'nn.quantized.dynamic.RNNCellBase.from_float \
+                                 only works for nn.LSTMCell, nn.GRUCell and nn.RNNCell'
+        assert hasattr(
+            mod, 'qconfig'), 'Input float module must have qconfig defined'
+
+        if mod.qconfig is not None and mod.qconfig.weight is not None:
+            weight_observer_method = mod.qconfig.weight
+        else:
+            # We have the circular import issues if we import the qconfig in the beginning of this file:
+            # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the
+            # import until we need it.
+            from torch.ao.quantization.qconfig import default_dynamic_qconfig
+            weight_observer_method = default_dynamic_qconfig.weight
+
+        dtype = weight_observer_method().dtype
+        supported_scalar_types = [torch.qint8, torch.float16]
+        if dtype not in supported_scalar_types:
+            raise RuntimeError(f'Unsupported dtype for dynamic RNN quantization: {dtype}')
+
+        qRNNCellBase: Union[LSTMCell, GRUCell, RNNCell]
+
+        if type(mod) == torch.nn.LSTMCell:
+            qRNNCellBase = LSTMCell(mod.input_size, mod.hidden_size, bias=mod.bias, dtype=dtype)
+        elif type(mod) == torch.nn.GRUCell:
+            qRNNCellBase = GRUCell(mod.input_size, mod.hidden_size, bias=mod.bias, dtype=dtype)
+        elif type(mod) == torch.nn.RNNCell:
+            qRNNCellBase = RNNCell(mod.input_size, mod.hidden_size, bias=mod.bias, nonlinearity=mod.nonlinearity, dtype=dtype)
+        else:
+            raise NotImplementedError('Only LSTMCell, GRUCell and RNNCell \
+            are supported for QuantizedRNN for now')
+
+        assert mod.bias
+
+        def _observe_and_quantize_weight(weight):
+            if dtype == torch.qint8:
+                weight_observer = weight_observer_method()
+                weight_observer(weight)
+                qweight = _quantize_weight(weight.float(), weight_observer)
+                return qweight
+            else:
+                return weight.float()
+
+        qRNNCellBase._packed_weight_ih = pack_weight_bias(_observe_and_quantize_weight(mod.weight_ih), mod.bias_ih, dtype)
+        qRNNCellBase._packed_weight_hh = pack_weight_bias(_observe_and_quantize_weight(mod.weight_hh), mod.bias_hh, dtype)
+        return qRNNCellBase
+
+    @classmethod
+    def from_reference(cls, ref_mod):
+        assert hasattr(ref_mod, "weight_ih_dtype"), "We are assuming weight_ih "
+        "exists in reference module, may need to relax the assumption to support the use case"
+        if hasattr(ref_mod, "nonlinearity"):
+            qmod = cls(
+                ref_mod.input_size,
+                ref_mod.hidden_size,
+                ref_mod.bias,
+                ref_mod.nonlinearity,
+                dtype=ref_mod.weight_ih_dtype
+            )
+        else:
+            qmod = cls(
+                ref_mod.input_size,
+                ref_mod.hidden_size,
+                ref_mod.bias,
+                dtype=ref_mod.weight_ih_dtype
+            )
+        weight_bias_dict = {
+            "weight": {
+                "weight_ih": ref_mod.get_quantized_weight_ih(),
+                "weight_hh": ref_mod.get_quantized_weight_hh(),
+            },
+            "bias": {
+                "bias_ih": ref_mod.bias_ih,
+                "bias_hh": ref_mod.bias_hh,
+            }
+        }
+        qmod.set_weight_bias(weight_bias_dict)
+        return qmod
+
+    def _weight_bias(self):
+        # Returns a dict of weights and biases
+        weight_bias_dict: Dict[str, Dict] = {'weight' : {}, 'bias' : {}}
+        w1, b1 = self._packed_weight_ih.__getstate__()[0]
+        w2, b2 = self._packed_weight_hh.__getstate__()[0]
+        # TODO: these can be simplified to one level? e.g. using weight_ih as key
+        # directly
+        weight_bias_dict['weight']['weight_ih'] = w1
+        weight_bias_dict['weight']['weight_hh'] = w2
+        weight_bias_dict['bias']['bias_ih'] = b1
+        weight_bias_dict['bias']['bias_hh'] = b2
+        return weight_bias_dict
+
+    def get_weight(self):
+        return self._weight_bias()['weight']
+
+    def get_bias(self):
+        return self._weight_bias()['bias']
+
+    def set_weight_bias(self, weight_bias_dict):
+        # TODO: these can be simplified to one level? e.g. using weight_ih as key
+        # directly
+        self._packed_weight_ih = pack_weight_bias(
+            weight_bias_dict["weight"]["weight_ih"],
+            weight_bias_dict["bias"]["bias_ih"],
+            self.weight_dtype)
+        self._packed_weight_hh = pack_weight_bias(
+            weight_bias_dict["weight"]["weight_hh"],
+            weight_bias_dict["bias"]["bias_hh"],
+            self.weight_dtype)
+
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        super()._save_to_state_dict(destination, prefix, keep_vars)
+        destination[prefix + '_packed_weight_ih'] = self._packed_weight_ih
+        destination[prefix + '_packed_weight_hh'] = self._packed_weight_hh
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        self._packed_weight_ih = state_dict.pop(prefix + '_packed_weight_ih')
+        self._packed_weight_hh = state_dict.pop(prefix + '_packed_weight_hh')
+        super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
+                                      missing_keys, unexpected_keys, error_msgs)
+
+
+class RNNCell(RNNCellBase):
+    r"""An Elman RNN cell with tanh or ReLU non-linearity.
+    A dynamic quantized RNNCell module with floating point tensor as inputs and outputs.
+    Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.RNNCell`,
+    please see https://pytorch.org/docs/stable/nn.html#torch.nn.RNNCell for documentation.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> rnn = nn.RNNCell(10, 20)
+        >>> input = torch.randn(6, 3, 10)
+        >>> hx = torch.randn(3, 20)
+        >>> output = []
+        >>> for i in range(6):
+        ...     hx = rnn(input[i], hx)
+        ...     output.append(hx)
+    """
+    __constants__ = ['input_size', 'hidden_size', 'bias', 'nonlinearity']
+
+    def __init__(self, input_size, hidden_size, bias=True, nonlinearity="tanh", dtype=torch.qint8):
+        super().__init__(input_size, hidden_size, bias, num_chunks=1, dtype=dtype)
+        self.nonlinearity = nonlinearity
+
+    def _get_name(self):
+        return 'DynamicQuantizedRNNCell'
+
+    def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor:
+        self.check_forward_input(input)
+        if hx is None:
+            hx = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
+        self.check_forward_hidden(input, hx, '')
+        if self.nonlinearity == "tanh":
+            ret = torch.ops.quantized.quantized_rnn_tanh_cell_dynamic(
+                input, hx,
+                self._packed_weight_ih, self._packed_weight_hh,
+                self.bias_ih, self.bias_hh)
+        elif self.nonlinearity == "relu":
+            ret = torch.ops.quantized.quantized_rnn_relu_cell_dynamic(
+                input, hx,
+                self._packed_weight_ih, self._packed_weight_hh,
+                self.bias_ih, self.bias_hh)
+        else:
+            ret = input  # TODO: remove when jit supports exception flow
+            raise RuntimeError(
+                f"Unknown nonlinearity: {self.nonlinearity}")
+        return ret
+
+    @classmethod
+    def from_float(cls, mod):
+        return super().from_float(mod)
+
+
+class LSTMCell(RNNCellBase):
+    r"""A long short-term memory (LSTM) cell.
+
+    A dynamic quantized LSTMCell module with floating point tensor as inputs and outputs.
+    Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.LSTMCell`,
+    please see https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell for documentation.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> rnn = nn.LSTMCell(10, 20)
+        >>> input = torch.randn(6, 3, 10)
+        >>> hx = torch.randn(3, 20)
+        >>> cx = torch.randn(3, 20)
+        >>> output = []
+        >>> for i in range(6):
+        ...     hx, cx = rnn(input[i], (hx, cx))
+        ...     output.append(hx)
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, num_chunks=4, **kwargs)  # type: ignore[misc]
+
+    def _get_name(self):
+        return 'DynamicQuantizedLSTMCell'
+
+    def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
+        self.check_forward_input(input)
+        if hx is None:
+            zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
+            hx = (zeros, zeros)
+        self.check_forward_hidden(input, hx[0], '[0]')
+        self.check_forward_hidden(input, hx[1], '[1]')
+        return torch.ops.quantized.quantized_lstm_cell_dynamic(
+            input, hx,
+            self._packed_weight_ih, self._packed_weight_hh,
+            self.bias_ih, self.bias_hh)
+
+    @classmethod
+    def from_float(cls, mod):
+        return super().from_float(mod)
+
+
+class GRUCell(RNNCellBase):
+    r"""A gated recurrent unit (GRU) cell
+
+    A dynamic quantized GRUCell module with floating point tensor as inputs and outputs.
+    Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.GRUCell`,
+    please see https://pytorch.org/docs/stable/nn.html#torch.nn.GRUCell for documentation.
+
+    Examples::
+
+        >>> # xdoctest: +SKIP
+        >>> rnn = nn.GRUCell(10, 20)
+        >>> input = torch.randn(6, 3, 10)
+        >>> hx = torch.randn(3, 20)
+        >>> output = []
+        >>> for i in range(6):
+        ...     hx = rnn(input[i], hx)
+        ...     output.append(hx)
+    """
+
+    def __init__(self, input_size, hidden_size, bias=True, dtype=torch.qint8):
+        super().__init__(input_size, hidden_size, bias, num_chunks=3, dtype=dtype)
+
+    def _get_name(self):
+        return 'DynamicQuantizedGRUCell'
+
+    def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor:
+        self.check_forward_input(input)
+        if hx is None:
+            hx = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
+        self.check_forward_hidden(input, hx, '')
+        return torch.ops.quantized.quantized_gru_cell_dynamic(
+            input, hx,
+            self._packed_weight_ih, self._packed_weight_hh,
+            self.bias_ih, self.bias_hh,
+        )
+
+    @classmethod
+    def from_float(cls, mod):
+        return super().from_float(mod)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/functional.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/functional.py
new file mode 100644
index 0000000000000000000000000000000000000000..72218184fcfacc20aba8c5359d38999d13493664
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/functional.py
@@ -0,0 +1,644 @@
+r""" Functional interface (quantized)."""
+from typing import List, Optional
+import warnings
+
+import torch
+from torch import Tensor
+from torch.nn.modules.utils import _pair, _triple
+from torch.jit.annotations import BroadcastingList2
+
+from .modules.utils import _pair_from_first
+
+# Although some of the functions and docstrings are mirrored from the torch.nn,
+# we want to have them here for future changes.
+
+__all__ = [
+    "avg_pool2d",
+    "avg_pool3d",
+    "adaptive_avg_pool2d",
+    "adaptive_avg_pool3d",
+    "conv1d",
+    "conv2d",
+    "conv3d",
+    "interpolate",
+    "linear",
+    "max_pool1d",
+    "max_pool2d",
+    "celu",
+    "leaky_relu",
+    "hardtanh",
+    "hardswish",
+    "threshold",
+    "elu",
+    "hardsigmoid",
+    "clamp",
+    "upsample",
+    "upsample_bilinear",
+    "upsample_nearest",
+]
+
+def avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False,
+               count_include_pad=True, divisor_override=None):
+    r"""
+    Applies 2D average-pooling operation in :math:`kH \times kW` regions by step size
+    :math:`sH \times sW` steps. The number of output features is equal to the number of
+    input planes.
+
+    .. note:: The input quantization parameters propagate to the output.
+
+    See :class:`~torch.ao.nn.quantized.AvgPool2d` for details and output shape.
+
+    Args:
+        input: quantized input tensor :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
+        kernel_size: size of the pooling region. Can be a single number or a
+          tuple `(kH, kW)`
+        stride: stride of the pooling operation. Can be a single number or a
+          tuple `(sH, sW)`. Default: :attr:`kernel_size`
+        padding: implicit zero paddings on both sides of the input. Can be a
+          single number or a tuple `(padH, padW)`. Default: 0
+        ceil_mode: when True, will use `ceil` instead of `floor` in the formula
+            to compute the output shape. Default: ``False``
+        count_include_pad: when True, will include the zero-padding in the
+            averaging calculation. Default: ``True``
+        divisor_override: if specified, it will be used as divisor, otherwise
+             size of the pooling region will be used. Default: None
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.avg_pool2d' must be quantized!")
+    return torch.nn.functional.avg_pool2d(input, kernel_size, stride, padding,
+                                          ceil_mode, count_include_pad,
+                                          divisor_override)
+
+def avg_pool3d(input, kernel_size, stride=None, padding=0, ceil_mode=False,
+               count_include_pad=True, divisor_override=None):
+    r"""
+    Applies 3D average-pooling operation in :math:`kD \ times kH \times kW` regions by step size
+    :math:`sD \times sH \times sW` steps. The number of output features is equal to the number of
+    input planes.
+
+    .. note:: The input quantization parameters propagate to the output.
+
+    Args:
+        input: quantized input tensor :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
+        kernel_size: size of the pooling region. Can be a single number or a
+          tuple `(kD, kH, kW)`
+        stride: stride of the pooling operation. Can be a single number or a
+          tuple `(sD, sH, sW)`. Default: :attr:`kernel_size`
+        padding: implicit zero paddings on both sides of the input. Can be a
+          single number or a tuple `(padD, padH, padW)`. Default: 0
+        ceil_mode: when True, will use `ceil` instead of `floor` in the formula
+            to compute the output shape. Default: ``False``
+        count_include_pad: when True, will include the zero-padding in the
+            averaging calculation. Default: ``True``
+        divisor_override: if specified, it will be used as divisor, otherwise
+             size of the pooling region will be used. Default: None
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.avg_pool3d' must be quantized!")
+    return torch.nn.functional.avg_pool3d(input, kernel_size, stride, padding,
+                                          ceil_mode, count_include_pad,
+                                          divisor_override)
+
+def adaptive_avg_pool2d(input: Tensor, output_size: BroadcastingList2[int]) -> Tensor:
+    r"""
+    Applies a 2D adaptive average pooling over a quantized input signal composed
+    of several quantized input planes.
+
+    .. note:: The input quantization parameters propagate to the output.
+
+    See :class:`~torch.ao.nn.quantized.AdaptiveAvgPool2d` for details and output shape.
+
+    Args:
+        output_size: the target output size (single integer or
+                     double-integer tuple)
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.functional.adaptive_avg_pool2d' must be quantized!")
+    return torch.nn.functional.adaptive_avg_pool2d(input, output_size)
+
+def adaptive_avg_pool3d(input: Tensor, output_size: BroadcastingList2[int]) -> Tensor:
+    r"""
+    Applies a 3D adaptive average pooling over a quantized input signal composed
+    of several quantized input planes.
+
+    .. note:: The input quantization parameters propagate to the output.
+
+    See :class:`~torch.ao.nn.quantized.AdaptiveAvgPool3d` for details and output shape.
+
+    Args:
+        output_size: the target output size (single integer or
+                     double-integer tuple)
+    """
+    if not input.is_quantized:
+        raise ValueError(
+            "Input to 'quantized.functional.adaptive_avg_pool3d' must be quantized!")
+    return torch.nn.functional.adaptive_avg_pool3d(input, output_size)
+
+def conv1d(input, weight, bias,
+           stride=1, padding=0, dilation=1, groups=1,
+           padding_mode='zeros',
+           scale=1.0, zero_point=0,
+           dtype=torch.quint8):
+    r"""
+    Applies a 1D convolution over a quantized 1D input composed of several input
+    planes.
+
+    See :class:`~torch.ao.nn.quantized.Conv1d` for details and output shape.
+
+    Args:
+        input: quantized input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`
+        weight: quantized filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , iW)`
+        bias: **non-quantized** bias tensor of shape :math:`(\text{out\_channels})`. The tensor type must be `torch.float`.
+        stride: the stride of the convolving kernel. Can be a single number or a
+          tuple `(sW,)`. Default: 1
+        padding: implicit paddings on both sides of the input. Can be a
+          single number or a tuple `(padW,)`. Default: 0
+        dilation: the spacing between kernel elements. Can be a single number or
+          a tuple `(dW,)`. Default: 1
+        groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the
+          number of groups. Default: 1
+        padding_mode: the padding mode to use. Only "zeros" is supported for quantized convolution at the moment. Default: "zeros"
+        scale: quantization scale for the output. Default: 1.0
+        zero_point: quantization zero_point for the output. Default: 0
+        dtype: quantization data type to use. Default: ``torch.quint8``
+
+    Examples::
+
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> from torch.ao.nn.quantized import functional as qF
+        >>> filters = torch.randn(33, 16, 3, dtype=torch.float)
+        >>> inputs = torch.randn(20, 16, 50, dtype=torch.float)
+        >>> bias = torch.randn(33, dtype=torch.float)
+        >>>
+        >>> scale, zero_point = 1.0, 0
+        >>> dtype_inputs = torch.quint8
+        >>> dtype_filters = torch.qint8
+        >>>
+        >>> q_filters = torch.quantize_per_tensor(filters, scale, zero_point, dtype_filters)
+        >>> q_inputs = torch.quantize_per_tensor(inputs, scale, zero_point, dtype_inputs)
+        >>> qF.conv1d(q_inputs, q_filters, bias, padding=1, scale=scale, zero_point=zero_point)
+    """  # noqa: E501
+    if padding_mode != 'zeros':
+        raise NotImplementedError("Only zero-padding is supported!")
+    if input.dtype != torch.quint8:
+        raise NotImplementedError("Only torch.quint8 is supported for activation tensor!")
+    if weight.dtype != torch.qint8:
+        raise NotImplementedError("Only torch.qint8 is supported for weight tensor!")
+    if input.ndim != 3:
+        raise ValueError("Input shape must be `(N, C, L)`!")
+    stride = _pair_from_first(stride)
+    padding = _pair_from_first(padding)
+    dilation = _pair_from_first(dilation)
+
+    packed_params = torch.ops.quantized.conv1d_prepack(
+        weight, bias, stride, padding, dilation, groups)
+    return torch.ops.quantized.conv1d(input, packed_params, scale, zero_point)
+
+def conv2d(input, weight, bias,
+           stride=1, padding=0, dilation=1, groups=1,
+           padding_mode='zeros',
+           scale=1.0, zero_point=0,
+           dtype=torch.quint8):
+    r"""
+    Applies a 2D convolution over a quantized 2D input composed of several input
+    planes.
+
+    See :class:`~torch.ao.nn.quantized.Conv2d` for details and output shape.
+
+    Args:
+        input: quantized input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
+        weight: quantized filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kH , kW)`
+        bias: **non-quantized** bias tensor of shape :math:`(\text{out\_channels})`. The tensor type must be `torch.float`.
+        stride: the stride of the convolving kernel. Can be a single number or a
+          tuple `(sH, sW)`. Default: 1
+        padding: implicit paddings on both sides of the input. Can be a
+          single number or a tuple `(padH, padW)`. Default: 0
+        dilation: the spacing between kernel elements. Can be a single number or
+          a tuple `(dH, dW)`. Default: 1
+        groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the
+          number of groups. Default: 1
+        padding_mode: the padding mode to use. Only "zeros" is supported for quantized convolution at the moment. Default: "zeros"
+        scale: quantization scale for the output. Default: 1.0
+        zero_point: quantization zero_point for the output. Default: 0
+        dtype: quantization data type to use. Default: ``torch.quint8``
+
+    Examples::
+
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> from torch.ao.nn.quantized import functional as qF
+        >>> filters = torch.randn(8, 4, 3, 3, dtype=torch.float)
+        >>> inputs = torch.randn(1, 4, 5, 5, dtype=torch.float)
+        >>> bias = torch.randn(8, dtype=torch.float)
+        >>>
+        >>> scale, zero_point = 1.0, 0
+        >>> dtype_inputs = torch.quint8
+        >>> dtype_filters = torch.qint8
+        >>>
+        >>> q_filters = torch.quantize_per_tensor(filters, scale, zero_point, dtype_filters)
+        >>> q_inputs = torch.quantize_per_tensor(inputs, scale, zero_point, dtype_inputs)
+        >>> qF.conv2d(q_inputs, q_filters, bias, padding=1, scale=scale, zero_point=zero_point)
+    """  # noqa: E501
+    if padding_mode != 'zeros':
+        raise NotImplementedError("Only zero-padding is supported!")
+    if input.dtype != torch.quint8:
+        raise NotImplementedError("Only torch.quint8 is supported for activation tensor!")
+    if weight.dtype != torch.qint8:
+        raise NotImplementedError("Only torch.qint8 is supported for weight tensor!")
+    if input.ndim != 4:
+        raise ValueError("Input shape must be `(N, C, H, W)`!")
+    stride = _pair(stride)
+    padding = _pair(padding)
+    dilation = _pair(dilation)
+
+    packed_params = torch.ops.quantized.conv2d_prepack(
+        weight, bias, stride, padding, dilation, groups)
+    return torch.ops.quantized.conv2d(input, packed_params, scale, zero_point)
+
+def conv3d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1,
+           padding_mode='zeros', scale=1.0, zero_point=0, dtype=torch.quint8):
+    r"""
+    Applies a 3D convolution over a quantized 3D input composed of several input
+    planes.
+
+    See :class:`~torch.ao.nn.quantized.Conv3d` for details and output shape.
+
+    Args:
+        input: quantized input tensor of shape
+          :math:`(\text{minibatch} , \text{in\_channels} , iD , iH , iW)`
+        weight: quantized filters of shape
+          :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kD , kH , kW)`
+        bias: **non-quantized** bias tensor of shape
+          :math:`(\text{out\_channels})`. The tensor type must be `torch.float`.
+        stride: the stride of the convolving kernel. Can be a single number or a
+          tuple `(sD, sH, sW)`. Default: 1
+        padding: implicit paddings on both sides of the input. Can be a
+          single number or a tuple `(padD, padH, padW)`. Default: 0
+        dilation: the spacing between kernel elements. Can be a single number or
+          a tuple `(dD, dH, dW)`. Default: 1
+        groups: split input into groups, :math:`\text{in\_channels}` should be
+          divisible by the number of groups. Default: 1
+        padding_mode: the padding mode to use. Only "zeros" is supported for
+          quantized convolution at the moment. Default: "zeros"
+        scale: quantization scale for the output. Default: 1.0
+        zero_point: quantization zero_point for the output. Default: 0
+        dtype: quantization data type to use. Default: ``torch.quint8``
+
+    Examples::
+
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> from torch.ao.nn.quantized import functional as qF
+        >>> filters = torch.randn(8, 4, 3, 3, 3, dtype=torch.float)
+        >>> inputs = torch.randn(1, 4, 5, 5, 5, dtype=torch.float)
+        >>> bias = torch.randn(8, dtype=torch.float)
+        >>>
+        >>> scale, zero_point = 1.0, 0
+        >>> dtype_inputs = torch.quint8
+        >>> dtype_filters = torch.qint8
+        >>>
+        >>> q_filters = torch.quantize_per_tensor(filters, scale, zero_point, dtype_filters)
+        >>> q_inputs = torch.quantize_per_tensor(inputs, scale, zero_point, dtype_inputs)
+        >>> qF.conv3d(q_inputs, q_filters, bias, padding=1, scale=scale, zero_point=zero_point)
+    """  # noqa: E501
+    if padding_mode != 'zeros':
+        raise NotImplementedError("Only zero-padding is supported!")
+    if input.dtype != torch.quint8:
+        raise NotImplementedError("Only torch.quint8 is supported for activation tensor!")
+    if weight.dtype != torch.qint8:
+        raise NotImplementedError("Only torch.qint8 is supported for weight tensor!")
+    if input.ndim != 5:
+        raise ValueError("Input shape must be `(N, C, D, H, W)`!")
+    stride = _triple(stride)
+    padding = _triple(padding)
+    dilation = _triple(dilation)
+
+    packed_params = torch.ops.quantized.conv3d_prepack(
+        weight, bias, stride, padding, dilation, groups)
+    return torch.ops.quantized.conv3d(input, packed_params, scale, zero_point)
+
+def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None):
+    r"""Down/up samples the input to either the given :attr:`size` or the given
+    :attr:`scale_factor`
+
+    See :func:`torch.nn.functional.interpolate` for implementation details.
+
+    The input dimensions are interpreted in the form:
+    `mini-batch x channels x [optional depth] x [optional height] x width`.
+
+    .. note:: The input quantization parameters propagate to the output.
+
+    .. note:: Only 2D/3D input is supported for quantized inputs
+
+    .. note:: Only the following modes are supported for the quantized inputs:
+
+        - `bilinear`
+        - `nearest`
+
+    Args:
+        input (Tensor): the input tensor
+        size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]):
+            output spatial size.
+        scale_factor (float or Tuple[float]): multiplier for spatial size. Has to match input size if it is a tuple.
+        mode (str): algorithm used for upsampling:
+            ``'nearest'`` | ``'bilinear'``
+        align_corners (bool, optional): Geometrically, we consider the pixels of the
+            input and output as squares rather than points.
+            If set to ``True``, the input and output tensors are aligned by the
+            center points of their corner pixels, preserving the values at the corner pixels.
+            If set to ``False``, the input and output tensors are aligned by the corner
+            points of their corner pixels, and the interpolation uses edge value padding
+            for out-of-boundary values, making this operation *independent* of input size
+            when :attr:`scale_factor` is kept the same. This only has an effect when :attr:`mode`
+            is ``'bilinear'``.
+            Default: ``False``
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.interpolate' must be quantized!")
+    return torch.nn.functional.interpolate(input, size, scale_factor, mode,
+                                           align_corners)
+
+def linear(
+    input: Tensor, weight: Tensor, bias: Optional[Tensor] = None,
+    scale: Optional[float] = None, zero_point: Optional[int] = None
+) -> Tensor:
+    r"""
+    Applies a linear transformation to the incoming quantized data:
+    :math:`y = xA^T + b`.
+    See :class:`~torch.ao.nn.quantized.Linear`
+
+    .. note::
+
+      Current implementation packs weights on every call, which has penalty on performance.
+      If you want to avoid the overhead, use :class:`~torch.ao.nn.quantized.Linear`.
+
+    Args:
+      input (Tensor): Quantized input of type `torch.quint8`
+      weight (Tensor): Quantized weight of type `torch.qint8`
+      bias (Tensor): None or fp32 bias of type `torch.float`
+      scale (double): output scale. If None, derived from the input scale
+      zero_point (long): output zero point. If None, derived from the input zero_point
+
+    Shape:
+        - Input: :math:`(N, *, in\_features)` where `*` means any number of
+          additional dimensions
+        - Weight: :math:`(out\_features, in\_features)`
+        - Bias: :math:`(out\_features)`
+        - Output: :math:`(N, *, out\_features)`
+    """
+    if scale is None:
+        scale = input.q_scale()
+    if zero_point is None:
+        zero_point = input.q_zero_point()
+    _packed_params = torch.ops.quantized.linear_prepack(weight, bias)
+    return torch.ops.quantized.linear(input, _packed_params, scale, zero_point)
+
+def max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1,
+               ceil_mode=False, return_indices=False):
+    r"""Applies a 1D max pooling over a quantized input signal composed of
+    several quantized input planes.
+
+    .. note:: The input quantization parameters are propagated to the output.
+
+    See :class:`~torch.ao.nn.quantized.MaxPool1d` for details.
+    """
+    if return_indices:
+        raise NotImplementedError("return_indices is not yet implemented!")
+    if stride is None:
+        stride = torch.jit.annotate(List[int], [])
+    return torch.nn.functional.max_pool1d(input, kernel_size, stride, padding,
+                                          dilation, ceil_mode=ceil_mode, return_indices=return_indices)
+
+def max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1,
+               ceil_mode=False, return_indices=False):
+    r"""Applies a 2D max pooling over a quantized input signal composed of
+    several quantized input planes.
+
+    .. note:: The input quantization parameters are propagated to the output.
+
+    See :class:`~torch.ao.nn.quantized.MaxPool2d` for details.
+    """
+    if return_indices:
+        raise NotImplementedError("return_indices is not yet implemented!")
+    if stride is None:
+        stride = torch.jit.annotate(List[int], [])
+    return torch.nn.functional.max_pool2d(input, kernel_size, stride, padding,
+                                          dilation, ceil_mode=ceil_mode, return_indices=return_indices)
+
+def celu(input: Tensor, scale: float, zero_point: int, alpha: float = 1.) -> Tensor:
+    r"""celu(input, scale, zero_point, alpha=1.) -> Tensor
+
+    Applies the quantized CELU function element-wise.
+
+    .. math::
+        \text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x / \alpha) - 1))
+
+    Args:
+        input: quantized input
+        alpha: the :math:`\alpha` value for the CELU formulation. Default: 1.0
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.celu' must be quantized!")
+    return torch.ops.quantized.celu(input, scale, zero_point, alpha)
+
+
+def leaky_relu(input: Tensor, negative_slope: float = 0.01, inplace: bool = False,
+               scale: Optional[float] = None, zero_point: Optional[int] = None):
+    r"""
+    Quantized version of the.
+    leaky_relu(input, negative_slope=0.01, inplace=False, scale, zero_point) -> Tensor
+
+    Applies element-wise,
+    :math:`\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)`
+
+    Args:
+        input: Quantized input
+        negative_slope: The slope of the negative input
+        inplace: Inplace modification of the input tensor
+        scale, zero_point: Scale and zero point of the output tensor.
+
+    See :class:`~torch.nn.LeakyReLU` for more details.
+    """
+    if scale is not None and zero_point is not None:
+        assert not inplace, "Cannot rescale with `inplace`"
+        output = torch._empty_affine_quantized(
+            input.shape, scale=scale, zero_point=int(zero_point), dtype=input.dtype)
+        torch._C._nn.leaky_relu(input, negative_slope, out=output)
+        return output
+    if inplace:
+        result = torch._C._nn.leaky_relu_(input, negative_slope)
+    else:
+        result = torch._C._nn.leaky_relu(input, negative_slope)
+    return result
+
+def hardtanh(input: Tensor, min_val: float = -1., max_val: float = 1., inplace: bool = False) -> Tensor:
+    r"""This is the quantized version of :func:`~torch.nn.functional.hardtanh`.
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.hardtanh' must be quantized!")
+    if inplace:
+        return torch._C._nn.hardtanh_(input, min_val, max_val)
+    return torch._C._nn.hardtanh(input, min_val, max_val)
+
+def hardswish(input: Tensor, scale: float, zero_point: int) -> Tensor:
+    r"""This is the quantized version of :func:`~torch.nn.functional.hardswish`.
+
+    Args:
+        input: quantized input
+        scale: quantization scale of the output tensor
+        zero_point: quantization zero point of the output tensor
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.hardswish' must be quantized!")
+    return torch._ops.ops.quantized.hardswish(input, scale, zero_point)
+
+def threshold(input: Tensor, threshold: float, value: float) -> Tensor:
+    r"""Applies the quantized version of the threshold function element-wise:
+
+    .. math::
+        x = \begin{cases}
+                x & \text{if~} x > \text{threshold} \\
+                \text{value} & \text{otherwise}
+            \end{cases}
+
+    See :class:`~torch.nn.Threshold` for more details.
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.threshold' must be quantized!")
+    if threshold is None:
+        raise ValueError("Input to 'threshold' must be specified!")
+    if value is None:
+        raise ValueError("Input to 'value' must be specified!")
+    return torch._ops.ops.quantized.threshold(input, threshold, value)
+
+def elu(input: Tensor, scale: float, zero_point: int, alpha: float = 1.) -> Tensor:
+    r"""This is the quantized version of :func:`~torch.nn.functional.elu`.
+
+    Args:
+        input: quantized input
+        scale: quantization scale of the output tensor
+        zero_point: quantization zero point of the output tensor
+        alpha: the alpha constant
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.elu' must be quantized!")
+    return torch.ops.quantized.elu(input, scale, zero_point, alpha)
+
+def hardsigmoid(input: Tensor, inplace: bool = False) -> Tensor:
+    r"""This is the quantized version of :func:`~torch.nn.functional.hardsigmoid`.
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.hardsigmoid' must be quantized!")
+    if inplace:
+        return torch._C._nn.hardsigmoid_(input)  # type: ignore[attr-defined]
+    return torch._C._nn.hardsigmoid(input)
+
+def clamp(input: Tensor, min_: float, max_: float) -> Tensor:
+    r"""float(input, min\_, max\_) -> Tensor
+
+    Applies the clamp function element-wise.
+    See :class:`~torch.ao.nn.quantized.clamp` for more details.
+
+    Args:
+        input: quantized input
+        min_: minimum value for clamping
+        max_: maximum value for clamping
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.clamp' must be quantized!")
+    return torch.clamp(input, min_, max_)
+
+def upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None):
+    r"""Upsamples the input to either the given :attr:`size` or the given
+    :attr:`scale_factor`
+
+    .. warning::
+        This function is deprecated in favor of
+        :func:`torch.ao.nn.quantized.functional.interpolate`.
+        This is equivalent with ``nn.quantized.functional.interpolate(...)``.
+
+    See :func:`torch.nn.functional.interpolate` for implementation details.
+
+    The input dimensions are interpreted in the form:
+    `mini-batch x channels x [optional depth] x [optional height] x width`.
+
+    .. note:: The input quantization parameters propagate to the output.
+
+    .. note:: Only 2D input is supported for quantized inputs
+
+    .. note:: Only the following modes are supported for the quantized inputs:
+
+        - `bilinear`
+        - `nearest`
+
+    Args:
+        input (Tensor): quantized input tensor
+        size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]):
+            output spatial size.
+        scale_factor (float or Tuple[float]): multiplier for spatial size. Has to be an integer.
+        mode (str): algorithm used for upsampling:
+            ``'nearest'`` | ``'bilinear'``
+        align_corners (bool, optional): Geometrically, we consider the pixels of the
+            input and output as squares rather than points.
+            If set to ``True``, the input and output tensors are aligned by the
+            center points of their corner pixels, preserving the values at the corner pixels.
+            If set to ``False``, the input and output tensors are aligned by the corner
+            points of their corner pixels, and the interpolation uses edge value padding
+            for out-of-boundary values, making this operation *independent* of input size
+            when :attr:`scale_factor` is kept the same. This only has an effect when :attr:`mode`
+            is ``'bilinear'``.
+            Default: ``False``
+
+    .. warning::
+        With ``align_corners = True``, the linearly interpolating modes
+        (`bilinear`) don't proportionally align the
+        output and input pixels, and thus the output values can depend on the
+        input size. This was the default behavior for these modes up to version
+        0.3.1. Since then, the default behavior is ``align_corners = False``.
+        See :class:`~torch.nn.Upsample` for concrete examples on how this
+        affects the outputs.
+    """
+    warnings.warn("nn.quantized.functional.upsample is deprecated. Use nn.quantized.functional.interpolate instead.")
+    return interpolate(input, size, scale_factor, mode, align_corners)
+
+def upsample_bilinear(input, size=None, scale_factor=None):
+    r"""Upsamples the input, using bilinear upsampling.
+
+    .. warning::
+        This function is deprecated in favor of
+        :func:`torch.ao.nn.quantized.functional.interpolate`.
+        This is equivalent with
+        ``nn.quantized.functional.interpolate(..., mode='bilinear', align_corners=True)``.
+
+    .. note:: The input quantization parameters propagate to the output.
+
+    .. note:: Only 2D inputs are supported
+
+    Args:
+        input (Tensor): quantized input
+        size (int or Tuple[int, int]): output spatial size.
+        scale_factor (int or Tuple[int, int]): multiplier for spatial size
+    """
+    # DeprecationWarning is ignored by default
+    warnings.warn("nn.quantized.functional.upsample_bilinear is deprecated. Use nn.quantized.functional.interpolate instead.")
+    return interpolate(input, size, scale_factor, mode='bilinear', align_corners=True)
+
+def upsample_nearest(input, size=None, scale_factor=None):
+    r"""Upsamples the input, using nearest neighbours' pixel values.
+
+    .. warning::
+        This function is deprecated in favor of
+        :func:`torch.ao.nn.quantized.functional.interpolate`.
+        This is equivalent with ``nn.quantized.functional.interpolate(..., mode='nearest')``.
+
+    .. note:: The input quantization parameters propagate to the output.
+
+    .. note:: Only 2D inputs are supported
+
+    Args:
+        input (Tensor): quantized input
+        size (int or Tuple[int, int] or Tuple[int, int, int]): output spatial
+            size.
+        scale_factor (int): multiplier for spatial size. Has to be an integer.
+    """
+    # DeprecationWarning is ignored by default
+    warnings.warn("nn.quantized.functional.upsample_nearest is deprecated. Use nn.quantized.functional.interpolate instead.")
+    return interpolate(input, size, scale_factor, mode='nearest')
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..668f765fe3ef0ae082e94a30639fe343034ca306
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__init__.py
@@ -0,0 +1,131 @@
+import torch
+
+# The quantized modules use `torch.nn` and `torch.ao.nn.quantizable`
+# packages. However, the `quantizable` package uses "lazy imports"
+# to avoid circular dependency.
+# Hence we need to include it here to make sure it is resolved before
+# they are used in the modules.
+import torch.ao.nn.quantizable
+
+from torch.nn.modules.pooling import MaxPool2d
+
+from .activation import ReLU6, Hardswish, ELU, LeakyReLU, Sigmoid, Softmax, MultiheadAttention, PReLU
+from .dropout import Dropout
+from .batchnorm import BatchNorm2d, BatchNorm3d
+from .normalization import LayerNorm, GroupNorm, InstanceNorm1d, \
+    InstanceNorm2d, InstanceNorm3d
+from .conv import Conv1d, Conv2d, Conv3d
+from .conv import ConvTranspose1d, ConvTranspose2d, ConvTranspose3d
+from .linear import Linear
+from .embedding_ops import Embedding, EmbeddingBag
+from .rnn import LSTM
+
+from .functional_modules import FloatFunctional, FXFloatFunctional, QFunctional
+
+__all__ = [
+    'BatchNorm2d',
+    'BatchNorm3d',
+    'Conv1d',
+    'Conv2d',
+    'Conv3d',
+    'ConvTranspose1d',
+    'ConvTranspose2d',
+    'ConvTranspose3d',
+    'DeQuantize',
+    'ELU',
+    'Embedding',
+    'EmbeddingBag',
+    'GroupNorm',
+    'Hardswish',
+    'InstanceNorm1d',
+    'InstanceNorm2d',
+    'InstanceNorm3d',
+    'LayerNorm',
+    'LeakyReLU',
+    'Linear',
+    'LSTM',
+    'MultiheadAttention',
+    'Quantize',
+    'ReLU6',
+    'Sigmoid',
+    'Softmax',
+    'Dropout',
+    'PReLU',
+    # Wrapper modules
+    'FloatFunctional',
+    'FXFloatFunctional',
+    'QFunctional',
+]
+
+class Quantize(torch.nn.Module):
+    r"""Quantizes an incoming tensor
+
+    Args:
+     `scale`: scale of the output Quantized Tensor
+     `zero_point`: zero_point of output Quantized Tensor
+     `dtype`: data type of output Quantized Tensor
+     `factory_kwargs`: Dictionary of kwargs used for configuring initialization
+         of internal buffers. Currently, `device` and `dtype` are supported.
+         Example: `factory_kwargs={'device': 'cuda', 'dtype': torch.float64}`
+         will initialize internal buffers as type `torch.float64` on the current CUDA device.
+         Note that `dtype` only applies to floating-point buffers.
+
+    Examples::
+        >>> t = torch.tensor([[1., -1.], [1., -1.]])
+        >>> scale, zero_point, dtype = 1.0, 2, torch.qint8
+        >>> qm = Quantize(scale, zero_point, dtype)
+        >>> # xdoctest: +SKIP
+        >>> qt = qm(t)
+        >>> print(qt)
+        tensor([[ 1., -1.],
+                [ 1., -1.]], size=(2, 2), dtype=torch.qint8, scale=1.0, zero_point=2)
+    """
+
+    scale: torch.Tensor
+    zero_point: torch.Tensor
+
+    def __init__(self, scale, zero_point, dtype, factory_kwargs=None):
+        factory_kwargs = torch.nn.factory_kwargs(factory_kwargs)
+        super().__init__()
+        self.register_buffer('scale', torch.tensor([scale], **factory_kwargs))
+        self.register_buffer('zero_point',
+                             torch.tensor([zero_point], dtype=torch.long,
+                                          **{k: v for k, v in factory_kwargs.items() if k != 'dtype'}))
+        self.dtype = dtype
+
+    def forward(self, X):
+        return torch.quantize_per_tensor(X, float(self.scale),
+                                         int(self.zero_point), self.dtype)
+
+    @staticmethod
+    def from_float(mod):
+        assert hasattr(mod, 'activation_post_process')
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        return Quantize(scale.float().item(), zero_point.long().item(), mod.activation_post_process.dtype)
+
+    def extra_repr(self):
+        return f'scale={self.scale}, zero_point={self.zero_point}, dtype={self.dtype}'
+
+
+class DeQuantize(torch.nn.Module):
+    r"""Dequantizes an incoming tensor
+
+    Examples::
+        >>> input = torch.tensor([[1., -1.], [1., -1.]])
+        >>> scale, zero_point, dtype = 1.0, 2, torch.qint8
+        >>> qm = Quantize(scale, zero_point, dtype)
+        >>> # xdoctest: +SKIP
+        >>> quantized_input = qm(input)
+        >>> dqm = DeQuantize()
+        >>> dequantized = dqm(quantized_input)
+        >>> print(dequantized)
+        tensor([[ 1., -1.],
+                [ 1., -1.]], dtype=torch.float32)
+    """
+
+    def forward(self, Xq):
+        return Xq.dequantize()
+
+    @staticmethod
+    def from_float(mod):
+        return DeQuantize()
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/activation.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/activation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6d24d45c2978de8132db9e805ad19ad0b721fe23
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/activation.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/embedding_ops.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/embedding_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4389a60d9b08fa264765ead9ad3f2932e141be9
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/embedding_ops.py
@@ -0,0 +1,295 @@
+import torch
+import torch.nn as nn
+from torch import Tensor  # noqa: F401
+from torch._jit_internal import Optional, List  # noqa: F401
+
+from .utils import _hide_packed_params_repr
+from .utils import _quantize_weight
+
+__all__ = ['EmbeddingPackedParams', 'Embedding', 'EmbeddingBag']
+
+class EmbeddingPackedParams(torch.nn.Module):
+    _version = 1
+
+    def __init__(self, num_embeddings, embedding_dim, dtype=torch.quint8):
+        super().__init__()
+        self.dtype = dtype
+        if self.dtype in [torch.quint8, torch.quint4x2]:
+            scales = torch.ones(num_embeddings, dtype=torch.float)
+            zero_points = torch.zeros(num_embeddings, dtype=torch.float)
+            wq = torch._empty_per_channel_affine_quantized([num_embeddings, embedding_dim], scales=scales,
+                                                           zero_points=zero_points,
+                                                           axis=0, dtype=self.dtype)
+            self.set_weight(wq)
+        else:
+            raise NotImplementedError(f'Unsupported dtype on quantized embedding! Supports quint8 and quint4x2. Got dtype: {dtype}')
+
+    @torch.jit.export
+    def set_weight(self, weight: torch.Tensor) -> None:
+        if self.dtype in [torch.quint8, torch.quint4x2]:
+            self._packed_weight = torch.ops.quantized.embedding_bag_prepack(weight)
+        else:
+            raise NotImplementedError('Unsupported dtype for quantized embedding prepack! Supports quint8 and quint4x2.')
+
+
+    @torch.jit.export
+    def _weight(self):
+        if self.dtype in [torch.quint8, torch.quint4x2]:
+            return torch.ops.quantized.embedding_bag_unpack(self._packed_weight)
+        else:
+            raise NotImplementedError('Unsupported dtype for quantized embedding unpack! Supports quint8 and quint4x2.')
+
+    def forward(self, x):
+        return x
+
+    # Version 1
+    #   self
+    #   |--- _packed_weight : Tensor representing weight of EmbeddingPackedParamsBase
+    #   |--- dtype : torch.dtype
+
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        super()._save_to_state_dict(destination, prefix, keep_vars)
+        destination[prefix + 'dtype'] = self.dtype
+        destination[prefix + '_packed_weight'] = self._weight()
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        self.dtype = state_dict[prefix + 'dtype']
+        state_dict.pop(prefix + 'dtype')
+
+        weight = state_dict[prefix + '_packed_weight']
+        state_dict.pop(prefix + '_packed_weight')
+        self.set_weight(weight)
+
+        super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
+                                      missing_keys, unexpected_keys, error_msgs)
+
+    def __repr__(self):
+        return self._weight().__repr__()
+
+class Embedding(torch.nn.Module):
+    r"""
+    A quantized Embedding module with quantized packed weights as inputs.
+    We adopt the same interface as `torch.nn.Embedding`, please see
+    https://pytorch.org/docs/stable/nn.html#torch.nn.Embedding for documentation.
+
+    Similar to :class:`~torch.nn.Embedding`, attributes will be randomly
+    initialized at module creation time and will be overwritten later
+
+    Attributes:
+        weight (Tensor): the non-learnable quantized weights of the module of
+                         shape :math:`(\text{num\_embeddings}, \text{embedding\_dim})`.
+
+    Examples::
+        >>> m = nn.quantized.Embedding(num_embeddings=10, embedding_dim=12)
+        >>> indices = torch.tensor([9, 6, 5, 7, 8, 8, 9, 2, 8])
+        >>> output = m(indices)
+        >>> print(output.size())
+        torch.Size([9, 12])
+
+    """
+    _version = 1
+
+    def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None,
+                 max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False,
+                 sparse: bool = False, _weight: Optional[Tensor] = None, dtype=torch.quint8) -> None:
+        super().__init__()
+        self.num_embeddings = num_embeddings
+        self.embedding_dim = embedding_dim
+        self.dtype = dtype
+
+        if _weight is None:
+            scales = torch.ones(num_embeddings, dtype=torch.float)
+            zero_points = torch.zeros(num_embeddings, dtype=torch.float)
+            qweight = torch._empty_per_channel_affine_quantized([num_embeddings, embedding_dim],
+                                                                scales=scales, zero_points=zero_points,
+                                                                axis=0, dtype=torch.quint8)
+        else:
+            assert list(_weight.shape) == [num_embeddings, embedding_dim], \
+                'Shape of weight does not match num_embeddings and embedding_dim'
+            qweight = _weight
+
+        self._packed_params = EmbeddingPackedParams(num_embeddings, embedding_dim, dtype)
+        self._packed_params.set_weight(qweight)
+
+    def forward(self, indices: Tensor) -> Tensor:
+        if self.dtype == torch.quint4x2:
+            return torch.ops.quantized.embedding_4bit(self._packed_params._packed_weight, indices)
+        else:
+            return torch.ops.quantized.embedding_byte(self._packed_params._packed_weight, indices)
+
+    def _get_name(self):
+        return 'QuantizedEmbedding'
+
+    def __repr__(self):
+        return _hide_packed_params_repr(self, EmbeddingPackedParams)
+
+    def extra_repr(self):
+        extra_repr_str = 'num_embeddings={}, embedding_dim={}, dtype={}, qscheme={}'.format(
+            self.num_embeddings, self.embedding_dim, self._packed_params.dtype, self.weight().qscheme()
+        )
+
+        return extra_repr_str
+
+    def set_weight(self, w: torch.Tensor) -> None:
+        self._packed_params.set_weight(w)
+
+    def weight(self):
+        return self._packed_params._weight()
+
+    @classmethod
+    def from_float(cls, mod):
+        r"""Create a quantized embedding module from a float module
+
+        Args:
+            mod (Module): a float module, either produced by torch.ao.quantization
+                          utilities or provided by user
+        """
+        if hasattr(mod, 'weight_fake_quant'):
+            assert type(mod) == torch.ao.nn.qat.Embedding, 'nnq.' + cls.__name__ + '.from_float ' + \
+                'with fake quant only works for ' + torch.ao.nn.qat.Embedding.__name__
+            weight_observer = mod.weight_fake_quant
+            activation_post_process = mod.activation_post_process
+        else:
+            assert type(mod) == nn.Embedding, 'nnq.' + cls.__name__ + '.from_float only works for ' + \
+                nn.Embedding.__name__
+            assert hasattr(mod, 'qconfig'), 'Embedding input float module must have qconfig defined'
+            from torch.ao.quantization import float_qparams_weight_only_qconfig
+            if mod.qconfig is not None and mod.qconfig.weight is not None:  # type: ignore[union-attr]
+                weight_observer = mod.qconfig.weight()  # type: ignore[union-attr, operator]
+            else:
+                weight_observer = float_qparams_weight_only_qconfig.weight()
+
+        dtype = weight_observer.dtype
+        is_float_qparams_qconfig = weight_observer.qscheme == torch.per_channel_affine_float_qparams
+        assert is_float_qparams_qconfig, \
+            'Embedding quantization is only supported with float_qparams_weight_only_qconfig.'
+
+        assert dtype == torch.quint8 or dtype == torch.quint4x2, \
+            f'The only supported dtype for nnq.Embedding is torch.quint8 and torch.quint4x2, got {dtype}'
+
+        # Run the observer to calculate qparams.
+        weight_observer(mod.weight)
+        qweight = _quantize_weight(mod.weight.float(), weight_observer)
+
+        # Create quantized Embedding module and pass in the quantized weight
+        qembedding = Embedding(mod.num_embeddings, mod.embedding_dim)
+        qembedding.set_weight(qweight)
+        return qembedding
+
+    @classmethod
+    def from_reference(cls, ref_embedding):
+        qembedding = cls(
+            ref_embedding.num_embeddings,
+            ref_embedding.embedding_dim,
+            ref_embedding.padding_idx,
+            ref_embedding.max_norm,
+            ref_embedding.norm_type,
+            ref_embedding.scale_grad_by_freq,
+            ref_embedding.sparse,
+            ref_embedding.get_quantized_weight(),
+            ref_embedding.weight_dtype,
+        )
+        return qembedding
+
+class EmbeddingBag(Embedding):
+    r"""
+    A quantized EmbeddingBag module with quantized packed weights as inputs.
+    We adopt the same interface as `torch.nn.EmbeddingBag`, please see
+    https://pytorch.org/docs/stable/nn.html#torch.nn.EmbeddingBag for documentation.
+
+    Similar to :class:`~torch.nn.EmbeddingBag`, attributes will be randomly
+    initialized at module creation time and will be overwritten later
+
+    Attributes:
+        weight (Tensor): the non-learnable quantized weights of the module of
+                         shape :math:`(\text{num\_embeddings}, \text{embedding\_dim})`.
+
+    Examples::
+        >>> m = nn.quantized.EmbeddingBag(num_embeddings=10, embedding_dim=12, include_last_offset=True, mode='sum')
+        >>> indices = torch.tensor([9, 6, 5, 7, 8, 8, 9, 2, 8, 6, 6, 9, 1, 6, 8, 8, 3, 2, 3, 6, 3, 6, 5, 7, 0, 8, 4, 6, 5, 8, 2, 3])
+        >>> offsets = torch.tensor([0, 19, 20, 28, 28, 32])
+        >>> output = m(indices, offsets)
+        >>> print(output.size())
+        torch.Size([5, 12])
+
+    """
+    _version = 1
+
+    def __init__(self, num_embeddings: int, embedding_dim: int,
+                 max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False,
+                 mode: str = 'sum', sparse: bool = False, _weight: Optional[Tensor] = None,
+                 include_last_offset: bool = False, dtype=torch.quint8) -> None:
+        super().__init__(num_embeddings, embedding_dim, _weight=_weight, dtype=dtype)
+
+        self.mode = mode
+        self.pruned_weights = False
+        self.include_last_offset = include_last_offset
+        self.dtype = dtype
+
+    def forward(self, indices: Tensor, offsets: Optional[Tensor] = None, per_sample_weights: Optional[Tensor] = None,
+                compressed_indices_mapping: Optional[Tensor] = None) -> Tensor:
+        if self.dtype == torch.quint4x2:
+            return torch.ops.quantized.embedding_bag_4bit(self._packed_params._packed_weight, indices, offsets, False, 0,
+                                                          self.pruned_weights, per_sample_weights, compressed_indices_mapping,
+                                                          self.include_last_offset)
+        else:
+            return torch.ops.quantized.embedding_bag_byte(self._packed_params._packed_weight, indices, offsets, False, 0,
+                                                          self.pruned_weights, per_sample_weights, compressed_indices_mapping,
+                                                          self.include_last_offset)
+
+    def _get_name(self):
+        return 'QuantizedEmbeddingBag'
+
+    @classmethod
+    def from_float(cls, mod):
+        r"""Create a quantized embedding_bag module from a float module
+
+        Args:
+            mod (Module): a float module, either produced by torch.ao.quantization
+                          utilities or provided by user
+        """
+        if hasattr(mod, 'weight_fake_quant'):
+            weight_observer = mod.weight_fake_quant
+        else:
+            assert type(mod) == nn.EmbeddingBag, 'nnq.' + cls.__name__ + '.from_float only works for ' + \
+                nn.EmbeddingBag.__name__
+            assert hasattr(mod, 'qconfig'), 'EmbeddingBag input float module must have qconfig defined'
+            from torch.ao.quantization.qconfig import float_qparams_weight_only_qconfig
+            if mod.qconfig is not None and mod.qconfig.weight is not None:  # type: ignore[union-attr]
+                weight_observer = mod.qconfig.weight()  # type: ignore[union-attr, operator]
+            else:
+                weight_observer = float_qparams_weight_only_qconfig.weight()
+
+        dtype = weight_observer.dtype
+        is_float_qparams_qconfig = weight_observer.qscheme == torch.per_channel_affine_float_qparams
+        assert is_float_qparams_qconfig, \
+            'EmbeddingBag quantization is only supported with float_qparams_weight_only_qconfig.'
+
+        assert dtype == torch.quint8 or dtype == torch.quint4x2, \
+            f'The only supported dtype for nnq.EmbeddingBag is torch.quint8 and torch.quint4x2, got {dtype}'
+
+        # Run the observer to calculate qparams.
+        weight_observer(mod.weight)
+        qweight = _quantize_weight(mod.weight.float(), weight_observer)
+
+        # Create quantized EmbeddingBag module and pass in the quantized weight
+        qembedding_bag = EmbeddingBag(mod.num_embeddings, mod.embedding_dim, dtype=dtype)
+        qembedding_bag.set_weight(qweight)
+        return qembedding_bag
+
+    @classmethod
+    def from_reference(cls, ref_embedding_bag):
+        qembedding_bag = cls(
+            ref_embedding_bag.num_embeddings,
+            ref_embedding_bag.embedding_dim,
+            ref_embedding_bag.max_norm,
+            ref_embedding_bag.norm_type,
+            ref_embedding_bag.scale_grad_by_freq,
+            ref_embedding_bag.mode,
+            ref_embedding_bag.sparse,
+            ref_embedding_bag.get_quantized_weight(),
+            ref_embedding_bag.include_last_offset,
+            ref_embedding_bag.weight_dtype,
+        )
+        return qembedding_bag
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/normalization.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/normalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..f798a241e3242a74d9d022e6f06b140f65bb5b99
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/normalization.py
@@ -0,0 +1,199 @@
+import torch
+
+__all__ = ['LayerNorm', 'GroupNorm', 'InstanceNorm1d', 'InstanceNorm2d', 'InstanceNorm3d']
+
+class LayerNorm(torch.nn.LayerNorm):
+    r"""This is the quantized version of :class:`~torch.nn.LayerNorm`.
+
+    Additional args:
+        * **scale** - quantization scale of the output, type: double.
+        * **zero_point** - quantization zero point of the output, type: long.
+
+    """
+
+    def __init__(self, normalized_shape, weight, bias, scale, zero_point, eps=1e-5,
+                 elementwise_affine=True, device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(normalized_shape, eps=eps, elementwise_affine=elementwise_affine,
+                         **factory_kwargs)
+        self.weight = weight
+        self.bias = bias
+        self.register_buffer('scale', torch.tensor(scale, **factory_kwargs))
+        self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs))
+
+    def forward(self, input):
+        return torch.ops.quantized.layer_norm(
+            input, self.normalized_shape, weight=self.weight, bias=self.bias,
+            eps=self.eps, output_scale=self.scale, output_zero_point=self.zero_point)
+
+    def _get_name(self):
+        return 'QuantizedLayerNorm'
+
+    @classmethod
+    def from_float(cls, mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        new_mod = cls(
+            mod.normalized_shape, mod.weight, mod.bias, float(scale),
+            int(zero_point), mod.eps, mod.elementwise_affine)
+        return new_mod
+
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(
+            mod.normalized_shape, mod.weight, mod.bias, float(scale),
+            int(zero_point), mod.eps, mod.elementwise_affine)
+
+class GroupNorm(torch.nn.GroupNorm):
+    r"""This is the quantized version of :class:`~torch.nn.GroupNorm`.
+
+    Additional args:
+        * **scale** - quantization scale of the output, type: double.
+        * **zero_point** - quantization zero point of the output, type: long.
+
+    """
+    __constants__ = ['num_groups', 'num_channels', 'eps', 'affine']
+
+    def __init__(self, num_groups, num_channels, weight, bias, scale, zero_point, eps=1e-5,
+                 affine=True, device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(num_groups, num_channels, eps, affine, **factory_kwargs)
+        self.weight = weight
+        self.bias = bias
+        self.register_buffer('scale', torch.tensor(scale, **factory_kwargs))
+        self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs))
+
+    def forward(self, input):
+        return torch.ops.quantized.group_norm(
+            input, self.num_groups, self.weight, self.bias, self.eps, self.scale,
+            self.zero_point)
+
+    def _get_name(self):
+        return 'QuantizedGroupNorm'
+
+    @classmethod
+    def from_float(cls, mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        new_mod = cls(
+            mod.num_groups, mod.num_channels, mod.weight, mod.bias, float(scale), int(zero_point),
+            mod.eps, mod.affine)
+        return new_mod
+
+class InstanceNorm1d(torch.nn.InstanceNorm1d):
+    r"""This is the quantized version of :class:`~torch.nn.InstanceNorm1d`.
+
+    Additional args:
+        * **scale** - quantization scale of the output, type: double.
+        * **zero_point** - quantization zero point of the output, type: long.
+
+    """
+    def __init__(self, num_features, weight, bias, scale, zero_point,
+                 eps=1e-5, momentum=0.1, affine=False,
+                 track_running_stats=False, device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(num_features, eps, momentum, affine, track_running_stats, **factory_kwargs)
+        self.weight = weight
+        self.bias = bias
+        self.register_buffer('scale', torch.tensor(scale, **factory_kwargs))
+        self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs))
+
+    def forward(self, input):
+        return torch.ops.quantized.instance_norm(
+            input, self.weight, self.bias, self.eps, self.scale,
+            self.zero_point)
+
+    def _get_name(self):
+        return 'QuantizedInstanceNorm1d'
+
+    @classmethod
+    def from_float(cls, mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        new_mod = cls(
+            mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point),
+            mod.eps, mod.affine)
+        return new_mod
+
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(
+            mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point),
+            mod.eps, mod.affine)
+
+class InstanceNorm2d(torch.nn.InstanceNorm2d):
+    r"""This is the quantized version of :class:`~torch.nn.InstanceNorm2d`.
+
+    Additional args:
+        * **scale** - quantization scale of the output, type: double.
+        * **zero_point** - quantization zero point of the output, type: long.
+
+    """
+    def __init__(self, num_features, weight, bias, scale, zero_point,
+                 eps=1e-5, momentum=0.1, affine=False,
+                 track_running_stats=False, device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(num_features, eps, momentum, affine, track_running_stats, **factory_kwargs)
+        self.weight = weight
+        self.bias = bias
+        self.register_buffer('scale', torch.tensor(scale, **factory_kwargs))
+        self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs))
+
+    def forward(self, input):
+        return torch.ops.quantized.instance_norm(
+            input, self.weight, self.bias, self.eps, self.scale,
+            self.zero_point)
+
+    def _get_name(self):
+        return 'QuantizedInstanceNorm2d'
+
+    @classmethod
+    def from_float(cls, mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        new_mod = cls(
+            mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point),
+            mod.eps, mod.affine)
+        return new_mod
+
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(
+            mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point),
+            mod.eps, mod.affine)
+
+class InstanceNorm3d(torch.nn.InstanceNorm3d):
+    r"""This is the quantized version of :class:`~torch.nn.InstanceNorm3d`.
+
+    Additional args:
+        * **scale** - quantization scale of the output, type: double.
+        * **zero_point** - quantization zero point of the output, type: long.
+
+    """
+    def __init__(self, num_features, weight, bias, scale, zero_point,
+                 eps=1e-5, momentum=0.1, affine=False,
+                 track_running_stats=False, device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(num_features, eps, momentum, affine, track_running_stats, **factory_kwargs)
+        self.weight = weight
+        self.bias = bias
+        self.register_buffer('scale', torch.tensor(scale, **factory_kwargs))
+        self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs))
+
+    def forward(self, input):
+        return torch.ops.quantized.instance_norm(
+            input, self.weight, self.bias, self.eps, self.scale,
+            self.zero_point)
+
+    def _get_name(self):
+        return 'QuantizedInstanceNorm3d'
+
+    @classmethod
+    def from_float(cls, mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        new_mod = cls(
+            mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point),
+            mod.eps, mod.affine)
+        return new_mod
+
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(
+            mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point),
+            mod.eps, mod.affine)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/utils.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c24c0ca31dc7c1e0ed348667ad4f1cb69cbda31
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/utils.py
@@ -0,0 +1,117 @@
+import abc
+import torch
+import itertools
+import collections
+from torch.nn.modules.module import _addindent
+
+__all__ = [
+    "WeightedQuantizedModule",
+]
+
+class WeightedQuantizedModule(torch.nn.Module, metaclass=abc.ABCMeta):
+    """Wrapper for quantized modules than can be lowered from reference modules."""
+    @classmethod
+    @abc.abstractmethod
+    def from_reference(cls, ref_module, output_scale, output_zero_point):
+        raise NotImplementedError
+
+def _get_weight_observer(observer):
+    # FakeQuantize observer
+    if hasattr(observer, "activation_post_process"):
+        observer = observer.activation_post_process
+    # UniformQuantizationObserverBase observer
+    return observer
+
+def _needs_weight_clamping(observer, dtype):
+    observer = _get_weight_observer(observer)
+    if dtype in [torch.qint8, torch.quint8, torch.qint32]:
+        info = torch.iinfo(dtype)
+        return observer.quant_min > info.min or observer.quant_max < info.max
+    return False
+
+def _clamp_weights(qweight, observer, scale, zp):
+    if not _needs_weight_clamping(observer, qweight.dtype):
+        return qweight
+
+    observer = _get_weight_observer(observer)
+    min_, max_ = observer.quant_min, observer.quant_max
+
+    # Doing this because can't use torch.ops.quantized.clamp() with per_channel qscheme yet.
+    qw_int_max = torch.clone(qweight.int_repr()).fill_(max_)
+    qw_int_min = torch.clone(qweight.int_repr()).fill_(min_)
+    qw_int = torch.minimum(torch.maximum(qweight.int_repr(), qw_int_min), qw_int_max)
+
+    if observer.qscheme in [torch.per_tensor_symmetric,
+                            torch.per_tensor_affine]:
+        qweight = torch._make_per_tensor_quantized_tensor(qw_int, scale.item(), zp.item())
+    elif observer.qscheme in [torch.per_channel_symmetric,
+                              torch.per_channel_affine,
+                              torch.per_channel_affine_float_qparams]:
+        qweight = torch._make_per_channel_quantized_tensor(qw_int, scale, zp, axis=observer.ch_axis)
+    else:
+        raise ValueError("Unexpected qscheme " + observer.qscheme)
+    return qweight
+
+def _quantize_weight(float_wt, observer):
+    wt_scale, wt_zp = observer.calculate_qparams()
+    if observer.qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]:
+        qweight = torch.quantize_per_tensor(
+            float_wt,
+            float(wt_scale), int(wt_zp), torch.qint8)
+        qweight = _clamp_weights(qweight, observer, wt_scale, wt_zp)
+    elif observer.qscheme in [torch.per_channel_symmetric, torch.per_channel_affine]:
+        wt_axis = observer.ch_axis
+        qweight = torch.quantize_per_channel(
+            float_wt,
+            wt_scale.to(torch.double), wt_zp.to(torch.int64), wt_axis, torch.qint8)
+        qweight = _clamp_weights(qweight, observer, wt_scale, wt_zp)
+    elif observer.qscheme in [torch.per_channel_affine_float_qparams]:
+        qweight = torch.quantize_per_channel(
+            float_wt,
+            wt_scale.to(torch.float), wt_zp.to(torch.float), observer.ch_axis, observer.dtype)
+        qweight = _clamp_weights(qweight, observer, wt_scale, wt_zp)
+    else:
+        raise ValueError("Unexpected qscheme " + observer.qscheme)
+    return qweight
+
+def _ntuple_from_first(n):
+    """Converts the argument to a tuple of size n
+    with the first element repeated."""
+    def parse(x):
+        while isinstance(x, collections.abc.Sequence):
+            if len(x) == n:
+                break
+            x = x[0]
+        return tuple(itertools.repeat(x, n))
+    return parse
+
+def _hide_packed_params_repr(self, params):
+    # We don't want to show `PackedParams` children, hence custom
+    # `__repr__`. This is the same as nn.Module.__repr__, except the check
+    # for the `params module`.
+    extra_lines = []
+    extra_repr = self.extra_repr()
+    # empty string will be split into list ['']
+    if extra_repr:
+        extra_lines = extra_repr.split('\n')
+    child_lines = []
+    for key, module in self._modules.items():
+        if isinstance(module, params):
+            continue
+        mod_str = repr(module)
+        mod_str = _addindent(mod_str, 2)
+        child_lines.append('(' + key + '): ' + mod_str)
+    lines = extra_lines + child_lines
+
+    main_str = self._get_name() + '('
+    if lines:
+        # simple one-liner info, which most builtin Modules will use
+        if len(extra_lines) == 1 and not child_lines:
+            main_str += extra_lines[0]
+        else:
+            main_str += '\n  ' + '\n  '.join(lines) + '\n'
+
+    main_str += ')'
+    return main_str
+
+_pair_from_first = _ntuple_from_first(2)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c9fea7a8b70a7de658bb2930fbb7beca21f00132
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/conv.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/conv.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fa38486ac7df2291e47cc1f02b31a014553941f7
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/conv.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/rnn.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/rnn.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08f4ade3a2de72bc34a03d68e3eb4446d03950fb
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/rnn.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/conv.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..910223056fba9ca12333136c538765cf8643bc54
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/conv.py
@@ -0,0 +1,318 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Optional, Dict, Any, List
+from torch.nn.common_types import _size_1_t
+from .utils import ReferenceQuantizedModule
+
+__all__ = ['Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d']
+
+class _ConvNd(torch.nn.modules.conv._ConvNd, ReferenceQuantizedModule):
+    """ A reference version of nn.quantized.Conv2d
+        we will not pack the parameters in this module, since weight packing is an
+        optimization for quantized backends supported in PyTorch (fbgemm/qnnpack),
+        this is useful when user want to use this module in other backends like Glow.
+    """
+    __annotations__ = {"bias": Optional[torch.Tensor]}
+    _IS_REFERENCE = True
+
+    @staticmethod
+    def from_float(cls, float_conv, weight_qparams):
+        qref_conv = cls(
+            float_conv.in_channels,
+            float_conv.out_channels,
+            float_conv.kernel_size,  # type: ignore[arg-type]
+            float_conv.stride,  # type: ignore[arg-type]
+            float_conv.padding,  # type: ignore[arg-type]
+            float_conv.dilation,  # type: ignore[arg-type]
+            float_conv.groups,
+            float_conv.bias is not None,  # type: ignore[arg-type]
+            float_conv.padding_mode,
+            device=float_conv.weight.device,
+            dtype=float_conv.weight.dtype,
+            weight_qparams=weight_qparams)
+        qref_conv.weight = torch.nn.Parameter(float_conv.weight.detach())
+        if float_conv.bias is not None:
+            qref_conv.bias = torch.nn.Parameter(float_conv.bias.detach())
+        return qref_conv
+
+class Conv1d(_ConvNd, nn.Conv1d):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: _size_1_t,
+                 stride: _size_1_t = 1,
+                 padding: _size_1_t = 0,
+                 dilation: _size_1_t = 1,
+                 groups: int = 1,
+                 bias: bool = True,
+                 padding_mode: str = "zeros",
+                 device=None,
+                 dtype=None,
+                 weight_qparams: Optional[Dict[str, Any]] = None):
+        nn.Conv1d.__init__(
+            self, in_channels, out_channels, kernel_size, stride, padding, dilation,
+            groups, bias, padding_mode, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        we have:
+        w(float) -- quant - dequant \
+        x(float) ------------- F.conv1d ---
+
+        In the full model, we will see
+        w(float) -- quant - *dequant \
+        x -- quant --- *dequant --  *F.conv1d --- *quant - dequant
+        and the backend should be able to fuse the ops with `*` into a quantized conv1d
+        """
+        weight_quant_dequant = self.get_weight()
+        result = F.conv1d(
+            x, weight_quant_dequant, self.bias, self.stride,
+            self.padding, self.dilation, self.groups)
+        return result
+
+    def _get_name(self):
+        return "QuantizedConv1d(Reference)"
+
+    @classmethod
+    def from_float(cls, float_conv, weight_qparams):
+        return _ConvNd.from_float(cls, float_conv, weight_qparams)
+
+class Conv2d(_ConvNd, nn.Conv2d):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros',
+                 device=None,
+                 dtype=None,
+                 weight_qparams: Optional[Dict[str, Any]] = None):
+        nn.Conv2d.__init__(
+            self, in_channels, out_channels, kernel_size, stride, padding, dilation,
+            groups, bias, padding_mode, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        we have:
+        w(float) -- quant - dequant \
+        x(float) ------------- F.conv2d ---
+
+        In the full model, we will see
+        w(float) -- quant - *dequant \
+        x -- quant --- *dequant --  *F.conv2d --- *quant - dequant
+        and the backend should be able to fuse the ops with `*` into a quantized conv2d
+        """
+        weight_quant_dequant = self.get_weight()
+        result = F.conv2d(
+            x, weight_quant_dequant, self.bias, self.stride,
+            self.padding, self.dilation, self.groups)
+        return result
+
+    def _get_name(self):
+        return "QuantizedConv2d(Reference)"
+
+    @classmethod
+    def from_float(cls, float_conv, weight_qparams):
+        return _ConvNd.from_float(cls, float_conv, weight_qparams)
+
+class Conv3d(_ConvNd, nn.Conv3d):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode="zeros",
+                 device=None,
+                 dtype=None,
+                 weight_qparams: Optional[Dict[str, Any]] = None):
+        nn.Conv3d.__init__(
+            self, in_channels, out_channels, kernel_size, stride, padding, dilation,
+            groups, bias, padding_mode, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        we have:
+        w(float) -- quant - dequant \
+        x(float) ------------- F.conv3d ---
+
+        In the full model, we will see
+        w(float) -- quant - *dequant \
+        x -- quant --- *dequant --  *F.conv3d --- *quant - dequant
+        and the backend should be able to fuse the ops with `*` into a quantized conv3d
+        """
+        weight_quant_dequant = self.get_weight()
+        result = F.conv3d(
+            x, weight_quant_dequant, self.bias, self.stride,
+            self.padding, self.dilation, self.groups)
+        return result
+
+    def _get_name(self):
+        return "QuantizedConv3d(Reference)"
+
+    @classmethod
+    def from_float(cls, float_conv, weight_qparams):
+        return _ConvNd.from_float(cls, float_conv, weight_qparams)
+
+class _ConvTransposeNd(_ConvNd, torch.nn.modules.conv._ConvTransposeNd):
+    """ A reference version of nn.quantized.ConvTranspose2d
+        we will not pack the parameters in this module, since weight packing is an
+        optimization for quantized backends supported in PyTorch (fbgemm/qnnpack),
+        this is useful when user want to use this module in other backends like Glow.
+    """
+    @staticmethod
+    def from_float(cls, float_conv, weight_qparams):
+        qref_conv = cls(
+            float_conv.in_channels,
+            float_conv.out_channels,
+            float_conv.kernel_size,  # type: ignore[arg-type]
+            float_conv.stride,  # type: ignore[arg-type]
+            float_conv.padding,  # type: ignore[arg-type]
+            float_conv.output_padding,  # type: ignore[arg-type]
+            float_conv.groups,
+            float_conv.bias is not None,  # type: ignore[arg-type]
+            float_conv.dilation,  # type: ignore[arg-type]
+            float_conv.padding_mode,
+            device=float_conv.weight.device,
+            dtype=float_conv.weight.dtype,
+            weight_qparams=weight_qparams)
+        qref_conv.weight = torch.nn.Parameter(float_conv.weight.detach())
+        if float_conv.bias is not None:
+            qref_conv.bias = torch.nn.Parameter(float_conv.bias.detach())
+        return qref_conv
+
+
+class ConvTranspose1d(_ConvTransposeNd, nn.ConvTranspose1d):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: _size_1_t,
+                 stride: _size_1_t = 1,
+                 padding: _size_1_t = 0,
+                 output_padding: _size_1_t = 0,
+                 groups: int = 1,
+                 bias: bool = True,
+                 dilation: _size_1_t = 1,
+                 padding_mode: str = "zeros",
+                 device=None,
+                 dtype=None,
+                 weight_qparams: Optional[Dict[str, Any]] = None):
+        nn.ConvTranspose1d.__init__(
+            self, in_channels, out_channels, kernel_size, stride, padding, output_padding,
+            groups, bias, dilation, padding_mode, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+
+    def forward(self, x: torch.Tensor, output_size: Optional[List[int]] = None) -> torch.Tensor:
+        """
+        we have:
+        w(float) -- quant - dequant \
+        x(float) ------------- F.convTranspose1d ---
+        In the full model, we will see
+        w(float) -- quant - *dequant \
+        x -- quant --- *dequant --  *F.convTranspose1d --- *quant - dequant
+        and the backend should be able to fuse the ops with `*` into a quantized conv1d
+        """
+
+        assert isinstance(self.padding, tuple)
+        # One cannot replace List by Tuple or Sequence in "_output_padding" because
+        # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
+        output_padding = self._output_padding(
+            input, output_size, self.stride, self.padding, self.kernel_size, self.dilation)  # type: ignore[arg-type]
+
+        weight_quant_dequant = self.get_weight()
+        result = F.conv_transpose1d(
+            x, weight_quant_dequant, self.bias, self.stride,
+            self.padding, output_padding, self.groups, self.dilation)
+        return result
+
+    def _get_name(self):
+        return "QuantizedConvTranspose1d(Reference)"
+
+    @classmethod
+    def from_float(cls, float_conv, weight_qparams):
+        return _ConvTransposeNd.from_float(cls, float_conv, weight_qparams)
+
+class ConvTranspose2d(_ConvTransposeNd, nn.ConvTranspose2d):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0,
+                 groups=1, bias=True, dilation=1,
+                 padding_mode='zeros',
+                 device=None,
+                 dtype=None,
+                 weight_qparams: Optional[Dict[str, Any]] = None):
+
+        nn.ConvTranspose2d.__init__(
+            self, in_channels, out_channels, kernel_size, stride, padding, output_padding,
+            groups, bias, dilation, padding_mode, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+
+    def forward(self, x: torch.Tensor, output_size: Optional[List[int]] = None) -> torch.Tensor:
+        """
+        we have:
+        w(float) -- quant - dequant \
+        x(float) ------------- F.convTranspose2d ---
+        In the full model, we will see
+        w(float) -- quant - *dequant \
+        x -- quant --- *dequant --  *F.convTranspose2d --- *quant - dequant
+        and the backend should be able to fuse the ops with `*` into a quantized conv2d
+        """
+        assert isinstance(self.padding, tuple)
+        # One cannot replace List by Tuple or Sequence in "_output_padding" because
+        # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
+
+        output_padding = self._output_padding(
+            input, output_size, self.stride, self.padding, self.kernel_size, self.dilation)  # type: ignore[arg-type]
+
+        weight_quant_dequant = self.get_weight()
+        result = F.conv_transpose2d(
+            x, weight_quant_dequant, self.bias, self.stride,
+            self.padding, output_padding, self.groups, self.dilation)
+
+        return result
+
+    def _get_name(self):
+        return "QuantizedConvTranspose2d(Reference)"
+
+    @classmethod
+    def from_float(cls, float_conv, weight_qparams):
+        return _ConvTransposeNd.from_float(cls, float_conv, weight_qparams)
+
+class ConvTranspose3d(_ConvTransposeNd, nn.ConvTranspose3d):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0,
+                 groups=1, bias=True, dilation=1,
+                 padding_mode="zeros",
+                 device=None,
+                 dtype=None,
+                 weight_qparams: Optional[Dict[str, Any]] = None):
+        nn.ConvTranspose3d.__init__(
+            self, in_channels, out_channels, kernel_size, stride, padding, output_padding,
+            groups, bias, dilation, padding_mode, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+
+    def forward(self, x: torch.Tensor, output_size: Optional[List[int]] = None) -> torch.Tensor:
+        """
+        we have:
+        w(float) -- quant - dequant \
+        x(float) ------------- F.convTranspose3d ---
+        In the full model, we will see
+        w(float) -- quant - *dequant \
+        x -- quant --- *dequant --  *F.convTranspose3d --- *quant - dequant
+        and the backend should be able to fuse the ops with `*` into a quantized conv3d
+        """
+
+        assert isinstance(self.padding, tuple)
+        # One cannot replace List by Tuple or Sequence in "_output_padding" because
+        # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
+        output_padding = self._output_padding(
+            input, output_size, self.stride, self.padding, self.kernel_size, self.dilation)  # type: ignore[arg-type]
+
+        weight_quant_dequant = self.get_weight()
+        result = F.conv_transpose3d(
+            x, weight_quant_dequant, self.bias, self.stride,
+            self.padding, output_padding, self.groups, self.dilation)
+        return result
+
+    def _get_name(self):
+        return "QuantizedConvTranspose3d(Reference)"
+
+    @classmethod
+    def from_float(cls, float_conv, weight_qparams):
+        return _ConvTransposeNd.from_float(cls, float_conv, weight_qparams)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/sparse.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/sparse.py
new file mode 100644
index 0000000000000000000000000000000000000000..4890402b875a5ffea8d636aae0b4e3d00992706b
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/sparse.py
@@ -0,0 +1,94 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+from .utils import ReferenceQuantizedModule
+from typing import Optional, Dict, Any
+
+__all__ = ['Embedding', 'EmbeddingBag']
+
+class Embedding(nn.Embedding, ReferenceQuantizedModule):
+    """ A reference quantized Embedding module that fits into the
+    FX Graph Mode Quantization workflow, activation will be floating point Tensor,
+    we will store floating point weight as well in the module, but in forward we'll
+    quantize and dequantize the weight before running the floating point functional
+    embedding operator.
+    """
+    def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None,
+                 max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False,
+                 sparse: bool = False, _weight: Optional[Tensor] = None,
+                 device=None, dtype=None,
+                 weight_qparams: Optional[Dict[str, Any]] = None) -> None:
+        super().__init__(num_embeddings, embedding_dim, padding_idx, max_norm,
+                         norm_type, scale_grad_by_freq, sparse, _weight, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+
+    def _get_name(self):
+        return "QuantizedEmbedding(Reference)"
+
+    def forward(self, input: Tensor) -> Tensor:
+        weight_quant_dequant = self.get_weight()
+        return F.embedding(
+            input, weight_quant_dequant, self.padding_idx, self.max_norm,
+            self.norm_type, self.scale_grad_by_freq, self.sparse)
+
+    @classmethod
+    def from_float(cls, mod, weight_qparams):
+        return cls(
+            mod.num_embeddings,
+            mod.embedding_dim,
+            mod.padding_idx,
+            mod.max_norm,
+            mod.norm_type,
+            mod.scale_grad_by_freq,
+            mod.sparse,
+            mod.weight,
+            mod.weight.device,
+            mod.weight.dtype,
+            weight_qparams)
+
+class EmbeddingBag(nn.EmbeddingBag, ReferenceQuantizedModule):
+    """ A reference quantized EmbeddingBag module that fits into the
+    FX Graph Mode Quantization workflow, activation will be floating point Tensor,
+    we will store floating point weight as well in the module, but in forward we'll
+    quantize and dequantize the weight before running the floating point functional
+    embedding operator.
+    """
+    def __init__(self, num_embeddings: int, embedding_dim: int,
+                 max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False,
+                 mode: str = 'mean', sparse: bool = False, _weight: Optional[Tensor] = None,
+                 include_last_offset: bool = False, padding_idx: Optional[int] = None,
+                 device=None, dtype=None,
+                 weight_qparams: Optional[Dict[str, Any]] = None) -> None:
+        super().__init__(num_embeddings, embedding_dim, max_norm, norm_type,
+                         scale_grad_by_freq, mode, sparse, _weight, include_last_offset,
+                         padding_idx, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+
+    def _get_name(self):
+        return "QuantizedEmbedding(Reference)"
+
+    def forward(self, input: Tensor, offsets: Optional[Tensor] = None, per_sample_weights: Optional[Tensor] = None) -> Tensor:
+        weight_quant_dequant = self.get_weight()
+        return F.embedding_bag(input, weight_quant_dequant, offsets,
+                               self.max_norm, self.norm_type,
+                               self.scale_grad_by_freq, self.mode, self.sparse,
+                               per_sample_weights, self.include_last_offset,
+                               self.padding_idx)
+
+    @classmethod
+    def from_float(cls, mod, weight_qparams):
+        return cls(
+            mod.num_embeddings,
+            mod.embedding_dim,
+            mod.max_norm,
+            mod.norm_type,
+            mod.scale_grad_by_freq,
+            mod.mode,
+            mod.sparse,
+            mod.weight,
+            mod.include_last_offset,
+            mod.padding_idx,
+            mod.weight.device,
+            mod.weight.dtype,
+            weight_qparams
+        )
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..52ee46afd4be311668e5043caeee941aed205fc3
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..86596ba18cf1f08e979a9e4c0ae0485627c44845
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__init__.py
@@ -0,0 +1,10 @@
+from torch.ao.nn.sparse.quantized import dynamic
+
+from .linear import Linear
+from .linear import LinearPackedParams
+
+__all__ = [
+    "dynamic",
+    "Linear",
+    "LinearPackedParams",
+]
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..edebd898a21d7197ae11b5f965ed4171340175c2
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/__pycache__/data_norm_sparsifier.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/__pycache__/data_norm_sparsifier.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fa4b0d32f4633aab11f198b1769eb5f174262428
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/__pycache__/data_norm_sparsifier.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f017aa9e2e2c673909197d9424d95b196ff30c3
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__init__.py
@@ -0,0 +1,8 @@
+from .base_structured_sparsifier import BaseStructuredSparsifier
+from .parametrization import (
+    FakeStructuredSparsity,
+    BiasHook,
+)
+from .saliency_pruner import SaliencyPruner
+from .lstm_saliency_pruner import LSTMSaliencyPruner
+from .FPGM_pruner import FPGMPruner
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/utils.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..98f489904cc45340167cfae5f2362ee70ccb8fca
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/utils.py
@@ -0,0 +1,136 @@
+from typing import Any, Dict, Optional, Type
+from torch.nn.utils.parametrize import type_before_parametrizations, is_parametrized
+from itertools import chain
+
+from torch import nn
+
+__all__ = [
+    "module_contains_param",
+    "swap_module",
+    "module_to_fqn",
+    "fqn_to_module",
+    "get_arg_info_from_tensor_fqn",
+    "FakeSparsity",
+]
+
+
+def module_contains_param(module: nn.Module, parametrization: Type[nn.Module]) -> bool:
+    if is_parametrized(module):
+        # see if any of the module tensors have a parametriztion attached that matches the one passed in
+        return any(
+            any(isinstance(param, parametrization) for param in param_list)
+            for key, param_list in module.parametrizations.items()  # type: ignore[union-attr,operator]
+        )
+    return False
+
+
+def swap_module(
+    mod: nn.Module, mapping: Dict[Type[nn.Module], Type[nn.Module]]
+) -> nn.Module:
+    r"""Swaps the module using from_dense according to the mapping passed in.
+    Args:
+        mod: input module
+        mapping: a dictionary that maps from nn module to sparse nn module
+    Return:
+        The corresponding sparse module of `mod` according to mapping, created using from_dense
+    """
+    if type_before_parametrizations(mod) in mapping:
+        sparse_mod = mapping[type_before_parametrizations(mod)]
+
+        # TODO Fix this typing, as Type[Module] has no attribute "from_dense"
+        new_mod = sparse_mod.from_dense(mod)  # type: ignore[attr-defined]
+
+        # Preserve module's pre forward hooks. They'll be called on quantized input
+        for pre_hook_fn in mod._forward_pre_hooks.values():
+            new_mod.register_forward_pre_hook(pre_hook_fn)
+        # Preserve module's post forward hooks except _observer_forward_hook
+        # After convert they'll work with quantized output
+        for hook_fn in mod._forward_hooks.values():
+            new_mod.register_forward_hook(hook_fn)
+
+        # respect device affinity when swapping modules
+        devices = {p.device for p in chain(mod.parameters(), mod.buffers())}
+        assert len(devices) <= 1, (
+            f"swap_module only works with cpu or single-device CUDA modules, but got devices {devices}"
+        )
+        device = next(iter(devices)) if len(devices) > 0 else None
+        if device:
+            new_mod.to(device)
+
+        return new_mod
+
+    else:
+        return mod
+
+
+def module_to_fqn(
+    model: nn.Module, module: nn.Module, prefix: str = ""
+) -> Optional[str]:
+    """
+    Returns the fqn for a module or None if module not a descendent of model.
+    """
+    if module is model:
+        return ""
+    for name, child in model.named_children():
+        fqn = module_to_fqn(child, module, ".")
+        if isinstance(fqn, str):
+            return prefix + name + fqn
+    return None
+
+
+def fqn_to_module(model: Optional[nn.Module], path: str) -> Optional[nn.Module]:
+    """
+    Given an fqn, returns the corresponding module or tensor or None if the fqn given by `path`
+    doesn't correspond to anything. Similar to model.get_submodule(path) but works for tensors.
+    """
+    if path != "":
+        for name in path.split("."):
+            model = getattr(model, name, None)
+    return model
+
+
+def get_arg_info_from_tensor_fqn(model: nn.Module, tensor_fqn: str) -> Dict[str, Any]:
+    """
+    Uses tensor_fqn to obtain a dict containing module_fqn, module and tensor_name
+    """
+    # string manip to split tensor_fqn into module_fqn and tensor_name
+    # if tensor_fqn is 'weight' then module_fqn and tensor_name are '' and 'weight'
+    # if tensor_fqn is 'linear.weight' then module_fqn and tensor_name are 'linear' and 'weight'
+    tensor_name = tensor_fqn.split(".")[-1]
+    module_fqn = tensor_fqn[: -len(tensor_name) - ("." in tensor_fqn)]
+
+    module = fqn_to_module(model, module_fqn)
+
+    return {
+        "module_fqn": module_fqn,
+        "module": module,
+        "tensor_name": tensor_name,
+        "tensor_fqn": tensor_fqn,
+    }
+
+
+# Parametrizations
+class FakeSparsity(nn.Module):
+    r"""Parametrization for the weights. Should be attached to the 'weight' or
+    any other parameter that requires a mask applied to it.
+
+    Note::
+
+        Once the mask is passed, the variable should not change the id. The
+        contents of the mask can change, but the mask reference itself should
+        not.
+    """
+
+    def __init__(self, mask):
+        super().__init__()
+        self.register_buffer("mask", mask)
+
+    def forward(self, x):
+        assert self.mask.shape == x.shape
+        return self.mask * x
+
+    def state_dict(self, *args, **kwargs):
+        # We don't want to let the parametrizations to save the mask.
+        # That way we make sure that the linear module doesn't store the masks
+        # alongside their parametrizations.
+        return {}
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quant_type.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quant_type.py
new file mode 100644
index 0000000000000000000000000000000000000000..1448a6270e2ba9ba49b13a9d4878180f56a5bba7
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quant_type.py
@@ -0,0 +1,30 @@
+import enum
+
+__all__ = [
+    "QuantType",
+]
+
+# Quantization type (dynamic quantization, static quantization).
+# Should match the c++ enum in quantization_type.h
+class QuantType(enum.IntEnum):
+    DYNAMIC = 0
+    STATIC = 1
+    QAT = 2
+    WEIGHT_ONLY = 3
+
+_quant_type_to_str = {
+    QuantType.STATIC: "static",
+    QuantType.DYNAMIC: "dynamic",
+    QuantType.QAT: "qat",
+    QuantType.WEIGHT_ONLY: "weight_only",
+}
+
+# TODO: make this private
+def _get_quant_type_to_str(quant_type: QuantType) -> str:
+    return _quant_type_to_str[quant_type]
+
+def _quant_type_from_str(name: str) -> QuantType:
+    for quant_type, s in _quant_type_to_str.items():
+        if name == s:
+            return quant_type
+    raise ValueError(f"Unknown QuantType name '{name}'")
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/__main__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/__main__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a424f63a260abf3dc76503072cdd1c598da2e26
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/__main__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_bdist_wheel.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_bdist_wheel.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d400b5ea84b434df76d7998d165ede06994b106f
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_bdist_wheel.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/util.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/util.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..30e0ef60bf5e2da8a5d4743d543a8a926e474a32
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/util.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/wheelfile.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/wheelfile.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2edc4facd7ad8210bb6590be73a02d12c1ceaa68
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/wheelfile.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_bdist_wheel.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_bdist_wheel.py
new file mode 100644
index 0000000000000000000000000000000000000000..88973ebfb88f9521e5f0613b5cedf1204d10a8f1
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_bdist_wheel.py
@@ -0,0 +1,613 @@
+"""
+Create a wheel (.whl) distribution.
+
+A wheel is a built archive format.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import stat
+import struct
+import sys
+import sysconfig
+import warnings
+from email.generator import BytesGenerator, Generator
+from email.policy import EmailPolicy
+from glob import iglob
+from shutil import rmtree
+from typing import TYPE_CHECKING, Callable, Iterable, Literal, Sequence, cast
+from zipfile import ZIP_DEFLATED, ZIP_STORED
+
+import setuptools
+from setuptools import Command
+
+from . import __version__ as wheel_version
+from .metadata import pkginfo_to_metadata
+from .util import log
+from .vendored.packaging import tags
+from .vendored.packaging import version as _packaging_version
+from .wheelfile import WheelFile
+
+if TYPE_CHECKING:
+    import types
+
+# ensure Python logging is configured
+try:
+    __import__("setuptools.logging")
+except ImportError:
+    # setuptools < ??
+    from . import _setuptools_logging
+
+    _setuptools_logging.configure()
+
+
+def safe_name(name: str) -> str:
+    """Convert an arbitrary string to a standard distribution name
+    Any runs of non-alphanumeric/. characters are replaced with a single '-'.
+    """
+    return re.sub("[^A-Za-z0-9.]+", "-", name)
+
+
+def safe_version(version: str) -> str:
+    """
+    Convert an arbitrary string to a standard version string
+    """
+    try:
+        # normalize the version
+        return str(_packaging_version.Version(version))
+    except _packaging_version.InvalidVersion:
+        version = version.replace(" ", ".")
+        return re.sub("[^A-Za-z0-9.]+", "-", version)
+
+
+setuptools_major_version = int(setuptools.__version__.split(".")[0])
+
+PY_LIMITED_API_PATTERN = r"cp3\d"
+
+
+def _is_32bit_interpreter() -> bool:
+    return struct.calcsize("P") == 4
+
+
+def python_tag() -> str:
+    return f"py{sys.version_info[0]}"
+
+
+def get_platform(archive_root: str | None) -> str:
+    """Return our platform name 'win32', 'linux_x86_64'"""
+    result = sysconfig.get_platform()
+    if result.startswith("macosx") and archive_root is not None:
+        from .macosx_libfile import calculate_macosx_platform_tag
+
+        result = calculate_macosx_platform_tag(archive_root, result)
+    elif _is_32bit_interpreter():
+        if result == "linux-x86_64":
+            # pip pull request #3497
+            result = "linux-i686"
+        elif result == "linux-aarch64":
+            # packaging pull request #234
+            # TODO armv8l, packaging pull request #690 => this did not land
+            # in pip/packaging yet
+            result = "linux-armv7l"
+
+    return result.replace("-", "_")
+
+
+def get_flag(
+    var: str, fallback: bool, expected: bool = True, warn: bool = True
+) -> bool:
+    """Use a fallback value for determining SOABI flags if the needed config
+    var is unset or unavailable."""
+    val = sysconfig.get_config_var(var)
+    if val is None:
+        if warn:
+            warnings.warn(
+                f"Config variable '{var}' is unset, Python ABI tag may be incorrect",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+        return fallback
+    return val == expected
+
+
+def get_abi_tag() -> str | None:
+    """Return the ABI tag based on SOABI (if available) or emulate SOABI (PyPy2)."""
+    soabi: str = sysconfig.get_config_var("SOABI")
+    impl = tags.interpreter_name()
+    if not soabi and impl in ("cp", "pp") and hasattr(sys, "maxunicode"):
+        d = ""
+        m = ""
+        u = ""
+        if get_flag("Py_DEBUG", hasattr(sys, "gettotalrefcount"), warn=(impl == "cp")):
+            d = "d"
+
+        if get_flag(
+            "WITH_PYMALLOC",
+            impl == "cp",
+            warn=(impl == "cp" and sys.version_info < (3, 8)),
+        ) and sys.version_info < (3, 8):
+            m = "m"
+
+        abi = f"{impl}{tags.interpreter_version()}{d}{m}{u}"
+    elif soabi and impl == "cp" and soabi.startswith("cpython"):
+        # non-Windows
+        abi = "cp" + soabi.split("-")[1]
+    elif soabi and impl == "cp" and soabi.startswith("cp"):
+        # Windows
+        abi = soabi.split("-")[0]
+    elif soabi and impl == "pp":
+        # we want something like pypy36-pp73
+        abi = "-".join(soabi.split("-")[:2])
+        abi = abi.replace(".", "_").replace("-", "_")
+    elif soabi and impl == "graalpy":
+        abi = "-".join(soabi.split("-")[:3])
+        abi = abi.replace(".", "_").replace("-", "_")
+    elif soabi:
+        abi = soabi.replace(".", "_").replace("-", "_")
+    else:
+        abi = None
+
+    return abi
+
+
+def safer_name(name: str) -> str:
+    return safe_name(name).replace("-", "_")
+
+
+def safer_version(version: str) -> str:
+    return safe_version(version).replace("-", "_")
+
+
+def remove_readonly(
+    func: Callable[..., object],
+    path: str,
+    excinfo: tuple[type[Exception], Exception, types.TracebackType],
+) -> None:
+    remove_readonly_exc(func, path, excinfo[1])
+
+
+def remove_readonly_exc(func: Callable[..., object], path: str, exc: Exception) -> None:
+    os.chmod(path, stat.S_IWRITE)
+    func(path)
+
+
+class bdist_wheel(Command):
+    description = "create a wheel distribution"
+
+    supported_compressions = {
+        "stored": ZIP_STORED,
+        "deflated": ZIP_DEFLATED,
+    }
+
+    user_options = [
+        ("bdist-dir=", "b", "temporary directory for creating the distribution"),
+        (
+            "plat-name=",
+            "p",
+            "platform name to embed in generated filenames "
+            f"(default: {get_platform(None)})",
+        ),
+        (
+            "keep-temp",
+            "k",
+            "keep the pseudo-installation tree around after "
+            "creating the distribution archive",
+        ),
+        ("dist-dir=", "d", "directory to put final built distributions in"),
+        ("skip-build", None, "skip rebuilding everything (for testing/debugging)"),
+        (
+            "relative",
+            None,
+            "build the archive using relative paths (default: false)",
+        ),
+        (
+            "owner=",
+            "u",
+            "Owner name used when creating a tar file [default: current user]",
+        ),
+        (
+            "group=",
+            "g",
+            "Group name used when creating a tar file [default: current group]",
+        ),
+        ("universal", None, "make a universal wheel (default: false)"),
+        (
+            "compression=",
+            None,
+            "zipfile compression (one of: {}) (default: 'deflated')".format(
+                ", ".join(supported_compressions)
+            ),
+        ),
+        (
+            "python-tag=",
+            None,
+            f"Python implementation compatibility tag (default: '{python_tag()}')",
+        ),
+        (
+            "build-number=",
+            None,
+            "Build number for this particular version. "
+            "As specified in PEP-0427, this must start with a digit. "
+            "[default: None]",
+        ),
+        (
+            "py-limited-api=",
+            None,
+            "Python tag (cp32|cp33|cpNN) for abi3 wheel tag (default: false)",
+        ),
+    ]
+
+    boolean_options = ["keep-temp", "skip-build", "relative", "universal"]
+
+    def initialize_options(self):
+        self.bdist_dir: str = None
+        self.data_dir = None
+        self.plat_name: str | None = None
+        self.plat_tag = None
+        self.format = "zip"
+        self.keep_temp = False
+        self.dist_dir: str | None = None
+        self.egginfo_dir = None
+        self.root_is_pure: bool | None = None
+        self.skip_build = None
+        self.relative = False
+        self.owner = None
+        self.group = None
+        self.universal: bool = False
+        self.compression: str | int = "deflated"
+        self.python_tag: str = python_tag()
+        self.build_number: str | None = None
+        self.py_limited_api: str | Literal[False] = False
+        self.plat_name_supplied = False
+
+    def finalize_options(self):
+        if self.bdist_dir is None:
+            bdist_base = self.get_finalized_command("bdist").bdist_base
+            self.bdist_dir = os.path.join(bdist_base, "wheel")
+
+        egg_info = self.distribution.get_command_obj("egg_info")
+        egg_info.ensure_finalized()  # needed for correct `wheel_dist_name`
+
+        self.data_dir = self.wheel_dist_name + ".data"
+        self.plat_name_supplied = self.plat_name is not None
+
+        try:
+            self.compression = self.supported_compressions[self.compression]
+        except KeyError:
+            raise ValueError(f"Unsupported compression: {self.compression}") from None
+
+        need_options = ("dist_dir", "plat_name", "skip_build")
+
+        self.set_undefined_options("bdist", *zip(need_options, need_options))
+
+        self.root_is_pure = not (
+            self.distribution.has_ext_modules() or self.distribution.has_c_libraries()
+        )
+
+        if self.py_limited_api and not re.match(
+            PY_LIMITED_API_PATTERN, self.py_limited_api
+        ):
+            raise ValueError(f"py-limited-api must match '{PY_LIMITED_API_PATTERN}'")
+
+        # Support legacy [wheel] section for setting universal
+        wheel = self.distribution.get_option_dict("wheel")
+        if "universal" in wheel:
+            # please don't define this in your global configs
+            log.warning(
+                "The [wheel] section is deprecated. Use [bdist_wheel] instead.",
+            )
+            val = wheel["universal"][1].strip()
+            if val.lower() in ("1", "true", "yes"):
+                self.universal = True
+
+        if self.build_number is not None and not self.build_number[:1].isdigit():
+            raise ValueError("Build tag (build-number) must start with a digit.")
+
+    @property
+    def wheel_dist_name(self):
+        """Return distribution full name with - replaced with _"""
+        components = (
+            safer_name(self.distribution.get_name()),
+            safer_version(self.distribution.get_version()),
+        )
+        if self.build_number:
+            components += (self.build_number,)
+        return "-".join(components)
+
+    def get_tag(self) -> tuple[str, str, str]:
+        # bdist sets self.plat_name if unset, we should only use it for purepy
+        # wheels if the user supplied it.
+        if self.plat_name_supplied:
+            plat_name = cast(str, self.plat_name)
+        elif self.root_is_pure:
+            plat_name = "any"
+        else:
+            # macosx contains system version in platform name so need special handle
+            if self.plat_name and not self.plat_name.startswith("macosx"):
+                plat_name = self.plat_name
+            else:
+                # on macosx always limit the platform name to comply with any
+                # c-extension modules in bdist_dir, since the user can specify
+                # a higher MACOSX_DEPLOYMENT_TARGET via tools like CMake
+
+                # on other platforms, and on macosx if there are no c-extension
+                # modules, use the default platform name.
+                plat_name = get_platform(self.bdist_dir)
+
+            if _is_32bit_interpreter():
+                if plat_name in ("linux-x86_64", "linux_x86_64"):
+                    plat_name = "linux_i686"
+                if plat_name in ("linux-aarch64", "linux_aarch64"):
+                    # TODO armv8l, packaging pull request #690 => this did not land
+                    # in pip/packaging yet
+                    plat_name = "linux_armv7l"
+
+        plat_name = (
+            plat_name.lower().replace("-", "_").replace(".", "_").replace(" ", "_")
+        )
+
+        if self.root_is_pure:
+            if self.universal:
+                impl = "py2.py3"
+            else:
+                impl = self.python_tag
+            tag = (impl, "none", plat_name)
+        else:
+            impl_name = tags.interpreter_name()
+            impl_ver = tags.interpreter_version()
+            impl = impl_name + impl_ver
+            # We don't work on CPython 3.1, 3.0.
+            if self.py_limited_api and (impl_name + impl_ver).startswith("cp3"):
+                impl = self.py_limited_api
+                abi_tag = "abi3"
+            else:
+                abi_tag = str(get_abi_tag()).lower()
+            tag = (impl, abi_tag, plat_name)
+            # issue gh-374: allow overriding plat_name
+            supported_tags = [
+                (t.interpreter, t.abi, plat_name) for t in tags.sys_tags()
+            ]
+            assert (
+                tag in supported_tags
+            ), f"would build wheel with unsupported tag {tag}"
+        return tag
+
+    def run(self):
+        build_scripts = self.reinitialize_command("build_scripts")
+        build_scripts.executable = "python"
+        build_scripts.force = True
+
+        build_ext = self.reinitialize_command("build_ext")
+        build_ext.inplace = False
+
+        if not self.skip_build:
+            self.run_command("build")
+
+        install = self.reinitialize_command("install", reinit_subcommands=True)
+        install.root = self.bdist_dir
+        install.compile = False
+        install.skip_build = self.skip_build
+        install.warn_dir = False
+
+        # A wheel without setuptools scripts is more cross-platform.
+        # Use the (undocumented) `no_ep` option to setuptools'
+        # install_scripts command to avoid creating entry point scripts.
+        install_scripts = self.reinitialize_command("install_scripts")
+        install_scripts.no_ep = True
+
+        # Use a custom scheme for the archive, because we have to decide
+        # at installation time which scheme to use.
+        for key in ("headers", "scripts", "data", "purelib", "platlib"):
+            setattr(install, "install_" + key, os.path.join(self.data_dir, key))
+
+        basedir_observed = ""
+
+        if os.name == "nt":
+            # win32 barfs if any of these are ''; could be '.'?
+            # (distutils.command.install:change_roots bug)
+            basedir_observed = os.path.normpath(os.path.join(self.data_dir, ".."))
+            self.install_libbase = self.install_lib = basedir_observed
+
+        setattr(
+            install,
+            "install_purelib" if self.root_is_pure else "install_platlib",
+            basedir_observed,
+        )
+
+        log.info(f"installing to {self.bdist_dir}")
+
+        self.run_command("install")
+
+        impl_tag, abi_tag, plat_tag = self.get_tag()
+        archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}"
+        if not self.relative:
+            archive_root = self.bdist_dir
+        else:
+            archive_root = os.path.join(
+                self.bdist_dir, self._ensure_relative(install.install_base)
+            )
+
+        self.set_undefined_options("install_egg_info", ("target", "egginfo_dir"))
+        distinfo_dirname = (
+            f"{safer_name(self.distribution.get_name())}-"
+            f"{safer_version(self.distribution.get_version())}.dist-info"
+        )
+        distinfo_dir = os.path.join(self.bdist_dir, distinfo_dirname)
+        self.egg2dist(self.egginfo_dir, distinfo_dir)
+
+        self.write_wheelfile(distinfo_dir)
+
+        # Make the archive
+        if not os.path.exists(self.dist_dir):
+            os.makedirs(self.dist_dir)
+
+        wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl")
+        with WheelFile(wheel_path, "w", self.compression) as wf:
+            wf.write_files(archive_root)
+
+        # Add to 'Distribution.dist_files' so that the "upload" command works
+        getattr(self.distribution, "dist_files", []).append(
+            (
+                "bdist_wheel",
+                "{}.{}".format(*sys.version_info[:2]),  # like 3.7
+                wheel_path,
+            )
+        )
+
+        if not self.keep_temp:
+            log.info(f"removing {self.bdist_dir}")
+            if not self.dry_run:
+                if sys.version_info < (3, 12):
+                    rmtree(self.bdist_dir, onerror=remove_readonly)
+                else:
+                    rmtree(self.bdist_dir, onexc=remove_readonly_exc)
+
+    def write_wheelfile(
+        self, wheelfile_base: str, generator: str = f"bdist_wheel ({wheel_version})"
+    ):
+        from email.message import Message
+
+        msg = Message()
+        msg["Wheel-Version"] = "1.0"  # of the spec
+        msg["Generator"] = generator
+        msg["Root-Is-Purelib"] = str(self.root_is_pure).lower()
+        if self.build_number is not None:
+            msg["Build"] = self.build_number
+
+        # Doesn't work for bdist_wininst
+        impl_tag, abi_tag, plat_tag = self.get_tag()
+        for impl in impl_tag.split("."):
+            for abi in abi_tag.split("."):
+                for plat in plat_tag.split("."):
+                    msg["Tag"] = "-".join((impl, abi, plat))
+
+        wheelfile_path = os.path.join(wheelfile_base, "WHEEL")
+        log.info(f"creating {wheelfile_path}")
+        with open(wheelfile_path, "wb") as f:
+            BytesGenerator(f, maxheaderlen=0).flatten(msg)
+
+    def _ensure_relative(self, path: str) -> str:
+        # copied from dir_util, deleted
+        drive, path = os.path.splitdrive(path)
+        if path[0:1] == os.sep:
+            path = drive + path[1:]
+        return path
+
+    @property
+    def license_paths(self) -> Iterable[str]:
+        if setuptools_major_version >= 57:
+            # Setuptools has resolved any patterns to actual file names
+            return self.distribution.metadata.license_files or ()
+
+        files: set[str] = set()
+        metadata = self.distribution.get_option_dict("metadata")
+        if setuptools_major_version >= 42:
+            # Setuptools recognizes the license_files option but does not do globbing
+            patterns = cast(Sequence[str], self.distribution.metadata.license_files)
+        else:
+            # Prior to those, wheel is entirely responsible for handling license files
+            if "license_files" in metadata:
+                patterns = metadata["license_files"][1].split()
+            else:
+                patterns = ()
+
+        if "license_file" in metadata:
+            warnings.warn(
+                'The "license_file" option is deprecated. Use "license_files" instead.',
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            files.add(metadata["license_file"][1])
+
+        if not files and not patterns and not isinstance(patterns, list):
+            patterns = ("LICEN[CS]E*", "COPYING*", "NOTICE*", "AUTHORS*")
+
+        for pattern in patterns:
+            for path in iglob(pattern):
+                if path.endswith("~"):
+                    log.debug(
+                        f'ignoring license file "{path}" as it looks like a backup'
+                    )
+                    continue
+
+                if path not in files and os.path.isfile(path):
+                    log.info(
+                        f'adding license file "{path}" (matched pattern "{pattern}")'
+                    )
+                    files.add(path)
+
+        return files
+
+    def egg2dist(self, egginfo_path: str, distinfo_path: str):
+        """Convert an .egg-info directory into a .dist-info directory"""
+
+        def adios(p: str) -> None:
+            """Appropriately delete directory, file or link."""
+            if os.path.exists(p) and not os.path.islink(p) and os.path.isdir(p):
+                shutil.rmtree(p)
+            elif os.path.exists(p):
+                os.unlink(p)
+
+        adios(distinfo_path)
+
+        if not os.path.exists(egginfo_path):
+            # There is no egg-info. This is probably because the egg-info
+            # file/directory is not named matching the distribution name used
+            # to name the archive file. Check for this case and report
+            # accordingly.
+            import glob
+
+            pat = os.path.join(os.path.dirname(egginfo_path), "*.egg-info")
+            possible = glob.glob(pat)
+            err = f"Egg metadata expected at {egginfo_path} but not found"
+            if possible:
+                alt = os.path.basename(possible[0])
+                err += f" ({alt} found - possible misnamed archive file?)"
+
+            raise ValueError(err)
+
+        if os.path.isfile(egginfo_path):
+            # .egg-info is a single file
+            pkg_info = pkginfo_to_metadata(egginfo_path, egginfo_path)
+            os.mkdir(distinfo_path)
+        else:
+            # .egg-info is a directory
+            pkginfo_path = os.path.join(egginfo_path, "PKG-INFO")
+            pkg_info = pkginfo_to_metadata(egginfo_path, pkginfo_path)
+
+            # ignore common egg metadata that is useless to wheel
+            shutil.copytree(
+                egginfo_path,
+                distinfo_path,
+                ignore=lambda x, y: {
+                    "PKG-INFO",
+                    "requires.txt",
+                    "SOURCES.txt",
+                    "not-zip-safe",
+                },
+            )
+
+            # delete dependency_links if it is only whitespace
+            dependency_links_path = os.path.join(distinfo_path, "dependency_links.txt")
+            with open(dependency_links_path, encoding="utf-8") as dependency_links_file:
+                dependency_links = dependency_links_file.read().strip()
+            if not dependency_links:
+                adios(dependency_links_path)
+
+        pkg_info_path = os.path.join(distinfo_path, "METADATA")
+        serialization_policy = EmailPolicy(
+            utf8=True,
+            mangle_from_=False,
+            max_line_length=0,
+        )
+        with open(pkg_info_path, "w", encoding="utf-8") as out:
+            Generator(out, policy=serialization_policy).flatten(pkg_info)
+
+        for license_path in self.license_paths:
+            filename = os.path.basename(license_path)
+            shutil.copy(license_path, os.path.join(distinfo_path, filename))
+
+        adios(egginfo_path)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/bdist_wheel.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/bdist_wheel.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd7b8629e5087fe35a2ea7ab6d3cbbb4cacf0031
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/bdist_wheel.py
@@ -0,0 +1,26 @@
+from typing import TYPE_CHECKING
+from warnings import warn
+
+warn(
+    "The 'wheel' package is no longer the canonical location of the 'bdist_wheel' "
+    "command, and will be removed in a future release. Please update to setuptools "
+    "v70.1 or later which contains an integrated version of this command.",
+    DeprecationWarning,
+    stacklevel=1,
+)
+
+if TYPE_CHECKING:
+    from ._bdist_wheel import bdist_wheel as bdist_wheel
+else:
+    try:
+        # Better integration/compatibility with setuptools:
+        # in the case new fixes or PEPs are implemented in setuptools
+        # there is no need to backport them to the deprecated code base.
+        # This is useful in the case of old packages in the ecosystem
+        # that are still used but have low maintenance.
+        from setuptools.command.bdist_wheel import bdist_wheel
+    except ImportError:
+        # Only used in the case of old setuptools versions.
+        # If the user wants to get the latest fixes/PEPs,
+        # they are encouraged to address the deprecation warning.
+        from ._bdist_wheel import bdist_wheel as bdist_wheel
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/metadata.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/metadata.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8098fa8599a1f142ee33ee551b7a874f5f98840
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/metadata.py
@@ -0,0 +1,183 @@
+"""
+Tools for converting old- to new-style metadata.
+"""
+
+from __future__ import annotations
+
+import functools
+import itertools
+import os.path
+import re
+import textwrap
+from email.message import Message
+from email.parser import Parser
+from typing import Generator, Iterable, Iterator, Literal
+
+from .vendored.packaging.requirements import Requirement
+
+
+def _nonblank(str: str) -> bool | Literal[""]:
+    return str and not str.startswith("#")
+
+
+@functools.singledispatch
+def yield_lines(iterable: Iterable[str]) -> Iterator[str]:
+    r"""
+    Yield valid lines of a string or iterable.
+    >>> list(yield_lines(''))
+    []
+    >>> list(yield_lines(['foo', 'bar']))
+    ['foo', 'bar']
+    >>> list(yield_lines('foo\nbar'))
+    ['foo', 'bar']
+    >>> list(yield_lines('\nfoo\n#bar\nbaz #comment'))
+    ['foo', 'baz #comment']
+    >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n']))
+    ['foo', 'bar', 'baz', 'bing']
+    """
+    return itertools.chain.from_iterable(map(yield_lines, iterable))
+
+
+@yield_lines.register(str)
+def _(text: str) -> Iterator[str]:
+    return filter(_nonblank, map(str.strip, text.splitlines()))
+
+
+def split_sections(
+    s: str | Iterator[str],
+) -> Generator[tuple[str | None, list[str]], None, None]:
+    """Split a string or iterable thereof into (section, content) pairs
+    Each ``section`` is a stripped version of the section header ("[section]")
+    and each ``content`` is a list of stripped lines excluding blank lines and
+    comment-only lines.  If there are any such lines before the first section
+    header, they're returned in a first ``section`` of ``None``.
+    """
+    section = None
+    content: list[str] = []
+    for line in yield_lines(s):
+        if line.startswith("["):
+            if line.endswith("]"):
+                if section or content:
+                    yield section, content
+                section = line[1:-1].strip()
+                content = []
+            else:
+                raise ValueError("Invalid section heading", line)
+        else:
+            content.append(line)
+
+    # wrap up last segment
+    yield section, content
+
+
+def safe_extra(extra: str) -> str:
+    """Convert an arbitrary string to a standard 'extra' name
+    Any runs of non-alphanumeric characters are replaced with a single '_',
+    and the result is always lowercased.
+    """
+    return re.sub("[^A-Za-z0-9.-]+", "_", extra).lower()
+
+
+def safe_name(name: str) -> str:
+    """Convert an arbitrary string to a standard distribution name
+    Any runs of non-alphanumeric/. characters are replaced with a single '-'.
+    """
+    return re.sub("[^A-Za-z0-9.]+", "-", name)
+
+
+def requires_to_requires_dist(requirement: Requirement) -> str:
+    """Return the version specifier for a requirement in PEP 345/566 fashion."""
+    if requirement.url:
+        return " @ " + requirement.url
+
+    requires_dist: list[str] = []
+    for spec in requirement.specifier:
+        requires_dist.append(spec.operator + spec.version)
+
+    if requires_dist:
+        return " " + ",".join(sorted(requires_dist))
+    else:
+        return ""
+
+
+def convert_requirements(requirements: list[str]) -> Iterator[str]:
+    """Yield Requires-Dist: strings for parsed requirements strings."""
+    for req in requirements:
+        parsed_requirement = Requirement(req)
+        spec = requires_to_requires_dist(parsed_requirement)
+        extras = ",".join(sorted(safe_extra(e) for e in parsed_requirement.extras))
+        if extras:
+            extras = f"[{extras}]"
+
+        yield safe_name(parsed_requirement.name) + extras + spec
+
+
+def generate_requirements(
+    extras_require: dict[str | None, list[str]],
+) -> Iterator[tuple[str, str]]:
+    """
+    Convert requirements from a setup()-style dictionary to
+    ('Requires-Dist', 'requirement') and ('Provides-Extra', 'extra') tuples.
+
+    extras_require is a dictionary of {extra: [requirements]} as passed to setup(),
+    using the empty extra {'': [requirements]} to hold install_requires.
+    """
+    for extra, depends in extras_require.items():
+        condition = ""
+        extra = extra or ""
+        if ":" in extra:  # setuptools extra:condition syntax
+            extra, condition = extra.split(":", 1)
+
+        extra = safe_extra(extra)
+        if extra:
+            yield "Provides-Extra", extra
+            if condition:
+                condition = "(" + condition + ") and "
+            condition += f"extra == '{extra}'"
+
+        if condition:
+            condition = " ; " + condition
+
+        for new_req in convert_requirements(depends):
+            canonical_req = str(Requirement(new_req + condition))
+            yield "Requires-Dist", canonical_req
+
+
+def pkginfo_to_metadata(egg_info_path: str, pkginfo_path: str) -> Message:
+    """
+    Convert .egg-info directory with PKG-INFO to the Metadata 2.1 format
+    """
+    with open(pkginfo_path, encoding="utf-8") as headers:
+        pkg_info = Parser().parse(headers)
+
+    pkg_info.replace_header("Metadata-Version", "2.1")
+    # Those will be regenerated from `requires.txt`.
+    del pkg_info["Provides-Extra"]
+    del pkg_info["Requires-Dist"]
+    requires_path = os.path.join(egg_info_path, "requires.txt")
+    if os.path.exists(requires_path):
+        with open(requires_path, encoding="utf-8") as requires_file:
+            requires = requires_file.read()
+
+        parsed_requirements = sorted(split_sections(requires), key=lambda x: x[0] or "")
+        for extra, reqs in parsed_requirements:
+            for key, value in generate_requirements({extra: reqs}):
+                if (key, value) not in pkg_info.items():
+                    pkg_info[key] = value
+
+    description = pkg_info["Description"]
+    if description:
+        description_lines = pkg_info["Description"].splitlines()
+        dedented_description = "\n".join(
+            # if the first line of long_description is blank,
+            # the first line here will be indented.
+            (
+                description_lines[0].lstrip(),
+                textwrap.dedent("\n".join(description_lines[1:])),
+                "\n",
+            )
+        )
+        pkg_info.set_payload(dedented_description)
+        del pkg_info["Description"]
+
+    return pkg_info
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/util.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..c928aa403b75e1ae392b4af5fe13147a095bbd30
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/util.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import base64
+import logging
+
+log = logging.getLogger("wheel")
+
+
+def urlsafe_b64encode(data: bytes) -> bytes:
+    """urlsafe_b64encode without padding"""
+    return base64.urlsafe_b64encode(data).rstrip(b"=")
+
+
+def urlsafe_b64decode(data: bytes) -> bytes:
+    """urlsafe_b64decode without padding"""
+    pad = b"=" * (4 - (len(data) & 3))
+    return base64.urlsafe_b64decode(data + pad)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/wheelfile.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/wheelfile.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a0f4596c566420135991713ee75ca29c4cff3ed
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/wheelfile.py
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+import csv
+import hashlib
+import os.path
+import re
+import stat
+import time
+from io import StringIO, TextIOWrapper
+from typing import IO, TYPE_CHECKING, Literal
+from zipfile import ZIP_DEFLATED, ZipFile, ZipInfo
+
+from wheel.cli import WheelError
+from wheel.util import log, urlsafe_b64decode, urlsafe_b64encode
+
+if TYPE_CHECKING:
+    from typing import Protocol, Sized, Union
+
+    from typing_extensions import Buffer
+
+    StrPath = Union[str, os.PathLike[str]]
+
+    class SizedBuffer(Sized, Buffer, Protocol): ...
+
+
+# Non-greedy matching of an optional build number may be too clever (more
+# invalid wheel filenames will match). Separate regex for .dist-info?
+WHEEL_INFO_RE = re.compile(
+    r"""^(?P<namever>(?P<name>[^\s-]+?)-(?P<ver>[^\s-]+?))(-(?P<build>\d[^\s-]*))?
+     -(?P<pyver>[^\s-]+?)-(?P<abi>[^\s-]+?)-(?P<plat>\S+)\.whl$""",
+    re.VERBOSE,
+)
+MINIMUM_TIMESTAMP = 315532800  # 1980-01-01 00:00:00 UTC
+
+
+def get_zipinfo_datetime(timestamp: float | None = None):
+    # Some applications need reproducible .whl files, but they can't do this without
+    # forcing the timestamp of the individual ZipInfo objects. See issue #143.
+    timestamp = int(os.environ.get("SOURCE_DATE_EPOCH", timestamp or time.time()))
+    timestamp = max(timestamp, MINIMUM_TIMESTAMP)
+    return time.gmtime(timestamp)[0:6]
+
+
+class WheelFile(ZipFile):
+    """A ZipFile derivative class that also reads SHA-256 hashes from
+    .dist-info/RECORD and checks any read files against those.
+    """
+
+    _default_algorithm = hashlib.sha256
+
+    def __init__(
+        self,
+        file: StrPath,
+        mode: Literal["r", "w", "x", "a"] = "r",
+        compression: int = ZIP_DEFLATED,
+    ):
+        basename = os.path.basename(file)
+        self.parsed_filename = WHEEL_INFO_RE.match(basename)
+        if not basename.endswith(".whl") or self.parsed_filename is None:
+            raise WheelError(f"Bad wheel filename {basename!r}")
+
+        ZipFile.__init__(self, file, mode, compression=compression, allowZip64=True)
+
+        self.dist_info_path = "{}.dist-info".format(
+            self.parsed_filename.group("namever")
+        )
+        self.record_path = self.dist_info_path + "/RECORD"
+        self._file_hashes: dict[str, tuple[None, None] | tuple[int, bytes]] = {}
+        self._file_sizes = {}
+        if mode == "r":
+            # Ignore RECORD and any embedded wheel signatures
+            self._file_hashes[self.record_path] = None, None
+            self._file_hashes[self.record_path + ".jws"] = None, None
+            self._file_hashes[self.record_path + ".p7s"] = None, None
+
+            # Fill in the expected hashes by reading them from RECORD
+            try:
+                record = self.open(self.record_path)
+            except KeyError:
+                raise WheelError(f"Missing {self.record_path} file") from None
+
+            with record:
+                for line in csv.reader(
+                    TextIOWrapper(record, newline="", encoding="utf-8")
+                ):
+                    path, hash_sum, size = line
+                    if not hash_sum:
+                        continue
+
+                    algorithm, hash_sum = hash_sum.split("=")
+                    try:
+                        hashlib.new(algorithm)
+                    except ValueError:
+                        raise WheelError(
+                            f"Unsupported hash algorithm: {algorithm}"
+                        ) from None
+
+                    if algorithm.lower() in {"md5", "sha1"}:
+                        raise WheelError(
+                            f"Weak hash algorithm ({algorithm}) is not permitted by "
+                            f"PEP 427"
+                        )
+
+                    self._file_hashes[path] = (
+                        algorithm,
+                        urlsafe_b64decode(hash_sum.encode("ascii")),
+                    )
+
+    def open(
+        self,
+        name_or_info: str | ZipInfo,
+        mode: Literal["r", "w"] = "r",
+        pwd: bytes | None = None,
+    ) -> IO[bytes]:
+        def _update_crc(newdata: bytes) -> None:
+            eof = ef._eof
+            update_crc_orig(newdata)
+            running_hash.update(newdata)
+            if eof and running_hash.digest() != expected_hash:
+                raise WheelError(f"Hash mismatch for file '{ef_name}'")
+
+        ef_name = (
+            name_or_info.filename if isinstance(name_or_info, ZipInfo) else name_or_info
+        )
+        if (
+            mode == "r"
+            and not ef_name.endswith("/")
+            and ef_name not in self._file_hashes
+        ):
+            raise WheelError(f"No hash found for file '{ef_name}'")
+
+        ef = ZipFile.open(self, name_or_info, mode, pwd)
+        if mode == "r" and not ef_name.endswith("/"):
+            algorithm, expected_hash = self._file_hashes[ef_name]
+            if expected_hash is not None:
+                # Monkey patch the _update_crc method to also check for the hash from
+                # RECORD
+                running_hash = hashlib.new(algorithm)
+                update_crc_orig, ef._update_crc = ef._update_crc, _update_crc
+
+        return ef
+
+    def write_files(self, base_dir: str):
+        log.info(f"creating '{self.filename}' and adding '{base_dir}' to it")
+        deferred: list[tuple[str, str]] = []
+        for root, dirnames, filenames in os.walk(base_dir):
+            # Sort the directory names so that `os.walk` will walk them in a
+            # defined order on the next iteration.
+            dirnames.sort()
+            for name in sorted(filenames):
+                path = os.path.normpath(os.path.join(root, name))
+                if os.path.isfile(path):
+                    arcname = os.path.relpath(path, base_dir).replace(os.path.sep, "/")
+                    if arcname == self.record_path:
+                        pass
+                    elif root.endswith(".dist-info"):
+                        deferred.append((path, arcname))
+                    else:
+                        self.write(path, arcname)
+
+        deferred.sort()
+        for path, arcname in deferred:
+            self.write(path, arcname)
+
+    def write(
+        self,
+        filename: str,
+        arcname: str | None = None,
+        compress_type: int | None = None,
+    ) -> None:
+        with open(filename, "rb") as f:
+            st = os.fstat(f.fileno())
+            data = f.read()
+
+        zinfo = ZipInfo(
+            arcname or filename, date_time=get_zipinfo_datetime(st.st_mtime)
+        )
+        zinfo.external_attr = (stat.S_IMODE(st.st_mode) | stat.S_IFMT(st.st_mode)) << 16
+        zinfo.compress_type = compress_type or self.compression
+        self.writestr(zinfo, data, compress_type)
+
+    def writestr(
+        self,
+        zinfo_or_arcname: str | ZipInfo,
+        data: SizedBuffer | str,
+        compress_type: int | None = None,
+    ):
+        if isinstance(zinfo_or_arcname, str):
+            zinfo_or_arcname = ZipInfo(
+                zinfo_or_arcname, date_time=get_zipinfo_datetime()
+            )
+            zinfo_or_arcname.compress_type = self.compression
+            zinfo_or_arcname.external_attr = (0o664 | stat.S_IFREG) << 16
+
+        if isinstance(data, str):
+            data = data.encode("utf-8")
+
+        ZipFile.writestr(self, zinfo_or_arcname, data, compress_type)
+        fname = (
+            zinfo_or_arcname.filename
+            if isinstance(zinfo_or_arcname, ZipInfo)
+            else zinfo_or_arcname
+        )
+        log.info(f"adding '{fname}'")
+        if fname != self.record_path:
+            hash_ = self._default_algorithm(data)
+            self._file_hashes[fname] = (
+                hash_.name,
+                urlsafe_b64encode(hash_.digest()).decode("ascii"),
+            )
+            self._file_sizes[fname] = len(data)
+
+    def close(self):
+        # Write RECORD
+        if self.fp is not None and self.mode == "w" and self._file_hashes:
+            data = StringIO()
+            writer = csv.writer(data, delimiter=",", quotechar='"', lineterminator="\n")
+            writer.writerows(
+                (
+                    (fname, algorithm + "=" + hash_, self._file_sizes[fname])
+                    for fname, (algorithm, hash_) in self._file_hashes.items()
+                )
+            )
+            writer.writerow((format(self.record_path), "", ""))
+            self.writestr(self.record_path, data.getvalue())
+
+        ZipFile.close(self)