diff --git a/.gitattributes b/.gitattributes index a37724e09bb850f312f841f5d1e266c16c808bcf..f5441aff30b541916513f3d17b0033aa85c4b75c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -65,3 +65,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/_ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/model.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_fp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text +tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a443b39d4be005f8c3ad9094c13a29466c6a71e Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a030680b7909bc80325a6270a4b211203b3c619 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..609c5d27448bb56474e0a72b0159841d38767667 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b55afbcb629a106f796cc22a60b620437bb906d Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df66e2da2e7869aff4432a9449a0b0b2dfdeefa1 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b60bc4ab48d083293bed4e77e188c08c045bb8a0 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d7ff900f1090db39e1a63c8885cdc1f1fed025f Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f41bf071a7d39372c126cbf888df13a0c52c0d05 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a0e11cb648de2879023d09b10b11e67f7b4647f Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4654e2b846a39714020429aed0cee99f75fa844d Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/curand/include/curand.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/curand/include/curand.h new file mode 100644 index 0000000000000000000000000000000000000000..9b769f5d56d3533279b61b398a3e81cab2035e68 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/curand/include/curand.h @@ -0,0 +1,1077 @@ + + /* Copyright 2010-2014 NVIDIA Corporation. All rights reserved. + * + * NOTICE TO LICENSEE: + * + * The source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + * + * The Licensed Deliverables contained herein are PROPRIETARY and + * CONFIDENTIAL to NVIDIA and are being provided under the terms and + * conditions of a form of NVIDIA software license agreement by and + * between NVIDIA and Licensee ("License Agreement") or electronically + * accepted by Licensee. Notwithstanding any terms or conditions to + * the contrary in the License Agreement, reproduction or disclosure + * of the Licensed Deliverables to any third party without the express + * written consent of NVIDIA is prohibited. + * + * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE + * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE + * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE + * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. + * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED + * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, + * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. + * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE + * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY + * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY + * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THESE LICENSED DELIVERABLES. + * + * U.S. Government End Users. These Licensed Deliverables are a + * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT + * 1995), consisting of "commercial computer software" and "commercial + * computer software documentation" as such terms are used in 48 + * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government + * only as a commercial end item. Consistent with 48 C.F.R.12.212 and + * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all + * U.S. Government End Users acquire the Licensed Deliverables with + * only those rights set forth herein. + * + * Any use of the Licensed Deliverables in individual and commercial + * software must include, in the user documentation and internal + * comments to the code, the above Disclaimer and U.S. Government End + * Users Notice. + */ + +#if !defined(CURAND_H_) +#define CURAND_H_ + +/** + * \defgroup HOST Host API + * + * @{ + */ +#ifndef __CUDACC_RTC__ +#include +#endif + +#ifndef CURANDAPI +#ifdef _WIN32 +#define CURANDAPI __stdcall +#else +#define CURANDAPI +#endif +#endif + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +#define CURAND_VER_MAJOR 10 +#define CURAND_VER_MINOR 3 +#define CURAND_VER_PATCH 0 +#define CURAND_VER_BUILD 86 +#define CURAND_VERSION (CURAND_VER_MAJOR * 1000 + \ + CURAND_VER_MINOR * 100 + \ + CURAND_VER_PATCH) +/* CURAND Host API datatypes */ + +/** + * @{ + */ + +/** + * CURAND function call status types + */ +enum curandStatus { + CURAND_STATUS_SUCCESS = 0, ///< No errors + CURAND_STATUS_VERSION_MISMATCH = 100, ///< Header file and linked library version do not match + CURAND_STATUS_NOT_INITIALIZED = 101, ///< Generator not initialized + CURAND_STATUS_ALLOCATION_FAILED = 102, ///< Memory allocation failed + CURAND_STATUS_TYPE_ERROR = 103, ///< Generator is wrong type + CURAND_STATUS_OUT_OF_RANGE = 104, ///< Argument out of range + CURAND_STATUS_LENGTH_NOT_MULTIPLE = 105, ///< Length requested is not a multple of dimension + CURAND_STATUS_DOUBLE_PRECISION_REQUIRED = 106, ///< GPU does not have double precision required by MRG32k3a + CURAND_STATUS_LAUNCH_FAILURE = 201, ///< Kernel launch failure + CURAND_STATUS_PREEXISTING_FAILURE = 202, ///< Preexisting failure on library entry + CURAND_STATUS_INITIALIZATION_FAILED = 203, ///< Initialization of CUDA failed + CURAND_STATUS_ARCH_MISMATCH = 204, ///< Architecture mismatch, GPU does not support requested feature + CURAND_STATUS_INTERNAL_ERROR = 999 ///< Internal library error +}; + +/* + * CURAND function call status types +*/ +/** \cond UNHIDE_TYPEDEFS */ +typedef enum curandStatus curandStatus_t; +/** \endcond */ + +/** + * CURAND generator types + */ +enum curandRngType { + CURAND_RNG_TEST = 0, + CURAND_RNG_PSEUDO_DEFAULT = 100, ///< Default pseudorandom generator + CURAND_RNG_PSEUDO_XORWOW = 101, ///< XORWOW pseudorandom generator + CURAND_RNG_PSEUDO_MRG32K3A = 121, ///< MRG32k3a pseudorandom generator + CURAND_RNG_PSEUDO_MTGP32 = 141, ///< Mersenne Twister MTGP32 pseudorandom generator + CURAND_RNG_PSEUDO_MT19937 = 142, ///< Mersenne Twister MT19937 pseudorandom generator + CURAND_RNG_PSEUDO_PHILOX4_32_10 = 161, ///< PHILOX-4x32-10 pseudorandom generator + CURAND_RNG_QUASI_DEFAULT = 200, ///< Default quasirandom generator + CURAND_RNG_QUASI_SOBOL32 = 201, ///< Sobol32 quasirandom generator + CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 = 202, ///< Scrambled Sobol32 quasirandom generator + CURAND_RNG_QUASI_SOBOL64 = 203, ///< Sobol64 quasirandom generator + CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 = 204 ///< Scrambled Sobol64 quasirandom generator +}; + +/* + * CURAND generator types + */ +/** \cond UNHIDE_TYPEDEFS */ +typedef enum curandRngType curandRngType_t; +/** \endcond */ + +/** + * CURAND ordering of results in memory + */ +enum curandOrdering { + CURAND_ORDERING_PSEUDO_BEST = 100, ///< Best ordering for pseudorandom results + CURAND_ORDERING_PSEUDO_DEFAULT = 101, ///< Specific default thread sequence for pseudorandom results, same as CURAND_ORDERING_PSEUDO_BEST + CURAND_ORDERING_PSEUDO_SEEDED = 102, ///< Specific seeding pattern for fast lower quality pseudorandom results + CURAND_ORDERING_PSEUDO_LEGACY = 103, ///< Specific legacy sequence for pseudorandom results, guaranteed to remain the same for all cuRAND release + CURAND_ORDERING_PSEUDO_DYNAMIC = 104, ///< Specific ordering adjusted to the device it is being executed on, provides the best performance + CURAND_ORDERING_QUASI_DEFAULT = 201 ///< Specific n-dimensional ordering for quasirandom results +}; + +/* + * CURAND ordering of results in memory + */ +/** \cond UNHIDE_TYPEDEFS */ +typedef enum curandOrdering curandOrdering_t; +/** \endcond */ + +/** + * CURAND choice of direction vector set + */ +enum curandDirectionVectorSet { + CURAND_DIRECTION_VECTORS_32_JOEKUO6 = 101, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions + CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 = 102, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled + CURAND_DIRECTION_VECTORS_64_JOEKUO6 = 103, ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions + CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 = 104 ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled +}; + +/* + * CURAND choice of direction vector set + */ +/** \cond UNHIDE_TYPEDEFS */ +typedef enum curandDirectionVectorSet curandDirectionVectorSet_t; +/** \endcond */ + +/** + * CURAND array of 32-bit direction vectors + */ +/** \cond UNHIDE_TYPEDEFS */ +typedef unsigned int curandDirectionVectors32_t[32]; +/** \endcond */ + + /** + * CURAND array of 64-bit direction vectors + */ +/** \cond UNHIDE_TYPEDEFS */ +typedef unsigned long long curandDirectionVectors64_t[64]; +/** \endcond **/ + +/** + * CURAND generator (opaque) + */ +struct curandGenerator_st; + +/** + * CURAND generator + */ +/** \cond UNHIDE_TYPEDEFS */ +typedef struct curandGenerator_st *curandGenerator_t; +/** \endcond */ + +/** + * CURAND distribution + */ +/** \cond UNHIDE_TYPEDEFS */ +typedef double curandDistribution_st; +typedef curandDistribution_st *curandDistribution_t; +typedef struct curandDistributionShift_st *curandDistributionShift_t; +/** \endcond */ +/** + * CURAND distribution M2 + */ +/** \cond UNHIDE_TYPEDEFS */ +typedef struct curandDistributionM2Shift_st *curandDistributionM2Shift_t; +typedef struct curandHistogramM2_st *curandHistogramM2_t; +typedef unsigned int curandHistogramM2K_st; +typedef curandHistogramM2K_st *curandHistogramM2K_t; +typedef curandDistribution_st curandHistogramM2V_st; +typedef curandHistogramM2V_st *curandHistogramM2V_t; + +typedef struct curandDiscreteDistribution_st *curandDiscreteDistribution_t; +/** \endcond */ + +/* + * CURAND METHOD + */ +/** \cond UNHIDE_ENUMS */ +enum curandMethod { + CURAND_CHOOSE_BEST = 0, // choose best depends on args + CURAND_ITR = 1, + CURAND_KNUTH = 2, + CURAND_HITR = 3, + CURAND_M1 = 4, + CURAND_M2 = 5, + CURAND_BINARY_SEARCH = 6, + CURAND_DISCRETE_GAUSS = 7, + CURAND_REJECTION = 8, + CURAND_DEVICE_API = 9, + CURAND_FAST_REJECTION = 10, + CURAND_3RD = 11, + CURAND_DEFINITION = 12, + CURAND_POISSON = 13 +}; + +typedef enum curandMethod curandMethod_t; +/** \endcond */ + + +#ifndef __CUDACC_RTC__ + +/** + * @} + */ + +/** + * \brief Create new random number generator. + * + * Creates a new random number generator of type \p rng_type + * and returns it in \p *generator. + * + * Legal values for \p rng_type are: + * - CURAND_RNG_PSEUDO_DEFAULT + * - CURAND_RNG_PSEUDO_XORWOW + * - CURAND_RNG_PSEUDO_MRG32K3A + * - CURAND_RNG_PSEUDO_MTGP32 + * - CURAND_RNG_PSEUDO_MT19937 + * - CURAND_RNG_PSEUDO_PHILOX4_32_10 + * - CURAND_RNG_QUASI_DEFAULT + * - CURAND_RNG_QUASI_SOBOL32 + * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 + * - CURAND_RNG_QUASI_SOBOL64 + * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 + * + * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen + * is CURAND_RNG_PSEUDO_XORWOW. \n + * When \p rng_type is CURAND_RNG_QUASI_DEFAULT, + * the type chosen is CURAND_RNG_QUASI_SOBOL32. + * + * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are: + * - \p dimensions = 1 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are: + * - \p dimensions = 1 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBBLED_SOBOL32 are: + * - \p dimensions = 1 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are: + * - \p dimensions = 1 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT + * + * \param generator - Pointer to generator + * \param rng_type - Type of generator to create + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED, if memory could not be allocated \n + * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n + * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the + * dynamically linked library version \n + * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n + * - CURAND_STATUS_SUCCESS if generator was created successfully \n + * + */ +curandStatus_t CURANDAPI +curandCreateGenerator(curandGenerator_t *generator, curandRngType_t rng_type); + +/** + * \brief Create new host CPU random number generator. + * + * Creates a new host CPU random number generator of type \p rng_type + * and returns it in \p *generator. + * + * Legal values for \p rng_type are: + * - CURAND_RNG_PSEUDO_DEFAULT + * - CURAND_RNG_PSEUDO_XORWOW + * - CURAND_RNG_PSEUDO_MRG32K3A + * - CURAND_RNG_PSEUDO_MTGP32 + * - CURAND_RNG_PSEUDO_MT19937 + * - CURAND_RNG_PSEUDO_PHILOX4_32_10 + * - CURAND_RNG_QUASI_DEFAULT + * - CURAND_RNG_QUASI_SOBOL32 + * + * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen + * is CURAND_RNG_PSEUDO_XORWOW. \n + * When \p rng_type is CURAND_RNG_QUASI_DEFAULT, + * the type chosen is CURAND_RNG_QUASI_SOBOL32. + * + * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are: + * - \p seed = 0 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are: + * - \p dimensions = 1 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are: + * - \p dimensions = 1 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 are: + * - \p dimensions = 1 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT + * + * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are: + * - \p dimensions = 1 + * - \p offset = 0 + * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT + * + * \param generator - Pointer to generator + * \param rng_type - Type of generator to create + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n + * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the + * dynamically linked library version \n + * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n + * - CURAND_STATUS_SUCCESS if generator was created successfully \n + */ +curandStatus_t CURANDAPI +curandCreateGeneratorHost(curandGenerator_t *generator, curandRngType_t rng_type); + +/** + * \brief Destroy an existing generator. + * + * Destroy an existing generator and free all memory associated with its state. + * + * \param generator - Generator to destroy + * + * \return + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_SUCCESS if generator was destroyed successfully \n + */ +curandStatus_t CURANDAPI +curandDestroyGenerator(curandGenerator_t generator); + +/** + * \brief Return the version number of the library. + * + * Return in \p *version the version number of the dynamically linked CURAND + * library. The format is the same as CUDART_VERSION from the CUDA Runtime. + * The only supported configuration is CURAND version equal to CUDA Runtime + * version. + * + * \param version - CURAND library version + * + * \return + * - CURAND_STATUS_SUCCESS if the version number was successfully returned \n + */ +curandStatus_t CURANDAPI +curandGetVersion(int *version); + +/** +* \brief Return the value of the curand property. +* +* Return in \p *value the number for the property described by \p type of the +* dynamically linked CURAND library. +* +* \param type - CUDA library property +* \param value - integer value for the requested property +* +* \return +* - CURAND_STATUS_SUCCESS if the property value was successfully returned \n +* - CURAND_STATUS_OUT_OF_RANGE if the property type is not recognized \n +*/ +curandStatus_t CURANDAPI +curandGetProperty(libraryPropertyType type, int *value); + + +/** + * \brief Set the current stream for CURAND kernel launches. + * + * Set the current stream for CURAND kernel launches. All library functions + * will use this stream until set again. + * + * \param generator - Generator to modify + * \param stream - Stream to use or ::NULL for null stream + * + * \return + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_SUCCESS if stream was set successfully \n + */ +curandStatus_t CURANDAPI +curandSetStream(curandGenerator_t generator, cudaStream_t stream); + +/** + * \brief Set the seed value of the pseudo-random number generator. + * + * Set the seed value of the pseudorandom number generator. + * All values of seed are valid. Different seeds will produce different sequences. + * Different seeds will often not be statistically correlated with each other, + * but some pairs of seed values may generate sequences which are statistically correlated. + * + * \param generator - Generator to modify + * \param seed - Seed value + * + * \return + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_TYPE_ERROR if the generator is not a pseudorandom number generator \n + * - CURAND_STATUS_SUCCESS if generator seed was set successfully \n + */ +curandStatus_t CURANDAPI +curandSetPseudoRandomGeneratorSeed(curandGenerator_t generator, unsigned long long seed); + +/** + * \brief Set the absolute offset of the pseudo or quasirandom number generator. + * + * Set the absolute offset of the pseudo or quasirandom number generator. + * + * All values of offset are valid. The offset position is absolute, not + * relative to the current position in the sequence. + * + * \param generator - Generator to modify + * \param offset - Absolute offset position + * + * \return + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_SUCCESS if generator offset was set successfully \n + */ +curandStatus_t CURANDAPI +curandSetGeneratorOffset(curandGenerator_t generator, unsigned long long offset); + +/** + * \brief Set the ordering of results of the pseudo or quasirandom number generator. + * + * Set the ordering of results of the pseudo or quasirandom number generator. + * + * Legal values of \p order for pseudorandom generators are: + * - CURAND_ORDERING_PSEUDO_DEFAULT + * - CURAND_ORDERING_PSEUDO_BEST + * - CURAND_ORDERING_PSEUDO_SEEDED + * - CURAND_ORDERING_PSEUDO_LEGACY + * + * Legal values of \p order for quasirandom generators are: + * - CURAND_ORDERING_QUASI_DEFAULT + * + * \param generator - Generator to modify + * \param order - Ordering of results + * + * \return + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_OUT_OF_RANGE if the ordering is not valid \n + * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n + */ +curandStatus_t CURANDAPI +curandSetGeneratorOrdering(curandGenerator_t generator, curandOrdering_t order); + +/** + * \brief Set the number of dimensions. + * + * Set the number of dimensions to be generated by the quasirandom number + * generator. + * + * Legal values for \p num_dimensions are 1 to 20000. + * + * \param generator - Generator to modify + * \param num_dimensions - Number of dimensions + * + * \return + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_OUT_OF_RANGE if num_dimensions is not valid \n + * - CURAND_STATUS_TYPE_ERROR if the generator is not a quasirandom number generator \n + * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n + */ +curandStatus_t CURANDAPI +curandSetQuasiRandomGeneratorDimensions(curandGenerator_t generator, unsigned int num_dimensions); + +/** + * \brief Generate 32-bit pseudo or quasirandom numbers. + * + * Use \p generator to generate \p num 32-bit results into the device memory at + * \p outputPtr. The device memory must have been previously allocated and be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 32-bit values with every bit random. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param num - Number of random 32-bit values to generate + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_TYPE_ERROR if the generator is a 64 bit quasirandom generator. + * (use ::curandGenerateLongLong() with 64 bit quasirandom generators) + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerate(curandGenerator_t generator, unsigned int *outputPtr, size_t num); + +/** + * \brief Generate 64-bit quasirandom numbers. + * + * Use \p generator to generate \p num 64-bit results into the device memory at + * \p outputPtr. The device memory must have been previously allocated and be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 64-bit values with every bit random. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param num - Number of random 64-bit values to generate + * + * \return + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_TYPE_ERROR if the generator is not a 64 bit quasirandom generator\n + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerateLongLong(curandGenerator_t generator, unsigned long long *outputPtr, size_t num); + +/** + * \brief Generate uniformly distributed floats. + * + * Use \p generator to generate \p num float results into the device memory at + * \p outputPtr. The device memory must have been previously allocated and be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 32-bit floating point values between \p 0.0f and \p 1.0f, + * excluding \p 0.0f and including \p 1.0f. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param num - Number of floats to generate + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension \n + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerateUniform(curandGenerator_t generator, float *outputPtr, size_t num); + +/** + * \brief Generate uniformly distributed doubles. + * + * Use \p generator to generate \p num double results into the device memory at + * \p outputPtr. The device memory must have been previously allocated and be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 64-bit double precision floating point values between + * \p 0.0 and \p 1.0, excluding \p 0.0 and including \p 1.0. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param num - Number of doubles to generate + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension \n + * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerateUniformDouble(curandGenerator_t generator, double *outputPtr, size_t num); + +/** + * \brief Generate normally distributed doubles. + * + * Use \p generator to generate \p n float results into the device memory at + * \p outputPtr. The device memory must have been previously allocated and be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 32-bit floating point values with mean \p mean and standard + * deviation \p stddev. + * + * Normally distributed results are generated from pseudorandom generators + * with a Box-Muller transform, and so require \p n to be even. + * Quasirandom generators use an inverse cumulative distribution + * function to preserve dimensionality. + * + * There may be slight numerical differences between results generated + * on the GPU with generators created with ::curandCreateGenerator() + * and results calculated on the CPU with generators created with + * ::curandCreateGeneratorHost(). These differences arise because of + * differences in results for transcendental functions. In addition, + * future versions of CURAND may use newer versions of the CUDA math + * library, so different versions of CURAND may give slightly different + * numerical values. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param n - Number of floats to generate + * \param mean - Mean of normal distribution + * \param stddev - Standard deviation of normal distribution + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension, or is not a multiple + * of two for pseudorandom generators \n + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerateNormal(curandGenerator_t generator, float *outputPtr, + size_t n, float mean, float stddev); + +/** + * \brief Generate normally distributed doubles. + * + * Use \p generator to generate \p n double results into the device memory at + * \p outputPtr. The device memory must have been previously allocated and be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 64-bit floating point values with mean \p mean and standard + * deviation \p stddev. + * + * Normally distributed results are generated from pseudorandom generators + * with a Box-Muller transform, and so require \p n to be even. + * Quasirandom generators use an inverse cumulative distribution + * function to preserve dimensionality. + * + * There may be slight numerical differences between results generated + * on the GPU with generators created with ::curandCreateGenerator() + * and results calculated on the CPU with generators created with + * ::curandCreateGeneratorHost(). These differences arise because of + * differences in results for transcendental functions. In addition, + * future versions of CURAND may use newer versions of the CUDA math + * library, so different versions of CURAND may give slightly different + * numerical values. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param n - Number of doubles to generate + * \param mean - Mean of normal distribution + * \param stddev - Standard deviation of normal distribution + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension, or is not a multiple + * of two for pseudorandom generators \n + * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerateNormalDouble(curandGenerator_t generator, double *outputPtr, + size_t n, double mean, double stddev); + +/** + * \brief Generate log-normally distributed floats. + * + * Use \p generator to generate \p n float results into the device memory at + * \p outputPtr. The device memory must have been previously allocated and be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 32-bit floating point values with log-normal distribution based on + * an associated normal distribution with mean \p mean and standard deviation \p stddev. + * + * Normally distributed results are generated from pseudorandom generators + * with a Box-Muller transform, and so require \p n to be even. + * Quasirandom generators use an inverse cumulative distribution + * function to preserve dimensionality. + * The normally distributed results are transformed into log-normal distribution. + * + * There may be slight numerical differences between results generated + * on the GPU with generators created with ::curandCreateGenerator() + * and results calculated on the CPU with generators created with + * ::curandCreateGeneratorHost(). These differences arise because of + * differences in results for transcendental functions. In addition, + * future versions of CURAND may use newer versions of the CUDA math + * library, so different versions of CURAND may give slightly different + * numerical values. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param n - Number of floats to generate + * \param mean - Mean of associated normal distribution + * \param stddev - Standard deviation of associated normal distribution + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension, or is not a multiple + * of two for pseudorandom generators \n + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerateLogNormal(curandGenerator_t generator, float *outputPtr, + size_t n, float mean, float stddev); + +/** + * \brief Generate log-normally distributed doubles. + * + * Use \p generator to generate \p n double results into the device memory at + * \p outputPtr. The device memory must have been previously allocated and be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 64-bit floating point values with log-normal distribution based on + * an associated normal distribution with mean \p mean and standard deviation \p stddev. + * + * Normally distributed results are generated from pseudorandom generators + * with a Box-Muller transform, and so require \p n to be even. + * Quasirandom generators use an inverse cumulative distribution + * function to preserve dimensionality. + * The normally distributed results are transformed into log-normal distribution. + * + * There may be slight numerical differences between results generated + * on the GPU with generators created with ::curandCreateGenerator() + * and results calculated on the CPU with generators created with + * ::curandCreateGeneratorHost(). These differences arise because of + * differences in results for transcendental functions. In addition, + * future versions of CURAND may use newer versions of the CUDA math + * library, so different versions of CURAND may give slightly different + * numerical values. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param n - Number of doubles to generate + * \param mean - Mean of normal distribution + * \param stddev - Standard deviation of normal distribution + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension, or is not a multiple + * of two for pseudorandom generators \n + * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr, + size_t n, double mean, double stddev); + +/** + * \brief Construct the histogram array for a Poisson distribution. + * + * Construct the histogram array for the Poisson distribution with lambda \p lambda. + * For lambda greater than 2000, an approximation with a normal distribution is used. + * + * \param lambda - lambda for the Poisson distribution + * + * + * \param discrete_distribution - pointer to the histogram in device memory + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n + * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n + * - CURAND_STATUS_NOT_INITIALIZED if the distribution pointer was null \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n + * - CURAND_STATUS_SUCCESS if the histogram was generated successfully \n + */ + +curandStatus_t CURANDAPI +curandCreatePoissonDistribution(double lambda, curandDiscreteDistribution_t *discrete_distribution); + + + +/** + * \brief Destroy the histogram array for a discrete distribution (e.g. Poisson). + * + * Destroy the histogram array for a discrete distribution created by curandCreatePoissonDistribution. + * + * \param discrete_distribution - pointer to device memory where the histogram is stored + * + * \return + * - CURAND_STATUS_NOT_INITIALIZED if the histogram was never created \n + * - CURAND_STATUS_SUCCESS if the histogram was destroyed successfully \n + */ +curandStatus_t CURANDAPI +curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution); + + +/** + * \brief Generate Poisson-distributed unsigned ints. + * + * Use \p generator to generate \p n unsigned int results into device memory at + * \p outputPtr. The device memory must have been previously allocated and must be + * large enough to hold all the results. Launches are done with the stream + * set using ::curandSetStream(), or the null stream if no stream has been set. + * + * Results are 32-bit unsigned int point values with Poisson distribution, with lambda \p lambda. + * + * \param generator - Generator to use + * \param outputPtr - Pointer to device memory to store CUDA-generated results, or + * Pointer to host memory to store CPU-generated results + * \param n - Number of unsigned ints to generate + * \param lambda - lambda for the Poisson distribution + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is + * not a multiple of the quasirandom dimension\n + * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU or sm does not support double precision \n + * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n + * - CURAND_STATUS_SUCCESS if the results were generated successfully \n + */ + +curandStatus_t CURANDAPI +curandGeneratePoisson(curandGenerator_t generator, unsigned int *outputPtr, + size_t n, double lambda); +// just for internal usage +curandStatus_t CURANDAPI +curandGeneratePoissonMethod(curandGenerator_t generator, unsigned int *outputPtr, + size_t n, double lambda, curandMethod_t method); + + +curandStatus_t CURANDAPI +curandGenerateBinomial(curandGenerator_t generator, unsigned int *outputPtr, + size_t num, unsigned int n, double p); +// just for internal usage +curandStatus_t CURANDAPI +curandGenerateBinomialMethod(curandGenerator_t generator, + unsigned int *outputPtr, + size_t num, unsigned int n, double p, + curandMethod_t method); + + +/** + * \brief Setup starting states. + * + * Generate the starting state of the generator. This function is + * automatically called by generation functions such as + * ::curandGenerate() and ::curandGenerateUniform(). + * It can be called manually for performance testing reasons to separate + * timings for starting state generation and random number generation. + * + * \param generator - Generator to update + * + * \return + * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n + * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n + * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from + * a previous kernel launch \n + * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n + * - CURAND_STATUS_SUCCESS if the seeds were generated successfully \n + */ +curandStatus_t CURANDAPI +curandGenerateSeeds(curandGenerator_t generator); + +/** + * \brief Get direction vectors for 32-bit quasirandom number generation. + * + * Get a pointer to an array of direction vectors that can be used + * for quasirandom number generation. The resulting pointer will + * reference an array of direction vectors in host memory. + * + * The array contains vectors for many dimensions. Each dimension + * has 32 vectors. Each individual vector is an unsigned int. + * + * Legal values for \p set are: + * - CURAND_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions) + * - CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions) + * + * \param vectors - Address of pointer in which to return direction vectors + * \param set - Which set of direction vectors to use + * + * \return + * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n + * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n + */ +curandStatus_t CURANDAPI +curandGetDirectionVectors32(curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set); + +/** + * \brief Get scramble constants for 32-bit scrambled Sobol' . + * + * Get a pointer to an array of scramble constants that can be used + * for quasirandom number generation. The resulting pointer will + * reference an array of unsinged ints in host memory. + * + * The array contains constants for many dimensions. Each dimension + * has a single unsigned int constant. + * + * \param constants - Address of pointer in which to return scramble constants + * + * \return + * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n + */ +curandStatus_t CURANDAPI +curandGetScrambleConstants32(unsigned int * * constants); + +/** + * \brief Get direction vectors for 64-bit quasirandom number generation. + * + * Get a pointer to an array of direction vectors that can be used + * for quasirandom number generation. The resulting pointer will + * reference an array of direction vectors in host memory. + * + * The array contains vectors for many dimensions. Each dimension + * has 64 vectors. Each individual vector is an unsigned long long. + * + * Legal values for \p set are: + * - CURAND_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions) + * - CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions) + * + * \param vectors - Address of pointer in which to return direction vectors + * \param set - Which set of direction vectors to use + * + * \return + * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n + * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n + */ +curandStatus_t CURANDAPI +curandGetDirectionVectors64(curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set); + +/** + * \brief Get scramble constants for 64-bit scrambled Sobol' . + * + * Get a pointer to an array of scramble constants that can be used + * for quasirandom number generation. The resulting pointer will + * reference an array of unsinged long longs in host memory. + * + * The array contains constants for many dimensions. Each dimension + * has a single unsigned long long constant. + * + * \param constants - Address of pointer in which to return scramble constants + * + * \return + * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n + */ +curandStatus_t CURANDAPI +curandGetScrambleConstants64(unsigned long long * * constants); + +/** @} */ + +#endif // __CUDACC_RTC__ + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ + + +#endif /* !defined(CURAND_H_) */ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_nvrtc_cu11-11.8.89.dist-info/License.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_nvrtc_cu11-11.8.89.dist-info/License.txt new file mode 100644 index 0000000000000000000000000000000000000000..b491c70e0aef319022ded661e111ddbd45b8a17f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_nvrtc_cu11-11.8.89.dist-info/License.txt @@ -0,0 +1,1568 @@ +End User License Agreement +-------------------------- + + +Preface +------- + +The Software License Agreement in Chapter 1 and the Supplement +in Chapter 2 contain license terms and conditions that govern +the use of NVIDIA software. By accepting this agreement, you +agree to comply with all the terms and conditions applicable +to the product(s) included herein. + + +NVIDIA Driver + + +Description + +This package contains the operating system driver and +fundamental system software components for NVIDIA GPUs. + + +NVIDIA CUDA Toolkit + + +Description + +The NVIDIA CUDA Toolkit provides command-line and graphical +tools for building, debugging and optimizing the performance +of applications accelerated by NVIDIA GPUs, runtime and math +libraries, and documentation including programming guides, +user manuals, and API references. + + +Default Install Location of CUDA Toolkit + +Windows platform: + +%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.# + +Linux platform: + +/usr/local/cuda-#.# + +Mac platform: + +/Developer/NVIDIA/CUDA-#.# + + +NVIDIA CUDA Samples + + +Description + +This package includes over 100+ CUDA examples that demonstrate +various CUDA programming principles, and efficient CUDA +implementation of algorithms in specific application domains. + + +Default Install Location of CUDA Samples + +Windows platform: + +%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.# + +Linux platform: + +/usr/local/cuda-#.#/samples + +and + +$HOME/NVIDIA_CUDA-#.#_Samples + +Mac platform: + +/Developer/NVIDIA/CUDA-#.#/samples + + +NVIDIA Nsight Visual Studio Edition (Windows only) + + +Description + +NVIDIA Nsight Development Platform, Visual Studio Edition is a +development environment integrated into Microsoft Visual +Studio that provides tools for debugging, profiling, analyzing +and optimizing your GPU computing and graphics applications. + + +Default Install Location of Nsight Visual Studio Edition + +Windows platform: + +%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.# + + +1. License Agreement for NVIDIA Software Development Kits +--------------------------------------------------------- + + +Release Date: July 26, 2018 +--------------------------- + + +Important NoticeRead before downloading, installing, +copying or using the licensed software: +------------------------------------------------------- + +This license agreement, including exhibits attached +("Agreement”) is a legal agreement between you and NVIDIA +Corporation ("NVIDIA") and governs your use of a NVIDIA +software development kit (“SDK”). + +Each SDK has its own set of software and materials, but here +is a description of the types of items that may be included in +a SDK: source code, header files, APIs, data sets and assets +(examples include images, textures, models, scenes, videos, +native API input/output files), binary software, sample code, +libraries, utility programs, programming code and +documentation. + +This Agreement can be accepted only by an adult of legal age +of majority in the country in which the SDK is used. + +If you are entering into this Agreement on behalf of a company +or other legal entity, you represent that you have the legal +authority to bind the entity to this Agreement, in which case +“you” will mean the entity you represent. + +If you don’t have the required age or authority to accept +this Agreement, or if you don’t accept all the terms and +conditions of this Agreement, do not download, install or use +the SDK. + +You agree to use the SDK only for purposes that are permitted +by (a) this Agreement, and (b) any applicable law, regulation +or generally accepted practices or guidelines in the relevant +jurisdictions. + + +1.1. License + + +1.1.1. License Grant + +Subject to the terms of this Agreement, NVIDIA hereby grants +you a non-exclusive, non-transferable license, without the +right to sublicense (except as expressly provided in this +Agreement) to: + + 1. Install and use the SDK, + + 2. Modify and create derivative works of sample source code + delivered in the SDK, and + + 3. Distribute those portions of the SDK that are identified + in this Agreement as distributable, as incorporated in + object code format into a software application that meets + the distribution requirements indicated in this Agreement. + + +1.1.2. Distribution Requirements + +These are the distribution requirements for you to exercise +the distribution grant: + + 1. Your application must have material additional + functionality, beyond the included portions of the SDK. + + 2. The distributable portions of the SDK shall only be + accessed by your application. + + 3. The following notice shall be included in modifications + and derivative works of sample source code distributed: + “This software contains source code provided by NVIDIA + Corporation.” + + 4. Unless a developer tool is identified in this Agreement + as distributable, it is delivered for your internal use + only. + + 5. The terms under which you distribute your application + must be consistent with the terms of this Agreement, + including (without limitation) terms relating to the + license grant and license restrictions and protection of + NVIDIA’s intellectual property rights. Additionally, you + agree that you will protect the privacy, security and + legal rights of your application users. + + 6. You agree to notify NVIDIA in writing of any known or + suspected distribution or use of the SDK not in compliance + with the requirements of this Agreement, and to enforce + the terms of your agreements with respect to distributed + SDK. + + +1.1.3. Authorized Users + +You may allow employees and contractors of your entity or of +your subsidiary(ies) to access and use the SDK from your +secure network to perform work on your behalf. + +If you are an academic institution you may allow users +enrolled or employed by the academic institution to access and +use the SDK from your secure network. + +You are responsible for the compliance with the terms of this +Agreement by your authorized users. If you become aware that +your authorized users didn’t follow the terms of this +Agreement, you agree to take reasonable steps to resolve the +non-compliance and prevent new occurrences. + + +1.1.4. Pre-Release SDK + +The SDK versions identified as alpha, beta, preview or +otherwise as pre-release, may not be fully functional, may +contain errors or design flaws, and may have reduced or +different security, privacy, accessibility, availability, and +reliability standards relative to commercial versions of +NVIDIA software and materials. Use of a pre-release SDK may +result in unexpected results, loss of data, project delays or +other unpredictable damage or loss. + +You may use a pre-release SDK at your own risk, understanding +that pre-release SDKs are not intended for use in production +or business-critical systems. + +NVIDIA may choose not to make available a commercial version +of any pre-release SDK. NVIDIA may also choose to abandon +development and terminate the availability of a pre-release +SDK at any time without liability. + + +1.1.5. Updates + +NVIDIA may, at its option, make available patches, workarounds +or other updates to this SDK. Unless the updates are provided +with their separate governing terms, they are deemed part of +the SDK licensed to you as provided in this Agreement. You +agree that the form and content of the SDK that NVIDIA +provides may change without prior notice to you. While NVIDIA +generally maintains compatibility between versions, NVIDIA may +in some cases make changes that introduce incompatibilities in +future versions of the SDK. + + +1.1.6. Third Party Licenses + +The SDK may come bundled with, or otherwise include or be +distributed with, third party software licensed by a NVIDIA +supplier and/or open source software provided under an open +source license. Use of third party software is subject to the +third-party license terms, or in the absence of third party +terms, the terms of this Agreement. Copyright to third party +software is held by the copyright holders indicated in the +third-party software or license. + + +1.1.7. Reservation of Rights + +NVIDIA reserves all rights, title, and interest in and to the +SDK, not expressly granted to you under this Agreement. + + +1.2. Limitations + +The following license limitations apply to your use of the +SDK: + + 1. You may not reverse engineer, decompile or disassemble, + or remove copyright or other proprietary notices from any + portion of the SDK or copies of the SDK. + + 2. Except as expressly provided in this Agreement, you may + not copy, sell, rent, sublicense, transfer, distribute, + modify, or create derivative works of any portion of the + SDK. For clarity, you may not distribute or sublicense the + SDK as a stand-alone product. + + 3. Unless you have an agreement with NVIDIA for this + purpose, you may not indicate that an application created + with the SDK is sponsored or endorsed by NVIDIA. + + 4. You may not bypass, disable, or circumvent any + encryption, security, digital rights management or + authentication mechanism in the SDK. + + 5. You may not use the SDK in any manner that would cause it + to become subject to an open source software license. As + examples, licenses that require as a condition of use, + modification, and/or distribution that the SDK be: + + a. Disclosed or distributed in source code form; + + b. Licensed for the purpose of making derivative works; + or + + c. Redistributable at no charge. + + 6. Unless you have an agreement with NVIDIA for this + purpose, you may not use the SDK with any system or + application where the use or failure of the system or + application can reasonably be expected to threaten or + result in personal injury, death, or catastrophic loss. + Examples include use in avionics, navigation, military, + medical, life support or other life critical applications. + NVIDIA does not design, test or manufacture the SDK for + these critical uses and NVIDIA shall not be liable to you + or any third party, in whole or in part, for any claims or + damages arising from such uses. + + 7. You agree to defend, indemnify and hold harmless NVIDIA + and its affiliates, and their respective employees, + contractors, agents, officers and directors, from and + against any and all claims, damages, obligations, losses, + liabilities, costs or debt, fines, restitutions and + expenses (including but not limited to attorney’s fees + and costs incident to establishing the right of + indemnification) arising out of or related to your use of + the SDK outside of the scope of this Agreement, or not in + compliance with its terms. + + +1.3. Ownership + + 1. NVIDIA or its licensors hold all rights, title and + interest in and to the SDK and its modifications and + derivative works, including their respective intellectual + property rights, subject to your rights described in this + section. This SDK may include software and materials from + NVIDIA’s licensors, and these licensors are intended + third party beneficiaries that may enforce this Agreement + with respect to their intellectual property rights. + + 2. You hold all rights, title and interest in and to your + applications and your derivative works of the sample + source code delivered in the SDK, including their + respective intellectual property rights, subject to + NVIDIA’s rights described in this section. + + 3. You may, but don’t have to, provide to NVIDIA + suggestions, feature requests or other feedback regarding + the SDK, including possible enhancements or modifications + to the SDK. For any feedback that you voluntarily provide, + you hereby grant NVIDIA and its affiliates a perpetual, + non-exclusive, worldwide, irrevocable license to use, + reproduce, modify, license, sublicense (through multiple + tiers of sublicensees), and distribute (through multiple + tiers of distributors) it without the payment of any + royalties or fees to you. NVIDIA will use feedback at its + choice. NVIDIA is constantly looking for ways to improve + its products, so you may send feedback to NVIDIA through + the developer portal at https://developer.nvidia.com. + + +1.4. No Warranties + +THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL +FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND +ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND +OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, +BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE +ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO +WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF +DEALING OR COURSE OF TRADE. + + +1.5. Limitation of Liability + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS +AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, +PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS +OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF +PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION +WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK, +WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH +OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF +LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES +TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS +AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE +NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS +LIMIT. + +These exclusions and limitations of liability shall apply +regardless if NVIDIA or its affiliates have been advised of +the possibility of such damages, and regardless of whether a +remedy fails its essential purpose. These exclusions and +limitations of liability form an essential basis of the +bargain between the parties, and, absent any of these +exclusions or limitations of liability, the provisions of this +Agreement, including, without limitation, the economic terms, +would be substantially different. + + +1.6. Termination + + 1. This Agreement will continue to apply until terminated by + either you or NVIDIA as described below. + + 2. If you want to terminate this Agreement, you may do so by + stopping to use the SDK. + + 3. NVIDIA may, at any time, terminate this Agreement if: + + a. (i) you fail to comply with any term of this + Agreement and the non-compliance is not fixed within + thirty (30) days following notice from NVIDIA (or + immediately if you violate NVIDIA’s intellectual + property rights); + + b. (ii) you commence or participate in any legal + proceeding against NVIDIA with respect to the SDK; or + + c. (iii) NVIDIA decides to no longer provide the SDK in + a country or, in NVIDIA’s sole discretion, the + continued use of it is no longer commercially viable. + + 4. Upon any termination of this Agreement, you agree to + promptly discontinue use of the SDK and destroy all copies + in your possession or control. Your prior distributions in + accordance with this Agreement are not affected by the + termination of this Agreement. Upon written request, you + will certify in writing that you have complied with your + commitments under this section. Upon any termination of + this Agreement all provisions survive except for the + license grant provisions. + + +1.7. General + +If you wish to assign this Agreement or your rights and +obligations, including by merger, consolidation, dissolution +or operation of law, contact NVIDIA to ask for permission. Any +attempted assignment not approved by NVIDIA in writing shall +be void and of no effect. NVIDIA may assign, delegate or +transfer this Agreement and its rights and obligations, and if +to a non-affiliate you will be notified. + +You agree to cooperate with NVIDIA and provide reasonably +requested information to verify your compliance with this +Agreement. + +This Agreement will be governed in all respects by the laws of +the United States and of the State of Delaware as those laws +are applied to contracts entered into and performed entirely +within Delaware by Delaware residents, without regard to the +conflicts of laws principles. The United Nations Convention on +Contracts for the International Sale of Goods is specifically +disclaimed. You agree to all terms of this Agreement in the +English language. + +The state or federal courts residing in Santa Clara County, +California shall have exclusive jurisdiction over any dispute +or claim arising out of this Agreement. Notwithstanding this, +you agree that NVIDIA shall still be allowed to apply for +injunctive remedies or an equivalent type of urgent legal +relief in any jurisdiction. + +If any court of competent jurisdiction determines that any +provision of this Agreement is illegal, invalid or +unenforceable, such provision will be construed as limited to +the extent necessary to be consistent with and fully +enforceable under the law and the remaining provisions will +remain in full force and effect. Unless otherwise specified, +remedies are cumulative. + +Each party acknowledges and agrees that the other is an +independent contractor in the performance of this Agreement. + +The SDK has been developed entirely at private expense and is +“commercial items” consisting of “commercial computer +software” and “commercial computer software +documentation” provided with RESTRICTED RIGHTS. Use, +duplication or disclosure by the U.S. Government or a U.S. +Government subcontractor is subject to the restrictions in +this Agreement pursuant to DFARS 227.7202-3(a) or as set forth +in subparagraphs (c)(1) and (2) of the Commercial Computer +Software - Restricted Rights clause at FAR 52.227-19, as +applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas +Expressway, Santa Clara, CA 95051. + +The SDK is subject to United States export laws and +regulations. You agree that you will not ship, transfer or +export the SDK into any country, or use the SDK in any manner, +prohibited by the United States Bureau of Industry and +Security or economic sanctions regulations administered by the +U.S. Department of Treasury’s Office of Foreign Assets +Control (OFAC), or any applicable export laws, restrictions or +regulations. These laws include restrictions on destinations, +end users and end use. By accepting this Agreement, you +confirm that you are not a resident or citizen of any country +currently embargoed by the U.S. and that you are not otherwise +prohibited from receiving the SDK. + +Any notice delivered by NVIDIA to you under this Agreement +will be delivered via mail, email or fax. You agree that any +notices that NVIDIA sends you electronically will satisfy any +legal communication requirements. Please direct your legal +notices or other correspondence to NVIDIA Corporation, 2788 +San Tomas Expressway, Santa Clara, California 95051, United +States of America, Attention: Legal Department. + +This Agreement and any exhibits incorporated into this +Agreement constitute the entire agreement of the parties with +respect to the subject matter of this Agreement and supersede +all prior negotiations or documentation exchanged between the +parties relating to this SDK license. Any additional and/or +conflicting terms on documents issued by you are null, void, +and invalid. Any amendment or waiver under this Agreement +shall be in writing and signed by representatives of both +parties. + + +2. CUDA Toolkit Supplement to Software License Agreement for +NVIDIA Software Development Kits +------------------------------------------------------------ + + +Release date: August 16, 2018 +----------------------------- + +The terms in this supplement govern your use of the NVIDIA +CUDA Toolkit SDK under the terms of your license agreement +(“Agreement”) as modified by this supplement. Capitalized +terms used but not defined below have the meaning assigned to +them in the Agreement. + +This supplement is an exhibit to the Agreement and is +incorporated as an integral part of the Agreement. In the +event of conflict between the terms in this supplement and the +terms in the Agreement, the terms in this supplement govern. + + +2.1. License Scope + +The SDK is licensed for you to develop applications only for +use in systems with NVIDIA GPUs. + + +2.2. Distribution + +The portions of the SDK that are distributable under the +Agreement are listed in Attachment A. + + +2.3. Operating Systems + +Those portions of the SDK designed exclusively for use on the +Linux or FreeBSD operating systems, or other operating systems +derived from the source code to these operating systems, may +be copied and redistributed for use in accordance with this +Agreement, provided that the object code files are not +modified in any way (except for unzipping of compressed +files). + + +2.4. Audio and Video Encoders and Decoders + +You acknowledge and agree that it is your sole responsibility +to obtain any additional third-party licenses required to +make, have made, use, have used, sell, import, and offer for +sale your products or services that include or incorporate any +third-party software and content relating to audio and/or +video encoders and decoders from, including but not limited +to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., +MPEG-LA, and Coding Technologies. NVIDIA does not grant to you +under this Agreement any necessary patent or other rights with +respect to any audio and/or video encoders and decoders. + + +2.5. Licensing + +If the distribution terms in this Agreement are not suitable +for your organization, or for any questions regarding this +Agreement, please contact NVIDIA at +nvidia-compute-license-questions@nvidia.com. + + +2.6. Attachment A + +The following portions of the SDK are distributable under the +Agreement: + +Component + +CUDA Runtime + +Windows + +cudart.dll, cudart_static.lib, cudadevrt.lib + +Mac OSX + +libcudart.dylib, libcudart_static.a, libcudadevrt.a + +Linux + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Android + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Component + +CUDA FFT Library + +Windows + +cufft.dll, cufftw.dll, cufft.lib, cufftw.lib + +Mac OSX + +libcufft.dylib, libcufft_static.a, libcufftw.dylib, +libcufftw_static.a + +Linux + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Android + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Component + +CUDA BLAS Library + +Windows + +cublas.dll, cublasLt.dll + +Mac OSX + +libcublas.dylib, libcublasLt.dylib, libcublas_static.a, +libcublasLt_static.a + +Linux + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Android + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Component + +NVIDIA "Drop-in" BLAS Library + +Windows + +nvblas.dll + +Mac OSX + +libnvblas.dylib + +Linux + +libnvblas.so + +Component + +CUDA Sparse Matrix Library + +Windows + +cusparse.dll, cusparse.lib + +Mac OSX + +libcusparse.dylib, libcusparse_static.a + +Linux + +libcusparse.so, libcusparse_static.a + +Android + +libcusparse.so, libcusparse_static.a + +Component + +CUDA Linear Solver Library + +Windows + +cusolver.dll, cusolver.lib + +Mac OSX + +libcusolver.dylib, libcusolver_static.a + +Linux + +libcusolver.so, libcusolver_static.a + +Android + +libcusolver.so, libcusolver_static.a + +Component + +CUDA Random Number Generation Library + +Windows + +curand.dll, curand.lib + +Mac OSX + +libcurand.dylib, libcurand_static.a + +Linux + +libcurand.so, libcurand_static.a + +Android + +libcurand.so, libcurand_static.a + +Component + +CUDA Accelerated Graph Library + +Component + +NVIDIA Performance Primitives Library + +Windows + +nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll, +nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll, +nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib, +nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll, +nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib + +Mac OSX + +libnppc.dylib, libnppc_static.a, libnppial.dylib, +libnppial_static.a, libnppicc.dylib, libnppicc_static.a, +libnppicom.dylib, libnppicom_static.a, libnppidei.dylib, +libnppidei_static.a, libnppif.dylib, libnppif_static.a, +libnppig.dylib, libnppig_static.a, libnppim.dylib, +libnppisu_static.a, libnppitc.dylib, libnppitc_static.a, +libnpps.dylib, libnpps_static.a + +Linux + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Android + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Component + +NVIDIA JPEG Library + +Linux + +libnvjpeg.so, libnvjpeg_static.a + +Component + +Internal common library required for statically linking to +cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP + +Mac OSX + +libculibos.a + +Linux + +libculibos.a + +Component + +NVIDIA Runtime Compilation Library and Header + +All + +nvrtc.h + +Windows + +nvrtc.dll, nvrtc-builtins.dll + +Mac OSX + +libnvrtc.dylib, libnvrtc-builtins.dylib + +Linux + +libnvrtc.so, libnvrtc-builtins.so + +Component + +NVIDIA Optimizing Compiler Library + +Windows + +nvvm.dll + +Mac OSX + +libnvvm.dylib + +Linux + +libnvvm.so + +Component + +NVIDIA Common Device Math Functions Library + +Windows + +libdevice.10.bc + +Mac OSX + +libdevice.10.bc + +Linux + +libdevice.10.bc + +Component + +CUDA Occupancy Calculation Header Library + +All + +cuda_occupancy.h + +Component + +CUDA Half Precision Headers + +All + +cuda_fp16.h, cuda_fp16.hpp + +Component + +CUDA Profiling Tools Interface (CUPTI) Library + +Windows + +cupti.dll + +Mac OSX + +libcupti.dylib + +Linux + +libcupti.so + +Component + +NVIDIA Tools Extension Library + +Windows + +nvToolsExt.dll, nvToolsExt.lib + +Mac OSX + +libnvToolsExt.dylib + +Linux + +libnvToolsExt.so + +Component + +NVIDIA CUDA Driver Libraries + +Linux + +libcuda.so, libnvidia-fatbinaryloader.so, +libnvidia-ptxjitcompiler.so + +The NVIDIA CUDA Driver Libraries are only distributable in +applications that meet this criteria: + + 1. The application was developed starting from a NVIDIA CUDA + container obtained from Docker Hub or the NVIDIA GPU + Cloud, and + + 2. The resulting application is packaged as a Docker + container and distributed to users on Docker Hub or the + NVIDIA GPU Cloud only. + + +2.7. Attachment B + + +Additional Licensing Obligations + +The following third party components included in the SOFTWARE +are licensed to Licensee pursuant to the following terms and +conditions: + + 1. Licensee's use of the GDB third party component is + subject to the terms and conditions of GNU GPL v3: + + This product includes copyrighted third-party software licensed + under the terms of the GNU General Public License v3 ("GPL v3"). + All third-party software packages are copyright by their respective + authors. GPL v3 terms and conditions are hereby incorporated into + the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt + + Consistent with these licensing requirements, the software + listed below is provided under the terms of the specified + open source software licenses. To obtain source code for + software provided under licenses that require + redistribution of source code, including the GNU General + Public License (GPL) and GNU Lesser General Public License + (LGPL), contact oss-requests@nvidia.com. This offer is + valid for a period of three (3) years from the date of the + distribution of this product by NVIDIA CORPORATION. + + Component License + CUDA-GDB GPL v3 + + 2. Licensee represents and warrants that any and all third + party licensing and/or royalty payment obligations in + connection with Licensee's use of the H.264 video codecs + are solely the responsibility of Licensee. + + 3. Licensee's use of the Thrust library is subject to the + terms and conditions of the Apache License Version 2.0. + All third-party software packages are copyright by their + respective authors. Apache License Version 2.0 terms and + conditions are hereby incorporated into the Agreement by + this reference. + http://www.apache.org/licenses/LICENSE-2.0.html + + In addition, Licensee acknowledges the following notice: + Thrust includes source code from the Boost Iterator, + Tuple, System, and Random Number libraries. + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 4. Licensee's use of the LLVM third party component is + subject to the following terms and conditions: + + ====================================================== + LLVM Release License + ====================================================== + University of Illinois/NCSA + Open Source License + + Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. + All rights reserved. + + Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal with the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at Urbana- + Champaign, nor the names of its contributors may be used to endorse or + promote products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS WITH THE SOFTWARE. + + 5. Licensee's use (e.g. nvprof) of the PCRE third party + component is subject to the following terms and + conditions: + + ------------ + PCRE LICENCE + ------------ + PCRE is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. + Release 8 of PCRE is distributed under the terms of the "BSD" licence, as + specified below. The documentation for PCRE, supplied in the "doc" + directory, is distributed under the same terms as the software itself. The + basic library functions are written in C and are freestanding. Also + included in the distribution is a set of C++ wrapper functions, and a just- + in-time compiler that can be used to optimize pattern matching. These are + both optional features that can be omitted when the library is built. + + THE BASIC LIBRARY FUNCTIONS + --------------------------- + Written by: Philip Hazel + Email local part: ph10 + Email domain: cam.ac.uk + University of Cambridge Computing Service, + Cambridge, England. + Copyright (c) 1997-2012 University of Cambridge + All rights reserved. + + PCRE JUST-IN-TIME COMPILATION SUPPORT + ------------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2010-2012 Zoltan Herczeg + All rights reserved. + + STACK-LESS JUST-IN-TIME COMPILER + -------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2009-2012 Zoltan Herczeg + All rights reserved. + + THE C++ WRAPPER FUNCTIONS + ------------------------- + Contributed by: Google Inc. + Copyright (c) 2007-2012, Google Inc. + All rights reserved. + + THE "BSD" LICENCE + ----------------- + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 6. Some of the cuBLAS library routines were written by or + derived from code written by Vasily Volkov and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2007-2009, Regents of the University of California + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the University of California, Berkeley nor + the names of its contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 7. Some of the cuBLAS library routines were written by or + derived from code written by Davide Barbieri and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * The name of the author may not be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 8. Some of the cuBLAS library routines were derived from + code developed by the University of Tennessee and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2010 The University of Tennessee. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer listed in this license in the documentation and/or + other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 9. Some of the cuBLAS library routines were written by or + derived from code written by Jonathan Hogg and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2012, The Science and Technology Facilities Council (STFC). + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the STFC nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 10. Some of the cuBLAS library routines were written by or + derived from code written by Ahmad M. Abdelfattah, David + Keyes, and Hatem Ltaief, and are subject to the Apache + License, Version 2.0, as follows: + + -- (C) Copyright 2013 King Abdullah University of Science and Technology + Authors: + Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa) + David Keyes (david.keyes@kaust.edu.sa) + Hatem Ltaief (hatem.ltaief@kaust.edu.sa) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the King Abdullah University of Science and + Technology nor the names of its contributors may be used to endorse + or promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE + + 11. Some of the cuSPARSE library routines were written by or + derived from code written by Li-Wen Chang and are subject + to the NCSA Open Source License as follows: + + Copyright (c) 2012, University of Illinois. + + All rights reserved. + + Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal with the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimers in the documentation and/or other materials provided + with the distribution. + * Neither the names of IMPACT Group, University of Illinois, nor + the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + 12. Some of the cuRAND library routines were written by or + derived from code written by Mutsuo Saito and Makoto + Matsumoto and are subject to the following license: + + Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima + University. All rights reserved. + + Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima + University and University of Tokyo. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the Hiroshima University nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 13. Some of the cuRAND library routines were derived from + code developed by D. E. Shaw Research and are subject to + the following license: + + Copyright 2010-2011, D. E. Shaw Research. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions, and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 14. Some of the Math library routines were written by or + derived from code developed by Norbert Juffa and are + subject to the following license: + + Copyright (c) 2015-2017, Norbert Juffa + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 15. Licensee's use of the lz4 third party component is + subject to the following terms and conditions: + + Copyright (C) 2011-2013, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 16. The NPP library uses code from the Boost Math Toolkit, + and is subject to the following license: + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 17. Portions of the Nsight Eclipse Edition is subject to the + following license: + + The Eclipse Foundation makes available all content in this plug-in + ("Content"). Unless otherwise indicated below, the Content is provided + to you under the terms and conditions of the Eclipse Public License + Version 1.0 ("EPL"). A copy of the EPL is available at http:// + www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program" + will mean the Content. + + If you did not receive this Content directly from the Eclipse + Foundation, the Content is being redistributed by another party + ("Redistributor") and different terms and conditions may apply to your + use of any object code in the Content. Check the Redistributor's + license that was provided with the Content. If no such license exists, + contact the Redistributor. Unless otherwise indicated below, the terms + and conditions of the EPL still apply to any source code in the + Content and such source code may be obtained at http://www.eclipse.org. + + 18. Some of the cuBLAS library routines uses code from + OpenAI, which is subject to the following license: + + License URL + https://github.com/openai/openai-gemm/blob/master/LICENSE + + License Text + The MIT License + + Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + 19. Licensee's use of the Visual Studio Setup Configuration + Samples is subject to the following license: + + The MIT License (MIT) + Copyright (C) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + 20. Licensee's use of linmath.h header for CPU functions for + GL vector/matrix operations from lunarG is subject to the + Apache License Version 2.0. + + 21. The DX12-CUDA sample uses the d3dx12.h header, which is + subject to the MIT license . + +----------------- diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_runtime_cu11-11.8.89.dist-info/License.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_runtime_cu11-11.8.89.dist-info/License.txt new file mode 100644 index 0000000000000000000000000000000000000000..b491c70e0aef319022ded661e111ddbd45b8a17f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_runtime_cu11-11.8.89.dist-info/License.txt @@ -0,0 +1,1568 @@ +End User License Agreement +-------------------------- + + +Preface +------- + +The Software License Agreement in Chapter 1 and the Supplement +in Chapter 2 contain license terms and conditions that govern +the use of NVIDIA software. By accepting this agreement, you +agree to comply with all the terms and conditions applicable +to the product(s) included herein. + + +NVIDIA Driver + + +Description + +This package contains the operating system driver and +fundamental system software components for NVIDIA GPUs. + + +NVIDIA CUDA Toolkit + + +Description + +The NVIDIA CUDA Toolkit provides command-line and graphical +tools for building, debugging and optimizing the performance +of applications accelerated by NVIDIA GPUs, runtime and math +libraries, and documentation including programming guides, +user manuals, and API references. + + +Default Install Location of CUDA Toolkit + +Windows platform: + +%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.# + +Linux platform: + +/usr/local/cuda-#.# + +Mac platform: + +/Developer/NVIDIA/CUDA-#.# + + +NVIDIA CUDA Samples + + +Description + +This package includes over 100+ CUDA examples that demonstrate +various CUDA programming principles, and efficient CUDA +implementation of algorithms in specific application domains. + + +Default Install Location of CUDA Samples + +Windows platform: + +%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.# + +Linux platform: + +/usr/local/cuda-#.#/samples + +and + +$HOME/NVIDIA_CUDA-#.#_Samples + +Mac platform: + +/Developer/NVIDIA/CUDA-#.#/samples + + +NVIDIA Nsight Visual Studio Edition (Windows only) + + +Description + +NVIDIA Nsight Development Platform, Visual Studio Edition is a +development environment integrated into Microsoft Visual +Studio that provides tools for debugging, profiling, analyzing +and optimizing your GPU computing and graphics applications. + + +Default Install Location of Nsight Visual Studio Edition + +Windows platform: + +%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.# + + +1. License Agreement for NVIDIA Software Development Kits +--------------------------------------------------------- + + +Release Date: July 26, 2018 +--------------------------- + + +Important NoticeRead before downloading, installing, +copying or using the licensed software: +------------------------------------------------------- + +This license agreement, including exhibits attached +("Agreement”) is a legal agreement between you and NVIDIA +Corporation ("NVIDIA") and governs your use of a NVIDIA +software development kit (“SDK”). + +Each SDK has its own set of software and materials, but here +is a description of the types of items that may be included in +a SDK: source code, header files, APIs, data sets and assets +(examples include images, textures, models, scenes, videos, +native API input/output files), binary software, sample code, +libraries, utility programs, programming code and +documentation. + +This Agreement can be accepted only by an adult of legal age +of majority in the country in which the SDK is used. + +If you are entering into this Agreement on behalf of a company +or other legal entity, you represent that you have the legal +authority to bind the entity to this Agreement, in which case +“you” will mean the entity you represent. + +If you don’t have the required age or authority to accept +this Agreement, or if you don’t accept all the terms and +conditions of this Agreement, do not download, install or use +the SDK. + +You agree to use the SDK only for purposes that are permitted +by (a) this Agreement, and (b) any applicable law, regulation +or generally accepted practices or guidelines in the relevant +jurisdictions. + + +1.1. License + + +1.1.1. License Grant + +Subject to the terms of this Agreement, NVIDIA hereby grants +you a non-exclusive, non-transferable license, without the +right to sublicense (except as expressly provided in this +Agreement) to: + + 1. Install and use the SDK, + + 2. Modify and create derivative works of sample source code + delivered in the SDK, and + + 3. Distribute those portions of the SDK that are identified + in this Agreement as distributable, as incorporated in + object code format into a software application that meets + the distribution requirements indicated in this Agreement. + + +1.1.2. Distribution Requirements + +These are the distribution requirements for you to exercise +the distribution grant: + + 1. Your application must have material additional + functionality, beyond the included portions of the SDK. + + 2. The distributable portions of the SDK shall only be + accessed by your application. + + 3. The following notice shall be included in modifications + and derivative works of sample source code distributed: + “This software contains source code provided by NVIDIA + Corporation.” + + 4. Unless a developer tool is identified in this Agreement + as distributable, it is delivered for your internal use + only. + + 5. The terms under which you distribute your application + must be consistent with the terms of this Agreement, + including (without limitation) terms relating to the + license grant and license restrictions and protection of + NVIDIA’s intellectual property rights. Additionally, you + agree that you will protect the privacy, security and + legal rights of your application users. + + 6. You agree to notify NVIDIA in writing of any known or + suspected distribution or use of the SDK not in compliance + with the requirements of this Agreement, and to enforce + the terms of your agreements with respect to distributed + SDK. + + +1.1.3. Authorized Users + +You may allow employees and contractors of your entity or of +your subsidiary(ies) to access and use the SDK from your +secure network to perform work on your behalf. + +If you are an academic institution you may allow users +enrolled or employed by the academic institution to access and +use the SDK from your secure network. + +You are responsible for the compliance with the terms of this +Agreement by your authorized users. If you become aware that +your authorized users didn’t follow the terms of this +Agreement, you agree to take reasonable steps to resolve the +non-compliance and prevent new occurrences. + + +1.1.4. Pre-Release SDK + +The SDK versions identified as alpha, beta, preview or +otherwise as pre-release, may not be fully functional, may +contain errors or design flaws, and may have reduced or +different security, privacy, accessibility, availability, and +reliability standards relative to commercial versions of +NVIDIA software and materials. Use of a pre-release SDK may +result in unexpected results, loss of data, project delays or +other unpredictable damage or loss. + +You may use a pre-release SDK at your own risk, understanding +that pre-release SDKs are not intended for use in production +or business-critical systems. + +NVIDIA may choose not to make available a commercial version +of any pre-release SDK. NVIDIA may also choose to abandon +development and terminate the availability of a pre-release +SDK at any time without liability. + + +1.1.5. Updates + +NVIDIA may, at its option, make available patches, workarounds +or other updates to this SDK. Unless the updates are provided +with their separate governing terms, they are deemed part of +the SDK licensed to you as provided in this Agreement. You +agree that the form and content of the SDK that NVIDIA +provides may change without prior notice to you. While NVIDIA +generally maintains compatibility between versions, NVIDIA may +in some cases make changes that introduce incompatibilities in +future versions of the SDK. + + +1.1.6. Third Party Licenses + +The SDK may come bundled with, or otherwise include or be +distributed with, third party software licensed by a NVIDIA +supplier and/or open source software provided under an open +source license. Use of third party software is subject to the +third-party license terms, or in the absence of third party +terms, the terms of this Agreement. Copyright to third party +software is held by the copyright holders indicated in the +third-party software or license. + + +1.1.7. Reservation of Rights + +NVIDIA reserves all rights, title, and interest in and to the +SDK, not expressly granted to you under this Agreement. + + +1.2. Limitations + +The following license limitations apply to your use of the +SDK: + + 1. You may not reverse engineer, decompile or disassemble, + or remove copyright or other proprietary notices from any + portion of the SDK or copies of the SDK. + + 2. Except as expressly provided in this Agreement, you may + not copy, sell, rent, sublicense, transfer, distribute, + modify, or create derivative works of any portion of the + SDK. For clarity, you may not distribute or sublicense the + SDK as a stand-alone product. + + 3. Unless you have an agreement with NVIDIA for this + purpose, you may not indicate that an application created + with the SDK is sponsored or endorsed by NVIDIA. + + 4. You may not bypass, disable, or circumvent any + encryption, security, digital rights management or + authentication mechanism in the SDK. + + 5. You may not use the SDK in any manner that would cause it + to become subject to an open source software license. As + examples, licenses that require as a condition of use, + modification, and/or distribution that the SDK be: + + a. Disclosed or distributed in source code form; + + b. Licensed for the purpose of making derivative works; + or + + c. Redistributable at no charge. + + 6. Unless you have an agreement with NVIDIA for this + purpose, you may not use the SDK with any system or + application where the use or failure of the system or + application can reasonably be expected to threaten or + result in personal injury, death, or catastrophic loss. + Examples include use in avionics, navigation, military, + medical, life support or other life critical applications. + NVIDIA does not design, test or manufacture the SDK for + these critical uses and NVIDIA shall not be liable to you + or any third party, in whole or in part, for any claims or + damages arising from such uses. + + 7. You agree to defend, indemnify and hold harmless NVIDIA + and its affiliates, and their respective employees, + contractors, agents, officers and directors, from and + against any and all claims, damages, obligations, losses, + liabilities, costs or debt, fines, restitutions and + expenses (including but not limited to attorney’s fees + and costs incident to establishing the right of + indemnification) arising out of or related to your use of + the SDK outside of the scope of this Agreement, or not in + compliance with its terms. + + +1.3. Ownership + + 1. NVIDIA or its licensors hold all rights, title and + interest in and to the SDK and its modifications and + derivative works, including their respective intellectual + property rights, subject to your rights described in this + section. This SDK may include software and materials from + NVIDIA’s licensors, and these licensors are intended + third party beneficiaries that may enforce this Agreement + with respect to their intellectual property rights. + + 2. You hold all rights, title and interest in and to your + applications and your derivative works of the sample + source code delivered in the SDK, including their + respective intellectual property rights, subject to + NVIDIA’s rights described in this section. + + 3. You may, but don’t have to, provide to NVIDIA + suggestions, feature requests or other feedback regarding + the SDK, including possible enhancements or modifications + to the SDK. For any feedback that you voluntarily provide, + you hereby grant NVIDIA and its affiliates a perpetual, + non-exclusive, worldwide, irrevocable license to use, + reproduce, modify, license, sublicense (through multiple + tiers of sublicensees), and distribute (through multiple + tiers of distributors) it without the payment of any + royalties or fees to you. NVIDIA will use feedback at its + choice. NVIDIA is constantly looking for ways to improve + its products, so you may send feedback to NVIDIA through + the developer portal at https://developer.nvidia.com. + + +1.4. No Warranties + +THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL +FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND +ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND +OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, +BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE +ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO +WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF +DEALING OR COURSE OF TRADE. + + +1.5. Limitation of Liability + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS +AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, +PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS +OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF +PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION +WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK, +WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH +OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF +LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES +TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS +AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE +NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS +LIMIT. + +These exclusions and limitations of liability shall apply +regardless if NVIDIA or its affiliates have been advised of +the possibility of such damages, and regardless of whether a +remedy fails its essential purpose. These exclusions and +limitations of liability form an essential basis of the +bargain between the parties, and, absent any of these +exclusions or limitations of liability, the provisions of this +Agreement, including, without limitation, the economic terms, +would be substantially different. + + +1.6. Termination + + 1. This Agreement will continue to apply until terminated by + either you or NVIDIA as described below. + + 2. If you want to terminate this Agreement, you may do so by + stopping to use the SDK. + + 3. NVIDIA may, at any time, terminate this Agreement if: + + a. (i) you fail to comply with any term of this + Agreement and the non-compliance is not fixed within + thirty (30) days following notice from NVIDIA (or + immediately if you violate NVIDIA’s intellectual + property rights); + + b. (ii) you commence or participate in any legal + proceeding against NVIDIA with respect to the SDK; or + + c. (iii) NVIDIA decides to no longer provide the SDK in + a country or, in NVIDIA’s sole discretion, the + continued use of it is no longer commercially viable. + + 4. Upon any termination of this Agreement, you agree to + promptly discontinue use of the SDK and destroy all copies + in your possession or control. Your prior distributions in + accordance with this Agreement are not affected by the + termination of this Agreement. Upon written request, you + will certify in writing that you have complied with your + commitments under this section. Upon any termination of + this Agreement all provisions survive except for the + license grant provisions. + + +1.7. General + +If you wish to assign this Agreement or your rights and +obligations, including by merger, consolidation, dissolution +or operation of law, contact NVIDIA to ask for permission. Any +attempted assignment not approved by NVIDIA in writing shall +be void and of no effect. NVIDIA may assign, delegate or +transfer this Agreement and its rights and obligations, and if +to a non-affiliate you will be notified. + +You agree to cooperate with NVIDIA and provide reasonably +requested information to verify your compliance with this +Agreement. + +This Agreement will be governed in all respects by the laws of +the United States and of the State of Delaware as those laws +are applied to contracts entered into and performed entirely +within Delaware by Delaware residents, without regard to the +conflicts of laws principles. The United Nations Convention on +Contracts for the International Sale of Goods is specifically +disclaimed. You agree to all terms of this Agreement in the +English language. + +The state or federal courts residing in Santa Clara County, +California shall have exclusive jurisdiction over any dispute +or claim arising out of this Agreement. Notwithstanding this, +you agree that NVIDIA shall still be allowed to apply for +injunctive remedies or an equivalent type of urgent legal +relief in any jurisdiction. + +If any court of competent jurisdiction determines that any +provision of this Agreement is illegal, invalid or +unenforceable, such provision will be construed as limited to +the extent necessary to be consistent with and fully +enforceable under the law and the remaining provisions will +remain in full force and effect. Unless otherwise specified, +remedies are cumulative. + +Each party acknowledges and agrees that the other is an +independent contractor in the performance of this Agreement. + +The SDK has been developed entirely at private expense and is +“commercial items” consisting of “commercial computer +software” and “commercial computer software +documentation” provided with RESTRICTED RIGHTS. Use, +duplication or disclosure by the U.S. Government or a U.S. +Government subcontractor is subject to the restrictions in +this Agreement pursuant to DFARS 227.7202-3(a) or as set forth +in subparagraphs (c)(1) and (2) of the Commercial Computer +Software - Restricted Rights clause at FAR 52.227-19, as +applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas +Expressway, Santa Clara, CA 95051. + +The SDK is subject to United States export laws and +regulations. You agree that you will not ship, transfer or +export the SDK into any country, or use the SDK in any manner, +prohibited by the United States Bureau of Industry and +Security or economic sanctions regulations administered by the +U.S. Department of Treasury’s Office of Foreign Assets +Control (OFAC), or any applicable export laws, restrictions or +regulations. These laws include restrictions on destinations, +end users and end use. By accepting this Agreement, you +confirm that you are not a resident or citizen of any country +currently embargoed by the U.S. and that you are not otherwise +prohibited from receiving the SDK. + +Any notice delivered by NVIDIA to you under this Agreement +will be delivered via mail, email or fax. You agree that any +notices that NVIDIA sends you electronically will satisfy any +legal communication requirements. Please direct your legal +notices or other correspondence to NVIDIA Corporation, 2788 +San Tomas Expressway, Santa Clara, California 95051, United +States of America, Attention: Legal Department. + +This Agreement and any exhibits incorporated into this +Agreement constitute the entire agreement of the parties with +respect to the subject matter of this Agreement and supersede +all prior negotiations or documentation exchanged between the +parties relating to this SDK license. Any additional and/or +conflicting terms on documents issued by you are null, void, +and invalid. Any amendment or waiver under this Agreement +shall be in writing and signed by representatives of both +parties. + + +2. CUDA Toolkit Supplement to Software License Agreement for +NVIDIA Software Development Kits +------------------------------------------------------------ + + +Release date: August 16, 2018 +----------------------------- + +The terms in this supplement govern your use of the NVIDIA +CUDA Toolkit SDK under the terms of your license agreement +(“Agreement”) as modified by this supplement. Capitalized +terms used but not defined below have the meaning assigned to +them in the Agreement. + +This supplement is an exhibit to the Agreement and is +incorporated as an integral part of the Agreement. In the +event of conflict between the terms in this supplement and the +terms in the Agreement, the terms in this supplement govern. + + +2.1. License Scope + +The SDK is licensed for you to develop applications only for +use in systems with NVIDIA GPUs. + + +2.2. Distribution + +The portions of the SDK that are distributable under the +Agreement are listed in Attachment A. + + +2.3. Operating Systems + +Those portions of the SDK designed exclusively for use on the +Linux or FreeBSD operating systems, or other operating systems +derived from the source code to these operating systems, may +be copied and redistributed for use in accordance with this +Agreement, provided that the object code files are not +modified in any way (except for unzipping of compressed +files). + + +2.4. Audio and Video Encoders and Decoders + +You acknowledge and agree that it is your sole responsibility +to obtain any additional third-party licenses required to +make, have made, use, have used, sell, import, and offer for +sale your products or services that include or incorporate any +third-party software and content relating to audio and/or +video encoders and decoders from, including but not limited +to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., +MPEG-LA, and Coding Technologies. NVIDIA does not grant to you +under this Agreement any necessary patent or other rights with +respect to any audio and/or video encoders and decoders. + + +2.5. Licensing + +If the distribution terms in this Agreement are not suitable +for your organization, or for any questions regarding this +Agreement, please contact NVIDIA at +nvidia-compute-license-questions@nvidia.com. + + +2.6. Attachment A + +The following portions of the SDK are distributable under the +Agreement: + +Component + +CUDA Runtime + +Windows + +cudart.dll, cudart_static.lib, cudadevrt.lib + +Mac OSX + +libcudart.dylib, libcudart_static.a, libcudadevrt.a + +Linux + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Android + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Component + +CUDA FFT Library + +Windows + +cufft.dll, cufftw.dll, cufft.lib, cufftw.lib + +Mac OSX + +libcufft.dylib, libcufft_static.a, libcufftw.dylib, +libcufftw_static.a + +Linux + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Android + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Component + +CUDA BLAS Library + +Windows + +cublas.dll, cublasLt.dll + +Mac OSX + +libcublas.dylib, libcublasLt.dylib, libcublas_static.a, +libcublasLt_static.a + +Linux + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Android + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Component + +NVIDIA "Drop-in" BLAS Library + +Windows + +nvblas.dll + +Mac OSX + +libnvblas.dylib + +Linux + +libnvblas.so + +Component + +CUDA Sparse Matrix Library + +Windows + +cusparse.dll, cusparse.lib + +Mac OSX + +libcusparse.dylib, libcusparse_static.a + +Linux + +libcusparse.so, libcusparse_static.a + +Android + +libcusparse.so, libcusparse_static.a + +Component + +CUDA Linear Solver Library + +Windows + +cusolver.dll, cusolver.lib + +Mac OSX + +libcusolver.dylib, libcusolver_static.a + +Linux + +libcusolver.so, libcusolver_static.a + +Android + +libcusolver.so, libcusolver_static.a + +Component + +CUDA Random Number Generation Library + +Windows + +curand.dll, curand.lib + +Mac OSX + +libcurand.dylib, libcurand_static.a + +Linux + +libcurand.so, libcurand_static.a + +Android + +libcurand.so, libcurand_static.a + +Component + +CUDA Accelerated Graph Library + +Component + +NVIDIA Performance Primitives Library + +Windows + +nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll, +nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll, +nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib, +nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll, +nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib + +Mac OSX + +libnppc.dylib, libnppc_static.a, libnppial.dylib, +libnppial_static.a, libnppicc.dylib, libnppicc_static.a, +libnppicom.dylib, libnppicom_static.a, libnppidei.dylib, +libnppidei_static.a, libnppif.dylib, libnppif_static.a, +libnppig.dylib, libnppig_static.a, libnppim.dylib, +libnppisu_static.a, libnppitc.dylib, libnppitc_static.a, +libnpps.dylib, libnpps_static.a + +Linux + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Android + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Component + +NVIDIA JPEG Library + +Linux + +libnvjpeg.so, libnvjpeg_static.a + +Component + +Internal common library required for statically linking to +cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP + +Mac OSX + +libculibos.a + +Linux + +libculibos.a + +Component + +NVIDIA Runtime Compilation Library and Header + +All + +nvrtc.h + +Windows + +nvrtc.dll, nvrtc-builtins.dll + +Mac OSX + +libnvrtc.dylib, libnvrtc-builtins.dylib + +Linux + +libnvrtc.so, libnvrtc-builtins.so + +Component + +NVIDIA Optimizing Compiler Library + +Windows + +nvvm.dll + +Mac OSX + +libnvvm.dylib + +Linux + +libnvvm.so + +Component + +NVIDIA Common Device Math Functions Library + +Windows + +libdevice.10.bc + +Mac OSX + +libdevice.10.bc + +Linux + +libdevice.10.bc + +Component + +CUDA Occupancy Calculation Header Library + +All + +cuda_occupancy.h + +Component + +CUDA Half Precision Headers + +All + +cuda_fp16.h, cuda_fp16.hpp + +Component + +CUDA Profiling Tools Interface (CUPTI) Library + +Windows + +cupti.dll + +Mac OSX + +libcupti.dylib + +Linux + +libcupti.so + +Component + +NVIDIA Tools Extension Library + +Windows + +nvToolsExt.dll, nvToolsExt.lib + +Mac OSX + +libnvToolsExt.dylib + +Linux + +libnvToolsExt.so + +Component + +NVIDIA CUDA Driver Libraries + +Linux + +libcuda.so, libnvidia-fatbinaryloader.so, +libnvidia-ptxjitcompiler.so + +The NVIDIA CUDA Driver Libraries are only distributable in +applications that meet this criteria: + + 1. The application was developed starting from a NVIDIA CUDA + container obtained from Docker Hub or the NVIDIA GPU + Cloud, and + + 2. The resulting application is packaged as a Docker + container and distributed to users on Docker Hub or the + NVIDIA GPU Cloud only. + + +2.7. Attachment B + + +Additional Licensing Obligations + +The following third party components included in the SOFTWARE +are licensed to Licensee pursuant to the following terms and +conditions: + + 1. Licensee's use of the GDB third party component is + subject to the terms and conditions of GNU GPL v3: + + This product includes copyrighted third-party software licensed + under the terms of the GNU General Public License v3 ("GPL v3"). + All third-party software packages are copyright by their respective + authors. GPL v3 terms and conditions are hereby incorporated into + the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt + + Consistent with these licensing requirements, the software + listed below is provided under the terms of the specified + open source software licenses. To obtain source code for + software provided under licenses that require + redistribution of source code, including the GNU General + Public License (GPL) and GNU Lesser General Public License + (LGPL), contact oss-requests@nvidia.com. This offer is + valid for a period of three (3) years from the date of the + distribution of this product by NVIDIA CORPORATION. + + Component License + CUDA-GDB GPL v3 + + 2. Licensee represents and warrants that any and all third + party licensing and/or royalty payment obligations in + connection with Licensee's use of the H.264 video codecs + are solely the responsibility of Licensee. + + 3. Licensee's use of the Thrust library is subject to the + terms and conditions of the Apache License Version 2.0. + All third-party software packages are copyright by their + respective authors. Apache License Version 2.0 terms and + conditions are hereby incorporated into the Agreement by + this reference. + http://www.apache.org/licenses/LICENSE-2.0.html + + In addition, Licensee acknowledges the following notice: + Thrust includes source code from the Boost Iterator, + Tuple, System, and Random Number libraries. + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 4. Licensee's use of the LLVM third party component is + subject to the following terms and conditions: + + ====================================================== + LLVM Release License + ====================================================== + University of Illinois/NCSA + Open Source License + + Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. + All rights reserved. + + Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal with the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at Urbana- + Champaign, nor the names of its contributors may be used to endorse or + promote products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS WITH THE SOFTWARE. + + 5. Licensee's use (e.g. nvprof) of the PCRE third party + component is subject to the following terms and + conditions: + + ------------ + PCRE LICENCE + ------------ + PCRE is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. + Release 8 of PCRE is distributed under the terms of the "BSD" licence, as + specified below. The documentation for PCRE, supplied in the "doc" + directory, is distributed under the same terms as the software itself. The + basic library functions are written in C and are freestanding. Also + included in the distribution is a set of C++ wrapper functions, and a just- + in-time compiler that can be used to optimize pattern matching. These are + both optional features that can be omitted when the library is built. + + THE BASIC LIBRARY FUNCTIONS + --------------------------- + Written by: Philip Hazel + Email local part: ph10 + Email domain: cam.ac.uk + University of Cambridge Computing Service, + Cambridge, England. + Copyright (c) 1997-2012 University of Cambridge + All rights reserved. + + PCRE JUST-IN-TIME COMPILATION SUPPORT + ------------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2010-2012 Zoltan Herczeg + All rights reserved. + + STACK-LESS JUST-IN-TIME COMPILER + -------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2009-2012 Zoltan Herczeg + All rights reserved. + + THE C++ WRAPPER FUNCTIONS + ------------------------- + Contributed by: Google Inc. + Copyright (c) 2007-2012, Google Inc. + All rights reserved. + + THE "BSD" LICENCE + ----------------- + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 6. Some of the cuBLAS library routines were written by or + derived from code written by Vasily Volkov and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2007-2009, Regents of the University of California + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the University of California, Berkeley nor + the names of its contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 7. Some of the cuBLAS library routines were written by or + derived from code written by Davide Barbieri and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * The name of the author may not be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 8. Some of the cuBLAS library routines were derived from + code developed by the University of Tennessee and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2010 The University of Tennessee. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer listed in this license in the documentation and/or + other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 9. Some of the cuBLAS library routines were written by or + derived from code written by Jonathan Hogg and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2012, The Science and Technology Facilities Council (STFC). + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the STFC nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 10. Some of the cuBLAS library routines were written by or + derived from code written by Ahmad M. Abdelfattah, David + Keyes, and Hatem Ltaief, and are subject to the Apache + License, Version 2.0, as follows: + + -- (C) Copyright 2013 King Abdullah University of Science and Technology + Authors: + Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa) + David Keyes (david.keyes@kaust.edu.sa) + Hatem Ltaief (hatem.ltaief@kaust.edu.sa) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the King Abdullah University of Science and + Technology nor the names of its contributors may be used to endorse + or promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE + + 11. Some of the cuSPARSE library routines were written by or + derived from code written by Li-Wen Chang and are subject + to the NCSA Open Source License as follows: + + Copyright (c) 2012, University of Illinois. + + All rights reserved. + + Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal with the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimers in the documentation and/or other materials provided + with the distribution. + * Neither the names of IMPACT Group, University of Illinois, nor + the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + 12. Some of the cuRAND library routines were written by or + derived from code written by Mutsuo Saito and Makoto + Matsumoto and are subject to the following license: + + Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima + University. All rights reserved. + + Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima + University and University of Tokyo. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the Hiroshima University nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 13. Some of the cuRAND library routines were derived from + code developed by D. E. Shaw Research and are subject to + the following license: + + Copyright 2010-2011, D. E. Shaw Research. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions, and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 14. Some of the Math library routines were written by or + derived from code developed by Norbert Juffa and are + subject to the following license: + + Copyright (c) 2015-2017, Norbert Juffa + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 15. Licensee's use of the lz4 third party component is + subject to the following terms and conditions: + + Copyright (C) 2011-2013, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 16. The NPP library uses code from the Boost Math Toolkit, + and is subject to the following license: + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 17. Portions of the Nsight Eclipse Edition is subject to the + following license: + + The Eclipse Foundation makes available all content in this plug-in + ("Content"). Unless otherwise indicated below, the Content is provided + to you under the terms and conditions of the Eclipse Public License + Version 1.0 ("EPL"). A copy of the EPL is available at http:// + www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program" + will mean the Content. + + If you did not receive this Content directly from the Eclipse + Foundation, the Content is being redistributed by another party + ("Redistributor") and different terms and conditions may apply to your + use of any object code in the Content. Check the Redistributor's + license that was provided with the Content. If no such license exists, + contact the Redistributor. Unless otherwise indicated below, the terms + and conditions of the EPL still apply to any source code in the + Content and such source code may be obtained at http://www.eclipse.org. + + 18. Some of the cuBLAS library routines uses code from + OpenAI, which is subject to the following license: + + License URL + https://github.com/openai/openai-gemm/blob/master/LICENSE + + License Text + The MIT License + + Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + 19. Licensee's use of the Visual Studio Setup Configuration + Samples is subject to the following license: + + The MIT License (MIT) + Copyright (C) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + 20. Licensee's use of linmath.h header for CPU functions for + GL vector/matrix operations from lunarG is subject to the + Apache License Version 2.0. + + 21. The DX12-CUDA sample uses the d3dx12.h header, which is + subject to the MIT license . + +----------------- diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe new file mode 100644 index 0000000000000000000000000000000000000000..bc02472528a32bd70ebfa2b0eca26e34a83fa264 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5dc9884a8f458371550e09bd396e5418bf375820a31b9899f6499bf391c7b2e +size 168448 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distro/__main__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distro/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..0c01d5b08b6b44379b931d54d7fcf5221fdc9fde --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distro/__main__.py @@ -0,0 +1,4 @@ +from .distro import main + +if __name__ == "__main__": + main() diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..99da8b9dd120d37abe0113f75e0abcf982bf2f0e Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e170be019dddbb3c1551e726bd81cb4f6c5a9f0 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13e2094b5da471e74c3b7108d1a948edd3680075 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64c8f64d84fe4b846886be3e11fa8bf150e26f4e Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a75867d2dc5225c1bfc104d3985cc2c14663017 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py new file mode 100644 index 0000000000000000000000000000000000000000..264d564dbda676b52f446c0d25433a15939a78a3 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py @@ -0,0 +1,519 @@ +""" +This module uses ctypes to bind a whole bunch of functions and constants from +SecureTransport. The goal here is to provide the low-level API to +SecureTransport. These are essentially the C-level functions and constants, and +they're pretty gross to work with. + +This code is a bastardised version of the code found in Will Bond's oscrypto +library. An enormous debt is owed to him for blazing this trail for us. For +that reason, this code should be considered to be covered both by urllib3's +license and by oscrypto's: + + Copyright (c) 2015-2016 Will Bond + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +""" +from __future__ import absolute_import + +import platform +from ctypes import ( + CDLL, + CFUNCTYPE, + POINTER, + c_bool, + c_byte, + c_char_p, + c_int32, + c_long, + c_size_t, + c_uint32, + c_ulong, + c_void_p, +) +from ctypes.util import find_library + +from ...packages.six import raise_from + +if platform.system() != "Darwin": + raise ImportError("Only macOS is supported") + +version = platform.mac_ver()[0] +version_info = tuple(map(int, version.split("."))) +if version_info < (10, 8): + raise OSError( + "Only OS X 10.8 and newer are supported, not %s.%s" + % (version_info[0], version_info[1]) + ) + + +def load_cdll(name, macos10_16_path): + """Loads a CDLL by name, falling back to known path on 10.16+""" + try: + # Big Sur is technically 11 but we use 10.16 due to the Big Sur + # beta being labeled as 10.16. + if version_info >= (10, 16): + path = macos10_16_path + else: + path = find_library(name) + if not path: + raise OSError # Caught and reraised as 'ImportError' + return CDLL(path, use_errno=True) + except OSError: + raise_from(ImportError("The library %s failed to load" % name), None) + + +Security = load_cdll( + "Security", "/System/Library/Frameworks/Security.framework/Security" +) +CoreFoundation = load_cdll( + "CoreFoundation", + "/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", +) + + +Boolean = c_bool +CFIndex = c_long +CFStringEncoding = c_uint32 +CFData = c_void_p +CFString = c_void_p +CFArray = c_void_p +CFMutableArray = c_void_p +CFDictionary = c_void_p +CFError = c_void_p +CFType = c_void_p +CFTypeID = c_ulong + +CFTypeRef = POINTER(CFType) +CFAllocatorRef = c_void_p + +OSStatus = c_int32 + +CFDataRef = POINTER(CFData) +CFStringRef = POINTER(CFString) +CFArrayRef = POINTER(CFArray) +CFMutableArrayRef = POINTER(CFMutableArray) +CFDictionaryRef = POINTER(CFDictionary) +CFArrayCallBacks = c_void_p +CFDictionaryKeyCallBacks = c_void_p +CFDictionaryValueCallBacks = c_void_p + +SecCertificateRef = POINTER(c_void_p) +SecExternalFormat = c_uint32 +SecExternalItemType = c_uint32 +SecIdentityRef = POINTER(c_void_p) +SecItemImportExportFlags = c_uint32 +SecItemImportExportKeyParameters = c_void_p +SecKeychainRef = POINTER(c_void_p) +SSLProtocol = c_uint32 +SSLCipherSuite = c_uint32 +SSLContextRef = POINTER(c_void_p) +SecTrustRef = POINTER(c_void_p) +SSLConnectionRef = c_uint32 +SecTrustResultType = c_uint32 +SecTrustOptionFlags = c_uint32 +SSLProtocolSide = c_uint32 +SSLConnectionType = c_uint32 +SSLSessionOption = c_uint32 + + +try: + Security.SecItemImport.argtypes = [ + CFDataRef, + CFStringRef, + POINTER(SecExternalFormat), + POINTER(SecExternalItemType), + SecItemImportExportFlags, + POINTER(SecItemImportExportKeyParameters), + SecKeychainRef, + POINTER(CFArrayRef), + ] + Security.SecItemImport.restype = OSStatus + + Security.SecCertificateGetTypeID.argtypes = [] + Security.SecCertificateGetTypeID.restype = CFTypeID + + Security.SecIdentityGetTypeID.argtypes = [] + Security.SecIdentityGetTypeID.restype = CFTypeID + + Security.SecKeyGetTypeID.argtypes = [] + Security.SecKeyGetTypeID.restype = CFTypeID + + Security.SecCertificateCreateWithData.argtypes = [CFAllocatorRef, CFDataRef] + Security.SecCertificateCreateWithData.restype = SecCertificateRef + + Security.SecCertificateCopyData.argtypes = [SecCertificateRef] + Security.SecCertificateCopyData.restype = CFDataRef + + Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p] + Security.SecCopyErrorMessageString.restype = CFStringRef + + Security.SecIdentityCreateWithCertificate.argtypes = [ + CFTypeRef, + SecCertificateRef, + POINTER(SecIdentityRef), + ] + Security.SecIdentityCreateWithCertificate.restype = OSStatus + + Security.SecKeychainCreate.argtypes = [ + c_char_p, + c_uint32, + c_void_p, + Boolean, + c_void_p, + POINTER(SecKeychainRef), + ] + Security.SecKeychainCreate.restype = OSStatus + + Security.SecKeychainDelete.argtypes = [SecKeychainRef] + Security.SecKeychainDelete.restype = OSStatus + + Security.SecPKCS12Import.argtypes = [ + CFDataRef, + CFDictionaryRef, + POINTER(CFArrayRef), + ] + Security.SecPKCS12Import.restype = OSStatus + + SSLReadFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t)) + SSLWriteFunc = CFUNCTYPE( + OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t) + ) + + Security.SSLSetIOFuncs.argtypes = [SSLContextRef, SSLReadFunc, SSLWriteFunc] + Security.SSLSetIOFuncs.restype = OSStatus + + Security.SSLSetPeerID.argtypes = [SSLContextRef, c_char_p, c_size_t] + Security.SSLSetPeerID.restype = OSStatus + + Security.SSLSetCertificate.argtypes = [SSLContextRef, CFArrayRef] + Security.SSLSetCertificate.restype = OSStatus + + Security.SSLSetCertificateAuthorities.argtypes = [SSLContextRef, CFTypeRef, Boolean] + Security.SSLSetCertificateAuthorities.restype = OSStatus + + Security.SSLSetConnection.argtypes = [SSLContextRef, SSLConnectionRef] + Security.SSLSetConnection.restype = OSStatus + + Security.SSLSetPeerDomainName.argtypes = [SSLContextRef, c_char_p, c_size_t] + Security.SSLSetPeerDomainName.restype = OSStatus + + Security.SSLHandshake.argtypes = [SSLContextRef] + Security.SSLHandshake.restype = OSStatus + + Security.SSLRead.argtypes = [SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t)] + Security.SSLRead.restype = OSStatus + + Security.SSLWrite.argtypes = [SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t)] + Security.SSLWrite.restype = OSStatus + + Security.SSLClose.argtypes = [SSLContextRef] + Security.SSLClose.restype = OSStatus + + Security.SSLGetNumberSupportedCiphers.argtypes = [SSLContextRef, POINTER(c_size_t)] + Security.SSLGetNumberSupportedCiphers.restype = OSStatus + + Security.SSLGetSupportedCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + POINTER(c_size_t), + ] + Security.SSLGetSupportedCiphers.restype = OSStatus + + Security.SSLSetEnabledCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + c_size_t, + ] + Security.SSLSetEnabledCiphers.restype = OSStatus + + Security.SSLGetNumberEnabledCiphers.argtype = [SSLContextRef, POINTER(c_size_t)] + Security.SSLGetNumberEnabledCiphers.restype = OSStatus + + Security.SSLGetEnabledCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + POINTER(c_size_t), + ] + Security.SSLGetEnabledCiphers.restype = OSStatus + + Security.SSLGetNegotiatedCipher.argtypes = [SSLContextRef, POINTER(SSLCipherSuite)] + Security.SSLGetNegotiatedCipher.restype = OSStatus + + Security.SSLGetNegotiatedProtocolVersion.argtypes = [ + SSLContextRef, + POINTER(SSLProtocol), + ] + Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus + + Security.SSLCopyPeerTrust.argtypes = [SSLContextRef, POINTER(SecTrustRef)] + Security.SSLCopyPeerTrust.restype = OSStatus + + Security.SecTrustSetAnchorCertificates.argtypes = [SecTrustRef, CFArrayRef] + Security.SecTrustSetAnchorCertificates.restype = OSStatus + + Security.SecTrustSetAnchorCertificatesOnly.argstypes = [SecTrustRef, Boolean] + Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus + + Security.SecTrustEvaluate.argtypes = [SecTrustRef, POINTER(SecTrustResultType)] + Security.SecTrustEvaluate.restype = OSStatus + + Security.SecTrustGetCertificateCount.argtypes = [SecTrustRef] + Security.SecTrustGetCertificateCount.restype = CFIndex + + Security.SecTrustGetCertificateAtIndex.argtypes = [SecTrustRef, CFIndex] + Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef + + Security.SSLCreateContext.argtypes = [ + CFAllocatorRef, + SSLProtocolSide, + SSLConnectionType, + ] + Security.SSLCreateContext.restype = SSLContextRef + + Security.SSLSetSessionOption.argtypes = [SSLContextRef, SSLSessionOption, Boolean] + Security.SSLSetSessionOption.restype = OSStatus + + Security.SSLSetProtocolVersionMin.argtypes = [SSLContextRef, SSLProtocol] + Security.SSLSetProtocolVersionMin.restype = OSStatus + + Security.SSLSetProtocolVersionMax.argtypes = [SSLContextRef, SSLProtocol] + Security.SSLSetProtocolVersionMax.restype = OSStatus + + try: + Security.SSLSetALPNProtocols.argtypes = [SSLContextRef, CFArrayRef] + Security.SSLSetALPNProtocols.restype = OSStatus + except AttributeError: + # Supported only in 10.12+ + pass + + Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p] + Security.SecCopyErrorMessageString.restype = CFStringRef + + Security.SSLReadFunc = SSLReadFunc + Security.SSLWriteFunc = SSLWriteFunc + Security.SSLContextRef = SSLContextRef + Security.SSLProtocol = SSLProtocol + Security.SSLCipherSuite = SSLCipherSuite + Security.SecIdentityRef = SecIdentityRef + Security.SecKeychainRef = SecKeychainRef + Security.SecTrustRef = SecTrustRef + Security.SecTrustResultType = SecTrustResultType + Security.SecExternalFormat = SecExternalFormat + Security.OSStatus = OSStatus + + Security.kSecImportExportPassphrase = CFStringRef.in_dll( + Security, "kSecImportExportPassphrase" + ) + Security.kSecImportItemIdentity = CFStringRef.in_dll( + Security, "kSecImportItemIdentity" + ) + + # CoreFoundation time! + CoreFoundation.CFRetain.argtypes = [CFTypeRef] + CoreFoundation.CFRetain.restype = CFTypeRef + + CoreFoundation.CFRelease.argtypes = [CFTypeRef] + CoreFoundation.CFRelease.restype = None + + CoreFoundation.CFGetTypeID.argtypes = [CFTypeRef] + CoreFoundation.CFGetTypeID.restype = CFTypeID + + CoreFoundation.CFStringCreateWithCString.argtypes = [ + CFAllocatorRef, + c_char_p, + CFStringEncoding, + ] + CoreFoundation.CFStringCreateWithCString.restype = CFStringRef + + CoreFoundation.CFStringGetCStringPtr.argtypes = [CFStringRef, CFStringEncoding] + CoreFoundation.CFStringGetCStringPtr.restype = c_char_p + + CoreFoundation.CFStringGetCString.argtypes = [ + CFStringRef, + c_char_p, + CFIndex, + CFStringEncoding, + ] + CoreFoundation.CFStringGetCString.restype = c_bool + + CoreFoundation.CFDataCreate.argtypes = [CFAllocatorRef, c_char_p, CFIndex] + CoreFoundation.CFDataCreate.restype = CFDataRef + + CoreFoundation.CFDataGetLength.argtypes = [CFDataRef] + CoreFoundation.CFDataGetLength.restype = CFIndex + + CoreFoundation.CFDataGetBytePtr.argtypes = [CFDataRef] + CoreFoundation.CFDataGetBytePtr.restype = c_void_p + + CoreFoundation.CFDictionaryCreate.argtypes = [ + CFAllocatorRef, + POINTER(CFTypeRef), + POINTER(CFTypeRef), + CFIndex, + CFDictionaryKeyCallBacks, + CFDictionaryValueCallBacks, + ] + CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef + + CoreFoundation.CFDictionaryGetValue.argtypes = [CFDictionaryRef, CFTypeRef] + CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef + + CoreFoundation.CFArrayCreate.argtypes = [ + CFAllocatorRef, + POINTER(CFTypeRef), + CFIndex, + CFArrayCallBacks, + ] + CoreFoundation.CFArrayCreate.restype = CFArrayRef + + CoreFoundation.CFArrayCreateMutable.argtypes = [ + CFAllocatorRef, + CFIndex, + CFArrayCallBacks, + ] + CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef + + CoreFoundation.CFArrayAppendValue.argtypes = [CFMutableArrayRef, c_void_p] + CoreFoundation.CFArrayAppendValue.restype = None + + CoreFoundation.CFArrayGetCount.argtypes = [CFArrayRef] + CoreFoundation.CFArrayGetCount.restype = CFIndex + + CoreFoundation.CFArrayGetValueAtIndex.argtypes = [CFArrayRef, CFIndex] + CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p + + CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll( + CoreFoundation, "kCFAllocatorDefault" + ) + CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll( + CoreFoundation, "kCFTypeArrayCallBacks" + ) + CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll( + CoreFoundation, "kCFTypeDictionaryKeyCallBacks" + ) + CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll( + CoreFoundation, "kCFTypeDictionaryValueCallBacks" + ) + + CoreFoundation.CFTypeRef = CFTypeRef + CoreFoundation.CFArrayRef = CFArrayRef + CoreFoundation.CFStringRef = CFStringRef + CoreFoundation.CFDictionaryRef = CFDictionaryRef + +except (AttributeError): + raise ImportError("Error initializing ctypes") + + +class CFConst(object): + """ + A class object that acts as essentially a namespace for CoreFoundation + constants. + """ + + kCFStringEncodingUTF8 = CFStringEncoding(0x08000100) + + +class SecurityConst(object): + """ + A class object that acts as essentially a namespace for Security constants. + """ + + kSSLSessionOptionBreakOnServerAuth = 0 + + kSSLProtocol2 = 1 + kSSLProtocol3 = 2 + kTLSProtocol1 = 4 + kTLSProtocol11 = 7 + kTLSProtocol12 = 8 + # SecureTransport does not support TLS 1.3 even if there's a constant for it + kTLSProtocol13 = 10 + kTLSProtocolMaxSupported = 999 + + kSSLClientSide = 1 + kSSLStreamType = 0 + + kSecFormatPEMSequence = 10 + + kSecTrustResultInvalid = 0 + kSecTrustResultProceed = 1 + # This gap is present on purpose: this was kSecTrustResultConfirm, which + # is deprecated. + kSecTrustResultDeny = 3 + kSecTrustResultUnspecified = 4 + kSecTrustResultRecoverableTrustFailure = 5 + kSecTrustResultFatalTrustFailure = 6 + kSecTrustResultOtherError = 7 + + errSSLProtocol = -9800 + errSSLWouldBlock = -9803 + errSSLClosedGraceful = -9805 + errSSLClosedNoNotify = -9816 + errSSLClosedAbort = -9806 + + errSSLXCertChainInvalid = -9807 + errSSLCrypto = -9809 + errSSLInternal = -9810 + errSSLCertExpired = -9814 + errSSLCertNotYetValid = -9815 + errSSLUnknownRootCert = -9812 + errSSLNoRootCert = -9813 + errSSLHostNameMismatch = -9843 + errSSLPeerHandshakeFail = -9824 + errSSLPeerUserCancelled = -9839 + errSSLWeakPeerEphemeralDHKey = -9850 + errSSLServerAuthCompleted = -9841 + errSSLRecordOverflow = -9847 + + errSecVerifyFailed = -67808 + errSecNoTrustSettings = -25263 + errSecItemNotFound = -25300 + errSecInvalidTrustSettings = -25262 + + # Cipher suites. We only pick the ones our default cipher string allows. + # Source: https://developer.apple.com/documentation/security/1550981-ssl_cipher_suite_values + TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C + TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030 + TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B + TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F + TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA9 + TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA8 + TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F + TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024 + TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028 + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A + TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014 + TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B + TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039 + TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023 + TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027 + TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009 + TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013 + TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067 + TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033 + TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D + TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C + TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D + TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C + TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035 + TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F + TLS_AES_128_GCM_SHA256 = 0x1301 + TLS_AES_256_GCM_SHA384 = 0x1302 + TLS_AES_128_CCM_8_SHA256 = 0x1305 + TLS_AES_128_CCM_SHA256 = 0x1304 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py new file mode 100644 index 0000000000000000000000000000000000000000..fa0b245d279e96724d5610f93bc3b3c8c22ca032 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py @@ -0,0 +1,397 @@ +""" +Low-level helpers for the SecureTransport bindings. + +These are Python functions that are not directly related to the high-level APIs +but are necessary to get them to work. They include a whole bunch of low-level +CoreFoundation messing about and memory management. The concerns in this module +are almost entirely about trying to avoid memory leaks and providing +appropriate and useful assistance to the higher-level code. +""" +import base64 +import ctypes +import itertools +import os +import re +import ssl +import struct +import tempfile + +from .bindings import CFConst, CoreFoundation, Security + +# This regular expression is used to grab PEM data out of a PEM bundle. +_PEM_CERTS_RE = re.compile( + b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL +) + + +def _cf_data_from_bytes(bytestring): + """ + Given a bytestring, create a CFData object from it. This CFData object must + be CFReleased by the caller. + """ + return CoreFoundation.CFDataCreate( + CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring) + ) + + +def _cf_dictionary_from_tuples(tuples): + """ + Given a list of Python tuples, create an associated CFDictionary. + """ + dictionary_size = len(tuples) + + # We need to get the dictionary keys and values out in the same order. + keys = (t[0] for t in tuples) + values = (t[1] for t in tuples) + cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys) + cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values) + + return CoreFoundation.CFDictionaryCreate( + CoreFoundation.kCFAllocatorDefault, + cf_keys, + cf_values, + dictionary_size, + CoreFoundation.kCFTypeDictionaryKeyCallBacks, + CoreFoundation.kCFTypeDictionaryValueCallBacks, + ) + + +def _cfstr(py_bstr): + """ + Given a Python binary data, create a CFString. + The string must be CFReleased by the caller. + """ + c_str = ctypes.c_char_p(py_bstr) + cf_str = CoreFoundation.CFStringCreateWithCString( + CoreFoundation.kCFAllocatorDefault, + c_str, + CFConst.kCFStringEncodingUTF8, + ) + return cf_str + + +def _create_cfstring_array(lst): + """ + Given a list of Python binary data, create an associated CFMutableArray. + The array must be CFReleased by the caller. + + Raises an ssl.SSLError on failure. + """ + cf_arr = None + try: + cf_arr = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks), + ) + if not cf_arr: + raise MemoryError("Unable to allocate memory!") + for item in lst: + cf_str = _cfstr(item) + if not cf_str: + raise MemoryError("Unable to allocate memory!") + try: + CoreFoundation.CFArrayAppendValue(cf_arr, cf_str) + finally: + CoreFoundation.CFRelease(cf_str) + except BaseException as e: + if cf_arr: + CoreFoundation.CFRelease(cf_arr) + raise ssl.SSLError("Unable to allocate array: %s" % (e,)) + return cf_arr + + +def _cf_string_to_unicode(value): + """ + Creates a Unicode string from a CFString object. Used entirely for error + reporting. + + Yes, it annoys me quite a lot that this function is this complex. + """ + value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p)) + + string = CoreFoundation.CFStringGetCStringPtr( + value_as_void_p, CFConst.kCFStringEncodingUTF8 + ) + if string is None: + buffer = ctypes.create_string_buffer(1024) + result = CoreFoundation.CFStringGetCString( + value_as_void_p, buffer, 1024, CFConst.kCFStringEncodingUTF8 + ) + if not result: + raise OSError("Error copying C string from CFStringRef") + string = buffer.value + if string is not None: + string = string.decode("utf-8") + return string + + +def _assert_no_error(error, exception_class=None): + """ + Checks the return code and throws an exception if there is an error to + report + """ + if error == 0: + return + + cf_error_string = Security.SecCopyErrorMessageString(error, None) + output = _cf_string_to_unicode(cf_error_string) + CoreFoundation.CFRelease(cf_error_string) + + if output is None or output == u"": + output = u"OSStatus %s" % error + + if exception_class is None: + exception_class = ssl.SSLError + + raise exception_class(output) + + +def _cert_array_from_pem(pem_bundle): + """ + Given a bundle of certs in PEM format, turns them into a CFArray of certs + that can be used to validate a cert chain. + """ + # Normalize the PEM bundle's line endings. + pem_bundle = pem_bundle.replace(b"\r\n", b"\n") + + der_certs = [ + base64.b64decode(match.group(1)) for match in _PEM_CERTS_RE.finditer(pem_bundle) + ] + if not der_certs: + raise ssl.SSLError("No root certificates specified") + + cert_array = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks), + ) + if not cert_array: + raise ssl.SSLError("Unable to allocate memory!") + + try: + for der_bytes in der_certs: + certdata = _cf_data_from_bytes(der_bytes) + if not certdata: + raise ssl.SSLError("Unable to allocate memory!") + cert = Security.SecCertificateCreateWithData( + CoreFoundation.kCFAllocatorDefault, certdata + ) + CoreFoundation.CFRelease(certdata) + if not cert: + raise ssl.SSLError("Unable to build cert object!") + + CoreFoundation.CFArrayAppendValue(cert_array, cert) + CoreFoundation.CFRelease(cert) + except Exception: + # We need to free the array before the exception bubbles further. + # We only want to do that if an error occurs: otherwise, the caller + # should free. + CoreFoundation.CFRelease(cert_array) + raise + + return cert_array + + +def _is_cert(item): + """ + Returns True if a given CFTypeRef is a certificate. + """ + expected = Security.SecCertificateGetTypeID() + return CoreFoundation.CFGetTypeID(item) == expected + + +def _is_identity(item): + """ + Returns True if a given CFTypeRef is an identity. + """ + expected = Security.SecIdentityGetTypeID() + return CoreFoundation.CFGetTypeID(item) == expected + + +def _temporary_keychain(): + """ + This function creates a temporary Mac keychain that we can use to work with + credentials. This keychain uses a one-time password and a temporary file to + store the data. We expect to have one keychain per socket. The returned + SecKeychainRef must be freed by the caller, including calling + SecKeychainDelete. + + Returns a tuple of the SecKeychainRef and the path to the temporary + directory that contains it. + """ + # Unfortunately, SecKeychainCreate requires a path to a keychain. This + # means we cannot use mkstemp to use a generic temporary file. Instead, + # we're going to create a temporary directory and a filename to use there. + # This filename will be 8 random bytes expanded into base64. We also need + # some random bytes to password-protect the keychain we're creating, so we + # ask for 40 random bytes. + random_bytes = os.urandom(40) + filename = base64.b16encode(random_bytes[:8]).decode("utf-8") + password = base64.b16encode(random_bytes[8:]) # Must be valid UTF-8 + tempdirectory = tempfile.mkdtemp() + + keychain_path = os.path.join(tempdirectory, filename).encode("utf-8") + + # We now want to create the keychain itself. + keychain = Security.SecKeychainRef() + status = Security.SecKeychainCreate( + keychain_path, len(password), password, False, None, ctypes.byref(keychain) + ) + _assert_no_error(status) + + # Having created the keychain, we want to pass it off to the caller. + return keychain, tempdirectory + + +def _load_items_from_file(keychain, path): + """ + Given a single file, loads all the trust objects from it into arrays and + the keychain. + Returns a tuple of lists: the first list is a list of identities, the + second a list of certs. + """ + certificates = [] + identities = [] + result_array = None + + with open(path, "rb") as f: + raw_filedata = f.read() + + try: + filedata = CoreFoundation.CFDataCreate( + CoreFoundation.kCFAllocatorDefault, raw_filedata, len(raw_filedata) + ) + result_array = CoreFoundation.CFArrayRef() + result = Security.SecItemImport( + filedata, # cert data + None, # Filename, leaving it out for now + None, # What the type of the file is, we don't care + None, # what's in the file, we don't care + 0, # import flags + None, # key params, can include passphrase in the future + keychain, # The keychain to insert into + ctypes.byref(result_array), # Results + ) + _assert_no_error(result) + + # A CFArray is not very useful to us as an intermediary + # representation, so we are going to extract the objects we want + # and then free the array. We don't need to keep hold of keys: the + # keychain already has them! + result_count = CoreFoundation.CFArrayGetCount(result_array) + for index in range(result_count): + item = CoreFoundation.CFArrayGetValueAtIndex(result_array, index) + item = ctypes.cast(item, CoreFoundation.CFTypeRef) + + if _is_cert(item): + CoreFoundation.CFRetain(item) + certificates.append(item) + elif _is_identity(item): + CoreFoundation.CFRetain(item) + identities.append(item) + finally: + if result_array: + CoreFoundation.CFRelease(result_array) + + CoreFoundation.CFRelease(filedata) + + return (identities, certificates) + + +def _load_client_cert_chain(keychain, *paths): + """ + Load certificates and maybe keys from a number of files. Has the end goal + of returning a CFArray containing one SecIdentityRef, and then zero or more + SecCertificateRef objects, suitable for use as a client certificate trust + chain. + """ + # Ok, the strategy. + # + # This relies on knowing that macOS will not give you a SecIdentityRef + # unless you have imported a key into a keychain. This is a somewhat + # artificial limitation of macOS (for example, it doesn't necessarily + # affect iOS), but there is nothing inside Security.framework that lets you + # get a SecIdentityRef without having a key in a keychain. + # + # So the policy here is we take all the files and iterate them in order. + # Each one will use SecItemImport to have one or more objects loaded from + # it. We will also point at a keychain that macOS can use to work with the + # private key. + # + # Once we have all the objects, we'll check what we actually have. If we + # already have a SecIdentityRef in hand, fab: we'll use that. Otherwise, + # we'll take the first certificate (which we assume to be our leaf) and + # ask the keychain to give us a SecIdentityRef with that cert's associated + # key. + # + # We'll then return a CFArray containing the trust chain: one + # SecIdentityRef and then zero-or-more SecCertificateRef objects. The + # responsibility for freeing this CFArray will be with the caller. This + # CFArray must remain alive for the entire connection, so in practice it + # will be stored with a single SSLSocket, along with the reference to the + # keychain. + certificates = [] + identities = [] + + # Filter out bad paths. + paths = (path for path in paths if path) + + try: + for file_path in paths: + new_identities, new_certs = _load_items_from_file(keychain, file_path) + identities.extend(new_identities) + certificates.extend(new_certs) + + # Ok, we have everything. The question is: do we have an identity? If + # not, we want to grab one from the first cert we have. + if not identities: + new_identity = Security.SecIdentityRef() + status = Security.SecIdentityCreateWithCertificate( + keychain, certificates[0], ctypes.byref(new_identity) + ) + _assert_no_error(status) + identities.append(new_identity) + + # We now want to release the original certificate, as we no longer + # need it. + CoreFoundation.CFRelease(certificates.pop(0)) + + # We now need to build a new CFArray that holds the trust chain. + trust_chain = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks), + ) + for item in itertools.chain(identities, certificates): + # ArrayAppendValue does a CFRetain on the item. That's fine, + # because the finally block will release our other refs to them. + CoreFoundation.CFArrayAppendValue(trust_chain, item) + + return trust_chain + finally: + for obj in itertools.chain(identities, certificates): + CoreFoundation.CFRelease(obj) + + +TLS_PROTOCOL_VERSIONS = { + "SSLv2": (0, 2), + "SSLv3": (3, 0), + "TLSv1": (3, 1), + "TLSv1.1": (3, 2), + "TLSv1.2": (3, 3), +} + + +def _build_tls_unknown_ca_alert(version): + """ + Builds a TLS alert record for an unknown CA. + """ + ver_maj, ver_min = TLS_PROTOCOL_VERSIONS[version] + severity_fatal = 0x02 + description_unknown_ca = 0x30 + msg = struct.pack(">BB", severity_fatal, description_unknown_ca) + msg_len = len(msg) + record_type_alert = 0x15 + record = struct.pack(">BBBH", record_type_alert, ver_maj, ver_min, msg_len) + msg + return record diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/appengine.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/appengine.py new file mode 100644 index 0000000000000000000000000000000000000000..1717ee22cdf77849e2e273566c877f95311e691b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/appengine.py @@ -0,0 +1,314 @@ +""" +This module provides a pool manager that uses Google App Engine's +`URLFetch Service `_. + +Example usage:: + + from pip._vendor.urllib3 import PoolManager + from pip._vendor.urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox + + if is_appengine_sandbox(): + # AppEngineManager uses AppEngine's URLFetch API behind the scenes + http = AppEngineManager() + else: + # PoolManager uses a socket-level API behind the scenes + http = PoolManager() + + r = http.request('GET', 'https://google.com/') + +There are `limitations `_ to the URLFetch service and it may not be +the best choice for your application. There are three options for using +urllib3 on Google App Engine: + +1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is + cost-effective in many circumstances as long as your usage is within the + limitations. +2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets. + Sockets also have `limitations and restrictions + `_ and have a lower free quota than URLFetch. + To use sockets, be sure to specify the following in your ``app.yaml``:: + + env_variables: + GAE_USE_SOCKETS_HTTPLIB : 'true' + +3. If you are using `App Engine Flexible +`_, you can use the standard +:class:`PoolManager` without any configuration or special environment variables. +""" + +from __future__ import absolute_import + +import io +import logging +import warnings + +from ..exceptions import ( + HTTPError, + HTTPWarning, + MaxRetryError, + ProtocolError, + SSLError, + TimeoutError, +) +from ..packages.six.moves.urllib.parse import urljoin +from ..request import RequestMethods +from ..response import HTTPResponse +from ..util.retry import Retry +from ..util.timeout import Timeout +from . import _appengine_environ + +try: + from google.appengine.api import urlfetch +except ImportError: + urlfetch = None + + +log = logging.getLogger(__name__) + + +class AppEnginePlatformWarning(HTTPWarning): + pass + + +class AppEnginePlatformError(HTTPError): + pass + + +class AppEngineManager(RequestMethods): + """ + Connection manager for Google App Engine sandbox applications. + + This manager uses the URLFetch service directly instead of using the + emulated httplib, and is subject to URLFetch limitations as described in + the App Engine documentation `here + `_. + + Notably it will raise an :class:`AppEnginePlatformError` if: + * URLFetch is not available. + * If you attempt to use this on App Engine Flexible, as full socket + support is available. + * If a request size is more than 10 megabytes. + * If a response size is more than 32 megabytes. + * If you use an unsupported request method such as OPTIONS. + + Beyond those cases, it will raise normal urllib3 errors. + """ + + def __init__( + self, + headers=None, + retries=None, + validate_certificate=True, + urlfetch_retries=True, + ): + if not urlfetch: + raise AppEnginePlatformError( + "URLFetch is not available in this environment." + ) + + warnings.warn( + "urllib3 is using URLFetch on Google App Engine sandbox instead " + "of sockets. To use sockets directly instead of URLFetch see " + "https://urllib3.readthedocs.io/en/1.26.x/reference/urllib3.contrib.html.", + AppEnginePlatformWarning, + ) + + RequestMethods.__init__(self, headers) + self.validate_certificate = validate_certificate + self.urlfetch_retries = urlfetch_retries + + self.retries = retries or Retry.DEFAULT + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Return False to re-raise any potential exceptions + return False + + def urlopen( + self, + method, + url, + body=None, + headers=None, + retries=None, + redirect=True, + timeout=Timeout.DEFAULT_TIMEOUT, + **response_kw + ): + + retries = self._get_retries(retries, redirect) + + try: + follow_redirects = redirect and retries.redirect != 0 and retries.total + response = urlfetch.fetch( + url, + payload=body, + method=method, + headers=headers or {}, + allow_truncated=False, + follow_redirects=self.urlfetch_retries and follow_redirects, + deadline=self._get_absolute_timeout(timeout), + validate_certificate=self.validate_certificate, + ) + except urlfetch.DeadlineExceededError as e: + raise TimeoutError(self, e) + + except urlfetch.InvalidURLError as e: + if "too large" in str(e): + raise AppEnginePlatformError( + "URLFetch request too large, URLFetch only " + "supports requests up to 10mb in size.", + e, + ) + raise ProtocolError(e) + + except urlfetch.DownloadError as e: + if "Too many redirects" in str(e): + raise MaxRetryError(self, url, reason=e) + raise ProtocolError(e) + + except urlfetch.ResponseTooLargeError as e: + raise AppEnginePlatformError( + "URLFetch response too large, URLFetch only supports" + "responses up to 32mb in size.", + e, + ) + + except urlfetch.SSLCertificateError as e: + raise SSLError(e) + + except urlfetch.InvalidMethodError as e: + raise AppEnginePlatformError( + "URLFetch does not support method: %s" % method, e + ) + + http_response = self._urlfetch_response_to_http_response( + response, retries=retries, **response_kw + ) + + # Handle redirect? + redirect_location = redirect and http_response.get_redirect_location() + if redirect_location: + # Check for redirect response + if self.urlfetch_retries and retries.raise_on_redirect: + raise MaxRetryError(self, url, "too many redirects") + else: + if http_response.status == 303: + method = "GET" + + try: + retries = retries.increment( + method, url, response=http_response, _pool=self + ) + except MaxRetryError: + if retries.raise_on_redirect: + raise MaxRetryError(self, url, "too many redirects") + return http_response + + retries.sleep_for_retry(http_response) + log.debug("Redirecting %s -> %s", url, redirect_location) + redirect_url = urljoin(url, redirect_location) + return self.urlopen( + method, + redirect_url, + body, + headers, + retries=retries, + redirect=redirect, + timeout=timeout, + **response_kw + ) + + # Check if we should retry the HTTP response. + has_retry_after = bool(http_response.headers.get("Retry-After")) + if retries.is_retry(method, http_response.status, has_retry_after): + retries = retries.increment(method, url, response=http_response, _pool=self) + log.debug("Retry: %s", url) + retries.sleep(http_response) + return self.urlopen( + method, + url, + body=body, + headers=headers, + retries=retries, + redirect=redirect, + timeout=timeout, + **response_kw + ) + + return http_response + + def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw): + + if is_prod_appengine(): + # Production GAE handles deflate encoding automatically, but does + # not remove the encoding header. + content_encoding = urlfetch_resp.headers.get("content-encoding") + + if content_encoding == "deflate": + del urlfetch_resp.headers["content-encoding"] + + transfer_encoding = urlfetch_resp.headers.get("transfer-encoding") + # We have a full response's content, + # so let's make sure we don't report ourselves as chunked data. + if transfer_encoding == "chunked": + encodings = transfer_encoding.split(",") + encodings.remove("chunked") + urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings) + + original_response = HTTPResponse( + # In order for decoding to work, we must present the content as + # a file-like object. + body=io.BytesIO(urlfetch_resp.content), + msg=urlfetch_resp.header_msg, + headers=urlfetch_resp.headers, + status=urlfetch_resp.status_code, + **response_kw + ) + + return HTTPResponse( + body=io.BytesIO(urlfetch_resp.content), + headers=urlfetch_resp.headers, + status=urlfetch_resp.status_code, + original_response=original_response, + **response_kw + ) + + def _get_absolute_timeout(self, timeout): + if timeout is Timeout.DEFAULT_TIMEOUT: + return None # Defer to URLFetch's default. + if isinstance(timeout, Timeout): + if timeout._read is not None or timeout._connect is not None: + warnings.warn( + "URLFetch does not support granular timeout settings, " + "reverting to total or default URLFetch timeout.", + AppEnginePlatformWarning, + ) + return timeout.total + return timeout + + def _get_retries(self, retries, redirect): + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) + + if retries.connect or retries.read or retries.redirect: + warnings.warn( + "URLFetch only supports total retries and does not " + "recognize connect, read, or redirect retry parameters.", + AppEnginePlatformWarning, + ) + + return retries + + +# Alias methods from _appengine_environ to maintain public API interface. + +is_appengine = _appengine_environ.is_appengine +is_appengine_sandbox = _appengine_environ.is_appengine_sandbox +is_local_appengine = _appengine_environ.is_local_appengine +is_prod_appengine = _appengine_environ.is_prod_appengine +is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/request.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/request.py new file mode 100644 index 0000000000000000000000000000000000000000..330766ef4f3403e05a6ad8ec30f25fe05fdbc199 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/request.py @@ -0,0 +1,137 @@ +from __future__ import absolute_import + +from base64 import b64encode + +from ..exceptions import UnrewindableBodyError +from ..packages.six import b, integer_types + +# Pass as a value within ``headers`` to skip +# emitting some HTTP headers that are added automatically. +# The only headers that are supported are ``Accept-Encoding``, +# ``Host``, and ``User-Agent``. +SKIP_HEADER = "@@@SKIP_HEADER@@@" +SKIPPABLE_HEADERS = frozenset(["accept-encoding", "host", "user-agent"]) + +ACCEPT_ENCODING = "gzip,deflate" + +_FAILEDTELL = object() + + +def make_headers( + keep_alive=None, + accept_encoding=None, + user_agent=None, + basic_auth=None, + proxy_basic_auth=None, + disable_cache=None, +): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. + + Example:: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ",".join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers["accept-encoding"] = accept_encoding + + if user_agent: + headers["user-agent"] = user_agent + + if keep_alive: + headers["connection"] = "keep-alive" + + if basic_auth: + headers["authorization"] = "Basic " + b64encode(b(basic_auth)).decode("utf-8") + + if proxy_basic_auth: + headers["proxy-authorization"] = "Basic " + b64encode( + b(proxy_basic_auth) + ).decode("utf-8") + + if disable_cache: + headers["cache-control"] = "no-cache" + + return headers + + +def set_file_position(body, pos): + """ + If a position is provided, move file to that point. + Otherwise, we'll attempt to record a position for future use. + """ + if pos is not None: + rewind_body(body, pos) + elif getattr(body, "tell", None) is not None: + try: + pos = body.tell() + except (IOError, OSError): + # This differentiates from None, allowing us to catch + # a failed `tell()` later when trying to rewind the body. + pos = _FAILEDTELL + + return pos + + +def rewind_body(body, body_pos): + """ + Attempt to rewind body to a certain position. + Primarily used for request redirects and retries. + + :param body: + File-like object that supports seek. + + :param int pos: + Position to seek to in file. + """ + body_seek = getattr(body, "seek", None) + if body_seek is not None and isinstance(body_pos, integer_types): + try: + body_seek(body_pos) + except (IOError, OSError): + raise UnrewindableBodyError( + "An error occurred when rewinding request body for redirect/retry." + ) + elif body_pos is _FAILEDTELL: + raise UnrewindableBodyError( + "Unable to record file position for rewinding " + "request body during a redirect/retry." + ) + else: + raise ValueError( + "body_pos must be of type integer, instead it was %s." % type(body_pos) + ) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py new file mode 100644 index 0000000000000000000000000000000000000000..1dd950c489607d06ecc5218292a1b55558b47be8 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py @@ -0,0 +1,159 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# Note: This file is under the PSF license as the code comes from the python +# stdlib. http://docs.python.org/3/license.html + +import re +import sys + +# ipaddress has been backported to 2.6+ in pypi. If it is installed on the +# system, use it to handle IPAddress ServerAltnames (this was added in +# python-3.5) otherwise only do DNS matching. This allows +# util.ssl_match_hostname to continue to be used in Python 2.7. +try: + import ipaddress +except ImportError: + ipaddress = None + +__version__ = "3.5.0.1" + + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r".") + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count("*") + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn) + ) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == "*": + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append("[^.]+") + elif leftmost.startswith("xn--") or hostname.startswith("xn--"): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r"\*", "[^.]*")) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r"\A" + r"\.".join(pats) + r"\Z", re.IGNORECASE) + return pat.match(hostname) + + +def _to_unicode(obj): + if isinstance(obj, str) and sys.version_info < (3,): + # ignored flake8 # F821 to support python 2.7 function + obj = unicode(obj, encoding="ascii", errors="strict") # noqa: F821 + return obj + + +def _ipaddress_match(ipname, host_ip): + """Exact matching of IP addresses. + + RFC 6125 explicitly doesn't define an algorithm for this + (section 1.7.2 - "Out of Scope"). + """ + # OpenSSL may add a trailing newline to a subjectAltName's IP address + # Divergence from upstream: ipaddress can't handle byte str + ip = ipaddress.ip_address(_to_unicode(ipname).rstrip()) + return ip == host_ip + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError( + "empty or no certificate, match_hostname needs a " + "SSL socket or SSL context with either " + "CERT_OPTIONAL or CERT_REQUIRED" + ) + try: + # Divergence from upstream: ipaddress can't handle byte str + host_ip = ipaddress.ip_address(_to_unicode(hostname)) + except (UnicodeError, ValueError): + # ValueError: Not an IP address (common case) + # UnicodeError: Divergence from upstream: Have to deal with ipaddress not taking + # byte strings. addresses should be all ascii, so we consider it not + # an ipaddress in this case + host_ip = None + except AttributeError: + # Divergence from upstream: Make ipaddress library optional + if ipaddress is None: + host_ip = None + else: # Defensive + raise + dnsnames = [] + san = cert.get("subjectAltName", ()) + for key, value in san: + if key == "DNS": + if host_ip is None and _dnsname_match(value, hostname): + return + dnsnames.append(value) + elif key == "IP Address": + if host_ip is not None and _ipaddress_match(value, host_ip): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get("subject", ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == "commonName": + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError( + "hostname %r " + "doesn't match either of %s" % (hostname, ", ".join(map(repr, dnsnames))) + ) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0])) + else: + raise CertificateError( + "no appropriate commonName or subjectAltName fields were found" + ) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssltransport.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssltransport.py new file mode 100644 index 0000000000000000000000000000000000000000..4a7105d17916a7237f3df6e59d65ca82375f8803 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssltransport.py @@ -0,0 +1,221 @@ +import io +import socket +import ssl + +from ..exceptions import ProxySchemeUnsupported +from ..packages import six + +SSL_BLOCKSIZE = 16384 + + +class SSLTransport: + """ + The SSLTransport wraps an existing socket and establishes an SSL connection. + + Contrary to Python's implementation of SSLSocket, it allows you to chain + multiple TLS connections together. It's particularly useful if you need to + implement TLS within TLS. + + The class supports most of the socket API operations. + """ + + @staticmethod + def _validate_ssl_context_for_tls_in_tls(ssl_context): + """ + Raises a ProxySchemeUnsupported if the provided ssl_context can't be used + for TLS in TLS. + + The only requirement is that the ssl_context provides the 'wrap_bio' + methods. + """ + + if not hasattr(ssl_context, "wrap_bio"): + if six.PY2: + raise ProxySchemeUnsupported( + "TLS in TLS requires SSLContext.wrap_bio() which isn't " + "supported on Python 2" + ) + else: + raise ProxySchemeUnsupported( + "TLS in TLS requires SSLContext.wrap_bio() which isn't " + "available on non-native SSLContext" + ) + + def __init__( + self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True + ): + """ + Create an SSLTransport around socket using the provided ssl_context. + """ + self.incoming = ssl.MemoryBIO() + self.outgoing = ssl.MemoryBIO() + + self.suppress_ragged_eofs = suppress_ragged_eofs + self.socket = socket + + self.sslobj = ssl_context.wrap_bio( + self.incoming, self.outgoing, server_hostname=server_hostname + ) + + # Perform initial handshake. + self._ssl_io_loop(self.sslobj.do_handshake) + + def __enter__(self): + return self + + def __exit__(self, *_): + self.close() + + def fileno(self): + return self.socket.fileno() + + def read(self, len=1024, buffer=None): + return self._wrap_ssl_read(len, buffer) + + def recv(self, len=1024, flags=0): + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to recv") + return self._wrap_ssl_read(len) + + def recv_into(self, buffer, nbytes=None, flags=0): + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to recv_into") + if buffer and (nbytes is None): + nbytes = len(buffer) + elif nbytes is None: + nbytes = 1024 + return self.read(nbytes, buffer) + + def sendall(self, data, flags=0): + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to sendall") + count = 0 + with memoryview(data) as view, view.cast("B") as byte_view: + amount = len(byte_view) + while count < amount: + v = self.send(byte_view[count:]) + count += v + + def send(self, data, flags=0): + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to send") + response = self._ssl_io_loop(self.sslobj.write, data) + return response + + def makefile( + self, mode="r", buffering=None, encoding=None, errors=None, newline=None + ): + """ + Python's httpclient uses makefile and buffered io when reading HTTP + messages and we need to support it. + + This is unfortunately a copy and paste of socket.py makefile with small + changes to point to the socket directly. + """ + if not set(mode) <= {"r", "w", "b"}: + raise ValueError("invalid mode %r (only r, w, b allowed)" % (mode,)) + + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = socket.SocketIO(self, rawmode) + self.socket._io_refs += 1 + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + return raw + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode + return text + + def unwrap(self): + self._ssl_io_loop(self.sslobj.unwrap) + + def close(self): + self.socket.close() + + def getpeercert(self, binary_form=False): + return self.sslobj.getpeercert(binary_form) + + def version(self): + return self.sslobj.version() + + def cipher(self): + return self.sslobj.cipher() + + def selected_alpn_protocol(self): + return self.sslobj.selected_alpn_protocol() + + def selected_npn_protocol(self): + return self.sslobj.selected_npn_protocol() + + def shared_ciphers(self): + return self.sslobj.shared_ciphers() + + def compression(self): + return self.sslobj.compression() + + def settimeout(self, value): + self.socket.settimeout(value) + + def gettimeout(self): + return self.socket.gettimeout() + + def _decref_socketios(self): + self.socket._decref_socketios() + + def _wrap_ssl_read(self, len, buffer=None): + try: + return self._ssl_io_loop(self.sslobj.read, len, buffer) + except ssl.SSLError as e: + if e.errno == ssl.SSL_ERROR_EOF and self.suppress_ragged_eofs: + return 0 # eof, return 0. + else: + raise + + def _ssl_io_loop(self, func, *args): + """Performs an I/O loop between incoming/outgoing and the socket.""" + should_loop = True + ret = None + + while should_loop: + errno = None + try: + ret = func(*args) + except ssl.SSLError as e: + if e.errno not in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE): + # WANT_READ, and WANT_WRITE are expected, others are not. + raise e + errno = e.errno + + buf = self.outgoing.read() + self.socket.sendall(buf) + + if errno is None: + should_loop = False + elif errno == ssl.SSL_ERROR_WANT_READ: + buf = self.socket.recv(SSL_BLOCKSIZE) + if buf: + self.incoming.write(buf) + else: + self.incoming.write_eof() + return ret diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_torch_docs.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_torch_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..dee69d9c408fe7c1ae2ac16fe5cd0788aaf1e108 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_torch_docs.py @@ -0,0 +1,14192 @@ +"""Adds docstrings to functions defined in the torch._C module.""" + +import re + +import torch._C +from torch._C import _add_docstr as add_docstr + + +def parse_kwargs(desc): + r"""Map a description of args to a dictionary of {argname: description}. + + Input: + (' weight (Tensor): a weight tensor\n' + + ' Some optional description') + Output: { + 'weight': \ + 'weight (Tensor): a weight tensor\n Some optional description' + } + """ + # Split on exactly 4 spaces after a newline + regx = re.compile(r"\n\s{4}(?!\s)") + kwargs = [section.strip() for section in regx.split(desc)] + kwargs = [section for section in kwargs if len(section) > 0] + return {desc.split(" ")[0]: desc for desc in kwargs} + + +def merge_dicts(*dicts): + """Merge dictionaries into a single dictionary.""" + return {x: d[x] for d in dicts for x in d} + + +common_args = parse_kwargs( + """ + input (Tensor): the input tensor. + generator (:class:`torch.Generator`, optional): a pseudorandom number generator for sampling + out (Tensor, optional): the output tensor. + memory_format (:class:`torch.memory_format`, optional): the desired memory format of + returned tensor. Default: ``torch.preserve_format``. +""" +) + +reduceops_common_args = merge_dicts( + common_args, + parse_kwargs( + """ + dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. + If specified, the input tensor is casted to :attr:`dtype` before the operation + is performed. This is useful for preventing data type overflows. Default: None. + keepdim (bool): whether the output tensor has :attr:`dim` retained or not. +""" + ), +) + +multi_dim_common = merge_dicts( + reduceops_common_args, + parse_kwargs( + """ + dim (int or tuple of ints): the dimension or dimensions to reduce. +""" + ), + { + "keepdim_details": """ +If :attr:`keepdim` is ``True``, the output tensor is of the same size +as :attr:`input` except in the dimension(s) :attr:`dim` where it is of size 1. +Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in the +output tensor having 1 (or ``len(dim)``) fewer dimension(s). +""" + }, + { + "opt_dim": """ + dim (int or tuple of ints, optional): the dimension or dimensions to reduce. + If ``None``, all dimensions are reduced. +""" + }, +) + +single_dim_common = merge_dicts( + reduceops_common_args, + parse_kwargs( + """ + dim (int): the dimension to reduce. +""" + ), + { + "keepdim_details": """If :attr:`keepdim` is ``True``, the output tensor is of the same size +as :attr:`input` except in the dimension :attr:`dim` where it is of size 1. +Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in +the output tensor having 1 fewer dimension than :attr:`input`.""" + }, +) + +factory_common_args = merge_dicts( + common_args, + parse_kwargs( + """ + dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. + Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`). + layout (:class:`torch.layout`, optional): the desired layout of returned Tensor. + Default: ``torch.strided``. + device (:class:`torch.device`, optional): the desired device of returned tensor. + Default: if ``None``, uses the current device for the default tensor type + (see :func:`torch.set_default_device`). :attr:`device` will be the CPU + for CPU tensor types and the current CUDA device for CUDA tensor types. + requires_grad (bool, optional): If autograd should record operations on the + returned tensor. Default: ``False``. + pin_memory (bool, optional): If set, returned tensor would be allocated in + the pinned memory. Works only for CPU tensors. Default: ``False``. + memory_format (:class:`torch.memory_format`, optional): the desired memory format of + returned Tensor. Default: ``torch.contiguous_format``. + check_invariants (bool, optional): If sparse tensor invariants are checked. + Default: as returned by :func:`torch.sparse.check_sparse_tensor_invariants.is_enabled`, + initially False. +""" + ), + { + "sparse_factory_device_note": """\ +.. note:: + + If the ``device`` argument is not specified the device of the given + :attr:`values` and indices tensor(s) must match. If, however, the + argument is specified the input Tensors will be converted to the + given device and in turn determine the device of the constructed + sparse tensor.""" + }, +) + +factory_like_common_args = parse_kwargs( + """ + input (Tensor): the size of :attr:`input` will determine size of the output tensor. + layout (:class:`torch.layout`, optional): the desired layout of returned tensor. + Default: if ``None``, defaults to the layout of :attr:`input`. + dtype (:class:`torch.dtype`, optional): the desired data type of returned Tensor. + Default: if ``None``, defaults to the dtype of :attr:`input`. + device (:class:`torch.device`, optional): the desired device of returned tensor. + Default: if ``None``, defaults to the device of :attr:`input`. + requires_grad (bool, optional): If autograd should record operations on the + returned tensor. Default: ``False``. + pin_memory (bool, optional): If set, returned tensor would be allocated in + the pinned memory. Works only for CPU tensors. Default: ``False``. + memory_format (:class:`torch.memory_format`, optional): the desired memory format of + returned Tensor. Default: ``torch.preserve_format``. +""" +) + +factory_data_common_args = parse_kwargs( + """ + data (array_like): Initial data for the tensor. Can be a list, tuple, + NumPy ``ndarray``, scalar, and other types. + dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. + Default: if ``None``, infers data type from :attr:`data`. + device (:class:`torch.device`, optional): the desired device of returned tensor. + Default: if ``None``, uses the current device for the default tensor type + (see :func:`torch.set_default_device`). :attr:`device` will be the CPU + for CPU tensor types and the current CUDA device for CUDA tensor types. + requires_grad (bool, optional): If autograd should record operations on the + returned tensor. Default: ``False``. + pin_memory (bool, optional): If set, returned tensor would be allocated in + the pinned memory. Works only for CPU tensors. Default: ``False``. +""" +) + +tf32_notes = { + "tf32_note": """This operator supports :ref:`TensorFloat32`.""" +} + +rocm_fp16_notes = { + "rocm_fp16_note": """On certain ROCm devices, when using float16 inputs this module will use \ +:ref:`different precision` for backward.""" +} + +reproducibility_notes = { + "forward_reproducibility_note": """This operation may behave nondeterministically when given tensors on \ +a CUDA device. See :doc:`/notes/randomness` for more information.""", + "backward_reproducibility_note": """This operation may produce nondeterministic gradients when given tensors on \ +a CUDA device. See :doc:`/notes/randomness` for more information.""", + "cudnn_reproducibility_note": """In some circumstances when given tensors on a CUDA device \ +and using CuDNN, this operator may select a nondeterministic algorithm to increase performance. If this is \ +undesirable, you can try to make the operation deterministic (potentially at \ +a performance cost) by setting ``torch.backends.cudnn.deterministic = True``. \ +See :doc:`/notes/randomness` for more information.""", +} + +sparse_support_notes = { + "sparse_beta_warning": """ +.. warning:: + Sparse support is a beta feature and some layout(s)/dtype/device combinations may not be supported, + or may not have autograd support. If you notice missing functionality please + open a feature request.""", +} + +add_docstr( + torch.abs, + r""" +abs(input, *, out=None) -> Tensor + +Computes the absolute value of each element in :attr:`input`. + +.. math:: + \text{out}_{i} = |\text{input}_{i}| +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> torch.abs(torch.tensor([-1, -2, 3])) + tensor([ 1, 2, 3]) +""".format( + **common_args + ), +) + +add_docstr( + torch.absolute, + r""" +absolute(input, *, out=None) -> Tensor + +Alias for :func:`torch.abs` +""", +) + +add_docstr( + torch.acos, + r""" +acos(input, *, out=None) -> Tensor + +Computes the inverse cosine of each element in :attr:`input`. + +.. math:: + \text{out}_{i} = \cos^{-1}(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.3348, -0.5889, 0.2005, -0.1584]) + >>> torch.acos(a) + tensor([ 1.2294, 2.2004, 1.3690, 1.7298]) +""".format( + **common_args + ), +) + +add_docstr( + torch.arccos, + r""" +arccos(input, *, out=None) -> Tensor + +Alias for :func:`torch.acos`. +""", +) + +add_docstr( + torch.acosh, + r""" +acosh(input, *, out=None) -> Tensor + +Returns a new tensor with the inverse hyperbolic cosine of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \cosh^{-1}(\text{input}_{i}) + +Note: + The domain of the inverse hyperbolic cosine is `[1, inf)` and values outside this range + will be mapped to ``NaN``, except for `+ INF` for which the output is mapped to `+ INF`. +""" + + r""" +Args: + {input} + +Keyword arguments: + {out} + +Example:: + + >>> a = torch.randn(4).uniform_(1, 2) + >>> a + tensor([ 1.3192, 1.9915, 1.9674, 1.7151 ]) + >>> torch.acosh(a) + tensor([ 0.7791, 1.3120, 1.2979, 1.1341 ]) +""".format( + **common_args + ), +) + +add_docstr( + torch.arccosh, + r""" +arccosh(input, *, out=None) -> Tensor + +Alias for :func:`torch.acosh`. +""", +) + +add_docstr( + torch.index_add, + r""" +index_add(input, dim, index, source, *, alpha=1, out=None) -> Tensor + +See :meth:`~Tensor.index_add_` for function description. +""", +) + +add_docstr( + torch.index_copy, + r""" +index_copy(input, dim, index, source, *, out=None) -> Tensor + +See :meth:`~Tensor.index_add_` for function description. +""", +) + +add_docstr( + torch.index_reduce, + r""" +index_reduce(input, dim, index, source, reduce, *, include_self=True, out=None) -> Tensor + +See :meth:`~Tensor.index_reduce_` for function description. +""", +) + +add_docstr( + torch.add, + r""" +add(input, other, *, alpha=1, out=None) -> Tensor + +Adds :attr:`other`, scaled by :attr:`alpha`, to :attr:`input`. + +.. math:: + \text{{out}}_i = \text{{input}}_i + \text{{alpha}} \times \text{{other}}_i +""" + + r""" + +Supports :ref:`broadcasting to a common shape `, +:ref:`type promotion `, and integer, float, and complex inputs. + +Args: + {input} + other (Tensor or Number): the tensor or number to add to :attr:`input`. + +Keyword arguments: + alpha (Number): the multiplier for :attr:`other`. + {out} + +Examples:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.0202, 1.0985, 1.3506, -0.6056]) + >>> torch.add(a, 20) + tensor([ 20.0202, 21.0985, 21.3506, 19.3944]) + + >>> b = torch.randn(4) + >>> b + tensor([-0.9732, -0.3497, 0.6245, 0.4022]) + >>> c = torch.randn(4, 1) + >>> c + tensor([[ 0.3743], + [-1.7724], + [-0.5811], + [-0.8017]]) + >>> torch.add(b, c, alpha=10) + tensor([[ 2.7695, 3.3930, 4.3672, 4.1450], + [-18.6971, -18.0736, -17.0994, -17.3216], + [ -6.7845, -6.1610, -5.1868, -5.4090], + [ -8.9902, -8.3667, -7.3925, -7.6147]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.addbmm, + r""" +addbmm(input, batch1, batch2, *, beta=1, alpha=1, out=None) -> Tensor + +Performs a batch matrix-matrix product of matrices stored +in :attr:`batch1` and :attr:`batch2`, +with a reduced add step (all matrix multiplications get accumulated +along the first dimension). +:attr:`input` is added to the final result. + +:attr:`batch1` and :attr:`batch2` must be 3-D tensors each containing the +same number of matrices. + +If :attr:`batch1` is a :math:`(b \times n \times m)` tensor, :attr:`batch2` is a +:math:`(b \times m \times p)` tensor, :attr:`input` must be +:ref:`broadcastable ` with a :math:`(n \times p)` tensor +and :attr:`out` will be a :math:`(n \times p)` tensor. + +.. math:: + out = \beta\ \text{input} + \alpha\ (\sum_{i=0}^{b-1} \text{batch1}_i \mathbin{@} \text{batch2}_i) + +If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in +it will not be propagated. +""" + + r""" +For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and :attr:`alpha` +must be real numbers, otherwise they should be integers. + +{tf32_note} + +{rocm_fp16_note} + +Args: + batch1 (Tensor): the first batch of matrices to be multiplied + batch2 (Tensor): the second batch of matrices to be multiplied + +Keyword args: + beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`) + input (Tensor): matrix to be added + alpha (Number, optional): multiplier for `batch1 @ batch2` (:math:`\alpha`) + {out} + +Example:: + + >>> M = torch.randn(3, 5) + >>> batch1 = torch.randn(10, 3, 4) + >>> batch2 = torch.randn(10, 4, 5) + >>> torch.addbmm(M, batch1, batch2) + tensor([[ 6.6311, 0.0503, 6.9768, -12.0362, -2.1653], + [ -4.8185, -1.4255, -6.6760, 8.9453, 2.5743], + [ -3.8202, 4.3691, 1.0943, -1.1109, 5.4730]]) +""".format( + **common_args, **tf32_notes, **rocm_fp16_notes + ), +) + +add_docstr( + torch.addcdiv, + r""" +addcdiv(input, tensor1, tensor2, *, value=1, out=None) -> Tensor + +Performs the element-wise division of :attr:`tensor1` by :attr:`tensor2`, +multiplies the result by the scalar :attr:`value` and adds it to :attr:`input`. + +.. warning:: + Integer division with addcdiv is no longer supported, and in a future + release addcdiv will perform a true division of tensor1 and tensor2. + The historic addcdiv behavior can be implemented as + (input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype) + for integer inputs and as (input + value * tensor1 / tensor2) for float inputs. + The future addcdiv behavior is just the latter implementation: + (input + value * tensor1 / tensor2), for all dtypes. + +.. math:: + \text{out}_i = \text{input}_i + \text{value} \times \frac{\text{tensor1}_i}{\text{tensor2}_i} +""" + + r""" + +The shapes of :attr:`input`, :attr:`tensor1`, and :attr:`tensor2` must be +:ref:`broadcastable `. + +For inputs of type `FloatTensor` or `DoubleTensor`, :attr:`value` must be +a real number, otherwise an integer. + +Args: + input (Tensor): the tensor to be added + tensor1 (Tensor): the numerator tensor + tensor2 (Tensor): the denominator tensor + +Keyword args: + value (Number, optional): multiplier for :math:`\text{{tensor1}} / \text{{tensor2}}` + {out} + +Example:: + + >>> t = torch.randn(1, 3) + >>> t1 = torch.randn(3, 1) + >>> t2 = torch.randn(1, 3) + >>> torch.addcdiv(t, t1, t2, value=0.1) + tensor([[-0.2312, -3.6496, 0.1312], + [-1.0428, 3.4292, -0.1030], + [-0.5369, -0.9829, 0.0430]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.addcmul, + r""" +addcmul(input, tensor1, tensor2, *, value=1, out=None) -> Tensor + +Performs the element-wise multiplication of :attr:`tensor1` +by :attr:`tensor2`, multiplies the result by the scalar :attr:`value` +and adds it to :attr:`input`. + +.. math:: + \text{out}_i = \text{input}_i + \text{value} \times \text{tensor1}_i \times \text{tensor2}_i +""" + + r""" +The shapes of :attr:`tensor`, :attr:`tensor1`, and :attr:`tensor2` must be +:ref:`broadcastable `. + +For inputs of type `FloatTensor` or `DoubleTensor`, :attr:`value` must be +a real number, otherwise an integer. + +Args: + input (Tensor): the tensor to be added + tensor1 (Tensor): the tensor to be multiplied + tensor2 (Tensor): the tensor to be multiplied + +Keyword args: + value (Number, optional): multiplier for :math:`tensor1 .* tensor2` + {out} + +Example:: + + >>> t = torch.randn(1, 3) + >>> t1 = torch.randn(3, 1) + >>> t2 = torch.randn(1, 3) + >>> torch.addcmul(t, t1, t2, value=0.1) + tensor([[-0.8635, -0.6391, 1.6174], + [-0.7617, -0.5879, 1.7388], + [-0.8353, -0.6249, 1.6511]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.addmm, + r""" +addmm(input, mat1, mat2, *, beta=1, alpha=1, out=None) -> Tensor + +Performs a matrix multiplication of the matrices :attr:`mat1` and :attr:`mat2`. +The matrix :attr:`input` is added to the final result. + +If :attr:`mat1` is a :math:`(n \times m)` tensor, :attr:`mat2` is a +:math:`(m \times p)` tensor, then :attr:`input` must be +:ref:`broadcastable ` with a :math:`(n \times p)` tensor +and :attr:`out` will be a :math:`(n \times p)` tensor. + +:attr:`alpha` and :attr:`beta` are scaling factors on matrix-vector product between +:attr:`mat1` and :attr:`mat2` and the added matrix :attr:`input` respectively. + +.. math:: + \text{out} = \beta\ \text{input} + \alpha\ (\text{mat1}_i \mathbin{@} \text{mat2}_i) + +If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in +it will not be propagated. +""" + + r""" +For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and +:attr:`alpha` must be real numbers, otherwise they should be integers. + +This operation has support for arguments with :ref:`sparse layouts`. If +:attr:`input` is sparse the result will have the same layout and if :attr:`out` +is provided it must have the same layout as :attr:`input`. + +{sparse_beta_warning} + +{tf32_note} + +{rocm_fp16_note} + +Args: + input (Tensor): matrix to be added + mat1 (Tensor): the first matrix to be matrix multiplied + mat2 (Tensor): the second matrix to be matrix multiplied + +Keyword args: + beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`) + alpha (Number, optional): multiplier for :math:`mat1 @ mat2` (:math:`\alpha`) + {out} + +Example:: + + >>> M = torch.randn(2, 3) + >>> mat1 = torch.randn(2, 3) + >>> mat2 = torch.randn(3, 3) + >>> torch.addmm(M, mat1, mat2) + tensor([[-4.8716, 1.4671, -1.3746], + [ 0.7573, -3.9555, -2.8681]]) +""".format( + **common_args, **tf32_notes, **rocm_fp16_notes, **sparse_support_notes + ), +) + +add_docstr( + torch.adjoint, + r""" +adjoint(Tensor) -> Tensor +Returns a view of the tensor conjugated and with the last two dimensions transposed. + +``x.adjoint()`` is equivalent to ``x.transpose(-2, -1).conj()`` for complex tensors and +to ``x.transpose(-2, -1)`` for real tensors. + +Example:: + >>> x = torch.arange(4, dtype=torch.float) + >>> A = torch.complex(x, x).reshape(2, 2) + >>> A + tensor([[0.+0.j, 1.+1.j], + [2.+2.j, 3.+3.j]]) + >>> A.adjoint() + tensor([[0.-0.j, 2.-2.j], + [1.-1.j, 3.-3.j]]) + >>> (A.adjoint() == A.mH).all() + tensor(True) +""", +) + +add_docstr( + torch.sspaddmm, + r""" +sspaddmm(input, mat1, mat2, *, beta=1, alpha=1, out=None) -> Tensor + +Matrix multiplies a sparse tensor :attr:`mat1` with a dense tensor +:attr:`mat2`, then adds the sparse tensor :attr:`input` to the result. + +Note: This function is equivalent to :func:`torch.addmm`, except +:attr:`input` and :attr:`mat1` are sparse. + +Args: + input (Tensor): a sparse matrix to be added + mat1 (Tensor): a sparse matrix to be matrix multiplied + mat2 (Tensor): a dense matrix to be matrix multiplied + +Keyword args: + beta (Number, optional): multiplier for :attr:`mat` (:math:`\beta`) + alpha (Number, optional): multiplier for :math:`mat1 @ mat2` (:math:`\alpha`) + {out} +""".format( + **common_args + ), +) + +add_docstr( + torch.smm, + r""" +smm(input, mat) -> Tensor + +Performs a matrix multiplication of the sparse matrix :attr:`input` +with the dense matrix :attr:`mat`. + +Args: + input (Tensor): a sparse matrix to be matrix multiplied + mat (Tensor): a dense matrix to be matrix multiplied +""", +) + +add_docstr( + torch.addmv, + r""" +addmv(input, mat, vec, *, beta=1, alpha=1, out=None) -> Tensor + +Performs a matrix-vector product of the matrix :attr:`mat` and +the vector :attr:`vec`. +The vector :attr:`input` is added to the final result. + +If :attr:`mat` is a :math:`(n \times m)` tensor, :attr:`vec` is a 1-D tensor of +size `m`, then :attr:`input` must be +:ref:`broadcastable ` with a 1-D tensor of size `n` and +:attr:`out` will be 1-D tensor of size `n`. + +:attr:`alpha` and :attr:`beta` are scaling factors on matrix-vector product between +:attr:`mat` and :attr:`vec` and the added tensor :attr:`input` respectively. + +.. math:: + \text{out} = \beta\ \text{input} + \alpha\ (\text{mat} \mathbin{@} \text{vec}) + +If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in +it will not be propagated. +""" + + r""" +For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and +:attr:`alpha` must be real numbers, otherwise they should be integers. + +Args: + input (Tensor): vector to be added + mat (Tensor): matrix to be matrix multiplied + vec (Tensor): vector to be matrix multiplied + +Keyword args: + beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`) + alpha (Number, optional): multiplier for :math:`mat @ vec` (:math:`\alpha`) + {out} + +Example:: + + >>> M = torch.randn(2) + >>> mat = torch.randn(2, 3) + >>> vec = torch.randn(3) + >>> torch.addmv(M, mat, vec) + tensor([-0.3768, -5.5565]) +""".format( + **common_args + ), +) + +add_docstr( + torch.addr, + r""" +addr(input, vec1, vec2, *, beta=1, alpha=1, out=None) -> Tensor + +Performs the outer-product of vectors :attr:`vec1` and :attr:`vec2` +and adds it to the matrix :attr:`input`. + +Optional values :attr:`beta` and :attr:`alpha` are scaling factors on the +outer product between :attr:`vec1` and :attr:`vec2` and the added matrix +:attr:`input` respectively. + +.. math:: + \text{out} = \beta\ \text{input} + \alpha\ (\text{vec1} \otimes \text{vec2}) + +If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in +it will not be propagated. +""" + + r""" +If :attr:`vec1` is a vector of size `n` and :attr:`vec2` is a vector +of size `m`, then :attr:`input` must be +:ref:`broadcastable ` with a matrix of size +:math:`(n \times m)` and :attr:`out` will be a matrix of size +:math:`(n \times m)`. + +Args: + input (Tensor): matrix to be added + vec1 (Tensor): the first vector of the outer product + vec2 (Tensor): the second vector of the outer product + +Keyword args: + beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`) + alpha (Number, optional): multiplier for :math:`\text{{vec1}} \otimes \text{{vec2}}` (:math:`\alpha`) + {out} + +Example:: + + >>> vec1 = torch.arange(1., 4.) + >>> vec2 = torch.arange(1., 3.) + >>> M = torch.zeros(3, 2) + >>> torch.addr(M, vec1, vec2) + tensor([[ 1., 2.], + [ 2., 4.], + [ 3., 6.]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.allclose, + r""" +allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False) -> bool + +This function checks if :attr:`input` and :attr:`other` satisfy the condition: + +.. math:: + \lvert \text{input} - \text{other} \rvert \leq \texttt{atol} + \texttt{rtol} \times \lvert \text{other} \rvert +""" + + r""" +elementwise, for all elements of :attr:`input` and :attr:`other`. The behaviour of this function is analogous to +`numpy.allclose `_ + +Args: + input (Tensor): first tensor to compare + other (Tensor): second tensor to compare + atol (float, optional): absolute tolerance. Default: 1e-08 + rtol (float, optional): relative tolerance. Default: 1e-05 + equal_nan (bool, optional): if ``True``, then two ``NaN`` s will be considered equal. Default: ``False`` + +Example:: + + >>> torch.allclose(torch.tensor([10000., 1e-07]), torch.tensor([10000.1, 1e-08])) + False + >>> torch.allclose(torch.tensor([10000., 1e-08]), torch.tensor([10000.1, 1e-09])) + True + >>> torch.allclose(torch.tensor([1.0, float('nan')]), torch.tensor([1.0, float('nan')])) + False + >>> torch.allclose(torch.tensor([1.0, float('nan')]), torch.tensor([1.0, float('nan')]), equal_nan=True) + True +""", +) + +add_docstr( + torch.all, + r""" +all(input) -> Tensor + +Tests if all elements in :attr:`input` evaluate to `True`. + +.. note:: This function matches the behaviour of NumPy in returning + output of dtype `bool` for all supported dtypes except `uint8`. + For `uint8` the dtype of output is `uint8` itself. + +Example:: + + >>> a = torch.rand(1, 2).bool() + >>> a + tensor([[False, True]], dtype=torch.bool) + >>> torch.all(a) + tensor(False, dtype=torch.bool) + >>> a = torch.arange(0, 3) + >>> a + tensor([0, 1, 2]) + >>> torch.all(a) + tensor(False) + +.. function:: all(input, dim, keepdim=False, *, out=None) -> Tensor + :noindex: + +For each row of :attr:`input` in the given dimension :attr:`dim`, +returns `True` if all elements in the row evaluate to `True` and `False` otherwise. + +{keepdim_details} + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + {out} + +Example:: + + >>> a = torch.rand(4, 2).bool() + >>> a + tensor([[True, True], + [True, False], + [True, True], + [True, True]], dtype=torch.bool) + >>> torch.all(a, dim=1) + tensor([ True, False, True, True], dtype=torch.bool) + >>> torch.all(a, dim=0) + tensor([ True, False], dtype=torch.bool) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.any, + r""" +any(input) -> Tensor + +Tests if any element in :attr:`input` evaluates to `True`. + +.. note:: This function matches the behaviour of NumPy in returning + output of dtype `bool` for all supported dtypes except `uint8`. + For `uint8` the dtype of output is `uint8` itself. + +Example:: + + >>> a = torch.rand(1, 2).bool() + >>> a + tensor([[False, True]], dtype=torch.bool) + >>> torch.any(a) + tensor(True, dtype=torch.bool) + >>> a = torch.arange(0, 3) + >>> a + tensor([0, 1, 2]) + >>> torch.any(a) + tensor(True) + +.. function:: any(input, dim, keepdim=False, *, out=None) -> Tensor + :noindex: + +For each row of :attr:`input` in the given dimension :attr:`dim`, +returns `True` if any element in the row evaluate to `True` and `False` otherwise. + +{keepdim_details} + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4, 2) < 0 + >>> a + tensor([[ True, True], + [False, True], + [ True, True], + [False, False]]) + >>> torch.any(a, 1) + tensor([ True, True, True, False]) + >>> torch.any(a, 0) + tensor([True, True]) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.angle, + r""" +angle(input, *, out=None) -> Tensor + +Computes the element-wise angle (in radians) of the given :attr:`input` tensor. + +.. math:: + \text{out}_{i} = angle(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +.. note:: Starting in PyTorch 1.8, angle returns pi for negative real numbers, + zero for non-negative real numbers, and propagates NaNs. Previously + the function would return zero for all real numbers and not propagate + floating-point NaNs. + +Example:: + + >>> torch.angle(torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j]))*180/3.14159 + tensor([ 135., 135, -45]) +""".format( + **common_args + ), +) + +add_docstr( + torch.as_strided, + r""" +as_strided(input, size, stride, storage_offset=None) -> Tensor + +Create a view of an existing `torch.Tensor` :attr:`input` with specified +:attr:`size`, :attr:`stride` and :attr:`storage_offset`. + +.. warning:: + Prefer using other view functions, like :meth:`torch.Tensor.expand`, + to setting a view's strides manually with `as_strided`, as this + function's behavior depends on the implementation of a tensor's storage. + The constructed view of the storage must only refer to elements within + the storage or a runtime error will be thrown, and if the view is + "overlapped" (with multiple indices referring to the same element in + memory) its behavior is undefined. + +Args: + {input} + size (tuple or ints): the shape of the output tensor + stride (tuple or ints): the stride of the output tensor + storage_offset (int, optional): the offset in the underlying storage of the output tensor. + If ``None``, the storage_offset of the output tensor will match the input tensor. + +Example:: + + >>> x = torch.randn(3, 3) + >>> x + tensor([[ 0.9039, 0.6291, 1.0795], + [ 0.1586, 2.1939, -0.4900], + [-0.1909, -0.7503, 1.9355]]) + >>> t = torch.as_strided(x, (2, 2), (1, 2)) + >>> t + tensor([[0.9039, 1.0795], + [0.6291, 0.1586]]) + >>> t = torch.as_strided(x, (2, 2), (1, 2), 1) + tensor([[0.6291, 0.1586], + [1.0795, 2.1939]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.as_tensor, + r""" +as_tensor(data, dtype=None, device=None) -> Tensor + +Converts :attr:`data` into a tensor, sharing data and preserving autograd +history if possible. + +If :attr:`data` is already a tensor with the requested dtype and device +then :attr:`data` itself is returned, but if :attr:`data` is a +tensor with a different dtype or device then it's copied as if using +`data.to(dtype=dtype, device=device)`. + +If :attr:`data` is a NumPy array (an ndarray) with the same dtype and device then a +tensor is constructed using :func:`torch.from_numpy`. + +.. seealso:: + + :func:`torch.tensor` never shares its data and creates a new "leaf tensor" (see :doc:`/notes/autograd`). + + +Args: + {data} + {dtype} + device (:class:`torch.device`, optional): the device of the constructed tensor. If None and data is a tensor + then the device of data is used. If None and data is not a tensor then + the result tensor is constructed on the current device. + + +Example:: + + >>> a = numpy.array([1, 2, 3]) + >>> t = torch.as_tensor(a) + >>> t + tensor([ 1, 2, 3]) + >>> t[0] = -1 + >>> a + array([-1, 2, 3]) + + >>> a = numpy.array([1, 2, 3]) + >>> t = torch.as_tensor(a, device=torch.device('cuda')) + >>> t + tensor([ 1, 2, 3]) + >>> t[0] = -1 + >>> a + array([1, 2, 3]) +""".format( + **factory_data_common_args + ), +) + +add_docstr( + torch.asin, + r""" +asin(input, *, out=None) -> Tensor + +Returns a new tensor with the arcsine of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \sin^{-1}(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-0.5962, 1.4985, -0.4396, 1.4525]) + >>> torch.asin(a) + tensor([-0.6387, nan, -0.4552, nan]) +""".format( + **common_args + ), +) + +add_docstr( + torch.arcsin, + r""" +arcsin(input, *, out=None) -> Tensor + +Alias for :func:`torch.asin`. +""", +) + +add_docstr( + torch.asinh, + r""" +asinh(input, *, out=None) -> Tensor + +Returns a new tensor with the inverse hyperbolic sine of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \sinh^{-1}(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword arguments: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.1606, -1.4267, -1.0899, -1.0250 ]) + >>> torch.asinh(a) + tensor([ 0.1599, -1.1534, -0.9435, -0.8990 ]) +""".format( + **common_args + ), +) + +add_docstr( + torch.arcsinh, + r""" +arcsinh(input, *, out=None) -> Tensor + +Alias for :func:`torch.asinh`. +""", +) + +add_docstr( + torch.atan, + r""" +atan(input, *, out=None) -> Tensor + +Returns a new tensor with the arctangent of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \tan^{-1}(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.2341, 0.2539, -0.6256, -0.6448]) + >>> torch.atan(a) + tensor([ 0.2299, 0.2487, -0.5591, -0.5727]) +""".format( + **common_args + ), +) + +add_docstr( + torch.arctan, + r""" +arctan(input, *, out=None) -> Tensor + +Alias for :func:`torch.atan`. +""", +) + +add_docstr( + torch.atan2, + r""" +atan2(input, other, *, out=None) -> Tensor + +Element-wise arctangent of :math:`\text{{input}}_{{i}} / \text{{other}}_{{i}}` +with consideration of the quadrant. Returns a new tensor with the signed angles +in radians between vector :math:`(\text{{other}}_{{i}}, \text{{input}}_{{i}})` +and vector :math:`(1, 0)`. (Note that :math:`\text{{other}}_{{i}}`, the second +parameter, is the x-coordinate, while :math:`\text{{input}}_{{i}}`, the first +parameter, is the y-coordinate.) + +The shapes of ``input`` and ``other`` must be +:ref:`broadcastable `. + +Args: + input (Tensor): the first input tensor + other (Tensor): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.9041, 0.0196, -0.3108, -2.4423]) + >>> torch.atan2(a, torch.randn(4)) + tensor([ 0.9833, 0.0811, -1.9743, -1.4151]) +""".format( + **common_args + ), +) + +add_docstr( + torch.arctan2, + r""" +arctan2(input, other, *, out=None) -> Tensor +Alias for :func:`torch.atan2`. +""", +) + +add_docstr( + torch.atanh, + r""" +atanh(input, *, out=None) -> Tensor + +Returns a new tensor with the inverse hyperbolic tangent of the elements of :attr:`input`. + +Note: + The domain of the inverse hyperbolic tangent is `(-1, 1)` and values outside this range + will be mapped to ``NaN``, except for the values `1` and `-1` for which the output is + mapped to `+/-INF` respectively. + +.. math:: + \text{out}_{i} = \tanh^{-1}(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword arguments: + {out} + +Example:: + + >>> a = torch.randn(4).uniform_(-1, 1) + >>> a + tensor([ -0.9385, 0.2968, -0.8591, -0.1871 ]) + >>> torch.atanh(a) + tensor([ -1.7253, 0.3060, -1.2899, -0.1893 ]) +""".format( + **common_args + ), +) + +add_docstr( + torch.arctanh, + r""" +arctanh(input, *, out=None) -> Tensor + +Alias for :func:`torch.atanh`. +""", +) + +add_docstr( + torch.asarray, + r""" +asarray(obj, *, dtype=None, device=None, copy=None, requires_grad=False) -> Tensor + +Converts :attr:`obj` to a tensor. + +:attr:`obj` can be one of: + +1. a tensor +2. a NumPy array or a NumPy scalar +3. a DLPack capsule +4. an object that implements Python's buffer protocol +5. a scalar +6. a sequence of scalars + +When :attr:`obj` is a tensor, NumPy array, or DLPack capsule the returned tensor will, +by default, not require a gradient, have the same datatype as :attr:`obj`, be on the +same device, and share memory with it. These properties can be controlled with the +:attr:`dtype`, :attr:`device`, :attr:`copy`, and :attr:`requires_grad` keyword arguments. +If the returned tensor is of a different datatype, on a different device, or a copy is +requested then it will not share its memory with :attr:`obj`. If :attr:`requires_grad` +is ``True`` then the returned tensor will require a gradient, and if :attr:`obj` is +also a tensor with an autograd history then the returned tensor will have the same history. + +When :attr:`obj` is not a tensor, NumPy array, or DLPack capsule but implements Python's +buffer protocol then the buffer is interpreted as an array of bytes grouped according to +the size of the datatype passed to the :attr:`dtype` keyword argument. (If no datatype is +passed then the default floating point datatype is used, instead.) The returned tensor +will have the specified datatype (or default floating point datatype if none is specified) +and, by default, be on the CPU device and share memory with the buffer. + +When :attr:`obj` is a NumPy scalar, the returned tensor will be a 0-dimensional tensor on +the CPU and that doesn't share its memory (i.e. ``copy=True``). By default datatype will +be the PyTorch datatype corresponding to the NumPy's scalar's datatype. + +When :attr:`obj` is none of the above but a scalar, or a sequence of scalars then the +returned tensor will, by default, infer its datatype from the scalar values, be on the +current default device, and not share its memory. + +.. seealso:: + + :func:`torch.tensor` creates a tensor that always copies the data from the input object. + :func:`torch.from_numpy` creates a tensor that always shares memory from NumPy arrays. + :func:`torch.frombuffer` creates a tensor that always shares memory from objects that + implement the buffer protocol. + :func:`torch.from_dlpack` creates a tensor that always shares memory from + DLPack capsules. + +Args: + obj (object): a tensor, NumPy array, DLPack Capsule, object that implements Python's + buffer protocol, scalar, or sequence of scalars. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the datatype of the returned tensor. + Default: ``None``, which causes the datatype of the returned tensor to be + inferred from :attr:`obj`. + copy (bool, optional): controls whether the returned tensor shares memory with :attr:`obj`. + Default: ``None``, which causes the returned tensor to share memory with :attr:`obj` + whenever possible. If ``True`` then the returned tensor does not share its memory. + If ``False`` then the returned tensor shares its memory with :attr:`obj` and an + error is thrown if it cannot. + device (:class:`torch.device`, optional): the device of the returned tensor. + Default: ``None``, which causes the device of :attr:`obj` to be used. Or, if + :attr:`obj` is a Python sequence, the current default device will be used. + requires_grad (bool, optional): whether the returned tensor requires grad. + Default: ``False``, which causes the returned tensor not to require a gradient. + If ``True``, then the returned tensor will require a gradient, and if :attr:`obj` + is also a tensor with an autograd history then the returned tensor will have + the same history. + +Example:: + + >>> a = torch.tensor([1, 2, 3]) + >>> # Shares memory with tensor 'a' + >>> b = torch.asarray(a) + >>> a.data_ptr() == b.data_ptr() + True + >>> # Forces memory copy + >>> c = torch.asarray(a, copy=True) + >>> a.data_ptr() == c.data_ptr() + False + + >>> a = torch.tensor([1., 2., 3.], requires_grad=True) + >>> b = a + 2 + >>> b + tensor([3., 4., 5.], grad_fn=) + >>> # Shares memory with tensor 'b', with no grad + >>> c = torch.asarray(b) + >>> c + tensor([3., 4., 5.]) + >>> # Shares memory with tensor 'b', retaining autograd history + >>> d = torch.asarray(b, requires_grad=True) + >>> d + tensor([3., 4., 5.], grad_fn=) + + >>> array = numpy.array([1, 2, 3]) + >>> # Shares memory with array 'array' + >>> t1 = torch.asarray(array) + >>> array.__array_interface__['data'][0] == t1.data_ptr() + True + >>> # Copies memory due to dtype mismatch + >>> t2 = torch.asarray(array, dtype=torch.float32) + >>> array.__array_interface__['data'][0] == t2.data_ptr() + False + + >>> scalar = numpy.float64(0.5) + >>> torch.asarray(scalar) + tensor(0.5000, dtype=torch.float64) +""", +) + +add_docstr( + torch.baddbmm, + r""" +baddbmm(input, batch1, batch2, *, beta=1, alpha=1, out=None) -> Tensor + +Performs a batch matrix-matrix product of matrices in :attr:`batch1` +and :attr:`batch2`. +:attr:`input` is added to the final result. + +:attr:`batch1` and :attr:`batch2` must be 3-D tensors each containing the same +number of matrices. + +If :attr:`batch1` is a :math:`(b \times n \times m)` tensor, :attr:`batch2` is a +:math:`(b \times m \times p)` tensor, then :attr:`input` must be +:ref:`broadcastable ` with a +:math:`(b \times n \times p)` tensor and :attr:`out` will be a +:math:`(b \times n \times p)` tensor. Both :attr:`alpha` and :attr:`beta` mean the +same as the scaling factors used in :meth:`torch.addbmm`. + +.. math:: + \text{out}_i = \beta\ \text{input}_i + \alpha\ (\text{batch1}_i \mathbin{@} \text{batch2}_i) + +If :attr:`beta` is 0, then :attr:`input` will be ignored, and `nan` and `inf` in +it will not be propagated. +""" + + r""" +For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and +:attr:`alpha` must be real numbers, otherwise they should be integers. + +{tf32_note} + +{rocm_fp16_note} + +Args: + input (Tensor): the tensor to be added + batch1 (Tensor): the first batch of matrices to be multiplied + batch2 (Tensor): the second batch of matrices to be multiplied + +Keyword args: + beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`) + alpha (Number, optional): multiplier for :math:`\text{{batch1}} \mathbin{{@}} \text{{batch2}}` (:math:`\alpha`) + {out} + +Example:: + + >>> M = torch.randn(10, 3, 5) + >>> batch1 = torch.randn(10, 3, 4) + >>> batch2 = torch.randn(10, 4, 5) + >>> torch.baddbmm(M, batch1, batch2).size() + torch.Size([10, 3, 5]) +""".format( + **common_args, **tf32_notes, **rocm_fp16_notes + ), +) + +add_docstr( + torch.bernoulli, + r""" +bernoulli(input, *, generator=None, out=None) -> Tensor + +Draws binary random numbers (0 or 1) from a Bernoulli distribution. + +The :attr:`input` tensor should be a tensor containing probabilities +to be used for drawing the binary random number. +Hence, all values in :attr:`input` have to be in the range: +:math:`0 \leq \text{input}_i \leq 1`. + +The :math:`\text{i}^{th}` element of the output tensor will draw a +value :math:`1` according to the :math:`\text{i}^{th}` probability value given +in :attr:`input`. + +.. math:: + \text{out}_{i} \sim \mathrm{Bernoulli}(p = \text{input}_{i}) +""" + + r""" +The returned :attr:`out` tensor only has values 0 or 1 and is of the same +shape as :attr:`input`. + +:attr:`out` can have integral ``dtype``, but :attr:`input` must have floating +point ``dtype``. + +Args: + input (Tensor): the input tensor of probability values for the Bernoulli distribution + +Keyword args: + {generator} + {out} + +Example:: + + >>> a = torch.empty(3, 3).uniform_(0, 1) # generate a uniform random matrix with range [0, 1] + >>> a + tensor([[ 0.1737, 0.0950, 0.3609], + [ 0.7148, 0.0289, 0.2676], + [ 0.9456, 0.8937, 0.7202]]) + >>> torch.bernoulli(a) + tensor([[ 1., 0., 0.], + [ 0., 0., 0.], + [ 1., 1., 1.]]) + + >>> a = torch.ones(3, 3) # probability of drawing "1" is 1 + >>> torch.bernoulli(a) + tensor([[ 1., 1., 1.], + [ 1., 1., 1.], + [ 1., 1., 1.]]) + >>> a = torch.zeros(3, 3) # probability of drawing "1" is 0 + >>> torch.bernoulli(a) + tensor([[ 0., 0., 0.], + [ 0., 0., 0.], + [ 0., 0., 0.]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.bincount, + r""" +bincount(input, weights=None, minlength=0) -> Tensor + +Count the frequency of each value in an array of non-negative ints. + +The number of bins (size 1) is one larger than the largest value in +:attr:`input` unless :attr:`input` is empty, in which case the result is a +tensor of size 0. If :attr:`minlength` is specified, the number of bins is at least +:attr:`minlength` and if :attr:`input` is empty, then the result is tensor of size +:attr:`minlength` filled with zeros. If ``n`` is the value at position ``i``, +``out[n] += weights[i]`` if :attr:`weights` is specified else +``out[n] += 1``. + +Note: + {backward_reproducibility_note} + +Arguments: + input (Tensor): 1-d int tensor + weights (Tensor): optional, weight for each value in the input tensor. + Should be of same size as input tensor. + minlength (int): optional, minimum number of bins. Should be non-negative. + +Returns: + output (Tensor): a tensor of shape ``Size([max(input) + 1])`` if + :attr:`input` is non-empty, else ``Size(0)`` + +Example:: + + >>> input = torch.randint(0, 8, (5,), dtype=torch.int64) + >>> weights = torch.linspace(0, 1, steps=5) + >>> input, weights + (tensor([4, 3, 6, 3, 4]), + tensor([ 0.0000, 0.2500, 0.5000, 0.7500, 1.0000]) + + >>> torch.bincount(input) + tensor([0, 0, 0, 2, 2, 0, 1]) + + >>> input.bincount(weights) + tensor([0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.5000]) +""".format( + **reproducibility_notes + ), +) + +add_docstr( + torch.bitwise_not, + r""" +bitwise_not(input, *, out=None) -> Tensor + +Computes the bitwise NOT of the given input tensor. The input tensor must be of +integral or Boolean types. For bool tensors, it computes the logical NOT. + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> torch.bitwise_not(torch.tensor([-1, -2, 3], dtype=torch.int8)) + tensor([ 0, 1, -4], dtype=torch.int8) +""".format( + **common_args + ), +) + +add_docstr( + torch.bmm, + r""" +bmm(input, mat2, *, out=None) -> Tensor + +Performs a batch matrix-matrix product of matrices stored in :attr:`input` +and :attr:`mat2`. + +:attr:`input` and :attr:`mat2` must be 3-D tensors each containing +the same number of matrices. + +If :attr:`input` is a :math:`(b \times n \times m)` tensor, :attr:`mat2` is a +:math:`(b \times m \times p)` tensor, :attr:`out` will be a +:math:`(b \times n \times p)` tensor. + +.. math:: + \text{out}_i = \text{input}_i \mathbin{@} \text{mat2}_i +""" + + r""" +{tf32_note} + +{rocm_fp16_note} + +.. note:: This function does not :ref:`broadcast `. + For broadcasting matrix products, see :func:`torch.matmul`. + +Args: + input (Tensor): the first batch of matrices to be multiplied + mat2 (Tensor): the second batch of matrices to be multiplied + +Keyword Args: + {out} + +Example:: + + >>> input = torch.randn(10, 3, 4) + >>> mat2 = torch.randn(10, 4, 5) + >>> res = torch.bmm(input, mat2) + >>> res.size() + torch.Size([10, 3, 5]) +""".format( + **common_args, **tf32_notes, **rocm_fp16_notes + ), +) + +add_docstr( + torch.bitwise_and, + r""" +bitwise_and(input, other, *, out=None) -> Tensor + +Computes the bitwise AND of :attr:`input` and :attr:`other`. The input tensor must be of +integral or Boolean types. For bool tensors, it computes the logical AND. + +Args: + input: the first input tensor + other: the second input tensor + +Keyword args: + {out} + +Example:: + + >>> torch.bitwise_and(torch.tensor([-1, -2, 3], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8)) + tensor([1, 0, 3], dtype=torch.int8) + >>> torch.bitwise_and(torch.tensor([True, True, False]), torch.tensor([False, True, False])) + tensor([ False, True, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.bitwise_or, + r""" +bitwise_or(input, other, *, out=None) -> Tensor + +Computes the bitwise OR of :attr:`input` and :attr:`other`. The input tensor must be of +integral or Boolean types. For bool tensors, it computes the logical OR. + +Args: + input: the first input tensor + other: the second input tensor + +Keyword args: + {out} + +Example:: + + >>> torch.bitwise_or(torch.tensor([-1, -2, 3], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8)) + tensor([-1, -2, 3], dtype=torch.int8) + >>> torch.bitwise_or(torch.tensor([True, True, False]), torch.tensor([False, True, False])) + tensor([ True, True, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.bitwise_xor, + r""" +bitwise_xor(input, other, *, out=None) -> Tensor + +Computes the bitwise XOR of :attr:`input` and :attr:`other`. The input tensor must be of +integral or Boolean types. For bool tensors, it computes the logical XOR. + +Args: + input: the first input tensor + other: the second input tensor + +Keyword args: + {out} + +Example:: + + >>> torch.bitwise_xor(torch.tensor([-1, -2, 3], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8)) + tensor([-2, -2, 0], dtype=torch.int8) + >>> torch.bitwise_xor(torch.tensor([True, True, False]), torch.tensor([False, True, False])) + tensor([ True, False, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.bitwise_left_shift, + r""" +bitwise_left_shift(input, other, *, out=None) -> Tensor + +Computes the left arithmetic shift of :attr:`input` by :attr:`other` bits. +The input tensor must be of integral type. This operator supports +:ref:`broadcasting to a common shape ` and +:ref:`type promotion `. + +The operation applied is: + +.. math:: + \text{{out}}_i = \text{{input}}_i << \text{{other}}_i + +Args: + input (Tensor or Scalar): the first input tensor + other (Tensor or Scalar): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> torch.bitwise_left_shift(torch.tensor([-1, -2, 3], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8)) + tensor([-2, -2, 24], dtype=torch.int8) +""".format( + **common_args + ), +) + +add_docstr( + torch.bitwise_right_shift, + r""" +bitwise_right_shift(input, other, *, out=None) -> Tensor + +Computes the right arithmetic shift of :attr:`input` by :attr:`other` bits. +The input tensor must be of integral type. This operator supports +:ref:`broadcasting to a common shape ` and +:ref:`type promotion `. +In any case, if the value of the right operand is negative or is greater +or equal to the number of bits in the promoted left operand, the behavior is undefined. + +The operation applied is: + +.. math:: + \text{{out}}_i = \text{{input}}_i >> \text{{other}}_i + +Args: + input (Tensor or Scalar): the first input tensor + other (Tensor or Scalar): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> torch.bitwise_right_shift(torch.tensor([-2, -7, 31], dtype=torch.int8), torch.tensor([1, 0, 3], dtype=torch.int8)) + tensor([-1, -7, 3], dtype=torch.int8) +""".format( + **common_args + ), +) + +add_docstr( + torch.broadcast_to, + r""" +broadcast_to(input, shape) -> Tensor + +Broadcasts :attr:`input` to the shape :attr:`\shape`. +Equivalent to calling ``input.expand(shape)``. See :meth:`~Tensor.expand` for details. + +Args: + {input} + shape (list, tuple, or :class:`torch.Size`): the new shape. + +Example:: + + >>> x = torch.tensor([1, 2, 3]) + >>> torch.broadcast_to(x, (3, 3)) + tensor([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.stack, + r""" +stack(tensors, dim=0, *, out=None) -> Tensor + +Concatenates a sequence of tensors along a new dimension. + +All tensors need to be of the same size. + +.. seealso:: + + :func:`torch.cat` concatenates the given sequence along an existing dimension. + +Arguments: + tensors (sequence of Tensors): sequence of tensors to concatenate + dim (int, optional): dimension to insert. Has to be between 0 and the number + of dimensions of concatenated tensors (inclusive). Default: 0 + +Keyword args: + {out} + +Example:: + + >>> x = torch.randn(2, 3) + >>> x + tensor([[ 0.3367, 0.1288, 0.2345], + [ 0.2303, -1.1229, -0.1863]]) + >>> x = torch.stack((x, x)) # same as torch.stack((x, x), dim=0) + >>> x + tensor([[[ 0.3367, 0.1288, 0.2345], + [ 0.2303, -1.1229, -0.1863]], + + [[ 0.3367, 0.1288, 0.2345], + [ 0.2303, -1.1229, -0.1863]]]) + >>> x.size() + torch.Size([2, 2, 3]) + >>> x = torch.stack((x, x), dim=1) + tensor([[[ 0.3367, 0.1288, 0.2345], + [ 0.3367, 0.1288, 0.2345]], + + [[ 0.2303, -1.1229, -0.1863], + [ 0.2303, -1.1229, -0.1863]]]) + >>> x = torch.stack((x, x), dim=2) + tensor([[[ 0.3367, 0.3367], + [ 0.1288, 0.1288], + [ 0.2345, 0.2345]], + + [[ 0.2303, 0.2303], + [-1.1229, -1.1229], + [-0.1863, -0.1863]]]) + >>> x = torch.stack((x, x), dim=-1) + tensor([[[ 0.3367, 0.3367], + [ 0.1288, 0.1288], + [ 0.2345, 0.2345]], + + [[ 0.2303, 0.2303], + [-1.1229, -1.1229], + [-0.1863, -0.1863]]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.hstack, + r""" +hstack(tensors, *, out=None) -> Tensor + +Stack tensors in sequence horizontally (column wise). + +This is equivalent to concatenation along the first axis for 1-D tensors, and along the second axis for all other tensors. + +Args: + tensors (sequence of Tensors): sequence of tensors to concatenate + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([1, 2, 3]) + >>> b = torch.tensor([4, 5, 6]) + >>> torch.hstack((a,b)) + tensor([1, 2, 3, 4, 5, 6]) + >>> a = torch.tensor([[1],[2],[3]]) + >>> b = torch.tensor([[4],[5],[6]]) + >>> torch.hstack((a,b)) + tensor([[1, 4], + [2, 5], + [3, 6]]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.vstack, + r""" +vstack(tensors, *, out=None) -> Tensor + +Stack tensors in sequence vertically (row wise). + +This is equivalent to concatenation along the first axis after all 1-D tensors have been reshaped by :func:`torch.atleast_2d`. + +Args: + tensors (sequence of Tensors): sequence of tensors to concatenate + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([1, 2, 3]) + >>> b = torch.tensor([4, 5, 6]) + >>> torch.vstack((a,b)) + tensor([[1, 2, 3], + [4, 5, 6]]) + >>> a = torch.tensor([[1],[2],[3]]) + >>> b = torch.tensor([[4],[5],[6]]) + >>> torch.vstack((a,b)) + tensor([[1], + [2], + [3], + [4], + [5], + [6]]) + + +""".format( + **common_args + ), +) + +add_docstr( + torch.dstack, + r""" +dstack(tensors, *, out=None) -> Tensor + +Stack tensors in sequence depthwise (along third axis). + +This is equivalent to concatenation along the third axis after 1-D and 2-D tensors have been reshaped by :func:`torch.atleast_3d`. + +Args: + tensors (sequence of Tensors): sequence of tensors to concatenate + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([1, 2, 3]) + >>> b = torch.tensor([4, 5, 6]) + >>> torch.dstack((a,b)) + tensor([[[1, 4], + [2, 5], + [3, 6]]]) + >>> a = torch.tensor([[1],[2],[3]]) + >>> b = torch.tensor([[4],[5],[6]]) + >>> torch.dstack((a,b)) + tensor([[[1, 4]], + [[2, 5]], + [[3, 6]]]) + + +""".format( + **common_args + ), +) + +add_docstr( + torch.tensor_split, + r""" +tensor_split(input, indices_or_sections, dim=0) -> List of Tensors + +Splits a tensor into multiple sub-tensors, all of which are views of :attr:`input`, +along dimension :attr:`dim` according to the indices or number of sections specified +by :attr:`indices_or_sections`. This function is based on NumPy's +:func:`numpy.array_split`. + +Args: + input (Tensor): the tensor to split + indices_or_sections (Tensor, int or list or tuple of ints): + If :attr:`indices_or_sections` is an integer ``n`` or a zero dimensional long tensor + with value ``n``, :attr:`input` is split into ``n`` sections along dimension :attr:`dim`. + If :attr:`input` is divisible by ``n`` along dimension :attr:`dim`, each + section will be of equal size, :code:`input.size(dim) / n`. If :attr:`input` + is not divisible by ``n``, the sizes of the first :code:`int(input.size(dim) % n)` + sections will have size :code:`int(input.size(dim) / n) + 1`, and the rest will + have size :code:`int(input.size(dim) / n)`. + + If :attr:`indices_or_sections` is a list or tuple of ints, or a one-dimensional long + tensor, then :attr:`input` is split along dimension :attr:`dim` at each of the indices + in the list, tuple or tensor. For instance, :code:`indices_or_sections=[2, 3]` and :code:`dim=0` + would result in the tensors :code:`input[:2]`, :code:`input[2:3]`, and :code:`input[3:]`. + + If :attr:`indices_or_sections` is a tensor, it must be a zero-dimensional or one-dimensional + long tensor on the CPU. + + dim (int, optional): dimension along which to split the tensor. Default: ``0`` + +Example:: + + >>> x = torch.arange(8) + >>> torch.tensor_split(x, 3) + (tensor([0, 1, 2]), tensor([3, 4, 5]), tensor([6, 7])) + + >>> x = torch.arange(7) + >>> torch.tensor_split(x, 3) + (tensor([0, 1, 2]), tensor([3, 4]), tensor([5, 6])) + >>> torch.tensor_split(x, (1, 6)) + (tensor([0]), tensor([1, 2, 3, 4, 5]), tensor([6])) + + >>> x = torch.arange(14).reshape(2, 7) + >>> x + tensor([[ 0, 1, 2, 3, 4, 5, 6], + [ 7, 8, 9, 10, 11, 12, 13]]) + >>> torch.tensor_split(x, 3, dim=1) + (tensor([[0, 1, 2], + [7, 8, 9]]), + tensor([[ 3, 4], + [10, 11]]), + tensor([[ 5, 6], + [12, 13]])) + >>> torch.tensor_split(x, (1, 6), dim=1) + (tensor([[0], + [7]]), + tensor([[ 1, 2, 3, 4, 5], + [ 8, 9, 10, 11, 12]]), + tensor([[ 6], + [13]])) +""", +) + +add_docstr( + torch.chunk, + r""" +chunk(input, chunks, dim=0) -> List of Tensors + +Attempts to split a tensor into the specified number of chunks. Each chunk is a view of +the input tensor. + + +.. note:: + + This function may return fewer than the specified number of chunks! + +.. seealso:: + + :func:`torch.tensor_split` a function that always returns exactly the specified number of chunks + +If the tensor size along the given dimension :attr:`dim` is divisible by :attr:`chunks`, +all returned chunks will be the same size. +If the tensor size along the given dimension :attr:`dim` is not divisible by :attr:`chunks`, +all returned chunks will be the same size, except the last one. +If such division is not possible, this function may return fewer +than the specified number of chunks. + +Arguments: + input (Tensor): the tensor to split + chunks (int): number of chunks to return + dim (int): dimension along which to split the tensor + +Example: + >>> torch.arange(11).chunk(6) + (tensor([0, 1]), + tensor([2, 3]), + tensor([4, 5]), + tensor([6, 7]), + tensor([8, 9]), + tensor([10])) + >>> torch.arange(12).chunk(6) + (tensor([0, 1]), + tensor([2, 3]), + tensor([4, 5]), + tensor([6, 7]), + tensor([8, 9]), + tensor([10, 11])) + >>> torch.arange(13).chunk(6) + (tensor([0, 1, 2]), + tensor([3, 4, 5]), + tensor([6, 7, 8]), + tensor([ 9, 10, 11]), + tensor([12])) +""", +) + +add_docstr( + torch.unsafe_chunk, + r""" +unsafe_chunk(input, chunks, dim=0) -> List of Tensors + +Works like :func:`torch.chunk` but without enforcing the autograd restrictions +on inplace modification of the outputs. + +.. warning:: + This function is safe to use as long as only the input, or only the outputs + are modified inplace after calling this function. It is user's + responsibility to ensure that is the case. If both the input and one or more + of the outputs are modified inplace, gradients computed by autograd will be + silently incorrect. +""", +) + +add_docstr( + torch.unsafe_split, + r""" +unsafe_split(tensor, split_size_or_sections, dim=0) -> List of Tensors + +Works like :func:`torch.split` but without enforcing the autograd restrictions +on inplace modification of the outputs. + +.. warning:: + This function is safe to use as long as only the input, or only the outputs + are modified inplace after calling this function. It is user's + responsibility to ensure that is the case. If both the input and one or more + of the outputs are modified inplace, gradients computed by autograd will be + silently incorrect. +""", +) + +add_docstr( + torch.hsplit, + r""" +hsplit(input, indices_or_sections) -> List of Tensors + +Splits :attr:`input`, a tensor with one or more dimensions, into multiple tensors +horizontally according to :attr:`indices_or_sections`. Each split is a view of +:attr:`input`. + +If :attr:`input` is one dimensional this is equivalent to calling +torch.tensor_split(input, indices_or_sections, dim=0) (the split dimension is +zero), and if :attr:`input` has two or more dimensions it's equivalent to calling +torch.tensor_split(input, indices_or_sections, dim=1) (the split dimension is 1), +except that if :attr:`indices_or_sections` is an integer it must evenly divide +the split dimension or a runtime error will be thrown. + +This function is based on NumPy's :func:`numpy.hsplit`. + +Args: + input (Tensor): tensor to split. + indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`. + +Example:: + >>> t = torch.arange(16.0).reshape(4,4) + >>> t + tensor([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) + >>> torch.hsplit(t, 2) + (tensor([[ 0., 1.], + [ 4., 5.], + [ 8., 9.], + [12., 13.]]), + tensor([[ 2., 3.], + [ 6., 7.], + [10., 11.], + [14., 15.]])) + >>> torch.hsplit(t, [3, 6]) + (tensor([[ 0., 1., 2.], + [ 4., 5., 6.], + [ 8., 9., 10.], + [12., 13., 14.]]), + tensor([[ 3.], + [ 7.], + [11.], + [15.]]), + tensor([], size=(4, 0))) + +""", +) + +add_docstr( + torch.vsplit, + r""" +vsplit(input, indices_or_sections) -> List of Tensors + +Splits :attr:`input`, a tensor with two or more dimensions, into multiple tensors +vertically according to :attr:`indices_or_sections`. Each split is a view of +:attr:`input`. + +This is equivalent to calling torch.tensor_split(input, indices_or_sections, dim=0) +(the split dimension is 0), except that if :attr:`indices_or_sections` is an integer +it must evenly divide the split dimension or a runtime error will be thrown. + +This function is based on NumPy's :func:`numpy.vsplit`. + +Args: + input (Tensor): tensor to split. + indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`. + +Example:: + >>> t = torch.arange(16.0).reshape(4,4) + >>> t + tensor([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) + >>> torch.vsplit(t, 2) + (tensor([[0., 1., 2., 3.], + [4., 5., 6., 7.]]), + tensor([[ 8., 9., 10., 11.], + [12., 13., 14., 15.]])) + >>> torch.vsplit(t, [3, 6]) + (tensor([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]]), + tensor([[12., 13., 14., 15.]]), + tensor([], size=(0, 4))) + +""", +) + +add_docstr( + torch.dsplit, + r""" +dsplit(input, indices_or_sections) -> List of Tensors + +Splits :attr:`input`, a tensor with three or more dimensions, into multiple tensors +depthwise according to :attr:`indices_or_sections`. Each split is a view of +:attr:`input`. + +This is equivalent to calling torch.tensor_split(input, indices_or_sections, dim=2) +(the split dimension is 2), except that if :attr:`indices_or_sections` is an integer +it must evenly divide the split dimension or a runtime error will be thrown. + +This function is based on NumPy's :func:`numpy.dsplit`. + +Args: + input (Tensor): tensor to split. + indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`. + +Example:: + >>> t = torch.arange(16.0).reshape(2, 2, 4) + >>> t + tensor([[[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.]], + [[ 8., 9., 10., 11.], + [12., 13., 14., 15.]]]) + >>> torch.dsplit(t, 2) + (tensor([[[ 0., 1.], + [ 4., 5.]], + [[ 8., 9.], + [12., 13.]]]), + tensor([[[ 2., 3.], + [ 6., 7.]], + [[10., 11.], + [14., 15.]]])) + + >>> torch.dsplit(t, [3, 6]) + (tensor([[[ 0., 1., 2.], + [ 4., 5., 6.]], + [[ 8., 9., 10.], + [12., 13., 14.]]]), + tensor([[[ 3.], + [ 7.]], + [[11.], + [15.]]]), + tensor([], size=(2, 2, 0))) + +""", +) + +add_docstr( + torch.can_cast, + r""" +can_cast(from, to) -> bool + +Determines if a type conversion is allowed under PyTorch casting rules +described in the type promotion :ref:`documentation `. + +Args: + from (dtype): The original :class:`torch.dtype`. + to (dtype): The target :class:`torch.dtype`. + +Example:: + + >>> torch.can_cast(torch.double, torch.float) + True + >>> torch.can_cast(torch.float, torch.int) + False +""", +) + +add_docstr( + torch.corrcoef, + r""" +corrcoef(input) -> Tensor + +Estimates the Pearson product-moment correlation coefficient matrix of the variables given by the :attr:`input` matrix, +where rows are the variables and columns are the observations. + +.. note:: + + The correlation coefficient matrix R is computed using the covariance matrix C as given by + :math:`R_{ij} = \frac{ C_{ij} } { \sqrt{ C_{ii} * C_{jj} } }` + +.. note:: + + Due to floating point rounding, the resulting array may not be Hermitian and its diagonal elements may not be 1. + The real and imaginary values are clipped to the interval [-1, 1] in an attempt to improve this situation. + +Args: + input (Tensor): A 2D matrix containing multiple variables and observations, or a + Scalar or 1D vector representing a single variable. + +Returns: + (Tensor) The correlation coefficient matrix of the variables. + +.. seealso:: + + :func:`torch.cov` covariance matrix. + +Example:: + + >>> x = torch.tensor([[0, 1, 2], [2, 1, 0]]) + >>> torch.corrcoef(x) + tensor([[ 1., -1.], + [-1., 1.]]) + >>> x = torch.randn(2, 4) + >>> x + tensor([[-0.2678, -0.0908, -0.3766, 0.2780], + [-0.5812, 0.1535, 0.2387, 0.2350]]) + >>> torch.corrcoef(x) + tensor([[1.0000, 0.3582], + [0.3582, 1.0000]]) + >>> torch.corrcoef(x[0]) + tensor(1.) +""", +) + +add_docstr( + torch.cov, + r""" +cov(input, *, correction=1, fweights=None, aweights=None) -> Tensor + +Estimates the covariance matrix of the variables given by the :attr:`input` matrix, where rows are +the variables and columns are the observations. + +A covariance matrix is a square matrix giving the covariance of each pair of variables. The diagonal contains +the variance of each variable (covariance of a variable with itself). By definition, if :attr:`input` represents +a single variable (Scalar or 1D) then its variance is returned. + +The sample covariance of the variables :math:`x` and :math:`y` is given by: + +.. math:: + \text{cov}(x,y) = \frac{\sum^{N}_{i = 1}(x_{i} - \bar{x})(y_{i} - \bar{y})}{\max(0,~N~-~\delta N)} + +where :math:`\bar{x}` and :math:`\bar{y}` are the simple means of the :math:`x` and :math:`y` respectively, and +:math:`\delta N` is the :attr:`correction`. + +If :attr:`fweights` and/or :attr:`aweights` are provided, the weighted covariance +is calculated, which is given by: + +.. math:: + \text{cov}_w(x,y) = \frac{\sum^{N}_{i = 1}w_i(x_{i} - \mu_x^*)(y_{i} - \mu_y^*)} + {\max(0,~\sum^{N}_{i = 1}w_i~-~\frac{\sum^{N}_{i = 1}w_ia_i}{\sum^{N}_{i = 1}w_i}~\delta N)} + +where :math:`w` denotes :attr:`fweights` or :attr:`aweights` (``f`` and ``a`` for brevity) based on whichever is +provided, or :math:`w = f \times a` if both are provided, and +:math:`\mu_x^* = \frac{\sum^{N}_{i = 1}w_ix_{i} }{\sum^{N}_{i = 1}w_i}` is the weighted mean of the variable. If not +provided, ``f`` and/or ``a`` can be seen as a :math:`\mathbb{1}` vector of appropriate size. + +Args: + input (Tensor): A 2D matrix containing multiple variables and observations, or a + Scalar or 1D vector representing a single variable. + +Keyword Args: + correction (int, optional): difference between the sample size and sample degrees of freedom. + Defaults to Bessel's correction, ``correction = 1`` which returns the unbiased estimate, + even if both :attr:`fweights` and :attr:`aweights` are specified. ``correction = 0`` + will return the simple average. Defaults to ``1``. + fweights (tensor, optional): A Scalar or 1D tensor of observation vector frequencies representing the number of + times each observation should be repeated. Its numel must equal the number of columns of :attr:`input`. + Must have integral dtype. Ignored if ``None``. Defaults to ``None``. + aweights (tensor, optional): A Scalar or 1D array of observation vector weights. + These relative weights are typically large for observations considered “important” and smaller for + observations considered less “important”. Its numel must equal the number of columns of :attr:`input`. + Must have floating point dtype. Ignored if ``None``. Defaults to ``None``. + +Returns: + (Tensor) The covariance matrix of the variables. + +.. seealso:: + + :func:`torch.corrcoef` normalized covariance matrix. + +Example:: + >>> x = torch.tensor([[0, 2], [1, 1], [2, 0]]).T + >>> x + tensor([[0, 1, 2], + [2, 1, 0]]) + >>> torch.cov(x) + tensor([[ 1., -1.], + [-1., 1.]]) + >>> torch.cov(x, correction=0) + tensor([[ 0.6667, -0.6667], + [-0.6667, 0.6667]]) + >>> fw = torch.randint(1, 10, (3,)) + >>> fw + tensor([1, 6, 9]) + >>> aw = torch.rand(3) + >>> aw + tensor([0.4282, 0.0255, 0.4144]) + >>> torch.cov(x, fweights=fw, aweights=aw) + tensor([[ 0.4169, -0.4169], + [-0.4169, 0.4169]]) +""", +) + +add_docstr( + torch.cat, + r""" +cat(tensors, dim=0, *, out=None) -> Tensor + +Concatenates the given sequence of :attr:`seq` tensors in the given dimension. +All tensors must either have the same shape (except in the concatenating +dimension) or be a 1-D empty tensor with size ``(0,)``. + +:func:`torch.cat` can be seen as an inverse operation for :func:`torch.split` +and :func:`torch.chunk`. + +:func:`torch.cat` can be best understood via examples. + +.. seealso:: + + :func:`torch.stack` concatenates the given sequence along a new dimension. + +Args: + tensors (sequence of Tensors): any python sequence of tensors of the same type. + Non-empty tensors provided must have the same shape, except in the + cat dimension. + dim (int, optional): the dimension over which the tensors are concatenated + +Keyword args: + {out} + +Example:: + + >>> x = torch.randn(2, 3) + >>> x + tensor([[ 0.6580, -1.0969, -0.4614], + [-0.1034, -0.5790, 0.1497]]) + >>> torch.cat((x, x, x), 0) + tensor([[ 0.6580, -1.0969, -0.4614], + [-0.1034, -0.5790, 0.1497], + [ 0.6580, -1.0969, -0.4614], + [-0.1034, -0.5790, 0.1497], + [ 0.6580, -1.0969, -0.4614], + [-0.1034, -0.5790, 0.1497]]) + >>> torch.cat((x, x, x), 1) + tensor([[ 0.6580, -1.0969, -0.4614, 0.6580, -1.0969, -0.4614, 0.6580, + -1.0969, -0.4614], + [-0.1034, -0.5790, 0.1497, -0.1034, -0.5790, 0.1497, -0.1034, + -0.5790, 0.1497]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.concat, + r""" +concat(tensors, dim=0, *, out=None) -> Tensor + +Alias of :func:`torch.cat`. +""", +) + +add_docstr( + torch.concatenate, + r""" +concatenate(tensors, axis=0, out=None) -> Tensor + +Alias of :func:`torch.cat`. +""", +) + +add_docstr( + torch.ceil, + r""" +ceil(input, *, out=None) -> Tensor + +Returns a new tensor with the ceil of the elements of :attr:`input`, +the smallest integer greater than or equal to each element. + +For integer inputs, follows the array-api convention of returning a +copy of the input tensor. + +.. math:: + \text{out}_{i} = \left\lceil \text{input}_{i} \right\rceil +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-0.6341, -1.4208, -1.0900, 0.5826]) + >>> torch.ceil(a) + tensor([-0., -1., -1., 1.]) +""".format( + **common_args + ), +) + +add_docstr( + torch.real, + r""" +real(input) -> Tensor + +Returns a new tensor containing real values of the :attr:`self` tensor. +The returned tensor and :attr:`self` share the same underlying storage. + +Args: + {input} + +Example:: + + >>> x=torch.randn(4, dtype=torch.cfloat) + >>> x + tensor([(0.3100+0.3553j), (-0.5445-0.7896j), (-1.6492-0.0633j), (-0.0638-0.8119j)]) + >>> x.real + tensor([ 0.3100, -0.5445, -1.6492, -0.0638]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.imag, + r""" +imag(input) -> Tensor + +Returns a new tensor containing imaginary values of the :attr:`self` tensor. +The returned tensor and :attr:`self` share the same underlying storage. + +.. warning:: + :func:`imag` is only supported for tensors with complex dtypes. + +Args: + {input} + +Example:: + + >>> x=torch.randn(4, dtype=torch.cfloat) + >>> x + tensor([(0.3100+0.3553j), (-0.5445-0.7896j), (-1.6492-0.0633j), (-0.0638-0.8119j)]) + >>> x.imag + tensor([ 0.3553, -0.7896, -0.0633, -0.8119]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.view_as_real, + r""" +view_as_real(input) -> Tensor + +Returns a view of :attr:`input` as a real tensor. For an input complex tensor of +:attr:`size` :math:`m1, m2, \dots, mi`, this function returns a new +real tensor of size :math:`m1, m2, \dots, mi, 2`, where the last dimension of size 2 +represents the real and imaginary components of complex numbers. + +.. warning:: + :func:`view_as_real` is only supported for tensors with ``complex dtypes``. + +Args: + {input} + +Example:: + + >>> x=torch.randn(4, dtype=torch.cfloat) + >>> x + tensor([(0.4737-0.3839j), (-0.2098-0.6699j), (0.3470-0.9451j), (-0.5174-1.3136j)]) + >>> torch.view_as_real(x) + tensor([[ 0.4737, -0.3839], + [-0.2098, -0.6699], + [ 0.3470, -0.9451], + [-0.5174, -1.3136]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.view_as_complex, + r""" +view_as_complex(input) -> Tensor + +Returns a view of :attr:`input` as a complex tensor. For an input complex +tensor of :attr:`size` :math:`m1, m2, \dots, mi, 2`, this function returns a +new complex tensor of :attr:`size` :math:`m1, m2, \dots, mi` where the last +dimension of the input tensor is expected to represent the real and imaginary +components of complex numbers. + +.. warning:: + :func:`view_as_complex` is only supported for tensors with + :class:`torch.dtype` ``torch.float64`` and ``torch.float32``. The input is + expected to have the last dimension of :attr:`size` 2. In addition, the + tensor must have a `stride` of 1 for its last dimension. The strides of all + other dimensions must be even numbers. + +Args: + {input} + +Example:: + + >>> x=torch.randn(4, 2) + >>> x + tensor([[ 1.6116, -0.5772], + [-1.4606, -0.9120], + [ 0.0786, -1.7497], + [-0.6561, -1.6623]]) + >>> torch.view_as_complex(x) + tensor([(1.6116-0.5772j), (-1.4606-0.9120j), (0.0786-1.7497j), (-0.6561-1.6623j)]) +""".format( + **common_args + ), +) + +add_docstr( + torch.reciprocal, + r""" +reciprocal(input, *, out=None) -> Tensor + +Returns a new tensor with the reciprocal of the elements of :attr:`input` + +.. math:: + \text{out}_{i} = \frac{1}{\text{input}_{i}} + +.. note:: + Unlike NumPy's reciprocal, torch.reciprocal supports integral inputs. Integral + inputs to reciprocal are automatically :ref:`promoted ` to + the default scalar type. +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-0.4595, -2.1219, -1.4314, 0.7298]) + >>> torch.reciprocal(a) + tensor([-2.1763, -0.4713, -0.6986, 1.3702]) +""".format( + **common_args + ), +) + +add_docstr( + torch.cholesky, + r""" +cholesky(input, upper=False, *, out=None) -> Tensor + +Computes the Cholesky decomposition of a symmetric positive-definite +matrix :math:`A` or for batches of symmetric positive-definite matrices. + +If :attr:`upper` is ``True``, the returned matrix ``U`` is upper-triangular, and +the decomposition has the form: + +.. math:: + + A = U^TU + +If :attr:`upper` is ``False``, the returned matrix ``L`` is lower-triangular, and +the decomposition has the form: + +.. math:: + + A = LL^T + +If :attr:`upper` is ``True``, and :math:`A` is a batch of symmetric positive-definite +matrices, then the returned tensor will be composed of upper-triangular Cholesky factors +of each of the individual matrices. Similarly, when :attr:`upper` is ``False``, the returned +tensor will be composed of lower-triangular Cholesky factors of each of the individual +matrices. + +.. warning:: + + :func:`torch.cholesky` is deprecated in favor of :func:`torch.linalg.cholesky` + and will be removed in a future PyTorch release. + + ``L = torch.cholesky(A)`` should be replaced with + + .. code:: python + + L = torch.linalg.cholesky(A) + + ``U = torch.cholesky(A, upper=True)`` should be replaced with + + .. code:: python + + U = torch.linalg.cholesky(A).mH + + This transform will produce equivalent results for all valid (symmetric positive definite) inputs. + +Args: + input (Tensor): the input tensor :math:`A` of size :math:`(*, n, n)` where `*` is zero or more + batch dimensions consisting of symmetric positive-definite matrices. + upper (bool, optional): flag that indicates whether to return a + upper or lower triangular matrix. Default: ``False`` + +Keyword args: + out (Tensor, optional): the output matrix + +Example:: + + >>> a = torch.randn(3, 3) + >>> a = a @ a.mT + 1e-3 # make symmetric positive-definite + >>> l = torch.cholesky(a) + >>> a + tensor([[ 2.4112, -0.7486, 1.4551], + [-0.7486, 1.3544, 0.1294], + [ 1.4551, 0.1294, 1.6724]]) + >>> l + tensor([[ 1.5528, 0.0000, 0.0000], + [-0.4821, 1.0592, 0.0000], + [ 0.9371, 0.5487, 0.7023]]) + >>> l @ l.mT + tensor([[ 2.4112, -0.7486, 1.4551], + [-0.7486, 1.3544, 0.1294], + [ 1.4551, 0.1294, 1.6724]]) + >>> a = torch.randn(3, 2, 2) # Example for batched input + >>> a = a @ a.mT + 1e-03 # make symmetric positive-definite + >>> l = torch.cholesky(a) + >>> z = l @ l.mT + >>> torch.dist(z, a) + tensor(2.3842e-07) +""", +) + +add_docstr( + torch.cholesky_solve, + r""" +cholesky_solve(B, L, upper=False, *, out=None) -> Tensor + +Computes the solution of a system of linear equations with complex Hermitian +or real symmetric positive-definite lhs given its Cholesky decomposition. + +Let :math:`A` be a complex Hermitian or real symmetric positive-definite matrix, +and :math:`L` its Cholesky decomposition such that: + +.. math:: + + A = LL^{\text{H}} + +where :math:`L^{\text{H}}` is the conjugate transpose when :math:`L` is complex, +and the transpose when :math:`L` is real-valued. + +Returns the solution :math:`X` of the following linear system: + +.. math:: + + AX = B + +Supports inputs of float, double, cfloat and cdouble dtypes. +Also supports batches of matrices, and if :math:`A` or :math:`B` is a batch of matrices +then the output has the same batch dimensions. + +Args: + B (Tensor): right-hand side tensor of shape `(*, n, k)` + where :math:`*` is zero or more batch dimensions + L (Tensor): tensor of shape `(*, n, n)` where `*` is zero or more batch dimensions + consisting of lower or upper triangular Cholesky decompositions of + symmetric or Hermitian positive-definite matrices. + upper (bool, optional): flag that indicates whether :math:`L` is lower triangular + or upper triangular. Default: ``False``. + +Keyword args: + out (Tensor, optional): output tensor. Ignored if `None`. Default: `None`. + +Example:: + + >>> A = torch.randn(3, 3) + >>> A = A @ A.T + torch.eye(3) * 1e-3 # Creates a symmetric positive-definite matrix + >>> L = torch.linalg.cholesky(A) # Extract Cholesky decomposition + >>> B = torch.randn(3, 2) + >>> torch.cholesky_solve(B, L) + tensor([[ -8.1625, 19.6097], + [ -5.8398, 14.2387], + [ -4.3771, 10.4173]]) + >>> A.inverse() @ B + tensor([[ -8.1626, 19.6097], + [ -5.8398, 14.2387], + [ -4.3771, 10.4173]]) + + >>> A = torch.randn(3, 2, 2, dtype=torch.complex64) + >>> A = A @ A.mH + torch.eye(2) * 1e-3 # Batch of Hermitian positive-definite matrices + >>> L = torch.linalg.cholesky(A) + >>> B = torch.randn(2, 1, dtype=torch.complex64) + >>> X = torch.cholesky_solve(B, L) + >>> torch.dist(X, A.inverse() @ B) + tensor(1.6881e-5) +""", +) + +add_docstr( + torch.cholesky_inverse, + r""" +cholesky_inverse(L, upper=False, *, out=None) -> Tensor + +Computes the inverse of a complex Hermitian or real symmetric +positive-definite matrix given its Cholesky decomposition. + +Let :math:`A` be a complex Hermitian or real symmetric positive-definite matrix, +and :math:`L` its Cholesky decomposition such that: + +.. math:: + + A = LL^{\text{H}} + +where :math:`L^{\text{H}}` is the conjugate transpose when :math:`L` is complex, +and the transpose when :math:`L` is real-valued. + +Computes the inverse matrix :math:`A^{-1}`. + +Supports input of float, double, cfloat and cdouble dtypes. +Also supports batches of matrices, and if :math:`A` is a batch of matrices +then the output has the same batch dimensions. + +Args: + L (Tensor): tensor of shape `(*, n, n)` where `*` is zero or more batch dimensions + consisting of lower or upper triangular Cholesky decompositions of + symmetric or Hermitian positive-definite matrices. + upper (bool, optional): flag that indicates whether :math:`L` is lower triangular + or upper triangular. Default: ``False`` + +Keyword args: + out (Tensor, optional): output tensor. Ignored if `None`. Default: `None`. + +Example:: + + >>> A = torch.randn(3, 3) + >>> A = A @ A.T + torch.eye(3) * 1e-3 # Creates a symmetric positive-definite matrix + >>> L = torch.linalg.cholesky(A) # Extract Cholesky decomposition + >>> torch.cholesky_inverse(L) + tensor([[ 1.9314, 1.2251, -0.0889], + [ 1.2251, 2.4439, 0.2122], + [-0.0889, 0.2122, 0.1412]]) + >>> A.inverse() + tensor([[ 1.9314, 1.2251, -0.0889], + [ 1.2251, 2.4439, 0.2122], + [-0.0889, 0.2122, 0.1412]]) + + >>> A = torch.randn(3, 2, 2, dtype=torch.complex64) + >>> A = A @ A.mH + torch.eye(2) * 1e-3 # Batch of Hermitian positive-definite matrices + >>> L = torch.linalg.cholesky(A) + >>> torch.dist(torch.inverse(A), torch.cholesky_inverse(L)) + tensor(5.6358e-7) +""", +) + +add_docstr( + torch.clone, + r""" +clone(input, *, memory_format=torch.preserve_format) -> Tensor + +Returns a copy of :attr:`input`. + +.. note:: + + This function is differentiable, so gradients will flow back from the + result of this operation to :attr:`input`. To create a tensor without an + autograd relationship to :attr:`input` see :meth:`~Tensor.detach`. + +Args: + {input} + +Keyword args: + {memory_format} +""".format( + **common_args + ), +) + +add_docstr( + torch.clamp, + r""" +clamp(input, min=None, max=None, *, out=None) -> Tensor + +Clamps all elements in :attr:`input` into the range `[` :attr:`min`, :attr:`max` `]`. +Letting min_value and max_value be :attr:`min` and :attr:`max`, respectively, this returns: + +.. math:: + y_i = \min(\max(x_i, \text{min\_value}_i), \text{max\_value}_i) + +If :attr:`min` is ``None``, there is no lower bound. +Or, if :attr:`max` is ``None`` there is no upper bound. +""" + + r""" + +.. note:: + If :attr:`min` is greater than :attr:`max` :func:`torch.clamp(..., min, max) ` + sets all elements in :attr:`input` to the value of :attr:`max`. + +Args: + {input} + min (Number or Tensor, optional): lower-bound of the range to be clamped to + max (Number or Tensor, optional): upper-bound of the range to be clamped to + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-1.7120, 0.1734, -0.0478, -0.0922]) + >>> torch.clamp(a, min=-0.5, max=0.5) + tensor([-0.5000, 0.1734, -0.0478, -0.0922]) + + >>> min = torch.linspace(-1, 1, steps=4) + >>> torch.clamp(a, min=min) + tensor([-1.0000, 0.1734, 0.3333, 1.0000]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.clip, + r""" +clip(input, min=None, max=None, *, out=None) -> Tensor + +Alias for :func:`torch.clamp`. +""", +) + +add_docstr( + torch.column_stack, + r""" +column_stack(tensors, *, out=None) -> Tensor + +Creates a new tensor by horizontally stacking the tensors in :attr:`tensors`. + +Equivalent to ``torch.hstack(tensors)``, except each zero or one dimensional tensor ``t`` +in :attr:`tensors` is first reshaped into a ``(t.numel(), 1)`` column before being stacked horizontally. + +Args: + tensors (sequence of Tensors): sequence of tensors to concatenate + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([1, 2, 3]) + >>> b = torch.tensor([4, 5, 6]) + >>> torch.column_stack((a, b)) + tensor([[1, 4], + [2, 5], + [3, 6]]) + >>> a = torch.arange(5) + >>> b = torch.arange(10).reshape(5, 2) + >>> torch.column_stack((a, b, b)) + tensor([[0, 0, 1, 0, 1], + [1, 2, 3, 2, 3], + [2, 4, 5, 4, 5], + [3, 6, 7, 6, 7], + [4, 8, 9, 8, 9]]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.complex, + r""" +complex(real, imag, *, out=None) -> Tensor + +Constructs a complex tensor with its real part equal to :attr:`real` and its +imaginary part equal to :attr:`imag`. + +Args: + real (Tensor): The real part of the complex tensor. Must be half, float or double. + imag (Tensor): The imaginary part of the complex tensor. Must be same dtype + as :attr:`real`. + +Keyword args: + out (Tensor): If the inputs are ``torch.float32``, must be + ``torch.complex64``. If the inputs are ``torch.float64``, must be + ``torch.complex128``. + +Example:: + + >>> real = torch.tensor([1, 2], dtype=torch.float32) + >>> imag = torch.tensor([3, 4], dtype=torch.float32) + >>> z = torch.complex(real, imag) + >>> z + tensor([(1.+3.j), (2.+4.j)]) + >>> z.dtype + torch.complex64 + +""", +) + +add_docstr( + torch.polar, + r""" +polar(abs, angle, *, out=None) -> Tensor + +Constructs a complex tensor whose elements are Cartesian coordinates +corresponding to the polar coordinates with absolute value :attr:`abs` and angle +:attr:`angle`. + +.. math:: + \text{out} = \text{abs} \cdot \cos(\text{angle}) + \text{abs} \cdot \sin(\text{angle}) \cdot j + +.. note:: + `torch.polar` is similar to + `std::polar `_ + and does not compute the polar decomposition + of a complex tensor like Python's `cmath.polar` and SciPy's `linalg.polar` do. + The behavior of this function is undefined if `abs` is negative or NaN, or if `angle` is + infinite. + +""" + + r""" +Args: + abs (Tensor): The absolute value the complex tensor. Must be float or double. + angle (Tensor): The angle of the complex tensor. Must be same dtype as + :attr:`abs`. + +Keyword args: + out (Tensor): If the inputs are ``torch.float32``, must be + ``torch.complex64``. If the inputs are ``torch.float64``, must be + ``torch.complex128``. + +Example:: + + >>> import numpy as np + >>> abs = torch.tensor([1, 2], dtype=torch.float64) + >>> angle = torch.tensor([np.pi / 2, 5 * np.pi / 4], dtype=torch.float64) + >>> z = torch.polar(abs, angle) + >>> z + tensor([(0.0000+1.0000j), (-1.4142-1.4142j)], dtype=torch.complex128) +""", +) + +add_docstr( + torch.conj_physical, + r""" +conj_physical(input, *, out=None) -> Tensor + +Computes the element-wise conjugate of the given :attr:`input` tensor. +If :attr:`input` has a non-complex dtype, this function just returns :attr:`input`. + +.. note:: + This performs the conjugate operation regardless of the fact conjugate bit is set or not. + +.. warning:: In the future, :func:`torch.conj_physical` may return a non-writeable view for an :attr:`input` of + non-complex dtype. It's recommended that programs not modify the tensor returned by :func:`torch.conj_physical` + when :attr:`input` is of non-complex dtype to be compatible with this change. + +.. math:: + \text{out}_{i} = conj(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> torch.conj_physical(torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j])) + tensor([-1 - 1j, -2 - 2j, 3 + 3j]) +""".format( + **common_args + ), +) + +add_docstr( + torch.conj, + r""" +conj(input) -> Tensor + +Returns a view of :attr:`input` with a flipped conjugate bit. If :attr:`input` has a non-complex dtype, +this function just returns :attr:`input`. + +.. note:: + :func:`torch.conj` performs a lazy conjugation, but the actual conjugated tensor can be materialized + at any time using :func:`torch.resolve_conj`. + +.. warning:: In the future, :func:`torch.conj` may return a non-writeable view for an :attr:`input` of + non-complex dtype. It's recommended that programs not modify the tensor returned by :func:`torch.conj_physical` + when :attr:`input` is of non-complex dtype to be compatible with this change. + +Args: + {input} + +Example:: + + >>> x = torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j]) + >>> x.is_conj() + False + >>> y = torch.conj(x) + >>> y.is_conj() + True +""".format( + **common_args + ), +) + +add_docstr( + torch.resolve_conj, + r""" +resolve_conj(input) -> Tensor + +Returns a new tensor with materialized conjugation if :attr:`input`'s conjugate bit is set to `True`, +else returns :attr:`input`. The output tensor will always have its conjugate bit set to `False`. + +Args: + {input} + +Example:: + + >>> x = torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j]) + >>> y = x.conj() + >>> y.is_conj() + True + >>> z = y.resolve_conj() + >>> z + tensor([-1 - 1j, -2 - 2j, 3 + 3j]) + >>> z.is_conj() + False +""".format( + **common_args + ), +) + +add_docstr( + torch.resolve_neg, + r""" +resolve_neg(input) -> Tensor + +Returns a new tensor with materialized negation if :attr:`input`'s negative bit is set to `True`, +else returns :attr:`input`. The output tensor will always have its negative bit set to `False`. + +Args: + {input} + +Example:: + + >>> x = torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j]) + >>> y = x.conj() + >>> z = y.imag + >>> z.is_neg() + True + >>> out = z.resolve_neg() + >>> out + tensor([-1., -2., 3.]) + >>> out.is_neg() + False +""".format( + **common_args + ), +) + +add_docstr( + torch.copysign, + r""" +copysign(input, other, *, out=None) -> Tensor + +Create a new floating-point tensor with the magnitude of :attr:`input` and the sign of :attr:`other`, elementwise. + +.. math:: + \text{out}_{i} = \begin{cases} + -|\text{input}_{i}| & \text{if } \text{other}_{i} \leq -0.0 \\ + |\text{input}_{i}| & \text{if } \text{other}_{i} \geq 0.0 \\ + \end{cases} +""" + + r""" + +Supports :ref:`broadcasting to a common shape `, +and integer and float inputs. + +Args: + input (Tensor): magnitudes. + other (Tensor or Number): contains value(s) whose signbit(s) are + applied to the magnitudes in :attr:`input`. + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(5) + >>> a + tensor([-1.2557, -0.0026, -0.5387, 0.4740, -0.9244]) + >>> torch.copysign(a, 1) + tensor([1.2557, 0.0026, 0.5387, 0.4740, 0.9244]) + >>> a = torch.randn(4, 4) + >>> a + tensor([[ 0.7079, 0.2778, -1.0249, 0.5719], + [-0.0059, -0.2600, -0.4475, -1.3948], + [ 0.3667, -0.9567, -2.5757, -0.1751], + [ 0.2046, -0.0742, 0.2998, -0.1054]]) + >>> b = torch.randn(4) + tensor([ 0.2373, 0.3120, 0.3190, -1.1128]) + >>> torch.copysign(a, b) + tensor([[ 0.7079, 0.2778, 1.0249, -0.5719], + [ 0.0059, 0.2600, 0.4475, -1.3948], + [ 0.3667, 0.9567, 2.5757, -0.1751], + [ 0.2046, 0.0742, 0.2998, -0.1054]]) + >>> a = torch.tensor([1.]) + >>> b = torch.tensor([-0.]) + >>> torch.copysign(a, b) + tensor([-1.]) + +.. note:: + copysign handles signed zeros. If the other argument has a negative zero (-0), + the corresponding output value will be negative. + +""".format( + **common_args + ), +) + +add_docstr( + torch.cos, + r""" +cos(input, *, out=None) -> Tensor + +Returns a new tensor with the cosine of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \cos(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 1.4309, 1.2706, -0.8562, 0.9796]) + >>> torch.cos(a) + tensor([ 0.1395, 0.2957, 0.6553, 0.5574]) +""".format( + **common_args + ), +) + +add_docstr( + torch.cosh, + r""" +cosh(input, *, out=None) -> Tensor + +Returns a new tensor with the hyperbolic cosine of the elements of +:attr:`input`. + +.. math:: + \text{out}_{i} = \cosh(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.1632, 1.1835, -0.6979, -0.7325]) + >>> torch.cosh(a) + tensor([ 1.0133, 1.7860, 1.2536, 1.2805]) + +.. note:: + When :attr:`input` is on the CPU, the implementation of torch.cosh may use + the Sleef library, which rounds very large results to infinity or negative + infinity. See `here `_ for details. +""".format( + **common_args + ), +) + +add_docstr( + torch.cross, + r""" +cross(input, other, dim=None, *, out=None) -> Tensor + + +Returns the cross product of vectors in dimension :attr:`dim` of :attr:`input` +and :attr:`other`. + +Supports input of float, double, cfloat and cdouble dtypes. Also supports batches +of vectors, for which it computes the product along the dimension :attr:`dim`. +In this case, the output has the same batch dimensions as the inputs. + +.. warning:: + If :attr:`dim` is not given, it defaults to the first dimension found + with the size 3. Note that this might be unexpected. + + This behavior is deprecated and will be changed to match that of :func:`torch.linalg.cross` + in a future release. + +.. seealso:: + :func:`torch.linalg.cross` which has dim=-1 as default. + + +Args: + {input} + other (Tensor): the second input tensor + dim (int, optional): the dimension to take the cross-product in. + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4, 3) + >>> a + tensor([[-0.3956, 1.1455, 1.6895], + [-0.5849, 1.3672, 0.3599], + [-1.1626, 0.7180, -0.0521], + [-0.1339, 0.9902, -2.0225]]) + >>> b = torch.randn(4, 3) + >>> b + tensor([[-0.0257, -1.4725, -1.2251], + [-1.1479, -0.7005, -1.9757], + [-1.3904, 0.3726, -1.1836], + [-0.9688, -0.7153, 0.2159]]) + >>> torch.cross(a, b, dim=1) + tensor([[ 1.0844, -0.5281, 0.6120], + [-2.4490, -1.5687, 1.9792], + [-0.8304, -1.3037, 0.5650], + [-1.2329, 1.9883, 1.0551]]) + >>> torch.cross(a, b) + tensor([[ 1.0844, -0.5281, 0.6120], + [-2.4490, -1.5687, 1.9792], + [-0.8304, -1.3037, 0.5650], + [-1.2329, 1.9883, 1.0551]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.logcumsumexp, + r""" +logcumsumexp(input, dim, *, out=None) -> Tensor +Returns the logarithm of the cumulative summation of the exponentiation of +elements of :attr:`input` in the dimension :attr:`dim`. + +For summation index :math:`j` given by `dim` and other indices :math:`i`, the result is + + .. math:: + \text{{logcumsumexp}}(x)_{{ij}} = \log \sum\limits_{{j=0}}^{{i}} \exp(x_{{ij}}) + +Args: + {input} + dim (int): the dimension to do the operation over + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(10) + >>> torch.logcumsumexp(a, dim=0) + tensor([-0.42296738, -0.04462666, 0.86278635, 0.94622083, 1.05277811, + 1.39202815, 1.83525007, 1.84492621, 2.06084887, 2.06844475])) +""".format( + **reduceops_common_args + ), +) + +add_docstr( + torch.cummax, + r""" +cummax(input, dim, *, out=None) -> (Tensor, LongTensor) +Returns a namedtuple ``(values, indices)`` where ``values`` is the cumulative maximum of +elements of :attr:`input` in the dimension :attr:`dim`. And ``indices`` is the index +location of each maximum value found in the dimension :attr:`dim`. + +.. math:: + y_i = max(x_1, x_2, x_3, \dots, x_i) + +Args: + {input} + dim (int): the dimension to do the operation over + +Keyword args: + out (tuple, optional): the result tuple of two output tensors (values, indices) + +Example:: + + >>> a = torch.randn(10) + >>> a + tensor([-0.3449, -1.5447, 0.0685, -1.5104, -1.1706, 0.2259, 1.4696, -1.3284, + 1.9946, -0.8209]) + >>> torch.cummax(a, dim=0) + torch.return_types.cummax( + values=tensor([-0.3449, -0.3449, 0.0685, 0.0685, 0.0685, 0.2259, 1.4696, 1.4696, + 1.9946, 1.9946]), + indices=tensor([0, 0, 2, 2, 2, 5, 6, 6, 8, 8])) +""".format( + **reduceops_common_args + ), +) + +add_docstr( + torch.cummin, + r""" +cummin(input, dim, *, out=None) -> (Tensor, LongTensor) +Returns a namedtuple ``(values, indices)`` where ``values`` is the cumulative minimum of +elements of :attr:`input` in the dimension :attr:`dim`. And ``indices`` is the index +location of each maximum value found in the dimension :attr:`dim`. + +.. math:: + y_i = min(x_1, x_2, x_3, \dots, x_i) + +Args: + {input} + dim (int): the dimension to do the operation over + +Keyword args: + out (tuple, optional): the result tuple of two output tensors (values, indices) + +Example:: + + >>> a = torch.randn(10) + >>> a + tensor([-0.2284, -0.6628, 0.0975, 0.2680, -1.3298, -0.4220, -0.3885, 1.1762, + 0.9165, 1.6684]) + >>> torch.cummin(a, dim=0) + torch.return_types.cummin( + values=tensor([-0.2284, -0.6628, -0.6628, -0.6628, -1.3298, -1.3298, -1.3298, -1.3298, + -1.3298, -1.3298]), + indices=tensor([0, 1, 1, 1, 4, 4, 4, 4, 4, 4])) +""".format( + **reduceops_common_args + ), +) + +add_docstr( + torch.cumprod, + r""" +cumprod(input, dim, *, dtype=None, out=None) -> Tensor + +Returns the cumulative product of elements of :attr:`input` in the dimension +:attr:`dim`. + +For example, if :attr:`input` is a vector of size N, the result will also be +a vector of size N, with elements. + +.. math:: + y_i = x_1 \times x_2\times x_3\times \dots \times x_i + +Args: + {input} + dim (int): the dimension to do the operation over + +Keyword args: + {dtype} + {out} + +Example:: + + >>> a = torch.randn(10) + >>> a + tensor([ 0.6001, 0.2069, -0.1919, 0.9792, 0.6727, 1.0062, 0.4126, + -0.2129, -0.4206, 0.1968]) + >>> torch.cumprod(a, dim=0) + tensor([ 0.6001, 0.1241, -0.0238, -0.0233, -0.0157, -0.0158, -0.0065, + 0.0014, -0.0006, -0.0001]) + + >>> a[5] = 0.0 + >>> torch.cumprod(a, dim=0) + tensor([ 0.6001, 0.1241, -0.0238, -0.0233, -0.0157, -0.0000, -0.0000, + 0.0000, -0.0000, -0.0000]) +""".format( + **reduceops_common_args + ), +) + +add_docstr( + torch.cumsum, + r""" +cumsum(input, dim, *, dtype=None, out=None) -> Tensor + +Returns the cumulative sum of elements of :attr:`input` in the dimension +:attr:`dim`. + +For example, if :attr:`input` is a vector of size N, the result will also be +a vector of size N, with elements. + +.. math:: + y_i = x_1 + x_2 + x_3 + \dots + x_i + +Args: + {input} + dim (int): the dimension to do the operation over + +Keyword args: + {dtype} + {out} + +Example:: + + >>> a = torch.randint(1, 20, (10,)) + >>> a + tensor([13, 7, 3, 10, 13, 3, 15, 10, 9, 10]) + >>> torch.cumsum(a, dim=0) + tensor([13, 20, 23, 33, 46, 49, 64, 74, 83, 93]) +""".format( + **reduceops_common_args + ), +) + +add_docstr( + torch.count_nonzero, + r""" +count_nonzero(input, dim=None) -> Tensor + +Counts the number of non-zero values in the tensor :attr:`input` along the given :attr:`dim`. +If no dim is specified then all non-zeros in the tensor are counted. + +Args: + {input} + dim (int or tuple of ints, optional): Dim or tuple of dims along which to count non-zeros. + +Example:: + + >>> x = torch.zeros(3,3) + >>> x[torch.randn(3,3) > 0.5] = 1 + >>> x + tensor([[0., 1., 1.], + [0., 0., 0.], + [0., 0., 1.]]) + >>> torch.count_nonzero(x) + tensor(3) + >>> torch.count_nonzero(x, dim=0) + tensor([0, 1, 2]) +""".format( + **reduceops_common_args + ), +) + +add_docstr( + torch.dequantize, + r""" +dequantize(tensor) -> Tensor + +Returns an fp32 Tensor by dequantizing a quantized Tensor + +Args: + tensor (Tensor): A quantized Tensor + +.. function:: dequantize(tensors) -> sequence of Tensors + :noindex: + +Given a list of quantized Tensors, dequantize them and return a list of fp32 Tensors + +Args: + tensors (sequence of Tensors): A list of quantized Tensors +""", +) + +add_docstr( + torch.diag, + r""" +diag(input, diagonal=0, *, out=None) -> Tensor + +- If :attr:`input` is a vector (1-D tensor), then returns a 2-D square tensor + with the elements of :attr:`input` as the diagonal. +- If :attr:`input` is a matrix (2-D tensor), then returns a 1-D tensor with + the diagonal elements of :attr:`input`. + +The argument :attr:`diagonal` controls which diagonal to consider: + +- If :attr:`diagonal` = 0, it is the main diagonal. +- If :attr:`diagonal` > 0, it is above the main diagonal. +- If :attr:`diagonal` < 0, it is below the main diagonal. + +Args: + {input} + diagonal (int, optional): the diagonal to consider + +Keyword args: + {out} + +.. seealso:: + + :func:`torch.diagonal` always returns the diagonal of its input. + + :func:`torch.diagflat` always constructs a tensor with diagonal elements + specified by the input. + +Examples: + +Get the square matrix where the input vector is the diagonal:: + + >>> a = torch.randn(3) + >>> a + tensor([ 0.5950,-0.0872, 2.3298]) + >>> torch.diag(a) + tensor([[ 0.5950, 0.0000, 0.0000], + [ 0.0000,-0.0872, 0.0000], + [ 0.0000, 0.0000, 2.3298]]) + >>> torch.diag(a, 1) + tensor([[ 0.0000, 0.5950, 0.0000, 0.0000], + [ 0.0000, 0.0000,-0.0872, 0.0000], + [ 0.0000, 0.0000, 0.0000, 2.3298], + [ 0.0000, 0.0000, 0.0000, 0.0000]]) + +Get the k-th diagonal of a given matrix:: + + >>> a = torch.randn(3, 3) + >>> a + tensor([[-0.4264, 0.0255,-0.1064], + [ 0.8795,-0.2429, 0.1374], + [ 0.1029,-0.6482,-1.6300]]) + >>> torch.diag(a, 0) + tensor([-0.4264,-0.2429,-1.6300]) + >>> torch.diag(a, 1) + tensor([ 0.0255, 0.1374]) +""".format( + **common_args + ), +) + +add_docstr( + torch.diag_embed, + r""" +diag_embed(input, offset=0, dim1=-2, dim2=-1) -> Tensor + +Creates a tensor whose diagonals of certain 2D planes (specified by +:attr:`dim1` and :attr:`dim2`) are filled by :attr:`input`. +To facilitate creating batched diagonal matrices, the 2D planes formed by +the last two dimensions of the returned tensor are chosen by default. + +The argument :attr:`offset` controls which diagonal to consider: + +- If :attr:`offset` = 0, it is the main diagonal. +- If :attr:`offset` > 0, it is above the main diagonal. +- If :attr:`offset` < 0, it is below the main diagonal. + +The size of the new matrix will be calculated to make the specified diagonal +of the size of the last input dimension. +Note that for :attr:`offset` other than :math:`0`, the order of :attr:`dim1` +and :attr:`dim2` matters. Exchanging them is equivalent to changing the +sign of :attr:`offset`. + +Applying :meth:`torch.diagonal` to the output of this function with +the same arguments yields a matrix identical to input. However, +:meth:`torch.diagonal` has different default dimensions, so those +need to be explicitly specified. + +Args: + {input} Must be at least 1-dimensional. + offset (int, optional): which diagonal to consider. Default: 0 + (main diagonal). + dim1 (int, optional): first dimension with respect to which to + take diagonal. Default: -2. + dim2 (int, optional): second dimension with respect to which to + take diagonal. Default: -1. + +Example:: + + >>> a = torch.randn(2, 3) + >>> torch.diag_embed(a) + tensor([[[ 1.5410, 0.0000, 0.0000], + [ 0.0000, -0.2934, 0.0000], + [ 0.0000, 0.0000, -2.1788]], + + [[ 0.5684, 0.0000, 0.0000], + [ 0.0000, -1.0845, 0.0000], + [ 0.0000, 0.0000, -1.3986]]]) + + >>> torch.diag_embed(a, offset=1, dim1=0, dim2=2) + tensor([[[ 0.0000, 1.5410, 0.0000, 0.0000], + [ 0.0000, 0.5684, 0.0000, 0.0000]], + + [[ 0.0000, 0.0000, -0.2934, 0.0000], + [ 0.0000, 0.0000, -1.0845, 0.0000]], + + [[ 0.0000, 0.0000, 0.0000, -2.1788], + [ 0.0000, 0.0000, 0.0000, -1.3986]], + + [[ 0.0000, 0.0000, 0.0000, 0.0000], + [ 0.0000, 0.0000, 0.0000, 0.0000]]]) +""".format( + **common_args + ), +) + + +add_docstr( + torch.diagflat, + r""" +diagflat(input, offset=0) -> Tensor + +- If :attr:`input` is a vector (1-D tensor), then returns a 2-D square tensor + with the elements of :attr:`input` as the diagonal. +- If :attr:`input` is a tensor with more than one dimension, then returns a + 2-D tensor with diagonal elements equal to a flattened :attr:`input`. + +The argument :attr:`offset` controls which diagonal to consider: + +- If :attr:`offset` = 0, it is the main diagonal. +- If :attr:`offset` > 0, it is above the main diagonal. +- If :attr:`offset` < 0, it is below the main diagonal. + +Args: + {input} + offset (int, optional): the diagonal to consider. Default: 0 (main + diagonal). + +Examples:: + + >>> a = torch.randn(3) + >>> a + tensor([-0.2956, -0.9068, 0.1695]) + >>> torch.diagflat(a) + tensor([[-0.2956, 0.0000, 0.0000], + [ 0.0000, -0.9068, 0.0000], + [ 0.0000, 0.0000, 0.1695]]) + >>> torch.diagflat(a, 1) + tensor([[ 0.0000, -0.2956, 0.0000, 0.0000], + [ 0.0000, 0.0000, -0.9068, 0.0000], + [ 0.0000, 0.0000, 0.0000, 0.1695], + [ 0.0000, 0.0000, 0.0000, 0.0000]]) + + >>> a = torch.randn(2, 2) + >>> a + tensor([[ 0.2094, -0.3018], + [-0.1516, 1.9342]]) + >>> torch.diagflat(a) + tensor([[ 0.2094, 0.0000, 0.0000, 0.0000], + [ 0.0000, -0.3018, 0.0000, 0.0000], + [ 0.0000, 0.0000, -0.1516, 0.0000], + [ 0.0000, 0.0000, 0.0000, 1.9342]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.diagonal, + r""" +diagonal(input, offset=0, dim1=0, dim2=1) -> Tensor + +Returns a partial view of :attr:`input` with the its diagonal elements +with respect to :attr:`dim1` and :attr:`dim2` appended as a dimension +at the end of the shape. + +The argument :attr:`offset` controls which diagonal to consider: + +- If :attr:`offset` = 0, it is the main diagonal. +- If :attr:`offset` > 0, it is above the main diagonal. +- If :attr:`offset` < 0, it is below the main diagonal. + +Applying :meth:`torch.diag_embed` to the output of this function with +the same arguments yields a diagonal matrix with the diagonal entries +of the input. However, :meth:`torch.diag_embed` has different default +dimensions, so those need to be explicitly specified. + +Args: + {input} Must be at least 2-dimensional. + offset (int, optional): which diagonal to consider. Default: 0 + (main diagonal). + dim1 (int, optional): first dimension with respect to which to + take diagonal. Default: 0. + dim2 (int, optional): second dimension with respect to which to + take diagonal. Default: 1. + +.. note:: To take a batch diagonal, pass in dim1=-2, dim2=-1. + +Examples:: + + >>> a = torch.randn(3, 3) + >>> a + tensor([[-1.0854, 1.1431, -0.1752], + [ 0.8536, -0.0905, 0.0360], + [ 0.6927, -0.3735, -0.4945]]) + + + >>> torch.diagonal(a, 0) + tensor([-1.0854, -0.0905, -0.4945]) + + + >>> torch.diagonal(a, 1) + tensor([ 1.1431, 0.0360]) + + + >>> x = torch.randn(2, 5, 4, 2) + >>> torch.diagonal(x, offset=-1, dim1=1, dim2=2) + tensor([[[-1.2631, 0.3755, -1.5977, -1.8172], + [-1.1065, 1.0401, -0.2235, -0.7938]], + + [[-1.7325, -0.3081, 0.6166, 0.2335], + [ 1.0500, 0.7336, -0.3836, -1.1015]]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.diagonal_scatter, + r""" +diagonal_scatter(input, src, offset=0, dim1=0, dim2=1) -> Tensor + +Embeds the values of the :attr:`src` tensor into :attr:`input` along +the diagonal elements of :attr:`input`, with respect to :attr:`dim1` +and :attr:`dim2`. + +This function returns a tensor with fresh storage; it does not +return a view. + +The argument :attr:`offset` controls which diagonal to consider: + +- If :attr:`offset` = 0, it is the main diagonal. +- If :attr:`offset` > 0, it is above the main diagonal. +- If :attr:`offset` < 0, it is below the main diagonal. + +Args: + {input} Must be at least 2-dimensional. + src (Tensor): the tensor to embed into :attr:`input`. + offset (int, optional): which diagonal to consider. Default: 0 + (main diagonal). + dim1 (int, optional): first dimension with respect to which to + take diagonal. Default: 0. + dim2 (int, optional): second dimension with respect to which to + take diagonal. Default: 1. + +.. note:: + + :attr:`src` must be of the proper size in order to be embedded + into :attr:`input`. Specifically, it should have the same shape as + ``torch.diagonal(input, offset, dim1, dim2)`` + +Examples:: + + >>> a = torch.zeros(3, 3) + >>> a + tensor([[0., 0., 0.], + [0., 0., 0.], + [0., 0., 0.]]) + + >>> torch.diagonal_scatter(a, torch.ones(3), 0) + tensor([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + + >>> torch.diagonal_scatter(a, torch.ones(2), 1) + tensor([[0., 1., 0.], + [0., 0., 1.], + [0., 0., 0.]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.as_strided_scatter, + r""" +as_strided_scatter(input, src, size, stride, storage_offset=None) -> Tensor + +Embeds the values of the :attr:`src` tensor into :attr:`input` along +the elements corresponding to the result of calling +input.as_strided(size, stride, storage_offset). + +This function returns a tensor with fresh storage; it does not +return a view. + +Args: + {input} + size (tuple or ints): the shape of the output tensor + stride (tuple or ints): the stride of the output tensor + storage_offset (int, optional): the offset in the underlying storage of the output tensor + +.. note:: + + :attr:`src` must be of the proper size in order to be embedded + into :attr:`input`. Specifically, it should have the same shape as + `torch.as_strided(input, size, stride, storage_offset)` + +Example:: + + >>> a = torch.arange(4).reshape(2, 2) + 1 + >>> a + tensor([[1, 2], + [3, 4]]) + >>> b = torch.zeros(3, 3) + >>> b + tensor([[0., 0., 0.], + [0., 0., 0.], + [0., 0., 0.]]) + >>> torch.as_strided_scatter(b, a, (2, 2), (1, 2)) + tensor([[1., 3., 2.], + [4., 0., 0.], + [0., 0., 0.]]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.diff, + r""" +diff(input, n=1, dim=-1, prepend=None, append=None) -> Tensor + +Computes the n-th forward difference along the given dimension. + +The first-order differences are given by `out[i] = input[i + 1] - input[i]`. Higher-order +differences are calculated by using :func:`torch.diff` recursively. + +Args: + input (Tensor): the tensor to compute the differences on + n (int, optional): the number of times to recursively compute the difference + dim (int, optional): the dimension to compute the difference along. + Default is the last dimension. + prepend, append (Tensor, optional): values to prepend or append to + :attr:`input` along :attr:`dim` before computing the difference. + Their dimensions must be equivalent to that of input, and their shapes + must match input's shape except on :attr:`dim`. + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([1, 3, 2]) + >>> torch.diff(a) + tensor([ 2, -1]) + >>> b = torch.tensor([4, 5]) + >>> torch.diff(a, append=b) + tensor([ 2, -1, 2, 1]) + >>> c = torch.tensor([[1, 2, 3], [3, 4, 5]]) + >>> torch.diff(c, dim=0) + tensor([[2, 2, 2]]) + >>> torch.diff(c, dim=1) + tensor([[1, 1], + [1, 1]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.digamma, + r""" +digamma(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.digamma`. +""", +) + +add_docstr( + torch.dist, + r""" +dist(input, other, p=2) -> Tensor + +Returns the p-norm of (:attr:`input` - :attr:`other`) + +The shapes of :attr:`input` and :attr:`other` must be +:ref:`broadcastable `. + +Args: + {input} + other (Tensor): the Right-hand-side input tensor + p (float, optional): the norm to be computed + +Example:: + + >>> x = torch.randn(4) + >>> x + tensor([-1.5393, -0.8675, 0.5916, 1.6321]) + >>> y = torch.randn(4) + >>> y + tensor([ 0.0967, -1.0511, 0.6295, 0.8360]) + >>> torch.dist(x, y, 3.5) + tensor(1.6727) + >>> torch.dist(x, y, 3) + tensor(1.6973) + >>> torch.dist(x, y, 0) + tensor(4.) + >>> torch.dist(x, y, 1) + tensor(2.6537) +""".format( + **common_args + ), +) + +add_docstr( + torch.div, + r""" +div(input, other, *, rounding_mode=None, out=None) -> Tensor + +Divides each element of the input ``input`` by the corresponding element of +:attr:`other`. + +.. math:: + \text{{out}}_i = \frac{{\text{{input}}_i}}{{\text{{other}}_i}} + +.. note:: + By default, this performs a "true" division like Python 3. + See the :attr:`rounding_mode` argument for floor division. + +Supports :ref:`broadcasting to a common shape `, +:ref:`type promotion `, and integer, float, and complex inputs. +Always promotes integer types to the default scalar type. + +Args: + input (Tensor): the dividend + other (Tensor or Number): the divisor + +Keyword args: + rounding_mode (str, optional): Type of rounding applied to the result: + + * None - default behavior. Performs no rounding and, if both :attr:`input` and + :attr:`other` are integer types, promotes the inputs to the default scalar type. + Equivalent to true division in Python (the ``/`` operator) and NumPy's ``np.true_divide``. + * ``"trunc"`` - rounds the results of the division towards zero. + Equivalent to C-style integer division. + * ``"floor"`` - rounds the results of the division down. + Equivalent to floor division in Python (the ``//`` operator) and NumPy's ``np.floor_divide``. + + {out} + +Examples:: + + >>> x = torch.tensor([ 0.3810, 1.2774, -0.2972, -0.3719, 0.4637]) + >>> torch.div(x, 0.5) + tensor([ 0.7620, 2.5548, -0.5944, -0.7438, 0.9274]) + + >>> a = torch.tensor([[-0.3711, -1.9353, -0.4605, -0.2917], + ... [ 0.1815, -1.0111, 0.9805, -1.5923], + ... [ 0.1062, 1.4581, 0.7759, -1.2344], + ... [-0.1830, -0.0313, 1.1908, -1.4757]]) + >>> b = torch.tensor([ 0.8032, 0.2930, -0.8113, -0.2308]) + >>> torch.div(a, b) + tensor([[-0.4620, -6.6051, 0.5676, 1.2639], + [ 0.2260, -3.4509, -1.2086, 6.8990], + [ 0.1322, 4.9764, -0.9564, 5.3484], + [-0.2278, -0.1068, -1.4678, 6.3938]]) + + >>> torch.div(a, b, rounding_mode='trunc') + tensor([[-0., -6., 0., 1.], + [ 0., -3., -1., 6.], + [ 0., 4., -0., 5.], + [-0., -0., -1., 6.]]) + + >>> torch.div(a, b, rounding_mode='floor') + tensor([[-1., -7., 0., 1.], + [ 0., -4., -2., 6.], + [ 0., 4., -1., 5.], + [-1., -1., -2., 6.]]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.divide, + r""" +divide(input, other, *, rounding_mode=None, out=None) -> Tensor + +Alias for :func:`torch.div`. +""", +) + +add_docstr( + torch.dot, + r""" +dot(input, other, *, out=None) -> Tensor + +Computes the dot product of two 1D tensors. + +.. note:: + + Unlike NumPy's dot, torch.dot intentionally only supports computing the dot product + of two 1D tensors with the same number of elements. + +Args: + input (Tensor): first tensor in the dot product, must be 1D. + other (Tensor): second tensor in the dot product, must be 1D. + +Keyword args: + {out} + +Example:: + + >>> torch.dot(torch.tensor([2, 3]), torch.tensor([2, 1])) + tensor(7) +""".format( + **common_args + ), +) + +add_docstr( + torch.vdot, + r""" +vdot(input, other, *, out=None) -> Tensor + +Computes the dot product of two 1D vectors along a dimension. + +In symbols, this function computes + +.. math:: + + \sum_{i=1}^n \overline{x_i}y_i. + +where :math:`\overline{x_i}` denotes the conjugate for complex +vectors, and it is the identity for real vectors. + +.. note:: + + Unlike NumPy's vdot, torch.vdot intentionally only supports computing the dot product + of two 1D tensors with the same number of elements. + +.. seealso:: + + :func:`torch.linalg.vecdot` computes the dot product of two batches of vectors along a dimension. + +Args: + input (Tensor): first tensor in the dot product, must be 1D. Its conjugate is used if it's complex. + other (Tensor): second tensor in the dot product, must be 1D. + +Keyword args: +""" + + rf""" +.. note:: {common_args["out"]} +""" + + r""" + +Example:: + + >>> torch.vdot(torch.tensor([2, 3]), torch.tensor([2, 1])) + tensor(7) + >>> a = torch.tensor((1 +2j, 3 - 1j)) + >>> b = torch.tensor((2 +1j, 4 - 0j)) + >>> torch.vdot(a, b) + tensor([16.+1.j]) + >>> torch.vdot(b, a) + tensor([16.-1.j]) +""", +) + +add_docstr( + torch.eq, + r""" +eq(input, other, *, out=None) -> Tensor + +Computes element-wise equality + +The second argument can be a number or a tensor whose shape is +:ref:`broadcastable ` with the first argument. + +Args: + input (Tensor): the tensor to compare + other (Tensor or float): the tensor or value to compare + +Keyword args: + {out} + +Returns: + A boolean tensor that is True where :attr:`input` is equal to :attr:`other` and False elsewhere + +Example:: + + >>> torch.eq(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]])) + tensor([[ True, False], + [False, True]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.equal, + r""" +equal(input, other) -> bool + +``True`` if two tensors have the same size and elements, ``False`` otherwise. + +Example:: + + >>> torch.equal(torch.tensor([1, 2]), torch.tensor([1, 2])) + True +""", +) + +add_docstr( + torch.erf, + r""" +erf(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.erf`. +""", +) + +add_docstr( + torch.erfc, + r""" +erfc(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.erfc`. +""", +) + +add_docstr( + torch.erfinv, + r""" +erfinv(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.erfinv`. +""", +) + +add_docstr( + torch.exp, + r""" +exp(input, *, out=None) -> Tensor + +Returns a new tensor with the exponential of the elements +of the input tensor :attr:`input`. + +.. math:: + y_{i} = e^{x_{i}} +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> torch.exp(torch.tensor([0, math.log(2.)])) + tensor([ 1., 2.]) +""".format( + **common_args + ), +) + +add_docstr( + torch.exp2, + r""" +exp2(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.exp2`. +""", +) + +add_docstr( + torch.expm1, + r""" +expm1(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.expm1`. +""", +) + +add_docstr( + torch.eye, + r""" +eye(n, m=None, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor + +Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. + +Args: + n (int): the number of rows + m (int, optional): the number of columns with default being :attr:`n` + +Keyword arguments: + {out} + {dtype} + {layout} + {device} + {requires_grad} + +Returns: + Tensor: A 2-D tensor with ones on the diagonal and zeros elsewhere + +Example:: + + >>> torch.eye(3) + tensor([[ 1., 0., 0.], + [ 0., 1., 0.], + [ 0., 0., 1.]]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.floor, + r""" +floor(input, *, out=None) -> Tensor + +Returns a new tensor with the floor of the elements of :attr:`input`, +the largest integer less than or equal to each element. + +For integer inputs, follows the array-api convention of returning a +copy of the input tensor. + +.. math:: + \text{out}_{i} = \left\lfloor \text{input}_{i} \right\rfloor +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-0.8166, 1.5308, -0.2530, -0.2091]) + >>> torch.floor(a) + tensor([-1., 1., -1., -1.]) +""".format( + **common_args + ), +) + +add_docstr( + torch.floor_divide, + r""" +floor_divide(input, other, *, out=None) -> Tensor + +.. note:: + + Before PyTorch 1.13 :func:`torch.floor_divide` incorrectly performed + truncation division. To restore the previous behavior use + :func:`torch.div` with ``rounding_mode='trunc'``. + +Computes :attr:`input` divided by :attr:`other`, elementwise, and floors +the result. + +.. math:: + \text{{out}}_i = \text{floor} \left( \frac{{\text{{input}}_i}}{{\text{{other}}_i}} \right) + +""" + + r""" + +Supports broadcasting to a common shape, type promotion, and integer and float inputs. + +Args: + input (Tensor or Number): the dividend + other (Tensor or Number): the divisor + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([4.0, 3.0]) + >>> b = torch.tensor([2.0, 2.0]) + >>> torch.floor_divide(a, b) + tensor([2.0, 1.0]) + >>> torch.floor_divide(a, 1.4) + tensor([2.0, 2.0]) +""".format( + **common_args + ), +) + +add_docstr( + torch.fmod, + r""" +fmod(input, other, *, out=None) -> Tensor + +Applies C++'s `std::fmod `_ entrywise. +The result has the same sign as the dividend :attr:`input` and its absolute value +is less than that of :attr:`other`. + +This function may be defined in terms of :func:`torch.div` as + +.. code:: python + + torch.fmod(a, b) == a - a.div(b, rounding_mode="trunc") * b + +Supports :ref:`broadcasting to a common shape `, +:ref:`type promotion `, and integer and float inputs. + +.. note:: + + When the divisor is zero, returns ``NaN`` for floating point dtypes + on both CPU and GPU; raises ``RuntimeError`` for integer division by + zero on CPU; Integer division by zero on GPU may return any value. + +.. note:: + + Complex inputs are not supported. In some cases, it is not mathematically + possible to satisfy the definition of a modulo operation with complex numbers. + +.. seealso:: + + :func:`torch.remainder` which implements Python's modulus operator. + This one is defined using division rounding down the result. + +Args: + input (Tensor): the dividend + other (Tensor or Scalar): the divisor + +Keyword args: + {out} + +Example:: + + >>> torch.fmod(torch.tensor([-3., -2, -1, 1, 2, 3]), 2) + tensor([-1., -0., -1., 1., 0., 1.]) + >>> torch.fmod(torch.tensor([1, 2, 3, 4, 5]), -1.5) + tensor([1.0000, 0.5000, 0.0000, 1.0000, 0.5000]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.frac, + r""" +frac(input, *, out=None) -> Tensor + +Computes the fractional portion of each element in :attr:`input`. + +.. math:: + \text{out}_{i} = \text{input}_{i} - \left\lfloor |\text{input}_{i}| \right\rfloor * \operatorname{sgn}(\text{input}_{i}) + +Example:: + + >>> torch.frac(torch.tensor([1, 2.5, -3.2])) + tensor([ 0.0000, 0.5000, -0.2000]) +""", +) + +add_docstr( + torch.frexp, + r""" +frexp(input, *, out=None) -> (Tensor mantissa, Tensor exponent) + +Decomposes :attr:`input` into mantissa and exponent tensors +such that :math:`\text{input} = \text{mantissa} \times 2^{\text{exponent}}`. + +The range of mantissa is the open interval (-1, 1). + +Supports float inputs. + +Args: + input (Tensor): the input tensor + + +Keyword args: + out (tuple, optional): the output tensors + +Example:: + + >>> x = torch.arange(9.) + >>> mantissa, exponent = torch.frexp(x) + >>> mantissa + tensor([0.0000, 0.5000, 0.5000, 0.7500, 0.5000, 0.6250, 0.7500, 0.8750, 0.5000]) + >>> exponent + tensor([0, 1, 2, 2, 3, 3, 3, 3, 4], dtype=torch.int32) + >>> torch.ldexp(mantissa, exponent) + tensor([0., 1., 2., 3., 4., 5., 6., 7., 8.]) +""", +) + +add_docstr( + torch.from_numpy, + r""" +from_numpy(ndarray) -> Tensor + +Creates a :class:`Tensor` from a :class:`numpy.ndarray`. + +The returned tensor and :attr:`ndarray` share the same memory. Modifications to +the tensor will be reflected in the :attr:`ndarray` and vice versa. The returned +tensor is not resizable. + +It currently accepts :attr:`ndarray` with dtypes of ``numpy.float64``, +``numpy.float32``, ``numpy.float16``, ``numpy.complex64``, ``numpy.complex128``, +``numpy.int64``, ``numpy.int32``, ``numpy.int16``, ``numpy.int8``, ``numpy.uint8``, +and ``bool``. + +.. warning:: + Writing to a tensor created from a read-only NumPy array is not supported and will result in undefined behavior. + +Example:: + + >>> a = numpy.array([1, 2, 3]) + >>> t = torch.from_numpy(a) + >>> t + tensor([ 1, 2, 3]) + >>> t[0] = -1 + >>> a + array([-1, 2, 3]) +""", +) + +add_docstr( + torch.frombuffer, + r""" +frombuffer(buffer, *, dtype, count=-1, offset=0, requires_grad=False) -> Tensor + +Creates a 1-dimensional :class:`Tensor` from an object that implements +the Python buffer protocol. + +Skips the first :attr:`offset` bytes in the buffer, and interprets the rest of +the raw bytes as a 1-dimensional tensor of type :attr:`dtype` with :attr:`count` +elements. + +Note that either of the following must be true: + +1. :attr:`count` is a positive non-zero number, and the total number of bytes +in the buffer is more than :attr:`offset` plus :attr:`count` times the size +(in bytes) of :attr:`dtype`. + +2. :attr:`count` is negative, and the length (number of bytes) of the buffer +subtracted by the :attr:`offset` is a multiple of the size (in bytes) of +:attr:`dtype`. + +The returned tensor and buffer share the same memory. Modifications to +the tensor will be reflected in the buffer and vice versa. The returned +tensor is not resizable. + +.. note:: + This function increments the reference count for the object that + owns the shared memory. Therefore, such memory will not be deallocated + before the returned tensor goes out of scope. + +.. warning:: + This function's behavior is undefined when passed an object implementing + the buffer protocol whose data is not on the CPU. Doing so is likely to + cause a segmentation fault. + +.. warning:: + This function does not try to infer the :attr:`dtype` (hence, it is not + optional). Passing a different :attr:`dtype` than its source may result + in unexpected behavior. + +Args: + buffer (object): a Python object that exposes the buffer interface. + +Keyword args: + dtype (:class:`torch.dtype`): the desired data type of returned tensor. + count (int, optional): the number of desired elements to be read. + If negative, all the elements (until the end of the buffer) will be + read. Default: -1. + offset (int, optional): the number of bytes to skip at the start of + the buffer. Default: 0. + {requires_grad} + +Example:: + + >>> import array + >>> a = array.array('i', [1, 2, 3]) + >>> t = torch.frombuffer(a, dtype=torch.int32) + >>> t + tensor([ 1, 2, 3]) + >>> t[0] = -1 + >>> a + array([-1, 2, 3]) + + >>> # Interprets the signed char bytes as 32-bit integers. + >>> # Each 4 signed char elements will be interpreted as + >>> # 1 signed 32-bit integer. + >>> import array + >>> a = array.array('b', [-1, 0, 0, 0]) + >>> torch.frombuffer(a, dtype=torch.int32) + tensor([255], dtype=torch.int32) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.from_file, + r""" +from_file(filename, shared=None, size=0, *, dtype=None, layout=None, device=None, pin_memory=False) + +Creates a CPU tensor with a storage backed by a memory-mapped file. + +If ``shared`` is True, then memory is shared between processes. All changes are written to the file. +If ``shared`` is False, then changes to the tensor do not affect the file. + +``size`` is the number of elements in the Tensor. If ``shared`` is ``False``, then the file must contain +at least ``size * sizeof(dtype)`` bytes. If ``shared`` is ``True`` the file will be created if needed. + +.. note:: + Only CPU tensors can be mapped to files. + +.. note:: + For now, tensors with storages backed by a memory-mapped file cannot be created in pinned memory. + + +Args: + filename (str): file name to map + shared (bool): whether to share memory (whether ``MAP_SHARED`` or ``MAP_PRIVATE`` is passed to the + underlying `mmap(2) call `_) + size (int): number of elements in the tensor + +Keyword args: + {dtype} + {layout} + {device} + {pin_memory} + +Example:: + >>> t = torch.randn(2, 5, dtype=torch.float64) + >>> t.numpy().tofile('storage.pt') + >>> t_mapped = torch.from_file('storage.pt', shared=False, size=10, dtype=torch.float64) + """.format( + **factory_common_args + ), +) + +add_docstr( + torch.flatten, + r""" +flatten(input, start_dim=0, end_dim=-1) -> Tensor + +Flattens :attr:`input` by reshaping it into a one-dimensional tensor. If :attr:`start_dim` or :attr:`end_dim` +are passed, only dimensions starting with :attr:`start_dim` and ending with :attr:`end_dim` are flattened. +The order of elements in :attr:`input` is unchanged. + +Unlike NumPy's flatten, which always copies input's data, this function may return the original object, a view, +or copy. If no dimensions are flattened, then the original object :attr:`input` is returned. Otherwise, if input can +be viewed as the flattened shape, then that view is returned. Finally, only if the input cannot be viewed as the +flattened shape is input's data copied. See :meth:`torch.Tensor.view` for details on when a view will be returned. + +.. note:: + Flattening a zero-dimensional tensor will return a one-dimensional view. + +Args: + {input} + start_dim (int): the first dim to flatten + end_dim (int): the last dim to flatten + +Example:: + + >>> t = torch.tensor([[[1, 2], + ... [3, 4]], + ... [[5, 6], + ... [7, 8]]]) + >>> torch.flatten(t) + tensor([1, 2, 3, 4, 5, 6, 7, 8]) + >>> torch.flatten(t, start_dim=1) + tensor([[1, 2, 3, 4], + [5, 6, 7, 8]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.unflatten, + r""" +unflatten(input, dim, sizes) -> Tensor + +Expands a dimension of the input tensor over multiple dimensions. + +.. seealso:: + + :func:`torch.flatten` the inverse of this function. It coalesces several dimensions into one. + +Args: + {input} + dim (int): Dimension to be unflattened, specified as an index into + ``input.shape``. + sizes (Tuple[int]): New shape of the unflattened dimension. + One of its elements can be `-1` in which case the corresponding output + dimension is inferred. Otherwise, the product of ``sizes`` *must* + equal ``input.shape[dim]``. + +Returns: + A View of input with the specified dimension unflattened. + +Examples:: + >>> torch.unflatten(torch.randn(3, 4, 1), 1, (2, 2)).shape + torch.Size([3, 2, 2, 1]) + >>> torch.unflatten(torch.randn(3, 4, 1), 1, (-1, 2)).shape + torch.Size([3, 2, 2, 1]) + >>> torch.unflatten(torch.randn(5, 12, 3), -2, (2, 2, 3, 1, 1)).shape + torch.Size([5, 2, 2, 3, 1, 1, 3]) +""".format( + **common_args + ), +) + +add_docstr( + torch.gather, + r""" +gather(input, dim, index, *, sparse_grad=False, out=None) -> Tensor + +Gathers values along an axis specified by `dim`. + +For a 3-D tensor the output is specified by:: + + out[i][j][k] = input[index[i][j][k]][j][k] # if dim == 0 + out[i][j][k] = input[i][index[i][j][k]][k] # if dim == 1 + out[i][j][k] = input[i][j][index[i][j][k]] # if dim == 2 + +:attr:`input` and :attr:`index` must have the same number of dimensions. +It is also required that ``index.size(d) <= input.size(d)`` for all +dimensions ``d != dim``. :attr:`out` will have the same shape as :attr:`index`. +Note that ``input`` and ``index`` do not broadcast against each other. + +Args: + input (Tensor): the source tensor + dim (int): the axis along which to index + index (LongTensor): the indices of elements to gather + +Keyword arguments: + sparse_grad (bool, optional): If ``True``, gradient w.r.t. :attr:`input` will be a sparse tensor. + out (Tensor, optional): the destination tensor + +Example:: + + >>> t = torch.tensor([[1, 2], [3, 4]]) + >>> torch.gather(t, 1, torch.tensor([[0, 0], [1, 0]])) + tensor([[ 1, 1], + [ 4, 3]]) +""", +) + + +add_docstr( + torch.gcd, + r""" +gcd(input, other, *, out=None) -> Tensor + +Computes the element-wise greatest common divisor (GCD) of :attr:`input` and :attr:`other`. + +Both :attr:`input` and :attr:`other` must have integer types. + +.. note:: + This defines :math:`gcd(0, 0) = 0`. + +Args: + {input} + other (Tensor): the second input tensor + +Keyword arguments: + {out} + +Example:: + + >>> a = torch.tensor([5, 10, 15]) + >>> b = torch.tensor([3, 4, 5]) + >>> torch.gcd(a, b) + tensor([1, 2, 5]) + >>> c = torch.tensor([3]) + >>> torch.gcd(a, c) + tensor([1, 1, 3]) +""".format( + **common_args + ), +) + +add_docstr( + torch.ge, + r""" +ge(input, other, *, out=None) -> Tensor + +Computes :math:`\text{input} \geq \text{other}` element-wise. +""" + + r""" + +The second argument can be a number or a tensor whose shape is +:ref:`broadcastable ` with the first argument. + +Args: + input (Tensor): the tensor to compare + other (Tensor or float): the tensor or value to compare + +Keyword args: + {out} + +Returns: + A boolean tensor that is True where :attr:`input` is greater than or equal to :attr:`other` and False elsewhere + +Example:: + + >>> torch.ge(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]])) + tensor([[True, True], [False, True]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.greater_equal, + r""" +greater_equal(input, other, *, out=None) -> Tensor + +Alias for :func:`torch.ge`. +""", +) + +add_docstr( + torch.gradient, + r""" +gradient(input, *, spacing=1, dim=None, edge_order=1) -> List of Tensors + +Estimates the gradient of a function :math:`g : \mathbb{R}^n \rightarrow \mathbb{R}` in +one or more dimensions using the `second-order accurate central differences method +`_ and +either first or second order estimates at the boundaries. + +The gradient of :math:`g` is estimated using samples. By default, when :attr:`spacing` is not +specified, the samples are entirely described by :attr:`input`, and the mapping of input coordinates +to an output is the same as the tensor's mapping of indices to values. For example, for a three-dimensional +:attr:`input` the function described is :math:`g : \mathbb{R}^3 \rightarrow \mathbb{R}`, and +:math:`g(1, 2, 3)\ == input[1, 2, 3]`. + +When :attr:`spacing` is specified, it modifies the relationship between :attr:`input` and input coordinates. +This is detailed in the "Keyword Arguments" section below. + +The gradient is estimated by estimating each partial derivative of :math:`g` independently. This estimation is +accurate if :math:`g` is in :math:`C^3` (it has at least 3 continuous derivatives), and the estimation can be +improved by providing closer samples. Mathematically, the value at each interior point of a partial derivative +is estimated using `Taylor’s theorem with remainder `_. +Letting :math:`x` be an interior point with :math:`x-h_l` and :math:`x+h_r` be points neighboring +it to the left and right respectively, :math:`f(x+h_r)` and :math:`f(x-h_l)` can be estimated using: + +.. math:: + \begin{aligned} + f(x+h_r) = f(x) + h_r f'(x) + {h_r}^2 \frac{f''(x)}{2} + {h_r}^3 \frac{f'''(\xi_1)}{6}, \xi_1 \in (x, x+h_r) \\ + f(x-h_l) = f(x) - h_l f'(x) + {h_l}^2 \frac{f''(x)}{2} - {h_l}^3 \frac{f'''(\xi_2)}{6}, \xi_2 \in (x, x-h_l) \\ + \end{aligned} + +Using the fact that :math:`f \in C^3` and solving the linear system, we derive: + +.. math:: + f'(x) \approx \frac{ {h_l}^2 f(x+h_r) - {h_r}^2 f(x-h_l) + + ({h_r}^2-{h_l}^2 ) f(x) }{ {h_r} {h_l}^2 + {h_r}^2 {h_l} } + +.. note:: + We estimate the gradient of functions in complex domain + :math:`g : \mathbb{C}^n \rightarrow \mathbb{C}` in the same way. + +The value of each partial derivative at the boundary points is computed differently. See edge_order below. + +Args: + input (``Tensor``): the tensor that represents the values of the function + +Keyword args: + spacing (``scalar``, ``list of scalar``, ``list of Tensor``, optional): :attr:`spacing` can be used to modify + how the :attr:`input` tensor's indices relate to sample coordinates. If :attr:`spacing` is a scalar then + the indices are multiplied by the scalar to produce the coordinates. For example, if :attr:`spacing=2` the + indices (1, 2, 3) become coordinates (2, 4, 6). If :attr:`spacing` is a list of scalars then the corresponding + indices are multiplied. For example, if :attr:`spacing=(2, -1, 3)` the indices (1, 2, 3) become coordinates (2, -2, 9). + Finally, if :attr:`spacing` is a list of one-dimensional tensors then each tensor specifies the coordinates for + the corresponding dimension. For example, if the indices are (1, 2, 3) and the tensors are (t0, t1, t2), then + the coordinates are (t0[1], t1[2], t2[3]) + + dim (``int``, ``list of int``, optional): the dimension or dimensions to approximate the gradient over. By default + the partial gradient in every dimension is computed. Note that when :attr:`dim` is specified the elements of + the :attr:`spacing` argument must correspond with the specified dims." + + edge_order (``int``, optional): 1 or 2, for `first-order + `_ or + `second-order `_ + estimation of the boundary ("edge") values, respectively. + +Examples:: + + >>> # Estimates the gradient of f(x)=x^2 at points [-2, -1, 2, 4] + >>> coordinates = (torch.tensor([-2., -1., 1., 4.]),) + >>> values = torch.tensor([4., 1., 1., 16.], ) + >>> torch.gradient(values, spacing = coordinates) + (tensor([-3., -2., 2., 5.]),) + + >>> # Estimates the gradient of the R^2 -> R function whose samples are + >>> # described by the tensor t. Implicit coordinates are [0, 1] for the outermost + >>> # dimension and [0, 1, 2, 3] for the innermost dimension, and function estimates + >>> # partial derivative for both dimensions. + >>> t = torch.tensor([[1, 2, 4, 8], [10, 20, 40, 80]]) + >>> torch.gradient(t) + (tensor([[ 9., 18., 36., 72.], + [ 9., 18., 36., 72.]]), + tensor([[ 1.0000, 1.5000, 3.0000, 4.0000], + [10.0000, 15.0000, 30.0000, 40.0000]])) + + >>> # A scalar value for spacing modifies the relationship between tensor indices + >>> # and input coordinates by multiplying the indices to find the + >>> # coordinates. For example, below the indices of the innermost + >>> # 0, 1, 2, 3 translate to coordinates of [0, 2, 4, 6], and the indices of + >>> # the outermost dimension 0, 1 translate to coordinates of [0, 2]. + >>> torch.gradient(t, spacing = 2.0) # dim = None (implicitly [0, 1]) + (tensor([[ 4.5000, 9.0000, 18.0000, 36.0000], + [ 4.5000, 9.0000, 18.0000, 36.0000]]), + tensor([[ 0.5000, 0.7500, 1.5000, 2.0000], + [ 5.0000, 7.5000, 15.0000, 20.0000]])) + >>> # doubling the spacing between samples halves the estimated partial gradients. + + >>> + >>> # Estimates only the partial derivative for dimension 1 + >>> torch.gradient(t, dim = 1) # spacing = None (implicitly 1.) + (tensor([[ 1.0000, 1.5000, 3.0000, 4.0000], + [10.0000, 15.0000, 30.0000, 40.0000]]),) + + >>> # When spacing is a list of scalars, the relationship between the tensor + >>> # indices and input coordinates changes based on dimension. + >>> # For example, below, the indices of the innermost dimension 0, 1, 2, 3 translate + >>> # to coordinates of [0, 3, 6, 9], and the indices of the outermost dimension + >>> # 0, 1 translate to coordinates of [0, 2]. + >>> torch.gradient(t, spacing = [3., 2.]) + (tensor([[ 4.5000, 9.0000, 18.0000, 36.0000], + [ 4.5000, 9.0000, 18.0000, 36.0000]]), + tensor([[ 0.3333, 0.5000, 1.0000, 1.3333], + [ 3.3333, 5.0000, 10.0000, 13.3333]])) + + >>> # The following example is a replication of the previous one with explicit + >>> # coordinates. + >>> coords = (torch.tensor([0, 2]), torch.tensor([0, 3, 6, 9])) + >>> torch.gradient(t, spacing = coords) + (tensor([[ 4.5000, 9.0000, 18.0000, 36.0000], + [ 4.5000, 9.0000, 18.0000, 36.0000]]), + tensor([[ 0.3333, 0.5000, 1.0000, 1.3333], + [ 3.3333, 5.0000, 10.0000, 13.3333]])) + +""", +) + +add_docstr( + torch.geqrf, + r""" +geqrf(input, *, out=None) -> (Tensor, Tensor) + +This is a low-level function for calling LAPACK's geqrf directly. This function +returns a namedtuple (a, tau) as defined in `LAPACK documentation for geqrf`_ . + +Computes a QR decomposition of :attr:`input`. +Both `Q` and `R` matrices are stored in the same output tensor `a`. +The elements of `R` are stored on and above the diagonal. +Elementary reflectors (or Householder vectors) implicitly defining matrix `Q` +are stored below the diagonal. +The results of this function can be used together with :func:`torch.linalg.householder_product` +to obtain the `Q` matrix or +with :func:`torch.ormqr`, which uses an implicit representation of the `Q` matrix, +for an efficient matrix-matrix multiplication. + +See `LAPACK documentation for geqrf`_ for further details. + +.. note:: + See also :func:`torch.linalg.qr`, which computes Q and R matrices, and :func:`torch.linalg.lstsq` + with the ``driver="gels"`` option for a function that can solve matrix equations using a QR decomposition. + +Args: + input (Tensor): the input matrix + +Keyword args: + out (tuple, optional): the output tuple of (Tensor, Tensor). Ignored if `None`. Default: `None`. + +.. _LAPACK documentation for geqrf: + http://www.netlib.org/lapack/explore-html/df/dc5/group__variants_g_ecomputational_ga3766ea903391b5cf9008132f7440ec7b.html + +""", +) + +add_docstr( + torch.inner, + r""" +inner(input, other, *, out=None) -> Tensor + +Computes the dot product for 1D tensors. For higher dimensions, sums the product +of elements from :attr:`input` and :attr:`other` along their last dimension. + +.. note:: + + If either :attr:`input` or :attr:`other` is a scalar, the result is equivalent + to `torch.mul(input, other)`. + + If both :attr:`input` and :attr:`other` are non-scalars, the size of their last + dimension must match and the result is equivalent to `torch.tensordot(input, + other, dims=([-1], [-1]))` + +Args: + input (Tensor): First input tensor + other (Tensor): Second input tensor + +Keyword args: + out (Tensor, optional): Optional output tensor to write result into. The output + shape is `input.shape[:-1] + other.shape[:-1]`. + +Example:: + + # Dot product + >>> torch.inner(torch.tensor([1, 2, 3]), torch.tensor([0, 2, 1])) + tensor(7) + + # Multidimensional input tensors + >>> a = torch.randn(2, 3) + >>> a + tensor([[0.8173, 1.0874, 1.1784], + [0.3279, 0.1234, 2.7894]]) + >>> b = torch.randn(2, 4, 3) + >>> b + tensor([[[-0.4682, -0.7159, 0.1506], + [ 0.4034, -0.3657, 1.0387], + [ 0.9892, -0.6684, 0.1774], + [ 0.9482, 1.3261, 0.3917]], + + [[ 0.4537, 0.7493, 1.1724], + [ 0.2291, 0.5749, -0.2267], + [-0.7920, 0.3607, -0.3701], + [ 1.3666, -0.5850, -1.7242]]]) + >>> torch.inner(a, b) + tensor([[[-0.9837, 1.1560, 0.2907, 2.6785], + [ 2.5671, 0.5452, -0.6912, -1.5509]], + + [[ 0.1782, 2.9843, 0.7366, 1.5672], + [ 3.5115, -0.4864, -1.2476, -4.4337]]]) + + # Scalar input + >>> torch.inner(a, torch.tensor(2)) + tensor([[1.6347, 2.1748, 2.3567], + [0.6558, 0.2469, 5.5787]]) +""", +) + +add_docstr( + torch.outer, + r""" +outer(input, vec2, *, out=None) -> Tensor + +Outer product of :attr:`input` and :attr:`vec2`. +If :attr:`input` is a vector of size :math:`n` and :attr:`vec2` is a vector of +size :math:`m`, then :attr:`out` must be a matrix of size :math:`(n \times m)`. + +.. note:: This function does not :ref:`broadcast `. + +Args: + input (Tensor): 1-D input vector + vec2 (Tensor): 1-D input vector + +Keyword args: + out (Tensor, optional): optional output matrix + +Example:: + + >>> v1 = torch.arange(1., 5.) + >>> v2 = torch.arange(1., 4.) + >>> torch.outer(v1, v2) + tensor([[ 1., 2., 3.], + [ 2., 4., 6.], + [ 3., 6., 9.], + [ 4., 8., 12.]]) +""", +) + +add_docstr( + torch.ger, + r""" +ger(input, vec2, *, out=None) -> Tensor + +Alias of :func:`torch.outer`. + +.. warning:: + This function is deprecated and will be removed in a future PyTorch release. + Use :func:`torch.outer` instead. +""", +) + +add_docstr( + torch.get_default_dtype, + r""" +get_default_dtype() -> torch.dtype + +Get the current default floating point :class:`torch.dtype`. + +Example:: + + >>> torch.get_default_dtype() # initial default for floating point is torch.float32 + torch.float32 + >>> torch.set_default_dtype(torch.float64) + >>> torch.get_default_dtype() # default is now changed to torch.float64 + torch.float64 + +""", +) + +add_docstr( + torch.get_num_threads, + r""" +get_num_threads() -> int + +Returns the number of threads used for parallelizing CPU operations +""", +) + +add_docstr( + torch.get_num_interop_threads, + r""" +get_num_interop_threads() -> int + +Returns the number of threads used for inter-op parallelism on CPU +(e.g. in JIT interpreter) +""", +) + +add_docstr( + torch.gt, + r""" +gt(input, other, *, out=None) -> Tensor + +Computes :math:`\text{input} > \text{other}` element-wise. +""" + + r""" + +The second argument can be a number or a tensor whose shape is +:ref:`broadcastable ` with the first argument. + +Args: + input (Tensor): the tensor to compare + other (Tensor or float): the tensor or value to compare + +Keyword args: + {out} + +Returns: + A boolean tensor that is True where :attr:`input` is greater than :attr:`other` and False elsewhere + +Example:: + + >>> torch.gt(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]])) + tensor([[False, True], [False, False]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.greater, + r""" +greater(input, other, *, out=None) -> Tensor + +Alias for :func:`torch.gt`. +""", +) + +add_docstr( + torch.histc, + r""" +histc(input, bins=100, min=0, max=0, *, out=None) -> Tensor + +Computes the histogram of a tensor. + +The elements are sorted into equal width bins between :attr:`min` and +:attr:`max`. If :attr:`min` and :attr:`max` are both zero, the minimum and +maximum values of the data are used. + +Elements lower than min and higher than max and ``NaN`` elements are ignored. + +Args: + {input} + bins (int): number of histogram bins + min (Scalar): lower end of the range (inclusive) + max (Scalar): upper end of the range (inclusive) + +Keyword args: + {out} + +Returns: + Tensor: Histogram represented as a tensor + +Example:: + + >>> torch.histc(torch.tensor([1., 2, 1]), bins=4, min=0, max=3) + tensor([ 0., 2., 1., 0.]) +""".format( + **common_args + ), +) + +add_docstr( + torch.histogram, + r""" +histogram(input, bins, *, range=None, weight=None, density=False, out=None) -> (Tensor, Tensor) + +Computes a histogram of the values in a tensor. + +:attr:`bins` can be an integer or a 1D tensor. + +If :attr:`bins` is an int, it specifies the number of equal-width bins. +By default, the lower and upper range of the bins is determined by the +minimum and maximum elements of the input tensor. The :attr:`range` +argument can be provided to specify a range for the bins. + +If :attr:`bins` is a 1D tensor, it specifies the sequence of bin edges +including the rightmost edge. It should contain at least 2 elements +and its elements should be increasing. + +Args: + {input} + bins: int or 1D Tensor. If int, defines the number of equal-width bins. If tensor, + defines the sequence of bin edges including the rightmost edge. + +Keyword args: + range (tuple of float): Defines the range of the bins. + weight (Tensor): If provided, weight should have the same shape as input. Each value in + input contributes its associated weight towards its bin's result. + density (bool): If False, the result will contain the count (or total weight) in each bin. + If True, the result is the value of the probability density function over the bins, + normalized such that the integral over the range of the bins is 1. + {out} (tuple, optional): The result tuple of two output tensors (hist, bin_edges). + +Returns: + hist (Tensor): 1D Tensor containing the values of the histogram. + bin_edges(Tensor): 1D Tensor containing the edges of the histogram bins. + +Example:: + + >>> torch.histogram(torch.tensor([1., 2, 1]), bins=4, range=(0., 3.), weight=torch.tensor([1., 2., 4.])) + (tensor([ 0., 5., 2., 0.]), tensor([0., 0.75, 1.5, 2.25, 3.])) + >>> torch.histogram(torch.tensor([1., 2, 1]), bins=4, range=(0., 3.), weight=torch.tensor([1., 2., 4.]), density=True) + (tensor([ 0., 0.9524, 0.3810, 0.]), tensor([0., 0.75, 1.5, 2.25, 3.])) +""".format( + **common_args + ), +) + +add_docstr( + torch.histogramdd, + r""" +histogramdd(input, bins, *, range=None, weight=None, density=False, out=None) -> (Tensor, Tensor[]) + +Computes a multi-dimensional histogram of the values in a tensor. + +Interprets the elements of an input tensor whose innermost dimension has size N +as a collection of N-dimensional points. Maps each of the points into a set of +N-dimensional bins and returns the number of points (or total weight) in each bin. + +:attr:`input` must be a tensor with at least 2 dimensions. +If input has shape (M, N), each of its M rows defines a point in N-dimensional space. +If input has three or more dimensions, all but the last dimension are flattened. + +Each dimension is independently associated with its own strictly increasing sequence +of bin edges. Bin edges may be specified explicitly by passing a sequence of 1D +tensors. Alternatively, bin edges may be constructed automatically by passing a +sequence of integers specifying the number of equal-width bins in each dimension. + +For each N-dimensional point in input: + - Each of its coordinates is binned independently among the bin edges + corresponding to its dimension + - Binning results are combined to identify the N-dimensional bin (if any) + into which the point falls + - If the point falls into a bin, the bin's count (or total weight) is incremented + - Points which do not fall into any bin do not contribute to the output + +:attr:`bins` can be a sequence of N 1D tensors, a sequence of N ints, or a single int. + +If :attr:`bins` is a sequence of N 1D tensors, it explicitly specifies the N sequences +of bin edges. Each 1D tensor should contain a strictly increasing sequence with at +least one element. A sequence of K bin edges defines K-1 bins, explicitly specifying +the left and right edges of all bins. Every bin is exclusive of its left edge. Only +the rightmost bin is inclusive of its right edge. + +If :attr:`bins` is a sequence of N ints, it specifies the number of equal-width bins +in each dimension. By default, the leftmost and rightmost bin edges in each dimension +are determined by the minimum and maximum elements of the input tensor in the +corresponding dimension. The :attr:`range` argument can be provided to manually +specify the leftmost and rightmost bin edges in each dimension. + +If :attr:`bins` is an int, it specifies the number of equal-width bins for all dimensions. + +.. note:: + See also :func:`torch.histogram`, which specifically computes 1D histograms. + While :func:`torch.histogramdd` infers the dimensionality of its bins and + binned values from the shape of :attr:`input`, :func:`torch.histogram` + accepts and flattens :attr:`input` of any shape. + +Args: + {input} + bins: Tensor[], int[], or int. + If Tensor[], defines the sequences of bin edges. + If int[], defines the number of equal-width bins in each dimension. + If int, defines the number of equal-width bins for all dimensions. +Keyword args: + range (sequence of float): Defines the leftmost and rightmost bin edges + in each dimension. + weight (Tensor): By default, each value in the input has weight 1. If a weight + tensor is passed, each N-dimensional coordinate in input + contributes its associated weight towards its bin's result. + The weight tensor should have the same shape as the :attr:`input` + tensor excluding its innermost dimension N. + density (bool): If False (default), the result will contain the count (or total weight) + in each bin. If True, each count (weight) is divided by the total count + (total weight), then divided by the volume of its associated bin. +Returns: + hist (Tensor): N-dimensional Tensor containing the values of the histogram. + bin_edges(Tensor[]): sequence of N 1D Tensors containing the bin edges. + +Example:: + >>> torch.histogramdd(torch.tensor([[0., 1.], [1., 0.], [2., 0.], [2., 2.]]), bins=[3, 3], + ... weight=torch.tensor([1., 2., 4., 8.])) + torch.return_types.histogramdd( + hist=tensor([[0., 1., 0.], + [2., 0., 0.], + [4., 0., 8.]]), + bin_edges=(tensor([0.0000, 0.6667, 1.3333, 2.0000]), + tensor([0.0000, 0.6667, 1.3333, 2.0000]))) + + >>> torch.histogramdd(torch.tensor([[0., 0.], [1., 1.], [2., 2.]]), bins=[2, 2], + ... range=[0., 1., 0., 1.], density=True) + torch.return_types.histogramdd( + hist=tensor([[2., 0.], + [0., 2.]]), + bin_edges=(tensor([0.0000, 0.5000, 1.0000]), + tensor([0.0000, 0.5000, 1.0000]))) + +""".format( + **common_args + ), +) +# TODO: Fix via https://github.com/pytorch/pytorch/issues/75798 +torch.histogramdd.__module__ = "torch" + +add_docstr( + torch.hypot, + r""" +hypot(input, other, *, out=None) -> Tensor + +Given the legs of a right triangle, return its hypotenuse. + +.. math:: + \text{out}_{i} = \sqrt{\text{input}_{i}^{2} + \text{other}_{i}^{2}} + +The shapes of ``input`` and ``other`` must be +:ref:`broadcastable `. +""" + + r""" +Args: + input (Tensor): the first input tensor + other (Tensor): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> a = torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])) + tensor([5.0000, 5.6569, 6.4031]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.i0, + r""" +i0(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.i0`. +""", +) + +add_docstr( + torch.igamma, + r""" +igamma(input, other, *, out=None) -> Tensor + +Alias for :func:`torch.special.gammainc`. +""", +) + +add_docstr( + torch.igammac, + r""" +igammac(input, other, *, out=None) -> Tensor + +Alias for :func:`torch.special.gammaincc`. +""", +) + +add_docstr( + torch.index_select, + r""" +index_select(input, dim, index, *, out=None) -> Tensor + +Returns a new tensor which indexes the :attr:`input` tensor along dimension +:attr:`dim` using the entries in :attr:`index` which is a `LongTensor`. + +The returned tensor has the same number of dimensions as the original tensor +(:attr:`input`). The :attr:`dim`\ th dimension has the same size as the length +of :attr:`index`; other dimensions have the same size as in the original tensor. + +.. note:: The returned tensor does **not** use the same storage as the original + tensor. If :attr:`out` has a different shape than expected, we + silently change it to the correct shape, reallocating the underlying + storage if necessary. + +Args: + {input} + dim (int): the dimension in which we index + index (IntTensor or LongTensor): the 1-D tensor containing the indices to index + +Keyword args: + {out} + +Example:: + + >>> x = torch.randn(3, 4) + >>> x + tensor([[ 0.1427, 0.0231, -0.5414, -1.0009], + [-0.4664, 0.2647, -0.1228, -1.1068], + [-1.1734, -0.6571, 0.7230, -0.6004]]) + >>> indices = torch.tensor([0, 2]) + >>> torch.index_select(x, 0, indices) + tensor([[ 0.1427, 0.0231, -0.5414, -1.0009], + [-1.1734, -0.6571, 0.7230, -0.6004]]) + >>> torch.index_select(x, 1, indices) + tensor([[ 0.1427, -0.5414], + [-0.4664, -0.1228], + [-1.1734, 0.7230]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.inverse, + r""" +inverse(input, *, out=None) -> Tensor + +Alias for :func:`torch.linalg.inv` +""", +) + +add_docstr( + torch.isin, + r""" +isin(elements, test_elements, *, assume_unique=False, invert=False) -> Tensor + +Tests if each element of :attr:`elements` is in :attr:`test_elements`. Returns +a boolean tensor of the same shape as :attr:`elements` that is True for elements +in :attr:`test_elements` and False otherwise. + +.. note:: + One of :attr:`elements` or :attr:`test_elements` can be a scalar, but not both. + +Args: + elements (Tensor or Scalar): Input elements + test_elements (Tensor or Scalar): Values against which to test for each input element + assume_unique (bool, optional): If True, assumes both :attr:`elements` and + :attr:`test_elements` contain unique elements, which can speed up the + calculation. Default: False + invert (bool, optional): If True, inverts the boolean return tensor, resulting in True + values for elements *not* in :attr:`test_elements`. Default: False + +Returns: + A boolean tensor of the same shape as :attr:`elements` that is True for elements in + :attr:`test_elements` and False otherwise + +Example: + >>> torch.isin(torch.tensor([[1, 2], [3, 4]]), torch.tensor([2, 3])) + tensor([[False, True], + [ True, False]]) +""", +) + +add_docstr( + torch.isinf, + r""" +isinf(input) -> Tensor + +Tests if each element of :attr:`input` is infinite +(positive or negative infinity) or not. + +.. note:: + Complex values are infinite when their real or imaginary part is + infinite. + +Args: + {input} + +Returns: + A boolean tensor that is True where :attr:`input` is infinite and False elsewhere + +Example:: + + >>> torch.isinf(torch.tensor([1, float('inf'), 2, float('-inf'), float('nan')])) + tensor([False, True, False, True, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.isposinf, + r""" +isposinf(input, *, out=None) -> Tensor +Tests if each element of :attr:`input` is positive infinity or not. + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([-float('inf'), float('inf'), 1.2]) + >>> torch.isposinf(a) + tensor([False, True, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.isneginf, + r""" +isneginf(input, *, out=None) -> Tensor +Tests if each element of :attr:`input` is negative infinity or not. + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([-float('inf'), float('inf'), 1.2]) + >>> torch.isneginf(a) + tensor([ True, False, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.isclose, + r""" +isclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False) -> Tensor + +Returns a new tensor with boolean elements representing if each element of +:attr:`input` is "close" to the corresponding element of :attr:`other`. +Closeness is defined as: + +.. math:: + \lvert \text{input} - \text{other} \rvert \leq \texttt{atol} + \texttt{rtol} \times \lvert \text{other} \rvert +""" + + r""" + +where :attr:`input` and :attr:`other` are finite. Where :attr:`input` +and/or :attr:`other` are nonfinite they are close if and only if +they are equal, with NaNs being considered equal to each other when +:attr:`equal_nan` is True. + +Args: + input (Tensor): first tensor to compare + other (Tensor): second tensor to compare + atol (float, optional): absolute tolerance. Default: 1e-08 + rtol (float, optional): relative tolerance. Default: 1e-05 + equal_nan (bool, optional): if ``True``, then two ``NaN`` s will be considered equal. Default: ``False`` + +Examples:: + + >>> torch.isclose(torch.tensor((1., 2, 3)), torch.tensor((1 + 1e-10, 3, 4))) + tensor([ True, False, False]) + >>> torch.isclose(torch.tensor((float('inf'), 4)), torch.tensor((float('inf'), 6)), rtol=.5) + tensor([True, True]) +""", +) + +add_docstr( + torch.isfinite, + r""" +isfinite(input) -> Tensor + +Returns a new tensor with boolean elements representing if each element is `finite` or not. + +Real values are finite when they are not NaN, negative infinity, or infinity. +Complex values are finite when both their real and imaginary parts are finite. + +Args: + {input} + +Returns: + A boolean tensor that is True where :attr:`input` is finite and False elsewhere + +Example:: + + >>> torch.isfinite(torch.tensor([1, float('inf'), 2, float('-inf'), float('nan')])) + tensor([True, False, True, False, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.isnan, + r""" +isnan(input) -> Tensor + +Returns a new tensor with boolean elements representing if each element of :attr:`input` +is NaN or not. Complex values are considered NaN when either their real +and/or imaginary part is NaN. + +Arguments: + {input} + +Returns: + A boolean tensor that is True where :attr:`input` is NaN and False elsewhere + +Example:: + + >>> torch.isnan(torch.tensor([1, float('nan'), 2])) + tensor([False, True, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.isreal, + r""" +isreal(input) -> Tensor + +Returns a new tensor with boolean elements representing if each element of :attr:`input` is real-valued or not. +All real-valued types are considered real. Complex values are considered real when their imaginary part is 0. + +Arguments: + {input} + +Returns: + A boolean tensor that is True where :attr:`input` is real and False elsewhere + +Example:: + + >>> torch.isreal(torch.tensor([1, 1+1j, 2+0j])) + tensor([True, False, True]) +""".format( + **common_args + ), +) + +add_docstr( + torch.is_floating_point, + r""" +is_floating_point(input) -> (bool) + +Returns True if the data type of :attr:`input` is a floating point data type i.e., +one of ``torch.float64``, ``torch.float32``, ``torch.float16``, and ``torch.bfloat16``. + +Args: + {input} +""".format( + **common_args + ), +) + +add_docstr( + torch.is_complex, + r""" +is_complex(input) -> (bool) + +Returns True if the data type of :attr:`input` is a complex data type i.e., +one of ``torch.complex64``, and ``torch.complex128``. + +Args: + {input} +""".format( + **common_args + ), +) + +add_docstr( + torch.is_grad_enabled, + r""" +is_grad_enabled() -> (bool) + +Returns True if grad mode is currently enabled. +""".format( + **common_args + ), +) + +add_docstr( + torch.is_inference_mode_enabled, + r""" +is_inference_mode_enabled() -> (bool) + +Returns True if inference mode is currently enabled. +""".format( + **common_args + ), +) + +add_docstr( + torch.is_inference, + r""" +is_inference(input) -> (bool) + +Returns True if :attr:`input` is an inference tensor. + +A non-view tensor is an inference tensor if and only if it was +allocated during inference mode. A view tensor is an inference +tensor if and only if the tensor it is a view of is an inference tensor. + +For details on inference mode please see +`Inference Mode `_. + +Args: + {input} +""".format( + **common_args + ), +) + +add_docstr( + torch.is_conj, + r""" +is_conj(input) -> (bool) + +Returns True if the :attr:`input` is a conjugated tensor, i.e. its conjugate bit is set to `True`. + +Args: + {input} +""".format( + **common_args + ), +) + +add_docstr( + torch.is_nonzero, + r""" +is_nonzero(input) -> (bool) + +Returns True if the :attr:`input` is a single element tensor which is not equal to zero +after type conversions. +i.e. not equal to ``torch.tensor([0.])`` or ``torch.tensor([0])`` or +``torch.tensor([False])``. +Throws a ``RuntimeError`` if ``torch.numel() != 1`` (even in case +of sparse tensors). + +Args: + {input} + +Examples:: + + >>> torch.is_nonzero(torch.tensor([0.])) + False + >>> torch.is_nonzero(torch.tensor([1.5])) + True + >>> torch.is_nonzero(torch.tensor([False])) + False + >>> torch.is_nonzero(torch.tensor([3])) + True + >>> torch.is_nonzero(torch.tensor([1, 3, 5])) + Traceback (most recent call last): + ... + RuntimeError: bool value of Tensor with more than one value is ambiguous + >>> torch.is_nonzero(torch.tensor([])) + Traceback (most recent call last): + ... + RuntimeError: bool value of Tensor with no values is ambiguous +""".format( + **common_args + ), +) + +add_docstr( + torch.kron, + r""" +kron(input, other, *, out=None) -> Tensor + +Computes the Kronecker product, denoted by :math:`\otimes`, of :attr:`input` and :attr:`other`. + +If :attr:`input` is a :math:`(a_0 \times a_1 \times \dots \times a_n)` tensor and :attr:`other` is a +:math:`(b_0 \times b_1 \times \dots \times b_n)` tensor, the result will be a +:math:`(a_0*b_0 \times a_1*b_1 \times \dots \times a_n*b_n)` tensor with the following entries: + +.. math:: + (\text{input} \otimes \text{other})_{k_0, k_1, \dots, k_n} = + \text{input}_{i_0, i_1, \dots, i_n} * \text{other}_{j_0, j_1, \dots, j_n}, + +where :math:`k_t = i_t * b_t + j_t` for :math:`0 \leq t \leq n`. +If one tensor has fewer dimensions than the other it is unsqueezed until it has the same number of dimensions. + +Supports real-valued and complex-valued inputs. + +.. note:: + This function generalizes the typical definition of the Kronecker product for two matrices to two tensors, + as described above. When :attr:`input` is a :math:`(m \times n)` matrix and :attr:`other` is a + :math:`(p \times q)` matrix, the result will be a :math:`(p*m \times q*n)` block matrix: + + .. math:: + \mathbf{A} \otimes \mathbf{B}=\begin{bmatrix} + a_{11} \mathbf{B} & \cdots & a_{1 n} \mathbf{B} \\ + \vdots & \ddots & \vdots \\ + a_{m 1} \mathbf{B} & \cdots & a_{m n} \mathbf{B} \end{bmatrix} + + where :attr:`input` is :math:`\mathbf{A}` and :attr:`other` is :math:`\mathbf{B}`. + +Arguments: + input (Tensor) + other (Tensor) + +Keyword args: + out (Tensor, optional): The output tensor. Ignored if ``None``. Default: ``None`` + +Examples:: + + >>> mat1 = torch.eye(2) + >>> mat2 = torch.ones(2, 2) + >>> torch.kron(mat1, mat2) + tensor([[1., 1., 0., 0.], + [1., 1., 0., 0.], + [0., 0., 1., 1.], + [0., 0., 1., 1.]]) + + >>> mat1 = torch.eye(2) + >>> mat2 = torch.arange(1, 5).reshape(2, 2) + >>> torch.kron(mat1, mat2) + tensor([[1., 2., 0., 0.], + [3., 4., 0., 0.], + [0., 0., 1., 2.], + [0., 0., 3., 4.]]) +""", +) + +add_docstr( + torch.kthvalue, + r""" +kthvalue(input, k, dim=None, keepdim=False, *, out=None) -> (Tensor, LongTensor) + +Returns a namedtuple ``(values, indices)`` where ``values`` is the :attr:`k` th +smallest element of each row of the :attr:`input` tensor in the given dimension +:attr:`dim`. And ``indices`` is the index location of each element found. + +If :attr:`dim` is not given, the last dimension of the `input` is chosen. + +If :attr:`keepdim` is ``True``, both the :attr:`values` and :attr:`indices` tensors +are the same size as :attr:`input`, except in the dimension :attr:`dim` where +they are of size 1. Otherwise, :attr:`dim` is squeezed +(see :func:`torch.squeeze`), resulting in both the :attr:`values` and +:attr:`indices` tensors having 1 fewer dimension than the :attr:`input` tensor. + +.. note:: + When :attr:`input` is a CUDA tensor and there are multiple valid + :attr:`k` th values, this function may nondeterministically return + :attr:`indices` for any of them. + +Args: + {input} + k (int): k for the k-th smallest element + dim (int, optional): the dimension to find the kth value along + {keepdim} + +Keyword args: + out (tuple, optional): the output tuple of (Tensor, LongTensor) + can be optionally given to be used as output buffers + +Example:: + + >>> x = torch.arange(1., 6.) + >>> x + tensor([ 1., 2., 3., 4., 5.]) + >>> torch.kthvalue(x, 4) + torch.return_types.kthvalue(values=tensor(4.), indices=tensor(3)) + + >>> x=torch.arange(1.,7.).resize_(2,3) + >>> x + tensor([[ 1., 2., 3.], + [ 4., 5., 6.]]) + >>> torch.kthvalue(x, 2, 0, True) + torch.return_types.kthvalue(values=tensor([[4., 5., 6.]]), indices=tensor([[1, 1, 1]])) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.lcm, + r""" +lcm(input, other, *, out=None) -> Tensor + +Computes the element-wise least common multiple (LCM) of :attr:`input` and :attr:`other`. + +Both :attr:`input` and :attr:`other` must have integer types. + +.. note:: + This defines :math:`lcm(0, 0) = 0` and :math:`lcm(0, a) = 0`. + +Args: + {input} + other (Tensor): the second input tensor + +Keyword arguments: + {out} + +Example:: + + >>> a = torch.tensor([5, 10, 15]) + >>> b = torch.tensor([3, 4, 5]) + >>> torch.lcm(a, b) + tensor([15, 20, 15]) + >>> c = torch.tensor([3]) + >>> torch.lcm(a, c) + tensor([15, 30, 15]) +""".format( + **common_args + ), +) + +add_docstr( + torch.ldexp, + r""" +ldexp(input, other, *, out=None) -> Tensor + +Multiplies :attr:`input` by 2 ** :attr:`other`. + +.. math:: + \text{{out}}_i = \text{{input}}_i * 2^\text{{other}}_i +""" + + r""" + +Typically this function is used to construct floating point numbers by multiplying +mantissas in :attr:`input` with integral powers of two created from the exponents +in :attr:`other`. + +Args: + {input} + other (Tensor): a tensor of exponents, typically integers. + +Keyword args: + {out} + +Example:: + + >>> torch.ldexp(torch.tensor([1.]), torch.tensor([1])) + tensor([2.]) + >>> torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])) + tensor([ 2., 4., 8., 16.]) + + +""".format( + **common_args + ), +) + +add_docstr( + torch.le, + r""" +le(input, other, *, out=None) -> Tensor + +Computes :math:`\text{input} \leq \text{other}` element-wise. +""" + + r""" + +The second argument can be a number or a tensor whose shape is +:ref:`broadcastable ` with the first argument. + +Args: + input (Tensor): the tensor to compare + other (Tensor or Scalar): the tensor or value to compare + +Keyword args: + {out} + +Returns: + A boolean tensor that is True where :attr:`input` is less than or equal to + :attr:`other` and False elsewhere + +Example:: + + >>> torch.le(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]])) + tensor([[True, False], [True, True]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.less_equal, + r""" +less_equal(input, other, *, out=None) -> Tensor + +Alias for :func:`torch.le`. +""", +) + +add_docstr( + torch.lerp, + r""" +lerp(input, end, weight, *, out=None) + +Does a linear interpolation of two tensors :attr:`start` (given by :attr:`input`) and :attr:`end` based +on a scalar or tensor :attr:`weight` and returns the resulting :attr:`out` tensor. + +.. math:: + \text{out}_i = \text{start}_i + \text{weight}_i \times (\text{end}_i - \text{start}_i) +""" + + r""" +The shapes of :attr:`start` and :attr:`end` must be +:ref:`broadcastable `. If :attr:`weight` is a tensor, then +the shapes of :attr:`weight`, :attr:`start`, and :attr:`end` must be :ref:`broadcastable `. + +Args: + input (Tensor): the tensor with the starting points + end (Tensor): the tensor with the ending points + weight (float or tensor): the weight for the interpolation formula + +Keyword args: + {out} + +Example:: + + >>> start = torch.arange(1., 5.) + >>> end = torch.empty(4).fill_(10) + >>> start + tensor([ 1., 2., 3., 4.]) + >>> end + tensor([ 10., 10., 10., 10.]) + >>> torch.lerp(start, end, 0.5) + tensor([ 5.5000, 6.0000, 6.5000, 7.0000]) + >>> torch.lerp(start, end, torch.full_like(start, 0.5)) + tensor([ 5.5000, 6.0000, 6.5000, 7.0000]) +""".format( + **common_args + ), +) + +add_docstr( + torch.lgamma, + r""" +lgamma(input, *, out=None) -> Tensor + +Computes the natural logarithm of the absolute value of the gamma function on :attr:`input`. + +.. math:: + \text{out}_{i} = \ln |\Gamma(\text{input}_{i})| +""" + + """ +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.arange(0.5, 2, 0.5) + >>> torch.lgamma(a) + tensor([ 0.5724, 0.0000, -0.1208]) +""".format( + **common_args + ), +) + +add_docstr( + torch.linspace, + r""" +linspace(start, end, steps, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor + +Creates a one-dimensional tensor of size :attr:`steps` whose values are evenly +spaced from :attr:`start` to :attr:`end`, inclusive. That is, the value are: + +.. math:: + (\text{start}, + \text{start} + \frac{\text{end} - \text{start}}{\text{steps} - 1}, + \ldots, + \text{start} + (\text{steps} - 2) * \frac{\text{end} - \text{start}}{\text{steps} - 1}, + \text{end}) +""" + + """ + +From PyTorch 1.11 linspace requires the steps argument. Use steps=100 to restore the previous behavior. + +Args: + start (float or Tensor): the starting value for the set of points. If `Tensor`, it must be 0-dimensional + end (float or Tensor): the ending value for the set of points. If `Tensor`, it must be 0-dimensional + steps (int): size of the constructed tensor + +Keyword arguments: + {out} + dtype (torch.dtype, optional): the data type to perform the computation in. + Default: if None, uses the global default dtype (see torch.get_default_dtype()) + when both :attr:`start` and :attr:`end` are real, + and corresponding complex dtype when either is complex. + {layout} + {device} + {requires_grad} + + +Example:: + + >>> torch.linspace(3, 10, steps=5) + tensor([ 3.0000, 4.7500, 6.5000, 8.2500, 10.0000]) + >>> torch.linspace(-10, 10, steps=5) + tensor([-10., -5., 0., 5., 10.]) + >>> torch.linspace(start=-10, end=10, steps=5) + tensor([-10., -5., 0., 5., 10.]) + >>> torch.linspace(start=-10, end=10, steps=1) + tensor([-10.]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.log, + r""" +log(input, *, out=None) -> Tensor + +Returns a new tensor with the natural logarithm of the elements +of :attr:`input`. + +.. math:: + y_{i} = \log_{e} (x_{i}) +""" + + r""" + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.rand(5) * 5 + >>> a + tensor([4.7767, 4.3234, 1.2156, 0.2411, 4.5739]) + >>> torch.log(a) + tensor([ 1.5637, 1.4640, 0.1952, -1.4226, 1.5204]) +""".format( + **common_args + ), +) + +add_docstr( + torch.log10, + r""" +log10(input, *, out=None) -> Tensor + +Returns a new tensor with the logarithm to the base 10 of the elements +of :attr:`input`. + +.. math:: + y_{i} = \log_{10} (x_{i}) +""" + + r""" + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.rand(5) + >>> a + tensor([ 0.5224, 0.9354, 0.7257, 0.1301, 0.2251]) + + + >>> torch.log10(a) + tensor([-0.2820, -0.0290, -0.1392, -0.8857, -0.6476]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.log1p, + r""" +log1p(input, *, out=None) -> Tensor + +Returns a new tensor with the natural logarithm of (1 + :attr:`input`). + +.. math:: + y_i = \log_{e} (x_i + 1) +""" + + r""" +.. note:: This function is more accurate than :func:`torch.log` for small + values of :attr:`input` + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(5) + >>> a + tensor([-1.0090, -0.9923, 1.0249, -0.5372, 0.2492]) + >>> torch.log1p(a) + tensor([ nan, -4.8653, 0.7055, -0.7705, 0.2225]) +""".format( + **common_args + ), +) + +add_docstr( + torch.log2, + r""" +log2(input, *, out=None) -> Tensor + +Returns a new tensor with the logarithm to the base 2 of the elements +of :attr:`input`. + +.. math:: + y_{i} = \log_{2} (x_{i}) +""" + + r""" + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.rand(5) + >>> a + tensor([ 0.8419, 0.8003, 0.9971, 0.5287, 0.0490]) + + + >>> torch.log2(a) + tensor([-0.2483, -0.3213, -0.0042, -0.9196, -4.3504]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.logaddexp, + r""" +logaddexp(input, other, *, out=None) -> Tensor + +Logarithm of the sum of exponentiations of the inputs. + +Calculates pointwise :math:`\log\left(e^x + e^y\right)`. This function is useful +in statistics where the calculated probabilities of events may be so small as to +exceed the range of normal floating point numbers. In such cases the logarithm +of the calculated probability is stored. This function allows adding +probabilities stored in such a fashion. + +This op should be disambiguated with :func:`torch.logsumexp` which performs a +reduction on a single tensor. + +Args: + {input} + other (Tensor): the second input tensor + +Keyword arguments: + {out} + +Example:: + + >>> torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1.0, -2, -3])) + tensor([-0.3069, -0.6867, -0.8731]) + >>> torch.logaddexp(torch.tensor([-100.0, -200, -300]), torch.tensor([-1.0, -2, -3])) + tensor([-1., -2., -3.]) + >>> torch.logaddexp(torch.tensor([1.0, 2000, 30000]), torch.tensor([-1.0, -2, -3])) + tensor([1.1269e+00, 2.0000e+03, 3.0000e+04]) +""".format( + **common_args + ), +) + +add_docstr( + torch.logaddexp2, + r""" +logaddexp2(input, other, *, out=None) -> Tensor + +Logarithm of the sum of exponentiations of the inputs in base-2. + +Calculates pointwise :math:`\log_2\left(2^x + 2^y\right)`. See +:func:`torch.logaddexp` for more details. + +Args: + {input} + other (Tensor): the second input tensor + +Keyword arguments: + {out} +""".format( + **common_args + ), +) + +add_docstr( + torch.xlogy, + r""" +xlogy(input, other, *, out=None) -> Tensor + +Alias for :func:`torch.special.xlogy`. +""", +) + +add_docstr( + torch.logical_and, + r""" +logical_and(input, other, *, out=None) -> Tensor + +Computes the element-wise logical AND of the given input tensors. Zeros are treated as ``False`` and nonzeros are +treated as ``True``. + +Args: + {input} + other (Tensor): the tensor to compute AND with + +Keyword args: + {out} + +Example:: + + >>> torch.logical_and(torch.tensor([True, False, True]), torch.tensor([True, False, False])) + tensor([ True, False, False]) + >>> a = torch.tensor([0, 1, 10, 0], dtype=torch.int8) + >>> b = torch.tensor([4, 0, 1, 0], dtype=torch.int8) + >>> torch.logical_and(a, b) + tensor([False, False, True, False]) + >>> torch.logical_and(a.double(), b.double()) + tensor([False, False, True, False]) + >>> torch.logical_and(a.double(), b) + tensor([False, False, True, False]) + >>> torch.logical_and(a, b, out=torch.empty(4, dtype=torch.bool)) + tensor([False, False, True, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.logical_not, + r""" +logical_not(input, *, out=None) -> Tensor + +Computes the element-wise logical NOT of the given input tensor. If not specified, the output tensor will have the bool +dtype. If the input tensor is not a bool tensor, zeros are treated as ``False`` and non-zeros are treated as ``True``. + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> torch.logical_not(torch.tensor([True, False])) + tensor([False, True]) + >>> torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)) + tensor([ True, False, False]) + >>> torch.logical_not(torch.tensor([0., 1.5, -10.], dtype=torch.double)) + tensor([ True, False, False]) + >>> torch.logical_not(torch.tensor([0., 1., -10.], dtype=torch.double), out=torch.empty(3, dtype=torch.int16)) + tensor([1, 0, 0], dtype=torch.int16) +""".format( + **common_args + ), +) + +add_docstr( + torch.logical_or, + r""" +logical_or(input, other, *, out=None) -> Tensor + +Computes the element-wise logical OR of the given input tensors. Zeros are treated as ``False`` and nonzeros are +treated as ``True``. + +Args: + {input} + other (Tensor): the tensor to compute OR with + +Keyword args: + {out} + +Example:: + + >>> torch.logical_or(torch.tensor([True, False, True]), torch.tensor([True, False, False])) + tensor([ True, False, True]) + >>> a = torch.tensor([0, 1, 10, 0], dtype=torch.int8) + >>> b = torch.tensor([4, 0, 1, 0], dtype=torch.int8) + >>> torch.logical_or(a, b) + tensor([ True, True, True, False]) + >>> torch.logical_or(a.double(), b.double()) + tensor([ True, True, True, False]) + >>> torch.logical_or(a.double(), b) + tensor([ True, True, True, False]) + >>> torch.logical_or(a, b, out=torch.empty(4, dtype=torch.bool)) + tensor([ True, True, True, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.logical_xor, + r""" +logical_xor(input, other, *, out=None) -> Tensor + +Computes the element-wise logical XOR of the given input tensors. Zeros are treated as ``False`` and nonzeros are +treated as ``True``. + +Args: + {input} + other (Tensor): the tensor to compute XOR with + +Keyword args: + {out} + +Example:: + + >>> torch.logical_xor(torch.tensor([True, False, True]), torch.tensor([True, False, False])) + tensor([False, False, True]) + >>> a = torch.tensor([0, 1, 10, 0], dtype=torch.int8) + >>> b = torch.tensor([4, 0, 1, 0], dtype=torch.int8) + >>> torch.logical_xor(a, b) + tensor([ True, True, False, False]) + >>> torch.logical_xor(a.double(), b.double()) + tensor([ True, True, False, False]) + >>> torch.logical_xor(a.double(), b) + tensor([ True, True, False, False]) + >>> torch.logical_xor(a, b, out=torch.empty(4, dtype=torch.bool)) + tensor([ True, True, False, False]) +""".format( + **common_args + ), +) + +add_docstr( + torch.logspace, + """ +logspace(start, end, steps, base=10.0, *, \ + out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor +""" + + r""" + +Creates a one-dimensional tensor of size :attr:`steps` whose values are evenly +spaced from :math:`{{\text{{base}}}}^{{\text{{start}}}}` to +:math:`{{\text{{base}}}}^{{\text{{end}}}}`, inclusive, on a logarithmic scale +with base :attr:`base`. That is, the values are: + +.. math:: + (\text{base}^{\text{start}}, + \text{base}^{(\text{start} + \frac{\text{end} - \text{start}}{ \text{steps} - 1})}, + \ldots, + \text{base}^{(\text{start} + (\text{steps} - 2) * \frac{\text{end} - \text{start}}{ \text{steps} - 1})}, + \text{base}^{\text{end}}) +""" + + """ + + +From PyTorch 1.11 logspace requires the steps argument. Use steps=100 to restore the previous behavior. + +Args: + start (float or Tensor): the starting value for the set of points. If `Tensor`, it must be 0-dimensional + end (float or Tensor): the ending value for the set of points. If `Tensor`, it must be 0-dimensional + steps (int): size of the constructed tensor + base (float, optional): base of the logarithm function. Default: ``10.0``. + +Keyword arguments: + {out} + dtype (torch.dtype, optional): the data type to perform the computation in. + Default: if None, uses the global default dtype (see torch.get_default_dtype()) + when both :attr:`start` and :attr:`end` are real, + and corresponding complex dtype when either is complex. + {layout} + {device} + {requires_grad} + +Example:: + + >>> torch.logspace(start=-10, end=10, steps=5) + tensor([ 1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10]) + >>> torch.logspace(start=0.1, end=1.0, steps=5) + tensor([ 1.2589, 2.1135, 3.5481, 5.9566, 10.0000]) + >>> torch.logspace(start=0.1, end=1.0, steps=1) + tensor([1.2589]) + >>> torch.logspace(start=2, end=2, steps=1, base=2) + tensor([4.0]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.logsumexp, + r""" +logsumexp(input, dim, keepdim=False, *, out=None) + +Returns the log of summed exponentials of each row of the :attr:`input` +tensor in the given dimension :attr:`dim`. The computation is numerically +stabilized. + +For summation index :math:`j` given by `dim` and other indices :math:`i`, the result is + + .. math:: + \text{{logsumexp}}(x)_{{i}} = \log \sum_j \exp(x_{{ij}}) + +{keepdim_details} + +Args: + {input} + {opt_dim} + {keepdim} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(3, 3) + >>> torch.logsumexp(a, 1) + tensor([1.4907, 1.0593, 1.5696]) + >>> torch.dist(torch.logsumexp(a, 1), torch.log(torch.sum(torch.exp(a), 1))) + tensor(1.6859e-07) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.lt, + r""" +lt(input, other, *, out=None) -> Tensor + +Computes :math:`\text{input} < \text{other}` element-wise. +""" + + r""" + +The second argument can be a number or a tensor whose shape is +:ref:`broadcastable ` with the first argument. + +Args: + input (Tensor): the tensor to compare + other (Tensor or float): the tensor or value to compare + +Keyword args: + {out} + +Returns: + A boolean tensor that is True where :attr:`input` is less than :attr:`other` and False elsewhere + +Example:: + + >>> torch.lt(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]])) + tensor([[False, False], [True, False]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.lu_unpack, + r""" +lu_unpack(LU_data, LU_pivots, unpack_data=True, unpack_pivots=True, *, out=None) -> (Tensor, Tensor, Tensor) + +Unpacks the LU decomposition returned by :func:`~linalg.lu_factor` into the `P, L, U` matrices. + +.. seealso:: + + :func:`~linalg.lu` returns the matrices from the LU decomposition. Its gradient formula is more efficient + than that of doing :func:`~linalg.lu_factor` followed by :func:`~linalg.lu_unpack`. + +Args: + LU_data (Tensor): the packed LU factorization data + LU_pivots (Tensor): the packed LU factorization pivots + unpack_data (bool): flag indicating if the data should be unpacked. + If ``False``, then the returned ``L`` and ``U`` are empty tensors. + Default: ``True`` + unpack_pivots (bool): flag indicating if the pivots should be unpacked into a permutation matrix ``P``. + If ``False``, then the returned ``P`` is an empty tensor. + Default: ``True`` + +Keyword args: + out (tuple, optional): output tuple of three tensors. Ignored if `None`. + +Returns: + A namedtuple ``(P, L, U)`` + +Examples:: + + >>> A = torch.randn(2, 3, 3) + >>> LU, pivots = torch.linalg.lu_factor(A) + >>> P, L, U = torch.lu_unpack(LU, pivots) + >>> # We can recover A from the factorization + >>> A_ = P @ L @ U + >>> torch.allclose(A, A_) + True + + >>> # LU factorization of a rectangular matrix: + >>> A = torch.randn(2, 3, 2) + >>> LU, pivots = torch.linalg.lu_factor(A) + >>> P, L, U = torch.lu_unpack(LU, pivots) + >>> # P, L, U are the same as returned by linalg.lu + >>> P_, L_, U_ = torch.linalg.lu(A) + >>> torch.allclose(P, P_) and torch.allclose(L, L_) and torch.allclose(U, U_) + True + +""".format( + **common_args + ), +) + +add_docstr( + torch.less, + r""" +less(input, other, *, out=None) -> Tensor + +Alias for :func:`torch.lt`. +""", +) + +add_docstr( + torch.lu_solve, + r""" +lu_solve(b, LU_data, LU_pivots, *, out=None) -> Tensor + +Returns the LU solve of the linear system :math:`Ax = b` using the partially pivoted +LU factorization of A from :func:`~linalg.lu_factor`. + +This function supports ``float``, ``double``, ``cfloat`` and ``cdouble`` dtypes for :attr:`input`. + +.. warning:: + + :func:`torch.lu_solve` is deprecated in favor of :func:`torch.linalg.lu_solve`. + :func:`torch.lu_solve` will be removed in a future PyTorch release. + ``X = torch.lu_solve(B, LU, pivots)`` should be replaced with + + .. code:: python + + X = linalg.lu_solve(LU, pivots, B) + +Arguments: + b (Tensor): the RHS tensor of size :math:`(*, m, k)`, where :math:`*` + is zero or more batch dimensions. + LU_data (Tensor): the pivoted LU factorization of A from :meth:`~linalg.lu_factor` of size :math:`(*, m, m)`, + where :math:`*` is zero or more batch dimensions. + LU_pivots (IntTensor): the pivots of the LU factorization from :meth:`~linalg.lu_factor` of size :math:`(*, m)`, + where :math:`*` is zero or more batch dimensions. + The batch dimensions of :attr:`LU_pivots` must be equal to the batch dimensions of + :attr:`LU_data`. + +Keyword args: + {out} + +Example:: + + >>> A = torch.randn(2, 3, 3) + >>> b = torch.randn(2, 3, 1) + >>> LU, pivots = torch.linalg.lu_factor(A) + >>> x = torch.lu_solve(b, LU, pivots) + >>> torch.dist(A @ x, b) + tensor(1.00000e-07 * + 2.8312) +""".format( + **common_args + ), +) + +add_docstr( + torch.masked_select, + r""" +masked_select(input, mask, *, out=None) -> Tensor + +Returns a new 1-D tensor which indexes the :attr:`input` tensor according to +the boolean mask :attr:`mask` which is a `BoolTensor`. + +The shapes of the :attr:`mask` tensor and the :attr:`input` tensor don't need +to match, but they must be :ref:`broadcastable `. + +.. note:: The returned tensor does **not** use the same storage + as the original tensor + +Args: + {input} + mask (BoolTensor): the tensor containing the binary mask to index with + +Keyword args: + {out} + +Example:: + + >>> x = torch.randn(3, 4) + >>> x + tensor([[ 0.3552, -2.3825, -0.8297, 0.3477], + [-1.2035, 1.2252, 0.5002, 0.6248], + [ 0.1307, -2.0608, 0.1244, 2.0139]]) + >>> mask = x.ge(0.5) + >>> mask + tensor([[False, False, False, False], + [False, True, True, True], + [False, False, False, True]]) + >>> torch.masked_select(x, mask) + tensor([ 1.2252, 0.5002, 0.6248, 2.0139]) +""".format( + **common_args + ), +) + +add_docstr( + torch.matrix_power, + r""" +matrix_power(input, n, *, out=None) -> Tensor + +Alias for :func:`torch.linalg.matrix_power` +""", +) + +add_docstr( + torch.matrix_exp, + r""" +matrix_exp(A) -> Tensor + +Alias for :func:`torch.linalg.matrix_exp`. +""", +) + +add_docstr( + torch.max, + r""" +max(input) -> Tensor + +Returns the maximum value of all elements in the ``input`` tensor. + +.. warning:: + This function produces deterministic (sub)gradients unlike ``max(dim=0)`` + +Args: + {input} + +Example:: + + >>> a = torch.randn(1, 3) + >>> a + tensor([[ 0.6763, 0.7445, -2.2369]]) + >>> torch.max(a) + tensor(0.7445) + +.. function:: max(input, dim, keepdim=False, *, out=None) -> (Tensor, LongTensor) + :noindex: + +Returns a namedtuple ``(values, indices)`` where ``values`` is the maximum +value of each row of the :attr:`input` tensor in the given dimension +:attr:`dim`. And ``indices`` is the index location of each maximum value found +(argmax). + +If ``keepdim`` is ``True``, the output tensors are of the same size +as ``input`` except in the dimension ``dim`` where they are of size 1. +Otherwise, ``dim`` is squeezed (see :func:`torch.squeeze`), resulting +in the output tensors having 1 fewer dimension than ``input``. + +.. note:: If there are multiple maximal values in a reduced row then + the indices of the first maximal value are returned. + +Args: + {input} + {dim} + {keepdim} Default: ``False``. + +Keyword args: + out (tuple, optional): the result tuple of two output tensors (max, max_indices) + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[-1.2360, -0.2942, -0.1222, 0.8475], + [ 1.1949, -1.1127, -2.2379, -0.6702], + [ 1.5717, -0.9207, 0.1297, -1.8768], + [-0.6172, 1.0036, -0.6060, -0.2432]]) + >>> torch.max(a, 1) + torch.return_types.max(values=tensor([0.8475, 1.1949, 1.5717, 1.0036]), indices=tensor([3, 0, 0, 1])) + +.. function:: max(input, other, *, out=None) -> Tensor + :noindex: + +See :func:`torch.maximum`. + +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.maximum, + r""" +maximum(input, other, *, out=None) -> Tensor + +Computes the element-wise maximum of :attr:`input` and :attr:`other`. + +.. note:: + If one of the elements being compared is a NaN, then that element is returned. + :func:`maximum` is not supported for tensors with complex dtypes. + +Args: + {input} + other (Tensor): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor((1, 2, -1)) + >>> b = torch.tensor((3, 0, 4)) + >>> torch.maximum(a, b) + tensor([3, 2, 4]) +""".format( + **common_args + ), +) + +add_docstr( + torch.fmax, + r""" +fmax(input, other, *, out=None) -> Tensor + +Computes the element-wise maximum of :attr:`input` and :attr:`other`. + +This is like :func:`torch.maximum` except it handles NaNs differently: +if exactly one of the two elements being compared is a NaN then the non-NaN element is taken as the maximum. +Only if both elements are NaN is NaN propagated. + +This function is a wrapper around C++'s ``std::fmax`` and is similar to NumPy's ``fmax`` function. + +Supports :ref:`broadcasting to a common shape `, +:ref:`type promotion `, and integer and floating-point inputs. + +Args: + {input} + other (Tensor): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([9.7, float('nan'), 3.1, float('nan')]) + >>> b = torch.tensor([-2.2, 0.5, float('nan'), float('nan')]) + >>> torch.fmax(a, b) + tensor([9.7000, 0.5000, 3.1000, nan]) +""".format( + **common_args + ), +) + +add_docstr( + torch.amax, + r""" +amax(input, dim, keepdim=False, *, out=None) -> Tensor + +Returns the maximum value of each slice of the :attr:`input` tensor in the given +dimension(s) :attr:`dim`. + +.. note:: + The difference between ``max``/``min`` and ``amax``/``amin`` is: + - ``amax``/``amin`` supports reducing on multiple dimensions, + - ``amax``/``amin`` does not return indices, + - ``amax``/``amin`` evenly distributes gradient between equal values, + while ``max(dim)``/``min(dim)`` propagates gradient only to a single + index in the source tensor. + +{keepdim_details} + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[ 0.8177, 1.4878, -0.2491, 0.9130], + [-0.7158, 1.1775, 2.0992, 0.4817], + [-0.0053, 0.0164, -1.3738, -0.0507], + [ 1.9700, 1.1106, -1.0318, -1.0816]]) + >>> torch.amax(a, 1) + tensor([1.4878, 2.0992, 0.0164, 1.9700]) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.argmax, + r""" +argmax(input) -> LongTensor + +Returns the indices of the maximum value of all elements in the :attr:`input` tensor. + +This is the second value returned by :meth:`torch.max`. See its +documentation for the exact semantics of this method. + +.. note:: If there are multiple maximal values then the indices of the first maximal value are returned. + +Args: + {input} + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[ 1.3398, 0.2663, -0.2686, 0.2450], + [-0.7401, -0.8805, -0.3402, -1.1936], + [ 0.4907, -1.3948, -1.0691, -0.3132], + [-1.6092, 0.5419, -0.2993, 0.3195]]) + >>> torch.argmax(a) + tensor(0) + +.. function:: argmax(input, dim, keepdim=False) -> LongTensor + :noindex: + +Returns the indices of the maximum values of a tensor across a dimension. + +This is the second value returned by :meth:`torch.max`. See its +documentation for the exact semantics of this method. + +Args: + {input} + {dim} If ``None``, the argmax of the flattened input is returned. + {keepdim} + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[ 1.3398, 0.2663, -0.2686, 0.2450], + [-0.7401, -0.8805, -0.3402, -1.1936], + [ 0.4907, -1.3948, -1.0691, -0.3132], + [-1.6092, 0.5419, -0.2993, 0.3195]]) + >>> torch.argmax(a, dim=1) + tensor([ 0, 2, 0, 1]) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.argwhere, + r""" +argwhere(input) -> Tensor + +Returns a tensor containing the indices of all non-zero elements of +:attr:`input`. Each row in the result contains the indices of a non-zero +element in :attr:`input`. The result is sorted lexicographically, with +the last index changing the fastest (C-style). + +If :attr:`input` has :math:`n` dimensions, then the resulting indices tensor +:attr:`out` is of size :math:`(z \times n)`, where :math:`z` is the total number of +non-zero elements in the :attr:`input` tensor. + +.. note:: + This function is similar to NumPy's `argwhere`. + + When :attr:`input` is on CUDA, this function causes host-device synchronization. + +Args: + {input} + +Example:: + + >>> t = torch.tensor([1, 0, 1]) + >>> torch.argwhere(t) + tensor([[0], + [2]]) + >>> t = torch.tensor([[1, 0, 1], [0, 1, 1]]) + >>> torch.argwhere(t) + tensor([[0, 0], + [0, 2], + [1, 1], + [1, 2]]) +""", +) + +add_docstr( + torch.mean, + r""" +mean(input, *, dtype=None) -> Tensor + +Returns the mean value of all elements in the :attr:`input` tensor. Input must be floating point or complex. + +Args: + input (Tensor): + the input tensor, either of floating point or complex dtype + +Keyword args: + {dtype} + +Example:: + + >>> a = torch.randn(1, 3) + >>> a + tensor([[ 0.2294, -0.5481, 1.3288]]) + >>> torch.mean(a) + tensor(0.3367) + +.. function:: mean(input, dim, keepdim=False, *, dtype=None, out=None) -> Tensor + :noindex: + +Returns the mean value of each row of the :attr:`input` tensor in the given +dimension :attr:`dim`. If :attr:`dim` is a list of dimensions, +reduce over all of them. + +{keepdim_details} + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + {dtype} + {out} + +.. seealso:: + + :func:`torch.nanmean` computes the mean value of `non-NaN` elements. + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[-0.3841, 0.6320, 0.4254, -0.7384], + [-0.9644, 1.0131, -0.6549, -1.4279], + [-0.2951, -1.3350, -0.7694, 0.5600], + [ 1.0842, -0.9580, 0.3623, 0.2343]]) + >>> torch.mean(a, 1) + tensor([-0.0163, -0.5085, -0.4599, 0.1807]) + >>> torch.mean(a, 1, True) + tensor([[-0.0163], + [-0.5085], + [-0.4599], + [ 0.1807]]) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.nanmean, + r""" +nanmean(input, dim=None, keepdim=False, *, dtype=None, out=None) -> Tensor + +Computes the mean of all `non-NaN` elements along the specified dimensions. + +This function is identical to :func:`torch.mean` when there are no `NaN` values +in the :attr:`input` tensor. In the presence of `NaN`, :func:`torch.mean` will +propagate the `NaN` to the output whereas :func:`torch.nanmean` will ignore the +`NaN` values (`torch.nanmean(a)` is equivalent to `torch.mean(a[~a.isnan()])`). + +{keepdim_details} + +Args: + {input} + {opt_dim} + {keepdim} + +Keyword args: + {dtype} + {out} + +.. seealso:: + + :func:`torch.mean` computes the mean value, propagating `NaN`. + +Example:: + + >>> x = torch.tensor([[torch.nan, 1, 2], [1, 2, 3]]) + >>> x.mean() + tensor(nan) + >>> x.nanmean() + tensor(1.8000) + >>> x.mean(dim=0) + tensor([ nan, 1.5000, 2.5000]) + >>> x.nanmean(dim=0) + tensor([1.0000, 1.5000, 2.5000]) + + # If all elements in the reduced dimensions are NaN then the result is NaN + >>> torch.tensor([torch.nan]).nanmean() + tensor(nan) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.median, + r""" +median(input) -> Tensor + +Returns the median of the values in :attr:`input`. + +.. note:: + The median is not unique for :attr:`input` tensors with an even number + of elements. In this case the lower of the two medians is returned. To + compute the mean of both medians, use :func:`torch.quantile` with ``q=0.5`` instead. + +.. warning:: + This function produces deterministic (sub)gradients unlike ``median(dim=0)`` + +Args: + {input} + +Example:: + + >>> a = torch.randn(1, 3) + >>> a + tensor([[ 1.5219, -1.5212, 0.2202]]) + >>> torch.median(a) + tensor(0.2202) + +.. function:: median(input, dim=-1, keepdim=False, *, out=None) -> (Tensor, LongTensor) + :noindex: + +Returns a namedtuple ``(values, indices)`` where ``values`` contains the median of each row of :attr:`input` +in the dimension :attr:`dim`, and ``indices`` contains the index of the median values found in the dimension :attr:`dim`. + +By default, :attr:`dim` is the last dimension of the :attr:`input` tensor. + +If :attr:`keepdim` is ``True``, the output tensors are of the same size +as :attr:`input` except in the dimension :attr:`dim` where they are of size 1. +Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in +the outputs tensor having 1 fewer dimension than :attr:`input`. + +.. note:: + The median is not unique for :attr:`input` tensors with an even number + of elements in the dimension :attr:`dim`. In this case the lower of the + two medians is returned. To compute the mean of both medians in + :attr:`input`, use :func:`torch.quantile` with ``q=0.5`` instead. + +.. warning:: + ``indices`` does not necessarily contain the first occurrence of each + median value found, unless it is unique. + The exact implementation details are device-specific. + Do not expect the same result when run on CPU and GPU in general. + For the same reason do not expect the gradients to be deterministic. + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + out ((Tensor, Tensor), optional): The first tensor will be populated with the median values and the second + tensor, which must have dtype long, with their indices in the dimension + :attr:`dim` of :attr:`input`. + +Example:: + + >>> a = torch.randn(4, 5) + >>> a + tensor([[ 0.2505, -0.3982, -0.9948, 0.3518, -1.3131], + [ 0.3180, -0.6993, 1.0436, 0.0438, 0.2270], + [-0.2751, 0.7303, 0.2192, 0.3321, 0.2488], + [ 1.0778, -1.9510, 0.7048, 0.4742, -0.7125]]) + >>> torch.median(a, 1) + torch.return_types.median(values=tensor([-0.3982, 0.2270, 0.2488, 0.4742]), indices=tensor([1, 4, 4, 3])) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.nanmedian, + r""" +nanmedian(input) -> Tensor + +Returns the median of the values in :attr:`input`, ignoring ``NaN`` values. + +This function is identical to :func:`torch.median` when there are no ``NaN`` values in :attr:`input`. +When :attr:`input` has one or more ``NaN`` values, :func:`torch.median` will always return ``NaN``, +while this function will return the median of the non-``NaN`` elements in :attr:`input`. +If all the elements in :attr:`input` are ``NaN`` it will also return ``NaN``. + +Args: + {input} + +Example:: + + >>> a = torch.tensor([1, float('nan'), 3, 2]) + >>> a.median() + tensor(nan) + >>> a.nanmedian() + tensor(2.) + +.. function:: nanmedian(input, dim=-1, keepdim=False, *, out=None) -> (Tensor, LongTensor) + :noindex: + +Returns a namedtuple ``(values, indices)`` where ``values`` contains the median of each row of :attr:`input` +in the dimension :attr:`dim`, ignoring ``NaN`` values, and ``indices`` contains the index of the median values +found in the dimension :attr:`dim`. + +This function is identical to :func:`torch.median` when there are no ``NaN`` values in a reduced row. When a reduced row has +one or more ``NaN`` values, :func:`torch.median` will always reduce it to ``NaN``, while this function will reduce it to the +median of the non-``NaN`` elements. If all the elements in a reduced row are ``NaN`` then it will be reduced to ``NaN``, too. + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + out ((Tensor, Tensor), optional): The first tensor will be populated with the median values and the second + tensor, which must have dtype long, with their indices in the dimension + :attr:`dim` of :attr:`input`. + +Example:: + + >>> a = torch.tensor([[2, 3, 1], [float('nan'), 1, float('nan')]]) + >>> a + tensor([[2., 3., 1.], + [nan, 1., nan]]) + >>> a.median(0) + torch.return_types.median(values=tensor([nan, 1., nan]), indices=tensor([1, 1, 1])) + >>> a.nanmedian(0) + torch.return_types.nanmedian(values=tensor([2., 1., 1.]), indices=tensor([0, 1, 0])) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.quantile, + r""" +quantile(input, q, dim=None, keepdim=False, *, interpolation='linear', out=None) -> Tensor + +Computes the q-th quantiles of each row of the :attr:`input` tensor along the dimension :attr:`dim`. + +To compute the quantile, we map q in [0, 1] to the range of indices [0, n] to find the location +of the quantile in the sorted input. If the quantile lies between two data points ``a < b`` with +indices ``i`` and ``j`` in the sorted order, result is computed according to the given +:attr:`interpolation` method as follows: + +- ``linear``: ``a + (b - a) * fraction``, where ``fraction`` is the fractional part of the computed quantile index. +- ``lower``: ``a``. +- ``higher``: ``b``. +- ``nearest``: ``a`` or ``b``, whichever's index is closer to the computed quantile index (rounding down for .5 fractions). +- ``midpoint``: ``(a + b) / 2``. + +If :attr:`q` is a 1D tensor, the first dimension of the output represents the quantiles and has size +equal to the size of :attr:`q`, the remaining dimensions are what remains from the reduction. + +.. note:: + By default :attr:`dim` is ``None`` resulting in the :attr:`input` tensor being flattened before computation. + +Args: + {input} + q (float or Tensor): a scalar or 1D tensor of values in the range [0, 1]. + {dim} + {keepdim} + +Keyword arguments: + interpolation (str): interpolation method to use when the desired quantile lies between two data points. + Can be ``linear``, ``lower``, ``higher``, ``midpoint`` and ``nearest``. + Default is ``linear``. + {out} + +Example:: + + >>> a = torch.randn(2, 3) + >>> a + tensor([[ 0.0795, -1.2117, 0.9765], + [ 1.1707, 0.6706, 0.4884]]) + >>> q = torch.tensor([0.25, 0.5, 0.75]) + >>> torch.quantile(a, q, dim=1, keepdim=True) + tensor([[[-0.5661], + [ 0.5795]], + + [[ 0.0795], + [ 0.6706]], + + [[ 0.5280], + [ 0.9206]]]) + >>> torch.quantile(a, q, dim=1, keepdim=True).shape + torch.Size([3, 2, 1]) + >>> a = torch.arange(4.) + >>> a + tensor([0., 1., 2., 3.]) + >>> torch.quantile(a, 0.6, interpolation='linear') + tensor(1.8000) + >>> torch.quantile(a, 0.6, interpolation='lower') + tensor(1.) + >>> torch.quantile(a, 0.6, interpolation='higher') + tensor(2.) + >>> torch.quantile(a, 0.6, interpolation='midpoint') + tensor(1.5000) + >>> torch.quantile(a, 0.6, interpolation='nearest') + tensor(2.) + >>> torch.quantile(a, 0.4, interpolation='nearest') + tensor(1.) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.nanquantile, + r""" +nanquantile(input, q, dim=None, keepdim=False, *, interpolation='linear', out=None) -> Tensor + +This is a variant of :func:`torch.quantile` that "ignores" ``NaN`` values, +computing the quantiles :attr:`q` as if ``NaN`` values in :attr:`input` did +not exist. If all values in a reduced row are ``NaN`` then the quantiles for +that reduction will be ``NaN``. See the documentation for :func:`torch.quantile`. + +Args: + {input} + q (float or Tensor): a scalar or 1D tensor of quantile values in the range [0, 1] + {dim} + {keepdim} + +Keyword arguments: + interpolation (str): interpolation method to use when the desired quantile lies between two data points. + Can be ``linear``, ``lower``, ``higher``, ``midpoint`` and ``nearest``. + Default is ``linear``. + {out} + +Example:: + + >>> t = torch.tensor([float('nan'), 1, 2]) + >>> t.quantile(0.5) + tensor(nan) + >>> t.nanquantile(0.5) + tensor(1.5000) + >>> t = torch.tensor([[float('nan'), float('nan')], [1, 2]]) + >>> t + tensor([[nan, nan], + [1., 2.]]) + >>> t.nanquantile(0.5, dim=0) + tensor([1., 2.]) + >>> t.nanquantile(0.5, dim=1) + tensor([ nan, 1.5000]) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.min, + r""" +min(input) -> Tensor + +Returns the minimum value of all elements in the :attr:`input` tensor. + +.. warning:: + This function produces deterministic (sub)gradients unlike ``min(dim=0)`` + +Args: + {input} + +Example:: + + >>> a = torch.randn(1, 3) + >>> a + tensor([[ 0.6750, 1.0857, 1.7197]]) + >>> torch.min(a) + tensor(0.6750) + +.. function:: min(input, dim, keepdim=False, *, out=None) -> (Tensor, LongTensor) + :noindex: + +Returns a namedtuple ``(values, indices)`` where ``values`` is the minimum +value of each row of the :attr:`input` tensor in the given dimension +:attr:`dim`. And ``indices`` is the index location of each minimum value found +(argmin). + +If :attr:`keepdim` is ``True``, the output tensors are of the same size as +:attr:`input` except in the dimension :attr:`dim` where they are of size 1. +Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in +the output tensors having 1 fewer dimension than :attr:`input`. + +.. note:: If there are multiple minimal values in a reduced row then + the indices of the first minimal value are returned. + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + out (tuple, optional): the tuple of two output tensors (min, min_indices) + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[-0.6248, 1.1334, -1.1899, -0.2803], + [-1.4644, -0.2635, -0.3651, 0.6134], + [ 0.2457, 0.0384, 1.0128, 0.7015], + [-0.1153, 2.9849, 2.1458, 0.5788]]) + >>> torch.min(a, 1) + torch.return_types.min(values=tensor([-1.1899, -1.4644, 0.0384, -0.1153]), indices=tensor([2, 0, 1, 0])) + +.. function:: min(input, other, *, out=None) -> Tensor + :noindex: + +See :func:`torch.minimum`. +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.minimum, + r""" +minimum(input, other, *, out=None) -> Tensor + +Computes the element-wise minimum of :attr:`input` and :attr:`other`. + +.. note:: + If one of the elements being compared is a NaN, then that element is returned. + :func:`minimum` is not supported for tensors with complex dtypes. + +Args: + {input} + other (Tensor): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor((1, 2, -1)) + >>> b = torch.tensor((3, 0, 4)) + >>> torch.minimum(a, b) + tensor([1, 0, -1]) +""".format( + **common_args + ), +) + +add_docstr( + torch.fmin, + r""" +fmin(input, other, *, out=None) -> Tensor + +Computes the element-wise minimum of :attr:`input` and :attr:`other`. + +This is like :func:`torch.minimum` except it handles NaNs differently: +if exactly one of the two elements being compared is a NaN then the non-NaN element is taken as the minimum. +Only if both elements are NaN is NaN propagated. + +This function is a wrapper around C++'s ``std::fmin`` and is similar to NumPy's ``fmin`` function. + +Supports :ref:`broadcasting to a common shape `, +:ref:`type promotion `, and integer and floating-point inputs. + +Args: + {input} + other (Tensor): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([2.2, float('nan'), 2.1, float('nan')]) + >>> b = torch.tensor([-9.3, 0.1, float('nan'), float('nan')]) + >>> torch.fmin(a, b) + tensor([-9.3000, 0.1000, 2.1000, nan]) +""".format( + **common_args + ), +) + +add_docstr( + torch.amin, + r""" +amin(input, dim, keepdim=False, *, out=None) -> Tensor + +Returns the minimum value of each slice of the :attr:`input` tensor in the given +dimension(s) :attr:`dim`. + +.. note:: + The difference between ``max``/``min`` and ``amax``/``amin`` is: + - ``amax``/``amin`` supports reducing on multiple dimensions, + - ``amax``/``amin`` does not return indices, + - ``amax``/``amin`` evenly distributes gradient between equal values, + while ``max(dim)``/``min(dim)`` propagates gradient only to a single + index in the source tensor. + +{keepdim_details} + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[ 0.6451, -0.4866, 0.2987, -1.3312], + [-0.5744, 1.2980, 1.8397, -0.2713], + [ 0.9128, 0.9214, -1.7268, -0.2995], + [ 0.9023, 0.4853, 0.9075, -1.6165]]) + >>> torch.amin(a, 1) + tensor([-1.3312, -0.5744, -1.7268, -1.6165]) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.aminmax, + r""" +aminmax(input, *, dim=None, keepdim=False, out=None) -> (Tensor min, Tensor max) + +Computes the minimum and maximum values of the :attr:`input` tensor. + +Args: + input (Tensor): + The input tensor + +Keyword Args: + dim (Optional[int]): + The dimension along which to compute the values. If `None`, + computes the values over the entire :attr:`input` tensor. + Default is `None`. + keepdim (bool): + If `True`, the reduced dimensions will be kept in the output + tensor as dimensions with size 1 for broadcasting, otherwise + they will be removed, as if calling (:func:`torch.squeeze`). + Default is `False`. + out (Optional[Tuple[Tensor, Tensor]]): + Optional tensors on which to write the result. Must have the same + shape and dtype as the expected output. + Default is `None`. + +Returns: + A named tuple `(min, max)` containing the minimum and maximum values. + +Raises: + RuntimeError + If any of the dimensions to compute the values over has size 0. + +.. note:: + NaN values are propagated to the output if at least one value is NaN. + +.. seealso:: + :func:`torch.amin` computes just the minimum value + :func:`torch.amax` computes just the maximum value + +Example:: + + >>> torch.aminmax(torch.tensor([1, -3, 5])) + torch.return_types.aminmax( + min=tensor(-3), + max=tensor(5)) + + >>> # aminmax propagates NaNs + >>> torch.aminmax(torch.tensor([1, -3, 5, torch.nan])) + torch.return_types.aminmax( + min=tensor(nan), + max=tensor(nan)) + + >>> t = torch.arange(10).view(2, 5) + >>> t + tensor([[0, 1, 2, 3, 4], + [5, 6, 7, 8, 9]]) + >>> t.aminmax(dim=0, keepdim=True) + torch.return_types.aminmax( + min=tensor([[0, 1, 2, 3, 4]]), + max=tensor([[5, 6, 7, 8, 9]])) +""", +) + +add_docstr( + torch.argmin, + r""" +argmin(input, dim=None, keepdim=False) -> LongTensor + +Returns the indices of the minimum value(s) of the flattened tensor or along a dimension + +This is the second value returned by :meth:`torch.min`. See its +documentation for the exact semantics of this method. + +.. note:: If there are multiple minimal values then the indices of the first minimal value are returned. + +Args: + {input} + {dim} If ``None``, the argmin of the flattened input is returned. + {keepdim} + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[ 0.1139, 0.2254, -0.1381, 0.3687], + [ 1.0100, -1.1975, -0.0102, -0.4732], + [-0.9240, 0.1207, -0.7506, -1.0213], + [ 1.7809, -1.2960, 0.9384, 0.1438]]) + >>> torch.argmin(a) + tensor(13) + >>> torch.argmin(a, dim=1) + tensor([ 2, 1, 3, 1]) + >>> torch.argmin(a, dim=1, keepdim=True) + tensor([[2], + [1], + [3], + [1]]) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.mm, + r""" +mm(input, mat2, *, out=None) -> Tensor + +Performs a matrix multiplication of the matrices :attr:`input` and :attr:`mat2`. + +If :attr:`input` is a :math:`(n \times m)` tensor, :attr:`mat2` is a +:math:`(m \times p)` tensor, :attr:`out` will be a :math:`(n \times p)` tensor. + +.. note:: This function does not :ref:`broadcast `. + For broadcasting matrix products, see :func:`torch.matmul`. + +Supports strided and sparse 2-D tensors as inputs, autograd with +respect to strided inputs. + +This operation has support for arguments with :ref:`sparse layouts`. +If :attr:`out` is provided it's layout will be used. Otherwise, the result +layout will be deduced from that of :attr:`input`. + +{sparse_beta_warning} + +{tf32_note} + +{rocm_fp16_note} + +Args: + input (Tensor): the first matrix to be matrix multiplied + mat2 (Tensor): the second matrix to be matrix multiplied + +Keyword args: + {out} + +Example:: + + >>> mat1 = torch.randn(2, 3) + >>> mat2 = torch.randn(3, 3) + >>> torch.mm(mat1, mat2) + tensor([[ 0.4851, 0.5037, -0.3633], + [-0.0760, -3.6705, 2.4784]]) +""".format( + **common_args, **tf32_notes, **rocm_fp16_notes, **sparse_support_notes + ), +) + +add_docstr( + torch.hspmm, + r""" +hspmm(mat1, mat2, *, out=None) -> Tensor + +Performs a matrix multiplication of a :ref:`sparse COO matrix +` :attr:`mat1` and a strided matrix :attr:`mat2`. The +result is a (1 + 1)-dimensional :ref:`hybrid COO matrix +`. + +Args: + mat1 (Tensor): the first sparse matrix to be matrix multiplied + mat2 (Tensor): the second strided matrix to be matrix multiplied + +Keyword args: + {out} +""".format( + **common_args + ), +) + +add_docstr( + torch.matmul, + r""" +matmul(input, other, *, out=None) -> Tensor + +Matrix product of two tensors. + +The behavior depends on the dimensionality of the tensors as follows: + +- If both tensors are 1-dimensional, the dot product (scalar) is returned. +- If both arguments are 2-dimensional, the matrix-matrix product is returned. +- If the first argument is 1-dimensional and the second argument is 2-dimensional, + a 1 is prepended to its dimension for the purpose of the matrix multiply. + After the matrix multiply, the prepended dimension is removed. +- If the first argument is 2-dimensional and the second argument is 1-dimensional, + the matrix-vector product is returned. +- If both arguments are at least 1-dimensional and at least one argument is + N-dimensional (where N > 2), then a batched matrix multiply is returned. If the first + argument is 1-dimensional, a 1 is prepended to its dimension for the purpose of the + batched matrix multiply and removed after. If the second argument is 1-dimensional, a + 1 is appended to its dimension for the purpose of the batched matrix multiple and removed after. + The non-matrix (i.e. batch) dimensions are :ref:`broadcasted ` (and thus + must be broadcastable). For example, if :attr:`input` is a + :math:`(j \times 1 \times n \times n)` tensor and :attr:`other` is a :math:`(k \times n \times n)` + tensor, :attr:`out` will be a :math:`(j \times k \times n \times n)` tensor. + + Note that the broadcasting logic only looks at the batch dimensions when determining if the inputs + are broadcastable, and not the matrix dimensions. For example, if :attr:`input` is a + :math:`(j \times 1 \times n \times m)` tensor and :attr:`other` is a :math:`(k \times m \times p)` + tensor, these inputs are valid for broadcasting even though the final two dimensions (i.e. the + matrix dimensions) are different. :attr:`out` will be a :math:`(j \times k \times n \times p)` tensor. + +This operation has support for arguments with :ref:`sparse layouts`. In particular the +matrix-matrix (both arguments 2-dimensional) supports sparse arguments with the same restrictions +as :func:`torch.mm` + +{sparse_beta_warning} + +{tf32_note} + +{rocm_fp16_note} + +.. note:: + + The 1-dimensional dot product version of this function does not support an :attr:`out` parameter. + +Arguments: + input (Tensor): the first tensor to be multiplied + other (Tensor): the second tensor to be multiplied + +Keyword args: + {out} + +Example:: + + >>> # vector x vector + >>> tensor1 = torch.randn(3) + >>> tensor2 = torch.randn(3) + >>> torch.matmul(tensor1, tensor2).size() + torch.Size([]) + >>> # matrix x vector + >>> tensor1 = torch.randn(3, 4) + >>> tensor2 = torch.randn(4) + >>> torch.matmul(tensor1, tensor2).size() + torch.Size([3]) + >>> # batched matrix x broadcasted vector + >>> tensor1 = torch.randn(10, 3, 4) + >>> tensor2 = torch.randn(4) + >>> torch.matmul(tensor1, tensor2).size() + torch.Size([10, 3]) + >>> # batched matrix x batched matrix + >>> tensor1 = torch.randn(10, 3, 4) + >>> tensor2 = torch.randn(10, 4, 5) + >>> torch.matmul(tensor1, tensor2).size() + torch.Size([10, 3, 5]) + >>> # batched matrix x broadcasted matrix + >>> tensor1 = torch.randn(10, 3, 4) + >>> tensor2 = torch.randn(4, 5) + >>> torch.matmul(tensor1, tensor2).size() + torch.Size([10, 3, 5]) + +""".format( + **common_args, **tf32_notes, **rocm_fp16_notes, **sparse_support_notes + ), +) + +add_docstr( + torch.mode, + r""" +mode(input, dim=-1, keepdim=False, *, out=None) -> (Tensor, LongTensor) + +Returns a namedtuple ``(values, indices)`` where ``values`` is the mode +value of each row of the :attr:`input` tensor in the given dimension +:attr:`dim`, i.e. a value which appears most often +in that row, and ``indices`` is the index location of each mode value found. + +By default, :attr:`dim` is the last dimension of the :attr:`input` tensor. + +If :attr:`keepdim` is ``True``, the output tensors are of the same size as +:attr:`input` except in the dimension :attr:`dim` where they are of size 1. +Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting +in the output tensors having 1 fewer dimension than :attr:`input`. + +.. note:: This function is not defined for ``torch.cuda.Tensor`` yet. + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + out (tuple, optional): the result tuple of two output tensors (values, indices) + +Example:: + + >>> b = torch.tensor( + [[0, 0, 0, 2, 0, 0, 2], + [0, 3, 0, 0, 2, 0, 1], + [2, 2, 2, 0, 0, 0, 3], + [2, 2, 3, 0, 1, 1, 0], + [1, 1, 0, 0, 2, 0, 2]]) + >>> torch.mode(b, 0) + torch.return_types.mode( + values=tensor([0, 2, 0, 0, 0, 0, 2]), + indices=tensor([1, 3, 4, 4, 2, 4, 4])) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.mul, + r""" +mul(input, other, *, out=None) -> Tensor + +Multiplies :attr:`input` by :attr:`other`. + + +.. math:: + \text{out}_i = \text{input}_i \times \text{other}_i +""" + + r""" + +Supports :ref:`broadcasting to a common shape `, +:ref:`type promotion `, and integer, float, and complex inputs. + +Args: + {input} + other (Tensor or Number) - the tensor or number to multiply input by. + +Keyword args: + {out} + +Examples:: + + >>> a = torch.randn(3) + >>> a + tensor([ 0.2015, -0.4255, 2.6087]) + >>> torch.mul(a, 100) + tensor([ 20.1494, -42.5491, 260.8663]) + + >>> b = torch.randn(4, 1) + >>> b + tensor([[ 1.1207], + [-0.3137], + [ 0.0700], + [ 0.8378]]) + >>> c = torch.randn(1, 4) + >>> c + tensor([[ 0.5146, 0.1216, -0.5244, 2.2382]]) + >>> torch.mul(b, c) + tensor([[ 0.5767, 0.1363, -0.5877, 2.5083], + [-0.1614, -0.0382, 0.1645, -0.7021], + [ 0.0360, 0.0085, -0.0367, 0.1567], + [ 0.4312, 0.1019, -0.4394, 1.8753]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.multiply, + r""" +multiply(input, other, *, out=None) + +Alias for :func:`torch.mul`. +""", +) + +add_docstr( + torch.multinomial, + r""" +multinomial(input, num_samples, replacement=False, *, generator=None, out=None) -> LongTensor + +Returns a tensor where each row contains :attr:`num_samples` indices sampled +from the multinomial (a stricter definition would be multivariate, +refer to torch.distributions.multinomial.Multinomial for more details) +probability distribution located in the corresponding row +of tensor :attr:`input`. + +.. note:: + The rows of :attr:`input` do not need to sum to one (in which case we use + the values as weights), but must be non-negative, finite and have + a non-zero sum. + +Indices are ordered from left to right according to when each was sampled +(first samples are placed in first column). + +If :attr:`input` is a vector, :attr:`out` is a vector of size :attr:`num_samples`. + +If :attr:`input` is a matrix with `m` rows, :attr:`out` is an matrix of shape +:math:`(m \times \text{{num\_samples}})`. + +If replacement is ``True``, samples are drawn with replacement. + +If not, they are drawn without replacement, which means that when a +sample index is drawn for a row, it cannot be drawn again for that row. + +.. note:: + When drawn without replacement, :attr:`num_samples` must be lower than + number of non-zero elements in :attr:`input` (or the min number of non-zero + elements in each row of :attr:`input` if it is a matrix). + +Args: + input (Tensor): the input tensor containing probabilities + num_samples (int): number of samples to draw + replacement (bool, optional): whether to draw with replacement or not + +Keyword args: + {generator} + {out} + +Example:: + + >>> weights = torch.tensor([0, 10, 3, 0], dtype=torch.float) # create a tensor of weights + >>> torch.multinomial(weights, 2) + tensor([1, 2]) + >>> torch.multinomial(weights, 4) # ERROR! + RuntimeError: invalid argument 2: invalid multinomial distribution (with replacement=False, + not enough non-negative category to sample) at ../aten/src/TH/generic/THTensorRandom.cpp:320 + >>> torch.multinomial(weights, 4, replacement=True) + tensor([ 2, 1, 1, 1]) +""".format( + **common_args + ), +) + +add_docstr( + torch.mv, + r""" +mv(input, vec, *, out=None) -> Tensor + +Performs a matrix-vector product of the matrix :attr:`input` and the vector +:attr:`vec`. + +If :attr:`input` is a :math:`(n \times m)` tensor, :attr:`vec` is a 1-D tensor of +size :math:`m`, :attr:`out` will be 1-D of size :math:`n`. + +.. note:: This function does not :ref:`broadcast `. + +Args: + input (Tensor): matrix to be multiplied + vec (Tensor): vector to be multiplied + +Keyword args: + {out} + +Example:: + + >>> mat = torch.randn(2, 3) + >>> vec = torch.randn(3) + >>> torch.mv(mat, vec) + tensor([ 1.0404, -0.6361]) +""".format( + **common_args + ), +) + +add_docstr( + torch.mvlgamma, + r""" +mvlgamma(input, p, *, out=None) -> Tensor + +Alias for :func:`torch.special.multigammaln`. +""", +) + +add_docstr( + torch.movedim, + r""" +movedim(input, source, destination) -> Tensor + +Moves the dimension(s) of :attr:`input` at the position(s) in :attr:`source` +to the position(s) in :attr:`destination`. + +Other dimensions of :attr:`input` that are not explicitly moved remain in +their original order and appear at the positions not specified in :attr:`destination`. + +Args: + {input} + source (int or tuple of ints): Original positions of the dims to move. These must be unique. + destination (int or tuple of ints): Destination positions for each of the original dims. These must also be unique. + +Examples:: + + >>> t = torch.randn(3,2,1) + >>> t + tensor([[[-0.3362], + [-0.8437]], + + [[-0.9627], + [ 0.1727]], + + [[ 0.5173], + [-0.1398]]]) + >>> torch.movedim(t, 1, 0).shape + torch.Size([2, 3, 1]) + >>> torch.movedim(t, 1, 0) + tensor([[[-0.3362], + [-0.9627], + [ 0.5173]], + + [[-0.8437], + [ 0.1727], + [-0.1398]]]) + >>> torch.movedim(t, (1, 2), (0, 1)).shape + torch.Size([2, 1, 3]) + >>> torch.movedim(t, (1, 2), (0, 1)) + tensor([[[-0.3362, -0.9627, 0.5173]], + + [[-0.8437, 0.1727, -0.1398]]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.moveaxis, + r""" +moveaxis(input, source, destination) -> Tensor + +Alias for :func:`torch.movedim`. + +This function is equivalent to NumPy's moveaxis function. + +Examples:: + + >>> t = torch.randn(3,2,1) + >>> t + tensor([[[-0.3362], + [-0.8437]], + + [[-0.9627], + [ 0.1727]], + + [[ 0.5173], + [-0.1398]]]) + >>> torch.moveaxis(t, 1, 0).shape + torch.Size([2, 3, 1]) + >>> torch.moveaxis(t, 1, 0) + tensor([[[-0.3362], + [-0.9627], + [ 0.5173]], + + [[-0.8437], + [ 0.1727], + [-0.1398]]]) + >>> torch.moveaxis(t, (1, 2), (0, 1)).shape + torch.Size([2, 1, 3]) + >>> torch.moveaxis(t, (1, 2), (0, 1)) + tensor([[[-0.3362, -0.9627, 0.5173]], + + [[-0.8437, 0.1727, -0.1398]]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.swapdims, + r""" +swapdims(input, dim0, dim1) -> Tensor + +Alias for :func:`torch.transpose`. + +This function is equivalent to NumPy's swapaxes function. + +Examples:: + + >>> x = torch.tensor([[[0,1],[2,3]],[[4,5],[6,7]]]) + >>> x + tensor([[[0, 1], + [2, 3]], + + [[4, 5], + [6, 7]]]) + >>> torch.swapdims(x, 0, 1) + tensor([[[0, 1], + [4, 5]], + + [[2, 3], + [6, 7]]]) + >>> torch.swapdims(x, 0, 2) + tensor([[[0, 4], + [2, 6]], + + [[1, 5], + [3, 7]]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.swapaxes, + r""" +swapaxes(input, axis0, axis1) -> Tensor + +Alias for :func:`torch.transpose`. + +This function is equivalent to NumPy's swapaxes function. + +Examples:: + + >>> x = torch.tensor([[[0,1],[2,3]],[[4,5],[6,7]]]) + >>> x + tensor([[[0, 1], + [2, 3]], + + [[4, 5], + [6, 7]]]) + >>> torch.swapaxes(x, 0, 1) + tensor([[[0, 1], + [4, 5]], + + [[2, 3], + [6, 7]]]) + >>> torch.swapaxes(x, 0, 2) + tensor([[[0, 4], + [2, 6]], + + [[1, 5], + [3, 7]]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.narrow, + r""" +narrow(input, dim, start, length) -> Tensor + +Returns a new tensor that is a narrowed version of :attr:`input` tensor. The +dimension :attr:`dim` is input from :attr:`start` to ``start + length``. The +returned tensor and :attr:`input` tensor share the same underlying storage. + +Args: + input (Tensor): the tensor to narrow + dim (int): the dimension along which to narrow + start (int or Tensor): index of the element to start the narrowed dimension + from. Can be negative, which means indexing from the end of `dim`. If + `Tensor`, it must be an 0-dim integral `Tensor` (bools not allowed) + length (int): length of the narrowed dimension, must be weakly positive + +Example:: + + >>> x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + >>> torch.narrow(x, 0, 0, 2) + tensor([[ 1, 2, 3], + [ 4, 5, 6]]) + >>> torch.narrow(x, 1, 1, 2) + tensor([[ 2, 3], + [ 5, 6], + [ 8, 9]]) + >>> torch.narrow(x, -1, torch.tensor(-1), 1) + tensor([[3], + [6], + [9]]) +""", +) + +add_docstr( + torch.narrow_copy, + r""" +narrow_copy(input, dim, start, length, *, out=None) -> Tensor + +Same as :meth:`Tensor.narrow` except this returns a copy rather +than shared storage. This is primarily for sparse tensors, which +do not have a shared-storage narrow method. + +Args: + input (Tensor): the tensor to narrow + dim (int): the dimension along which to narrow + start (int): index of the element to start the narrowed dimension from. Can + be negative, which means indexing from the end of `dim` + length (int): length of the narrowed dimension, must be weakly positive + +Keyword args: + {out} + +Example:: + + >>> x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + >>> torch.narrow_copy(x, 0, 0, 2) + tensor([[ 1, 2, 3], + [ 4, 5, 6]]) + >>> torch.narrow_copy(x, 1, 1, 2) + tensor([[ 2, 3], + [ 5, 6], + [ 8, 9]]) + >>> s = torch.arange(16).reshape(2, 2, 2, 2).to_sparse(2) + >>> torch.narrow_copy(s, 0, 0, 1) + tensor(indices=tensor([[0, 0], + [0, 1]]), + values=tensor([[[0, 1], + [2, 3]], + + [[4, 5], + [6, 7]]]), + size=(1, 2, 2, 2), nnz=2, layout=torch.sparse_coo) + +.. seealso:: + + :func:`torch.narrow` for a non copy variant + +""".format( + **common_args + ), +) + +add_docstr( + torch.nan_to_num, + r""" +nan_to_num(input, nan=0.0, posinf=None, neginf=None, *, out=None) -> Tensor + +Replaces :literal:`NaN`, positive infinity, and negative infinity values in :attr:`input` +with the values specified by :attr:`nan`, :attr:`posinf`, and :attr:`neginf`, respectively. +By default, :literal:`NaN`\ s are replaced with zero, positive infinity is replaced with the +greatest finite value representable by :attr:`input`'s dtype, and negative infinity +is replaced with the least finite value representable by :attr:`input`'s dtype. + +Args: + {input} + nan (Number, optional): the value to replace :literal:`NaN`\s with. Default is zero. + posinf (Number, optional): if a Number, the value to replace positive infinity values with. + If None, positive infinity values are replaced with the greatest finite value representable by :attr:`input`'s dtype. + Default is None. + neginf (Number, optional): if a Number, the value to replace negative infinity values with. + If None, negative infinity values are replaced with the lowest finite value representable by :attr:`input`'s dtype. + Default is None. + +Keyword args: + {out} + +Example:: + + >>> x = torch.tensor([float('nan'), float('inf'), -float('inf'), 3.14]) + >>> torch.nan_to_num(x) + tensor([ 0.0000e+00, 3.4028e+38, -3.4028e+38, 3.1400e+00]) + >>> torch.nan_to_num(x, nan=2.0) + tensor([ 2.0000e+00, 3.4028e+38, -3.4028e+38, 3.1400e+00]) + >>> torch.nan_to_num(x, nan=2.0, posinf=1.0) + tensor([ 2.0000e+00, 1.0000e+00, -3.4028e+38, 3.1400e+00]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.ne, + r""" +ne(input, other, *, out=None) -> Tensor + +Computes :math:`\text{input} \neq \text{other}` element-wise. +""" + + r""" + +The second argument can be a number or a tensor whose shape is +:ref:`broadcastable ` with the first argument. + +Args: + input (Tensor): the tensor to compare + other (Tensor or float): the tensor or value to compare + +Keyword args: + {out} + +Returns: + A boolean tensor that is True where :attr:`input` is not equal to :attr:`other` and False elsewhere + +Example:: + + >>> torch.ne(torch.tensor([[1, 2], [3, 4]]), torch.tensor([[1, 1], [4, 4]])) + tensor([[False, True], [True, False]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.not_equal, + r""" +not_equal(input, other, *, out=None) -> Tensor + +Alias for :func:`torch.ne`. +""", +) + +add_docstr( + torch.neg, + r""" +neg(input, *, out=None) -> Tensor + +Returns a new tensor with the negative of the elements of :attr:`input`. + +.. math:: + \text{out} = -1 \times \text{input} +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(5) + >>> a + tensor([ 0.0090, -0.2262, -0.0682, -0.2866, 0.3940]) + >>> torch.neg(a) + tensor([-0.0090, 0.2262, 0.0682, 0.2866, -0.3940]) +""".format( + **common_args + ), +) + +add_docstr( + torch.negative, + r""" +negative(input, *, out=None) -> Tensor + +Alias for :func:`torch.neg` +""", +) + +add_docstr( + torch.nextafter, + r""" +nextafter(input, other, *, out=None) -> Tensor + +Return the next floating-point value after :attr:`input` towards :attr:`other`, elementwise. + +The shapes of ``input`` and ``other`` must be +:ref:`broadcastable `. + +Args: + input (Tensor): the first input tensor + other (Tensor): the second input tensor + +Keyword args: + {out} + +Example:: + + >>> eps = torch.finfo(torch.float32).eps + >>> torch.nextafter(torch.tensor([1.0, 2.0]), torch.tensor([2.0, 1.0])) == torch.tensor([eps + 1, 2 - eps]) + tensor([True, True]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.nonzero, + r""" +nonzero(input, *, out=None, as_tuple=False) -> LongTensor or tuple of LongTensors + +.. note:: + :func:`torch.nonzero(..., as_tuple=False) ` (default) returns a + 2-D tensor where each row is the index for a nonzero value. + + :func:`torch.nonzero(..., as_tuple=True) ` returns a tuple of 1-D + index tensors, allowing for advanced indexing, so ``x[x.nonzero(as_tuple=True)]`` + gives all nonzero values of tensor ``x``. Of the returned tuple, each index tensor + contains nonzero indices for a certain dimension. + + See below for more details on the two behaviors. + + When :attr:`input` is on CUDA, :func:`torch.nonzero() ` causes + host-device synchronization. + +**When** :attr:`as_tuple` **is** ``False`` **(default)**: + +Returns a tensor containing the indices of all non-zero elements of +:attr:`input`. Each row in the result contains the indices of a non-zero +element in :attr:`input`. The result is sorted lexicographically, with +the last index changing the fastest (C-style). + +If :attr:`input` has :math:`n` dimensions, then the resulting indices tensor +:attr:`out` is of size :math:`(z \times n)`, where :math:`z` is the total number of +non-zero elements in the :attr:`input` tensor. + +**When** :attr:`as_tuple` **is** ``True``: + +Returns a tuple of 1-D tensors, one for each dimension in :attr:`input`, +each containing the indices (in that dimension) of all non-zero elements of +:attr:`input` . + +If :attr:`input` has :math:`n` dimensions, then the resulting tuple contains :math:`n` +tensors of size :math:`z`, where :math:`z` is the total number of +non-zero elements in the :attr:`input` tensor. + +As a special case, when :attr:`input` has zero dimensions and a nonzero scalar +value, it is treated as a one-dimensional tensor with one element. + +Args: + {input} + +Keyword args: + out (LongTensor, optional): the output tensor containing indices + +Returns: + LongTensor or tuple of LongTensor: If :attr:`as_tuple` is ``False``, the output + tensor containing indices. If :attr:`as_tuple` is ``True``, one 1-D tensor for + each dimension, containing the indices of each nonzero element along that + dimension. + +Example:: + + >>> torch.nonzero(torch.tensor([1, 1, 1, 0, 1])) + tensor([[ 0], + [ 1], + [ 2], + [ 4]]) + >>> torch.nonzero(torch.tensor([[0.6, 0.0, 0.0, 0.0], + ... [0.0, 0.4, 0.0, 0.0], + ... [0.0, 0.0, 1.2, 0.0], + ... [0.0, 0.0, 0.0,-0.4]])) + tensor([[ 0, 0], + [ 1, 1], + [ 2, 2], + [ 3, 3]]) + >>> torch.nonzero(torch.tensor([1, 1, 1, 0, 1]), as_tuple=True) + (tensor([0, 1, 2, 4]),) + >>> torch.nonzero(torch.tensor([[0.6, 0.0, 0.0, 0.0], + ... [0.0, 0.4, 0.0, 0.0], + ... [0.0, 0.0, 1.2, 0.0], + ... [0.0, 0.0, 0.0,-0.4]]), as_tuple=True) + (tensor([0, 1, 2, 3]), tensor([0, 1, 2, 3])) + >>> torch.nonzero(torch.tensor(5), as_tuple=True) + (tensor([0]),) +""".format( + **common_args + ), +) + +add_docstr( + torch.normal, + r""" +normal(mean, std, *, generator=None, out=None) -> Tensor + +Returns a tensor of random numbers drawn from separate normal distributions +whose mean and standard deviation are given. + +The :attr:`mean` is a tensor with the mean of +each output element's normal distribution + +The :attr:`std` is a tensor with the standard deviation of +each output element's normal distribution + +The shapes of :attr:`mean` and :attr:`std` don't need to match, but the +total number of elements in each tensor need to be the same. + +.. note:: When the shapes do not match, the shape of :attr:`mean` + is used as the shape for the returned output tensor + +.. note:: When :attr:`std` is a CUDA tensor, this function synchronizes + its device with the CPU. + +Args: + mean (Tensor): the tensor of per-element means + std (Tensor): the tensor of per-element standard deviations + +Keyword args: + {generator} + {out} + +Example:: + + >>> torch.normal(mean=torch.arange(1., 11.), std=torch.arange(1, 0, -0.1)) + tensor([ 1.0425, 3.5672, 2.7969, 4.2925, 4.7229, 6.2134, + 8.0505, 8.1408, 9.0563, 10.0566]) + +.. function:: normal(mean=0.0, std, *, out=None) -> Tensor + :noindex: + +Similar to the function above, but the means are shared among all drawn +elements. + +Args: + mean (float, optional): the mean for all distributions + std (Tensor): the tensor of per-element standard deviations + +Keyword args: + {out} + +Example:: + + >>> torch.normal(mean=0.5, std=torch.arange(1., 6.)) + tensor([-1.2793, -1.0732, -2.0687, 5.1177, -1.2303]) + +.. function:: normal(mean, std=1.0, *, out=None) -> Tensor + :noindex: + +Similar to the function above, but the standard deviations are shared among +all drawn elements. + +Args: + mean (Tensor): the tensor of per-element means + std (float, optional): the standard deviation for all distributions + +Keyword args: + out (Tensor, optional): the output tensor + +Example:: + + >>> torch.normal(mean=torch.arange(1., 6.)) + tensor([ 1.1552, 2.6148, 2.6535, 5.8318, 4.2361]) + +.. function:: normal(mean, std, size, *, out=None) -> Tensor + :noindex: + +Similar to the function above, but the means and standard deviations are shared +among all drawn elements. The resulting tensor has size given by :attr:`size`. + +Args: + mean (float): the mean for all distributions + std (float): the standard deviation for all distributions + size (int...): a sequence of integers defining the shape of the output tensor. + +Keyword args: + {out} + +Example:: + + >>> torch.normal(2, 3, size=(1, 4)) + tensor([[-1.3987, -1.9544, 3.6048, 0.7909]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.numel, + r""" +numel(input) -> int + +Returns the total number of elements in the :attr:`input` tensor. + +Args: + {input} + +Example:: + + >>> a = torch.randn(1, 2, 3, 4, 5) + >>> torch.numel(a) + 120 + >>> a = torch.zeros(4,4) + >>> torch.numel(a) + 16 + +""".format( + **common_args + ), +) + +add_docstr( + torch.ones, + r""" +ones(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor + +Returns a tensor filled with the scalar value `1`, with the shape defined +by the variable argument :attr:`size`. + +Args: + size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + +Keyword arguments: + {out} + {dtype} + {layout} + {device} + {requires_grad} + +Example:: + + >>> torch.ones(2, 3) + tensor([[ 1., 1., 1.], + [ 1., 1., 1.]]) + + >>> torch.ones(5) + tensor([ 1., 1., 1., 1., 1.]) + +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.ones_like, + r""" +ones_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor + +Returns a tensor filled with the scalar value `1`, with the same size as +:attr:`input`. ``torch.ones_like(input)`` is equivalent to +``torch.ones(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``. + +.. warning:: + As of 0.4, this function does not support an :attr:`out` keyword. As an alternative, + the old ``torch.ones_like(input, out=output)`` is equivalent to + ``torch.ones(input.size(), out=output)``. + +Args: + {input} + +Keyword arguments: + {dtype} + {layout} + {device} + {requires_grad} + {memory_format} + +Example:: + + >>> input = torch.empty(2, 3) + >>> torch.ones_like(input) + tensor([[ 1., 1., 1.], + [ 1., 1., 1.]]) +""".format( + **factory_like_common_args + ), +) + +add_docstr( + torch.orgqr, + r""" +orgqr(input, tau) -> Tensor + +Alias for :func:`torch.linalg.householder_product`. +""", +) + +add_docstr( + torch.ormqr, + r""" +ormqr(input, tau, other, left=True, transpose=False, *, out=None) -> Tensor + +Computes the matrix-matrix multiplication of a product of Householder matrices with a general matrix. + +Multiplies a :math:`m \times n` matrix `C` (given by :attr:`other`) with a matrix `Q`, +where `Q` is represented using Householder reflectors `(input, tau)`. +See `Representation of Orthogonal or Unitary Matrices`_ for further details. + +If :attr:`left` is `True` then `op(Q)` times `C` is computed, otherwise the result is `C` times `op(Q)`. +When :attr:`left` is `True`, the implicit matrix `Q` has size :math:`m \times m`. +It has size :math:`n \times n` otherwise. +If :attr:`transpose` is `True` then `op` is the conjugate transpose operation, otherwise it's a no-op. + +Supports inputs of float, double, cfloat and cdouble dtypes. +Also supports batched inputs, and, if the input is batched, the output is batched with the same dimensions. + +.. seealso:: + :func:`torch.geqrf` can be used to form the Householder representation `(input, tau)` of matrix `Q` + from the QR decomposition. + +.. note:: + This function supports backward but it is only fast when ``(input, tau)`` do not require gradients + and/or ``tau.size(-1)`` is very small. + `` + +Args: + input (Tensor): tensor of shape `(*, mn, k)` where `*` is zero or more batch dimensions + and `mn` equals to `m` or `n` depending on the :attr:`left`. + tau (Tensor): tensor of shape `(*, min(mn, k))` where `*` is zero or more batch dimensions. + other (Tensor): tensor of shape `(*, m, n)` where `*` is zero or more batch dimensions. + left (bool): controls the order of multiplication. + transpose (bool): controls whether the matrix `Q` is conjugate transposed or not. + +Keyword args: + out (Tensor, optional): the output Tensor. Ignored if `None`. Default: `None`. + +.. _Representation of Orthogonal or Unitary Matrices: + https://www.netlib.org/lapack/lug/node128.html +""", +) + +add_docstr( + torch.permute, + r""" +permute(input, dims) -> Tensor + +Returns a view of the original tensor :attr:`input` with its dimensions permuted. + +Args: + {input} + dims (tuple of int): The desired ordering of dimensions + +Example: + >>> x = torch.randn(2, 3, 5) + >>> x.size() + torch.Size([2, 3, 5]) + >>> torch.permute(x, (2, 0, 1)).size() + torch.Size([5, 2, 3]) +""".format( + **common_args + ), +) + +add_docstr( + torch.poisson, + r""" +poisson(input, generator=None) -> Tensor + +Returns a tensor of the same size as :attr:`input` with each element +sampled from a Poisson distribution with rate parameter given by the corresponding +element in :attr:`input` i.e., + +.. math:: + \text{{out}}_i \sim \text{{Poisson}}(\text{{input}}_i) + +:attr:`input` must be non-negative. + +Args: + input (Tensor): the input tensor containing the rates of the Poisson distribution + +Keyword args: + {generator} + +Example:: + + >>> rates = torch.rand(4, 4) * 5 # rate parameter between 0 and 5 + >>> torch.poisson(rates) + tensor([[9., 1., 3., 5.], + [8., 6., 6., 0.], + [0., 4., 5., 3.], + [2., 1., 4., 2.]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.polygamma, + r""" +polygamma(n, input, *, out=None) -> Tensor + +Alias for :func:`torch.special.polygamma`. +""", +) + +add_docstr( + torch.positive, + r""" +positive(input) -> Tensor + +Returns :attr:`input`. +Throws a runtime error if :attr:`input` is a bool tensor. +""" + + r""" +Args: + {input} + +Example:: + + >>> t = torch.randn(5) + >>> t + tensor([ 0.0090, -0.2262, -0.0682, -0.2866, 0.3940]) + >>> torch.positive(t) + tensor([ 0.0090, -0.2262, -0.0682, -0.2866, 0.3940]) +""".format( + **common_args + ), +) + +add_docstr( + torch.pow, + r""" +pow(input, exponent, *, out=None) -> Tensor + +Takes the power of each element in :attr:`input` with :attr:`exponent` and +returns a tensor with the result. + +:attr:`exponent` can be either a single ``float`` number or a `Tensor` +with the same number of elements as :attr:`input`. + +When :attr:`exponent` is a scalar value, the operation applied is: + +.. math:: + \text{out}_i = x_i ^ \text{exponent} + +When :attr:`exponent` is a tensor, the operation applied is: + +.. math:: + \text{out}_i = x_i ^ {\text{exponent}_i} +""" + + r""" +When :attr:`exponent` is a tensor, the shapes of :attr:`input` +and :attr:`exponent` must be :ref:`broadcastable `. + +Args: + {input} + exponent (float or tensor): the exponent value + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.4331, 1.2475, 0.6834, -0.2791]) + >>> torch.pow(a, 2) + tensor([ 0.1875, 1.5561, 0.4670, 0.0779]) + >>> exp = torch.arange(1., 5.) + + >>> a = torch.arange(1., 5.) + >>> a + tensor([ 1., 2., 3., 4.]) + >>> exp + tensor([ 1., 2., 3., 4.]) + >>> torch.pow(a, exp) + tensor([ 1., 4., 27., 256.]) + +.. function:: pow(self, exponent, *, out=None) -> Tensor + :noindex: + +:attr:`self` is a scalar ``float`` value, and :attr:`exponent` is a tensor. +The returned tensor :attr:`out` is of the same shape as :attr:`exponent` + +The operation applied is: + +.. math:: + \text{{out}}_i = \text{{self}} ^ {{\text{{exponent}}_i}} + +Args: + self (float): the scalar base value for the power operation + exponent (Tensor): the exponent tensor + +Keyword args: + {out} + +Example:: + + >>> exp = torch.arange(1., 5.) + >>> base = 2 + >>> torch.pow(base, exp) + tensor([ 2., 4., 8., 16.]) +""".format( + **common_args + ), +) + +add_docstr( + torch.float_power, + r""" +float_power(input, exponent, *, out=None) -> Tensor + +Raises :attr:`input` to the power of :attr:`exponent`, elementwise, in double precision. +If neither input is complex returns a ``torch.float64`` tensor, +and if one or more inputs is complex returns a ``torch.complex128`` tensor. + +.. note:: + This function always computes in double precision, unlike :func:`torch.pow`, + which implements more typical :ref:`type promotion `. + This is useful when the computation needs to be performed in a wider or more precise dtype, + or the results of the computation may contain fractional values not representable in the input dtypes, + like when an integer base is raised to a negative integer exponent. + +Args: + input (Tensor or Number): the base value(s) + exponent (Tensor or Number): the exponent value(s) + +Keyword args: + {out} + +Example:: + + >>> a = torch.randint(10, (4,)) + >>> a + tensor([6, 4, 7, 1]) + >>> torch.float_power(a, 2) + tensor([36., 16., 49., 1.], dtype=torch.float64) + + >>> a = torch.arange(1, 5) + >>> a + tensor([ 1, 2, 3, 4]) + >>> exp = torch.tensor([2, -3, 4, -5]) + >>> exp + tensor([ 2, -3, 4, -5]) + >>> torch.float_power(a, exp) + tensor([1.0000e+00, 1.2500e-01, 8.1000e+01, 9.7656e-04], dtype=torch.float64) +""".format( + **common_args + ), +) + +add_docstr( + torch.prod, + r""" +prod(input, *, dtype=None) -> Tensor + +Returns the product of all elements in the :attr:`input` tensor. + +Args: + {input} + +Keyword args: + {dtype} + +Example:: + + >>> a = torch.randn(1, 3) + >>> a + tensor([[-0.8020, 0.5428, -1.5854]]) + >>> torch.prod(a) + tensor(0.6902) + +.. function:: prod(input, dim, keepdim=False, *, dtype=None) -> Tensor + :noindex: + +Returns the product of each row of the :attr:`input` tensor in the given +dimension :attr:`dim`. + +{keepdim_details} + +Args: + {input} + {dim} + {keepdim} + +Keyword args: + {dtype} + +Example:: + + >>> a = torch.randn(4, 2) + >>> a + tensor([[ 0.5261, -0.3837], + [ 1.1857, -0.2498], + [-1.1646, 0.0705], + [ 1.1131, -1.0629]]) + >>> torch.prod(a, 1) + tensor([-0.2018, -0.2962, -0.0821, -1.1831]) +""".format( + **single_dim_common + ), +) + +add_docstr( + torch.promote_types, + r""" +promote_types(type1, type2) -> dtype + +Returns the :class:`torch.dtype` with the smallest size and scalar kind that is +not smaller nor of lower kind than either `type1` or `type2`. See type promotion +:ref:`documentation ` for more information on the type +promotion logic. + +Args: + type1 (:class:`torch.dtype`) + type2 (:class:`torch.dtype`) + +Example:: + + >>> torch.promote_types(torch.int32, torch.float32) + torch.float32 + >>> torch.promote_types(torch.uint8, torch.long) + torch.long +""", +) + +add_docstr( + torch.qr, + r""" +qr(input, some=True, *, out=None) -> (Tensor, Tensor) + +Computes the QR decomposition of a matrix or a batch of matrices :attr:`input`, +and returns a namedtuple (Q, R) of tensors such that :math:`\text{input} = Q R` +with :math:`Q` being an orthogonal matrix or batch of orthogonal matrices and +:math:`R` being an upper triangular matrix or batch of upper triangular matrices. + +If :attr:`some` is ``True``, then this function returns the thin (reduced) QR factorization. +Otherwise, if :attr:`some` is ``False``, this function returns the complete QR factorization. + +.. warning:: + + :func:`torch.qr` is deprecated in favor of :func:`torch.linalg.qr` + and will be removed in a future PyTorch release. The boolean parameter :attr:`some` has been + replaced with a string parameter :attr:`mode`. + + ``Q, R = torch.qr(A)`` should be replaced with + + .. code:: python + + Q, R = torch.linalg.qr(A) + + ``Q, R = torch.qr(A, some=False)`` should be replaced with + + .. code:: python + + Q, R = torch.linalg.qr(A, mode="complete") + +.. warning:: + If you plan to backpropagate through QR, note that the current backward implementation + is only well-defined when the first :math:`\min(input.size(-1), input.size(-2))` + columns of :attr:`input` are linearly independent. + This behavior will probably change once QR supports pivoting. + +.. note:: This function uses LAPACK for CPU inputs and MAGMA for CUDA inputs, + and may produce different (valid) decompositions on different device types + or different platforms. + +Args: + input (Tensor): the input tensor of size :math:`(*, m, n)` where `*` is zero or more + batch dimensions consisting of matrices of dimension :math:`m \times n`. + some (bool, optional): Set to ``True`` for reduced QR decomposition and ``False`` for + complete QR decomposition. If `k = min(m, n)` then: + + * ``some=True`` : returns `(Q, R)` with dimensions (m, k), (k, n) (default) + + * ``'some=False'``: returns `(Q, R)` with dimensions (m, m), (m, n) + +Keyword args: + out (tuple, optional): tuple of `Q` and `R` tensors. + The dimensions of `Q` and `R` are detailed in the description of :attr:`some` above. + +Example:: + + >>> a = torch.tensor([[12., -51, 4], [6, 167, -68], [-4, 24, -41]]) + >>> q, r = torch.qr(a) + >>> q + tensor([[-0.8571, 0.3943, 0.3314], + [-0.4286, -0.9029, -0.0343], + [ 0.2857, -0.1714, 0.9429]]) + >>> r + tensor([[ -14.0000, -21.0000, 14.0000], + [ 0.0000, -175.0000, 70.0000], + [ 0.0000, 0.0000, -35.0000]]) + >>> torch.mm(q, r).round() + tensor([[ 12., -51., 4.], + [ 6., 167., -68.], + [ -4., 24., -41.]]) + >>> torch.mm(q.t(), q).round() + tensor([[ 1., 0., 0.], + [ 0., 1., -0.], + [ 0., -0., 1.]]) + >>> a = torch.randn(3, 4, 5) + >>> q, r = torch.qr(a, some=False) + >>> torch.allclose(torch.matmul(q, r), a) + True + >>> torch.allclose(torch.matmul(q.mT, q), torch.eye(5)) + True +""", +) + +add_docstr( + torch.rad2deg, + r""" +rad2deg(input, *, out=None) -> Tensor + +Returns a new tensor with each of the elements of :attr:`input` +converted from angles in radians to degrees. + +Args: + {input} + +Keyword arguments: + {out} + +Example:: + + >>> a = torch.tensor([[3.142, -3.142], [6.283, -6.283], [1.570, -1.570]]) + >>> torch.rad2deg(a) + tensor([[ 180.0233, -180.0233], + [ 359.9894, -359.9894], + [ 89.9544, -89.9544]]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.deg2rad, + r""" +deg2rad(input, *, out=None) -> Tensor + +Returns a new tensor with each of the elements of :attr:`input` +converted from angles in degrees to radians. + +Args: + {input} + +Keyword arguments: + {out} + +Example:: + + >>> a = torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0, -90.0]]) + >>> torch.deg2rad(a) + tensor([[ 3.1416, -3.1416], + [ 6.2832, -6.2832], + [ 1.5708, -1.5708]]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.heaviside, + r""" +heaviside(input, values, *, out=None) -> Tensor + +Computes the Heaviside step function for each element in :attr:`input`. +The Heaviside step function is defined as: + +.. math:: + \text{{heaviside}}(input, values) = \begin{cases} + 0, & \text{if input < 0}\\ + values, & \text{if input == 0}\\ + 1, & \text{if input > 0} + \end{cases} +""" + + r""" + +Args: + {input} + values (Tensor): The values to use where :attr:`input` is zero. + +Keyword arguments: + {out} + +Example:: + + >>> input = torch.tensor([-1.5, 0, 2.0]) + >>> values = torch.tensor([0.5]) + >>> torch.heaviside(input, values) + tensor([0.0000, 0.5000, 1.0000]) + >>> values = torch.tensor([1.2, -2.0, 3.5]) + >>> torch.heaviside(input, values) + tensor([0., -2., 1.]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.rand, + """ +rand(*size, *, generator=None, out=None, dtype=None, layout=torch.strided, device=None, \ +requires_grad=False, pin_memory=False) -> Tensor +""" + + r""" +Returns a tensor filled with random numbers from a uniform distribution +on the interval :math:`[0, 1)` + +The shape of the tensor is defined by the variable argument :attr:`size`. + +Args: + size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + +Keyword args: + {generator} + {out} + {dtype} + {layout} + {device} + {requires_grad} + {pin_memory} + +Example:: + + >>> torch.rand(4) + tensor([ 0.5204, 0.2503, 0.3525, 0.5673]) + >>> torch.rand(2, 3) + tensor([[ 0.8237, 0.5781, 0.6879], + [ 0.3816, 0.7249, 0.0998]]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.rand_like, + r""" +rand_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor + +Returns a tensor with the same size as :attr:`input` that is filled with +random numbers from a uniform distribution on the interval :math:`[0, 1)`. +``torch.rand_like(input)`` is equivalent to +``torch.rand(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``. + +Args: + {input} + +Keyword args: + {dtype} + {layout} + {device} + {requires_grad} + {memory_format} + +""".format( + **factory_like_common_args + ), +) + +add_docstr( + torch.randint, + """ +randint(low=0, high, size, \\*, generator=None, out=None, \ +dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor + +Returns a tensor filled with random integers generated uniformly +between :attr:`low` (inclusive) and :attr:`high` (exclusive). + +The shape of the tensor is defined by the variable argument :attr:`size`. + +.. note:: + With the global dtype default (``torch.float32``), this function returns + a tensor with dtype ``torch.int64``. + +Args: + low (int, optional): Lowest integer to be drawn from the distribution. Default: 0. + high (int): One above the highest integer to be drawn from the distribution. + size (tuple): a tuple defining the shape of the output tensor. + +Keyword args: + {generator} + {out} + dtype (`torch.dtype`, optional) - the desired data type of returned tensor. Default: if ``None``, + this function returns a tensor with dtype ``torch.int64``. + {layout} + {device} + {requires_grad} + +Example:: + + >>> torch.randint(3, 5, (3,)) + tensor([4, 3, 4]) + + + >>> torch.randint(10, (2, 2)) + tensor([[0, 2], + [5, 5]]) + + + >>> torch.randint(3, 10, (2, 2)) + tensor([[4, 5], + [6, 7]]) + + +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.randint_like, + """ +randint_like(input, low=0, high, \\*, dtype=None, layout=torch.strided, device=None, requires_grad=False, \ +memory_format=torch.preserve_format) -> Tensor + +Returns a tensor with the same shape as Tensor :attr:`input` filled with +random integers generated uniformly between :attr:`low` (inclusive) and +:attr:`high` (exclusive). + +.. note: + With the global dtype default (``torch.float32``), this function returns + a tensor with dtype ``torch.int64``. + +Args: + {input} + low (int, optional): Lowest integer to be drawn from the distribution. Default: 0. + high (int): One above the highest integer to be drawn from the distribution. + +Keyword args: + {dtype} + {layout} + {device} + {requires_grad} + {memory_format} + +""".format( + **factory_like_common_args + ), +) + +add_docstr( + torch.randn, + """ +randn(*size, *, generator=None, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False, \ +pin_memory=False) -> Tensor +""" + + r""" + +Returns a tensor filled with random numbers from a normal distribution +with mean `0` and variance `1` (also called the standard normal +distribution). + +.. math:: + \text{{out}}_{{i}} \sim \mathcal{{N}}(0, 1) + +For complex dtypes, the tensor is i.i.d. sampled from a `complex normal distribution`_ with zero mean and +unit variance as + +.. math:: + \text{{out}}_{{i}} \sim \mathcal{{CN}}(0, 1) + +This is equivalent to separately sampling the real :math:`(\operatorname{{Re}})` and imaginary +:math:`(\operatorname{{Im}})` part of :math:`\text{{out}}_i` as + +.. math:: + \operatorname{{Re}}(\text{{out}}_{{i}}) \sim \mathcal{{N}}(0, \frac{{1}}{{2}}),\quad + \operatorname{{Im}}(\text{{out}}_{{i}}) \sim \mathcal{{N}}(0, \frac{{1}}{{2}}) + +The shape of the tensor is defined by the variable argument :attr:`size`. + + +Args: + size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + +Keyword args: + {generator} + {out} + {dtype} + {layout} + {device} + {requires_grad} + {pin_memory} + +Example:: + + >>> torch.randn(4) + tensor([-2.1436, 0.9966, 2.3426, -0.6366]) + >>> torch.randn(2, 3) + tensor([[ 1.5954, 2.8929, -1.0923], + [ 1.1719, -0.4709, -0.1996]]) + +.. _complex normal distribution: https://en.wikipedia.org/wiki/Complex_normal_distribution +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.randn_like, + r""" +randn_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor + +Returns a tensor with the same size as :attr:`input` that is filled with +random numbers from a normal distribution with mean 0 and variance 1. Please refer to :func:`torch.randn` for the +sampling process of complex dtypes. ``torch.randn_like(input)`` is equivalent to +``torch.randn(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``. + +Args: + {input} + +Keyword args: + {dtype} + {layout} + {device} + {requires_grad} + {memory_format} + +""".format( + **factory_like_common_args + ), +) + +add_docstr( + torch.randperm, + """ +randperm(n, *, generator=None, out=None, dtype=torch.int64,layout=torch.strided, \ +device=None, requires_grad=False, pin_memory=False) -> Tensor +""" + + r""" +Returns a random permutation of integers from ``0`` to ``n - 1``. + +Args: + n (int): the upper bound (exclusive) + +Keyword args: + {generator} + {out} + dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. + Default: ``torch.int64``. + {layout} + {device} + {requires_grad} + {pin_memory} + +Example:: + + >>> torch.randperm(4) + tensor([2, 1, 0, 3]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.tensor, + r""" +tensor(data, *, dtype=None, device=None, requires_grad=False, pin_memory=False) -> Tensor + +Constructs a tensor with no autograd history (also known as a "leaf tensor", see :doc:`/notes/autograd`) by copying :attr:`data`. + +.. warning:: + + When working with tensors prefer using :func:`torch.Tensor.clone`, + :func:`torch.Tensor.detach`, and :func:`torch.Tensor.requires_grad_` for + readability. Letting `t` be a tensor, ``torch.tensor(t)`` is equivalent to + ``t.clone().detach()``, and ``torch.tensor(t, requires_grad=True)`` + is equivalent to ``t.clone().detach().requires_grad_(True)``. + +.. seealso:: + + :func:`torch.as_tensor` preserves autograd history and avoids copies where possible. + :func:`torch.from_numpy` creates a tensor that shares storage with a NumPy array. + +Args: + {data} + +Keyword args: + {dtype} + device (:class:`torch.device`, optional): the device of the constructed tensor. If None and data is a tensor + then the device of data is used. If None and data is not a tensor then + the result tensor is constructed on the current device. + {requires_grad} + {pin_memory} + + +Example:: + + >>> torch.tensor([[0.1, 1.2], [2.2, 3.1], [4.9, 5.2]]) + tensor([[ 0.1000, 1.2000], + [ 2.2000, 3.1000], + [ 4.9000, 5.2000]]) + + >>> torch.tensor([0, 1]) # Type inference on data + tensor([ 0, 1]) + + >>> torch.tensor([[0.11111, 0.222222, 0.3333333]], + ... dtype=torch.float64, + ... device=torch.device('cuda:0')) # creates a double tensor on a CUDA device + tensor([[ 0.1111, 0.2222, 0.3333]], dtype=torch.float64, device='cuda:0') + + >>> torch.tensor(3.14159) # Create a zero-dimensional (scalar) tensor + tensor(3.1416) + + >>> torch.tensor([]) # Create an empty tensor (of size (0,)) + tensor([]) +""".format( + **factory_data_common_args + ), +) + +add_docstr( + torch.range, + r""" +range(start=0, end, step=1, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor + +Returns a 1-D tensor of size :math:`\left\lfloor \frac{\text{end} - \text{start}}{\text{step}} \right\rfloor + 1` +with values from :attr:`start` to :attr:`end` with step :attr:`step`. Step is +the gap between two values in the tensor. + +.. math:: + \text{out}_{i+1} = \text{out}_i + \text{step}. +""" + + r""" +.. warning:: + This function is deprecated and will be removed in a future release because its behavior is inconsistent with + Python's range builtin. Instead, use :func:`torch.arange`, which produces values in [start, end). + +Args: + start (float): the starting value for the set of points. Default: ``0``. + end (float): the ending value for the set of points + step (float): the gap between each pair of adjacent points. Default: ``1``. + +Keyword args: + {out} + {dtype} If `dtype` is not given, infer the data type from the other input + arguments. If any of `start`, `end`, or `stop` are floating-point, the + `dtype` is inferred to be the default dtype, see + :meth:`~torch.get_default_dtype`. Otherwise, the `dtype` is inferred to + be `torch.int64`. + {layout} + {device} + {requires_grad} + +Example:: + + >>> torch.range(1, 4) + tensor([ 1., 2., 3., 4.]) + >>> torch.range(1, 4, 0.5) + tensor([ 1.0000, 1.5000, 2.0000, 2.5000, 3.0000, 3.5000, 4.0000]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.arange, + r""" +arange(start=0, end, step=1, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor + +Returns a 1-D tensor of size :math:`\left\lceil \frac{\text{end} - \text{start}}{\text{step}} \right\rceil` +with values from the interval ``[start, end)`` taken with common difference +:attr:`step` beginning from `start`. + +Note that non-integer :attr:`step` is subject to floating point rounding errors when +comparing against :attr:`end`; to avoid inconsistency, we advise subtracting a small epsilon from :attr:`end` +in such cases. + +.. math:: + \text{out}_{{i+1}} = \text{out}_{i} + \text{step} +""" + + r""" +Args: + start (Number): the starting value for the set of points. Default: ``0``. + end (Number): the ending value for the set of points + step (Number): the gap between each pair of adjacent points. Default: ``1``. + +Keyword args: + {out} + {dtype} If `dtype` is not given, infer the data type from the other input + arguments. If any of `start`, `end`, or `stop` are floating-point, the + `dtype` is inferred to be the default dtype, see + :meth:`~torch.get_default_dtype`. Otherwise, the `dtype` is inferred to + be `torch.int64`. + {layout} + {device} + {requires_grad} + +Example:: + + >>> torch.arange(5) + tensor([ 0, 1, 2, 3, 4]) + >>> torch.arange(1, 4) + tensor([ 1, 2, 3]) + >>> torch.arange(1, 2.5, 0.5) + tensor([ 1.0000, 1.5000, 2.0000]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.ravel, + r""" +ravel(input) -> Tensor + +Return a contiguous flattened tensor. A copy is made only if needed. + +Args: + {input} + +Example:: + + >>> t = torch.tensor([[[1, 2], + ... [3, 4]], + ... [[5, 6], + ... [7, 8]]]) + >>> torch.ravel(t) + tensor([1, 2, 3, 4, 5, 6, 7, 8]) +""".format( + **common_args + ), +) + +add_docstr( + torch.remainder, + r""" +remainder(input, other, *, out=None) -> Tensor + +Computes +`Python's modulus operation `_ +entrywise. The result has the same sign as the divisor :attr:`other` and its absolute value +is less than that of :attr:`other`. + +It may also be defined in terms of :func:`torch.div` as + +.. code:: python + + torch.remainder(a, b) == a - a.div(b, rounding_mode="floor") * b + +Supports :ref:`broadcasting to a common shape `, +:ref:`type promotion `, and integer and float inputs. + +.. note:: + Complex inputs are not supported. In some cases, it is not mathematically + possible to satisfy the definition of a modulo operation with complex numbers. + See :func:`torch.fmod` for how division by zero is handled. + +.. seealso:: + + :func:`torch.fmod` which implements C++'s `std::fmod `_. + This one is defined in terms of division rounding towards zero. + +Args: + input (Tensor or Scalar): the dividend + other (Tensor or Scalar): the divisor + +Keyword args: + {out} + +Example:: + + >>> torch.remainder(torch.tensor([-3., -2, -1, 1, 2, 3]), 2) + tensor([ 1., 0., 1., 1., 0., 1.]) + >>> torch.remainder(torch.tensor([1, 2, 3, 4, 5]), -1.5) + tensor([ -0.5000, -1.0000, 0.0000, -0.5000, -1.0000 ]) +""".format( + **common_args + ), +) + +add_docstr( + torch.renorm, + r""" +renorm(input, p, dim, maxnorm, *, out=None) -> Tensor + +Returns a tensor where each sub-tensor of :attr:`input` along dimension +:attr:`dim` is normalized such that the `p`-norm of the sub-tensor is lower +than the value :attr:`maxnorm` + +.. note:: If the norm of a row is lower than `maxnorm`, the row is unchanged + +Args: + {input} + p (float): the power for the norm computation + dim (int): the dimension to slice over to get the sub-tensors + maxnorm (float): the maximum norm to keep each sub-tensor under + +Keyword args: + {out} + +Example:: + + >>> x = torch.ones(3, 3) + >>> x[1].fill_(2) + tensor([ 2., 2., 2.]) + >>> x[2].fill_(3) + tensor([ 3., 3., 3.]) + >>> x + tensor([[ 1., 1., 1.], + [ 2., 2., 2.], + [ 3., 3., 3.]]) + >>> torch.renorm(x, 1, 0, 5) + tensor([[ 1.0000, 1.0000, 1.0000], + [ 1.6667, 1.6667, 1.6667], + [ 1.6667, 1.6667, 1.6667]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.reshape, + r""" +reshape(input, shape) -> Tensor + +Returns a tensor with the same data and number of elements as :attr:`input`, +but with the specified shape. When possible, the returned tensor will be a view +of :attr:`input`. Otherwise, it will be a copy. Contiguous inputs and inputs +with compatible strides can be reshaped without copying, but you should not +depend on the copying vs. viewing behavior. + +See :meth:`torch.Tensor.view` on when it is possible to return a view. + +A single dimension may be -1, in which case it's inferred from the remaining +dimensions and the number of elements in :attr:`input`. + +Args: + input (Tensor): the tensor to be reshaped + shape (tuple of int): the new shape + +Example:: + + >>> a = torch.arange(4.) + >>> torch.reshape(a, (2, 2)) + tensor([[ 0., 1.], + [ 2., 3.]]) + >>> b = torch.tensor([[0, 1], [2, 3]]) + >>> torch.reshape(b, (-1,)) + tensor([ 0, 1, 2, 3]) +""", +) + + +add_docstr( + torch.result_type, + r""" +result_type(tensor1, tensor2) -> dtype + +Returns the :class:`torch.dtype` that would result from performing an arithmetic +operation on the provided input tensors. See type promotion :ref:`documentation ` +for more information on the type promotion logic. + +Args: + tensor1 (Tensor or Number): an input tensor or number + tensor2 (Tensor or Number): an input tensor or number + +Example:: + + >>> torch.result_type(torch.tensor([1, 2], dtype=torch.int), 1.0) + torch.float32 + >>> torch.result_type(torch.tensor([1, 2], dtype=torch.uint8), torch.tensor(1)) + torch.uint8 +""", +) + +add_docstr( + torch.row_stack, + r""" +row_stack(tensors, *, out=None) -> Tensor + +Alias of :func:`torch.vstack`. +""", +) + +add_docstr( + torch.round, + r""" +round(input, *, decimals=0, out=None) -> Tensor + +Rounds elements of :attr:`input` to the nearest integer. + +For integer inputs, follows the array-api convention of returning a +copy of the input tensor. +The return type of output is same as that of input's dtype. + +.. note:: + This function implements the "round half to even" to + break ties when a number is equidistant from two + integers (e.g. `round(2.5)` is 2). + + When the :attr:\`decimals\` argument is specified the + algorithm used is similar to NumPy's `around`. This + algorithm is fast but inexact and it can easily + overflow for low precision dtypes. + Eg. `round(tensor([10000], dtype=torch.float16), decimals=3)` is `inf`. + +.. seealso:: + :func:`torch.ceil`, which rounds up. + :func:`torch.floor`, which rounds down. + :func:`torch.trunc`, which rounds towards zero. + +Args: + {input} + decimals (int): Number of decimal places to round to (default: 0). + If decimals is negative, it specifies the number of positions + to the left of the decimal point. + +Keyword args: + {out} + +Example:: + + >>> torch.round(torch.tensor((4.7, -2.3, 9.1, -7.7))) + tensor([ 5., -2., 9., -8.]) + + >>> # Values equidistant from two integers are rounded towards the + >>> # the nearest even value (zero is treated as even) + >>> torch.round(torch.tensor([-0.5, 0.5, 1.5, 2.5])) + tensor([-0., 0., 2., 2.]) + + >>> # A positive decimals argument rounds to the to that decimal place + >>> torch.round(torch.tensor([0.1234567]), decimals=3) + tensor([0.1230]) + + >>> # A negative decimals argument rounds to the left of the decimal + >>> torch.round(torch.tensor([1200.1234567]), decimals=-3) + tensor([1000.]) +""".format( + **common_args + ), +) + +add_docstr( + torch.rsqrt, + r""" +rsqrt(input, *, out=None) -> Tensor + +Returns a new tensor with the reciprocal of the square-root of each of +the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \frac{1}{\sqrt{\text{input}_{i}}} +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-0.0370, 0.2970, 1.5420, -0.9105]) + >>> torch.rsqrt(a) + tensor([ nan, 1.8351, 0.8053, nan]) +""".format( + **common_args + ), +) + +add_docstr( + torch.scatter, + r""" +scatter(input, dim, index, src) -> Tensor + +Out-of-place version of :meth:`torch.Tensor.scatter_` +""", +) + +add_docstr( + torch.scatter_add, + r""" +scatter_add(input, dim, index, src) -> Tensor + +Out-of-place version of :meth:`torch.Tensor.scatter_add_` +""", +) + +add_docstr( + torch.scatter_reduce, + r""" +scatter_reduce(input, dim, index, src, reduce, *, include_self=True) -> Tensor + +Out-of-place version of :meth:`torch.Tensor.scatter_reduce_` +""", +) + +add_docstr( + torch.select, + r""" +select(input, dim, index) -> Tensor + +Slices the :attr:`input` tensor along the selected dimension at the given index. +This function returns a view of the original tensor with the given dimension removed. + +.. note:: If :attr:`input` is a sparse tensor and returning a view of + the tensor is not possible, a RuntimeError exception is + raised. In this is the case, consider using + :func:`torch.select_copy` function. + +Args: + {input} + dim (int): the dimension to slice + index (int): the index to select with + +.. note:: + + :meth:`select` is equivalent to slicing. For example, + ``tensor.select(0, index)`` is equivalent to ``tensor[index]`` and + ``tensor.select(2, index)`` is equivalent to ``tensor[:,:,index]``. +""".format( + **common_args + ), +) + +add_docstr( + torch.select_scatter, + r""" +select_scatter(input, src, dim, index) -> Tensor + +Embeds the values of the :attr:`src` tensor into :attr:`input` at the given index. +This function returns a tensor with fresh storage; it does not create a view. + + +Args: + {input} + src (Tensor): The tensor to embed into :attr:`input` + dim (int): the dimension to insert the slice into. + index (int): the index to select with + +.. note:: + + :attr:`src` must be of the proper size in order to be embedded + into :attr:`input`. Specifically, it should have the same shape as + ``torch.select(input, dim, index)`` + +Example:: + + >>> a = torch.zeros(2, 2) + >>> b = torch.ones(2) + >>> a.select_scatter(b, 0, 0) + tensor([[1., 1.], + [0., 0.]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.slice_scatter, + r""" +slice_scatter(input, src, dim=0, start=None, end=None, step=1) -> Tensor + +Embeds the values of the :attr:`src` tensor into :attr:`input` at the given +dimension. +This function returns a tensor with fresh storage; it does not create a view. + + +Args: + {input} + src (Tensor): The tensor to embed into :attr:`input` + dim (int): the dimension to insert the slice into + start (Optional[int]): the start index of where to insert the slice + end (Optional[int]): the end index of where to insert the slice + step (int): the how many elements to skip in + +Example:: + + >>> a = torch.zeros(8, 8) + >>> b = torch.ones(2, 8) + >>> a.slice_scatter(b, start=6) + tensor([[0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0.], + [1., 1., 1., 1., 1., 1., 1., 1.], + [1., 1., 1., 1., 1., 1., 1., 1.]]) + + >>> b = torch.ones(8, 2) + >>> a.slice_scatter(b, dim=1, start=2, end=6, step=2) + tensor([[0., 0., 1., 0., 1., 0., 0., 0.], + [0., 0., 1., 0., 1., 0., 0., 0.], + [0., 0., 1., 0., 1., 0., 0., 0.], + [0., 0., 1., 0., 1., 0., 0., 0.], + [0., 0., 1., 0., 1., 0., 0., 0.], + [0., 0., 1., 0., 1., 0., 0., 0.], + [0., 0., 1., 0., 1., 0., 0., 0.], + [0., 0., 1., 0., 1., 0., 0., 0.]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.set_flush_denormal, + r""" +set_flush_denormal(mode) -> bool + +Disables denormal floating numbers on CPU. + +Returns ``True`` if your system supports flushing denormal numbers and it +successfully configures flush denormal mode. :meth:`~torch.set_flush_denormal` +is supported on x86 architectures supporting SSE3 and AArch64 architecture. + +Args: + mode (bool): Controls whether to enable flush denormal mode or not + +Example:: + + >>> torch.set_flush_denormal(True) + True + >>> torch.tensor([1e-323], dtype=torch.float64) + tensor([ 0.], dtype=torch.float64) + >>> torch.set_flush_denormal(False) + True + >>> torch.tensor([1e-323], dtype=torch.float64) + tensor(9.88131e-324 * + [ 1.0000], dtype=torch.float64) +""", +) + +add_docstr( + torch.set_num_threads, + r""" +set_num_threads(int) + +Sets the number of threads used for intraop parallelism on CPU. + +.. warning:: + To ensure that the correct number of threads is used, set_num_threads + must be called before running eager, JIT or autograd code. +""", +) + +add_docstr( + torch.set_num_interop_threads, + r""" +set_num_interop_threads(int) + +Sets the number of threads used for interop parallelism +(e.g. in JIT interpreter) on CPU. + +.. warning:: + Can only be called once and before any inter-op parallel work + is started (e.g. JIT execution). +""", +) + +add_docstr( + torch.sigmoid, + r""" +sigmoid(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.expit`. +""", +) + +add_docstr( + torch.logit, + r""" +logit(input, eps=None, *, out=None) -> Tensor + +Alias for :func:`torch.special.logit`. +""", +) + +add_docstr( + torch.sign, + r""" +sign(input, *, out=None) -> Tensor + +Returns a new tensor with the signs of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \operatorname{sgn}(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([0.7, -1.2, 0., 2.3]) + >>> a + tensor([ 0.7000, -1.2000, 0.0000, 2.3000]) + >>> torch.sign(a) + tensor([ 1., -1., 0., 1.]) +""".format( + **common_args + ), +) + +add_docstr( + torch.signbit, + r""" +signbit(input, *, out=None) -> Tensor + +Tests if each element of :attr:`input` has its sign bit set or not. + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.tensor([0.7, -1.2, 0., 2.3]) + >>> torch.signbit(a) + tensor([ False, True, False, False]) + >>> a = torch.tensor([-0.0, 0.0]) + >>> torch.signbit(a) + tensor([ True, False]) + +.. note:: + signbit handles signed zeros, so negative zero (-0) returns True. + +""".format( + **common_args + ), +) + +add_docstr( + torch.sgn, + r""" +sgn(input, *, out=None) -> Tensor + +This function is an extension of torch.sign() to complex tensors. +It computes a new tensor whose elements have +the same angles as the corresponding elements of :attr:`input` and +absolute values (i.e. magnitudes) of one for complex tensors and +is equivalent to torch.sign() for non-complex tensors. + +.. math:: + \text{out}_{i} = \begin{cases} + 0 & |\text{{input}}_i| == 0 \\ + \frac{{\text{{input}}_i}}{|{\text{{input}}_i}|} & \text{otherwise} + \end{cases} + +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> t = torch.tensor([3+4j, 7-24j, 0, 1+2j]) + >>> t.sgn() + tensor([0.6000+0.8000j, 0.2800-0.9600j, 0.0000+0.0000j, 0.4472+0.8944j]) +""".format( + **common_args + ), +) + +add_docstr( + torch.sin, + r""" +sin(input, *, out=None) -> Tensor + +Returns a new tensor with the sine of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \sin(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-0.5461, 0.1347, -2.7266, -0.2746]) + >>> torch.sin(a) + tensor([-0.5194, 0.1343, -0.4032, -0.2711]) +""".format( + **common_args + ), +) + +add_docstr( + torch.sinc, + r""" +sinc(input, *, out=None) -> Tensor + +Alias for :func:`torch.special.sinc`. +""", +) + +add_docstr( + torch.sinh, + r""" +sinh(input, *, out=None) -> Tensor + +Returns a new tensor with the hyperbolic sine of the elements of +:attr:`input`. + +.. math:: + \text{out}_{i} = \sinh(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.5380, -0.8632, -0.1265, 0.9399]) + >>> torch.sinh(a) + tensor([ 0.5644, -0.9744, -0.1268, 1.0845]) + +.. note:: + When :attr:`input` is on the CPU, the implementation of torch.sinh may use + the Sleef library, which rounds very large results to infinity or negative + infinity. See `here `_ for details. +""".format( + **common_args + ), +) + +add_docstr( + torch.sort, + r""" +sort(input, dim=-1, descending=False, stable=False, *, out=None) -> (Tensor, LongTensor) + +Sorts the elements of the :attr:`input` tensor along a given dimension +in ascending order by value. + +If :attr:`dim` is not given, the last dimension of the `input` is chosen. + +If :attr:`descending` is ``True`` then the elements are sorted in descending +order by value. + +If :attr:`stable` is ``True`` then the sorting routine becomes stable, preserving +the order of equivalent elements. + +A namedtuple of (values, indices) is returned, where the `values` are the +sorted values and `indices` are the indices of the elements in the original +`input` tensor. + +Args: + {input} + dim (int, optional): the dimension to sort along + descending (bool, optional): controls the sorting order (ascending or descending) + stable (bool, optional): makes the sorting routine stable, which guarantees that the order + of equivalent elements is preserved. + +Keyword args: + out (tuple, optional): the output tuple of (`Tensor`, `LongTensor`) that can + be optionally given to be used as output buffers + +Example:: + + >>> x = torch.randn(3, 4) + >>> sorted, indices = torch.sort(x) + >>> sorted + tensor([[-0.2162, 0.0608, 0.6719, 2.3332], + [-0.5793, 0.0061, 0.6058, 0.9497], + [-0.5071, 0.3343, 0.9553, 1.0960]]) + >>> indices + tensor([[ 1, 0, 2, 3], + [ 3, 1, 0, 2], + [ 0, 3, 1, 2]]) + + >>> sorted, indices = torch.sort(x, 0) + >>> sorted + tensor([[-0.5071, -0.2162, 0.6719, -0.5793], + [ 0.0608, 0.0061, 0.9497, 0.3343], + [ 0.6058, 0.9553, 1.0960, 2.3332]]) + >>> indices + tensor([[ 2, 0, 0, 1], + [ 0, 1, 1, 2], + [ 1, 2, 2, 0]]) + >>> x = torch.tensor([0, 1] * 9) + >>> x.sort() + torch.return_types.sort( + values=tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), + indices=tensor([ 2, 16, 4, 6, 14, 8, 0, 10, 12, 9, 17, 15, 13, 11, 7, 5, 3, 1])) + >>> x.sort(stable=True) + torch.return_types.sort( + values=tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), + indices=tensor([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 1, 3, 5, 7, 9, 11, 13, 15, 17])) +""".format( + **common_args + ), +) + +add_docstr( + torch.argsort, + r""" +argsort(input, dim=-1, descending=False, stable=False) -> Tensor + +Returns the indices that sort a tensor along a given dimension in ascending +order by value. + +This is the second value returned by :meth:`torch.sort`. See its documentation +for the exact semantics of this method. + +If :attr:`stable` is ``True`` then the sorting routine becomes stable, preserving +the order of equivalent elements. If ``False``, the relative order of values +which compare equal is not guaranteed. ``True`` is slower. + +Args: + {input} + dim (int, optional): the dimension to sort along + descending (bool, optional): controls the sorting order (ascending or descending) + stable (bool, optional): controls the relative order of equivalent elements + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[ 0.0785, 1.5267, -0.8521, 0.4065], + [ 0.1598, 0.0788, -0.0745, -1.2700], + [ 1.2208, 1.0722, -0.7064, 1.2564], + [ 0.0669, -0.2318, -0.8229, -0.9280]]) + + + >>> torch.argsort(a, dim=1) + tensor([[2, 0, 3, 1], + [3, 2, 1, 0], + [2, 1, 0, 3], + [3, 2, 1, 0]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.msort, + r""" +msort(input, *, out=None) -> Tensor + +Sorts the elements of the :attr:`input` tensor along its first dimension +in ascending order by value. + +.. note:: `torch.msort(t)` is equivalent to `torch.sort(t, dim=0)[0]`. + See also :func:`torch.sort`. + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> t = torch.randn(3, 4) + >>> t + tensor([[-0.1321, 0.4370, -1.2631, -1.1289], + [-2.0527, -1.1250, 0.2275, 0.3077], + [-0.0881, -0.1259, -0.5495, 1.0284]]) + >>> torch.msort(t) + tensor([[-2.0527, -1.1250, -1.2631, -1.1289], + [-0.1321, -0.1259, -0.5495, 0.3077], + [-0.0881, 0.4370, 0.2275, 1.0284]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.sparse_compressed_tensor, + r"""sparse_compressed_tensor(compressed_indices, plain_indices, values, size=None, """ + r"""*, dtype=None, layout=None, device=None, requires_grad=False, check_invariants=None) -> Tensor + +Constructs a :ref:`sparse tensor in Compressed Sparse format - CSR, +CSC, BSR, or BSC - ` with specified values at +the given :attr:`compressed_indices` and :attr:`plain_indices`. Sparse +matrix multiplication operations in Compressed Sparse format are +typically faster than that for sparse tensors in COO format. Make you +have a look at :ref:`the note on the data type of the indices +`. + +{sparse_factory_device_note} + +Args: + compressed_indices (array_like): (B+1)-dimensional array of size + ``(*batchsize, compressed_dim_size + 1)``. The last element of + each batch is the number of non-zero elements or blocks. This + tensor encodes the index in ``values`` and ``plain_indices`` + depending on where the given compressed dimension (row or + column) starts. Each successive number in the tensor + subtracted by the number before it denotes the number of + elements or blocks in a given compressed dimension. + plain_indices (array_like): Plain dimension (column or row) + co-ordinates of each element or block in values. (B+1)-dimensional + tensor with the same length as values. + + values (array_list): Initial values for the tensor. Can be a list, + tuple, NumPy ``ndarray``, scalar, and other types. that + represents a (1+K)-dimensional (for CSR and CSC layouts) or + (1+2+K)-dimensional tensor (for BSR and BSC layouts) where + ``K`` is the number of dense dimensions. + size (list, tuple, :class:`torch.Size`, optional): Size of the + sparse tensor: ``(*batchsize, nrows * blocksize[0], ncols * + blocksize[1], *densesize)`` where ``blocksize[0] == + blocksize[1] == 1`` for CSR and CSC formats. If not provided, + the size will be inferred as the minimum size big enough to + hold all non-zero elements or blocks. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the desired data type of + returned tensor. Default: if None, infers data type from + :attr:`values`. + layout (:class:`torch.layout`, required): the desired layout of + returned tensor: :attr:`torch.sparse_csr`, + :attr:`torch.sparse_csc`, :attr:`torch.sparse_bsr`, or + :attr:`torch.sparse_bsc`. + device (:class:`torch.device`, optional): the desired device of + returned tensor. Default: if None, uses the current device + for the default tensor type (see + :func:`torch.set_default_device`). :attr:`device` will be + the CPU for CPU tensor types and the current CUDA device for + CUDA tensor types. + {requires_grad} + {check_invariants} + +Example:: + >>> compressed_indices = [0, 2, 4] + >>> plain_indices = [0, 1, 0, 1] + >>> values = [1, 2, 3, 4] + >>> torch.sparse_compressed_tensor(torch.tensor(compressed_indices, dtype=torch.int64), + ... torch.tensor(plain_indices, dtype=torch.int64), + ... torch.tensor(values), dtype=torch.double, layout=torch.sparse_csr) + tensor(crow_indices=tensor([0, 2, 4]), + col_indices=tensor([0, 1, 0, 1]), + values=tensor([1., 2., 3., 4.]), size=(2, 2), nnz=4, + dtype=torch.float64, layout=torch.sparse_csr) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.sparse_csr_tensor, + r"""sparse_csr_tensor(crow_indices, col_indices, values, size=None, """ + r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None) -> Tensor + +Constructs a :ref:`sparse tensor in CSR (Compressed Sparse Row) ` with specified +values at the given :attr:`crow_indices` and :attr:`col_indices`. Sparse matrix multiplication operations +in CSR format are typically faster than that for sparse tensors in COO format. Make you have a look +at :ref:`the note on the data type of the indices `. + +{sparse_factory_device_note} + +Args: + crow_indices (array_like): (B+1)-dimensional array of size + ``(*batchsize, nrows + 1)``. The last element of each batch + is the number of non-zeros. This tensor encodes the index in + values and col_indices depending on where the given row + starts. Each successive number in the tensor subtracted by the + number before it denotes the number of elements in a given + row. + col_indices (array_like): Column co-ordinates of each element in + values. (B+1)-dimensional tensor with the same length + as values. + values (array_list): Initial values for the tensor. Can be a list, + tuple, NumPy ``ndarray``, scalar, and other types that + represents a (1+K)-dimensional tensor where ``K`` is the number + of dense dimensions. + size (list, tuple, :class:`torch.Size`, optional): Size of the + sparse tensor: ``(*batchsize, nrows, ncols, *densesize)``. If + not provided, the size will be inferred as the minimum size + big enough to hold all non-zero elements. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the desired data type of + returned tensor. Default: if None, infers data type from + :attr:`values`. + device (:class:`torch.device`, optional): the desired device of + returned tensor. Default: if None, uses the current device + for the default tensor type (see + :func:`torch.set_default_device`). :attr:`device` will be + the CPU for CPU tensor types and the current CUDA device for + CUDA tensor types. + {requires_grad} + {check_invariants} + +Example:: + >>> crow_indices = [0, 2, 4] + >>> col_indices = [0, 1, 0, 1] + >>> values = [1, 2, 3, 4] + >>> torch.sparse_csr_tensor(torch.tensor(crow_indices, dtype=torch.int64), + ... torch.tensor(col_indices, dtype=torch.int64), + ... torch.tensor(values), dtype=torch.double) + tensor(crow_indices=tensor([0, 2, 4]), + col_indices=tensor([0, 1, 0, 1]), + values=tensor([1., 2., 3., 4.]), size=(2, 2), nnz=4, + dtype=torch.float64, layout=torch.sparse_csr) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.sparse_csc_tensor, + r"""sparse_csc_tensor(ccol_indices, row_indices, values, size=None, """ + r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None) -> Tensor + +Constructs a :ref:`sparse tensor in CSC (Compressed Sparse Column) +` with specified values at the given +:attr:`ccol_indices` and :attr:`row_indices`. Sparse matrix +multiplication operations in CSC format are typically faster than that +for sparse tensors in COO format. Make you have a look at :ref:`the +note on the data type of the indices `. + +{sparse_factory_device_note} + +Args: + ccol_indices (array_like): (B+1)-dimensional array of size + ``(*batchsize, ncols + 1)``. The last element of each batch + is the number of non-zeros. This tensor encodes the index in + values and row_indices depending on where the given column + starts. Each successive number in the tensor subtracted by the + number before it denotes the number of elements in a given + column. + row_indices (array_like): Row co-ordinates of each element in + values. (B+1)-dimensional tensor with the same length as + values. + values (array_list): Initial values for the tensor. Can be a list, + tuple, NumPy ``ndarray``, scalar, and other types that + represents a (1+K)-dimensional tensor where ``K`` is the number + of dense dimensions. + size (list, tuple, :class:`torch.Size`, optional): Size of the + sparse tensor: ``(*batchsize, nrows, ncols, *densesize)``. If + not provided, the size will be inferred as the minimum size + big enough to hold all non-zero elements. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the desired data type of + returned tensor. Default: if None, infers data type from + :attr:`values`. + device (:class:`torch.device`, optional): the desired device of + returned tensor. Default: if None, uses the current device + for the default tensor type (see + :func:`torch.set_default_device`). :attr:`device` will be + the CPU for CPU tensor types and the current CUDA device for + CUDA tensor types. + {requires_grad} + {check_invariants} + +Example:: + >>> ccol_indices = [0, 2, 4] + >>> row_indices = [0, 1, 0, 1] + >>> values = [1, 2, 3, 4] + >>> torch.sparse_csc_tensor(torch.tensor(ccol_indices, dtype=torch.int64), + ... torch.tensor(row_indices, dtype=torch.int64), + ... torch.tensor(values), dtype=torch.double) + tensor(ccol_indices=tensor([0, 2, 4]), + row_indices=tensor([0, 1, 0, 1]), + values=tensor([1., 2., 3., 4.]), size=(2, 2), nnz=4, + dtype=torch.float64, layout=torch.sparse_csc) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.sparse_bsr_tensor, + r"""sparse_bsr_tensor(crow_indices, col_indices, values, size=None, """ + r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None) -> Tensor + +Constructs a :ref:`sparse tensor in BSR (Block Compressed Sparse Row)) +` with specified 2-dimensional blocks at the given +:attr:`crow_indices` and :attr:`col_indices`. Sparse matrix +multiplication operations in BSR format are typically faster than that +for sparse tensors in COO format. Make you have a look at :ref:`the +note on the data type of the indices `. + +{sparse_factory_device_note} + +Args: + crow_indices (array_like): (B+1)-dimensional array of size + ``(*batchsize, nrowblocks + 1)``. The last element of each + batch is the number of non-zeros. This tensor encodes the + block index in values and col_indices depending on where the + given row block starts. Each successive number in the tensor + subtracted by the number before it denotes the number of + blocks in a given row. + col_indices (array_like): Column block co-ordinates of each block + in values. (B+1)-dimensional tensor with the same length as + values. + values (array_list): Initial values for the tensor. Can be a list, + tuple, NumPy ``ndarray``, scalar, and other types that + represents a (1 + 2 + K)-dimensional tensor where ``K`` is the + number of dense dimensions. + size (list, tuple, :class:`torch.Size`, optional): Size of the + sparse tensor: ``(*batchsize, nrows * blocksize[0], ncols * + blocksize[1], *densesize)`` where ``blocksize == + values.shape[1:3]``. If not provided, the size will be + inferred as the minimum size big enough to hold all non-zero + blocks. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the desired data type of + returned tensor. Default: if None, infers data type from + :attr:`values`. + device (:class:`torch.device`, optional): the desired device of + returned tensor. Default: if None, uses the current device + for the default tensor type (see + :func:`torch.set_default_device`). :attr:`device` will be + the CPU for CPU tensor types and the current CUDA device for + CUDA tensor types. + {requires_grad} + {check_invariants} + +Example:: + >>> crow_indices = [0, 1, 2] + >>> col_indices = [0, 1] + >>> values = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] + >>> torch.sparse_bsr_tensor(torch.tensor(crow_indices, dtype=torch.int64), + ... torch.tensor(col_indices, dtype=torch.int64), + ... torch.tensor(values), dtype=torch.double) + tensor(crow_indices=tensor([0, 1, 2]), + col_indices=tensor([0, 1]), + values=tensor([[[1., 2.], + [3., 4.]], + [[5., 6.], + [7., 8.]]]), size=(2, 2), nnz=2, dtype=torch.float64, + layout=torch.sparse_bsr) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.sparse_bsc_tensor, + r"""sparse_bsc_tensor(ccol_indices, row_indices, values, size=None, """ + r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None) -> Tensor + +Constructs a :ref:`sparse tensor in BSC (Block Compressed Sparse +Column)) ` with specified 2-dimensional blocks at the +given :attr:`ccol_indices` and :attr:`row_indices`. Sparse matrix +multiplication operations in BSC format are typically faster than that +for sparse tensors in COO format. Make you have a look at :ref:`the +note on the data type of the indices `. + +{sparse_factory_device_note} + +Args: + ccol_indices (array_like): (B+1)-dimensional array of size + ``(*batchsize, ncolblocks + 1)``. The last element of each + batch is the number of non-zeros. This tensor encodes the + index in values and row_indices depending on where the given + column starts. Each successive number in the tensor subtracted + by the number before it denotes the number of elements in a + given column. + row_indices (array_like): Row block co-ordinates of each block in + values. (B+1)-dimensional tensor with the same length + as values. + values (array_list): Initial blocks for the tensor. Can be a list, + tuple, NumPy ``ndarray``, and other types that + represents a (1 + 2 + K)-dimensional tensor where ``K`` is the + number of dense dimensions. + size (list, tuple, :class:`torch.Size`, optional): Size of the + sparse tensor: ``(*batchsize, nrows * blocksize[0], ncols * + blocksize[1], *densesize)`` If not provided, the size will be + inferred as the minimum size big enough to hold all non-zero + blocks. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the desired data type of + returned tensor. Default: if None, infers data type from + :attr:`values`. + device (:class:`torch.device`, optional): the desired device of + returned tensor. Default: if None, uses the current device + for the default tensor type (see + :func:`torch.set_default_device`). :attr:`device` will be + the CPU for CPU tensor types and the current CUDA device for + CUDA tensor types. + {requires_grad} + {check_invariants} + +Example:: + >>> ccol_indices = [0, 1, 2] + >>> row_indices = [0, 1] + >>> values = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] + >>> torch.sparse_bsc_tensor(torch.tensor(ccol_indices, dtype=torch.int64), + ... torch.tensor(row_indices, dtype=torch.int64), + ... torch.tensor(values), dtype=torch.double) + tensor(ccol_indices=tensor([0, 1, 2]), + row_indices=tensor([0, 1]), + values=tensor([[[1., 2.], + [3., 4.]], + [[5., 6.], + [7., 8.]]]), size=(2, 2), nnz=2, dtype=torch.float64, + layout=torch.sparse_bsc) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.sparse_coo_tensor, + r"""sparse_coo_tensor(indices, values, size=None, """ + r"""*, dtype=None, device=None, requires_grad=False, check_invariants=None, is_coalesced=None) -> Tensor + +Constructs a :ref:`sparse tensor in COO(rdinate) format +` with specified values at the given +:attr:`indices`. + +.. note:: + + This function returns an :ref:`uncoalesced tensor + ` when :attr:`is_coalesced` is + unspecified or ``None``. + +{sparse_factory_device_note} + +Args: + indices (array_like): Initial data for the tensor. Can be a list, tuple, + NumPy ``ndarray``, scalar, and other types. Will be cast to a :class:`torch.LongTensor` + internally. The indices are the coordinates of the non-zero values in the matrix, and thus + should be two-dimensional where the first dimension is the number of tensor dimensions and + the second dimension is the number of non-zero values. + values (array_like): Initial values for the tensor. Can be a list, tuple, + NumPy ``ndarray``, scalar, and other types. + size (list, tuple, or :class:`torch.Size`, optional): Size of the sparse tensor. If not + provided the size will be inferred as the minimum size big enough to hold all non-zero + elements. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. + Default: if None, infers data type from :attr:`values`. + device (:class:`torch.device`, optional): the desired device of returned tensor. + Default: if None, uses the current device for the default tensor type + (see :func:`torch.set_default_device`). :attr:`device` will be the CPU + for CPU tensor types and the current CUDA device for CUDA tensor types. + {requires_grad} + {check_invariants} + is_coalesced (bool, optional): When``True``, the caller is + responsible for providing tensor indices that correspond to a + coalesced tensor. If the :attr:`check_invariants` flag is + False, no error will be raised if the prerequisites are not + met and this will lead to silently incorrect results. To force + coalescion please use :meth:`coalesce` on the resulting + Tensor. + Default: None: except for trivial cases (e.g. nnz < 2) the + resulting Tensor has is_coalesced set to ``False```. + +Example:: + + >>> i = torch.tensor([[0, 1, 1], + ... [2, 0, 2]]) + >>> v = torch.tensor([3, 4, 5], dtype=torch.float32) + >>> torch.sparse_coo_tensor(i, v, [2, 4]) + tensor(indices=tensor([[0, 1, 1], + [2, 0, 2]]), + values=tensor([3., 4., 5.]), + size=(2, 4), nnz=3, layout=torch.sparse_coo) + + >>> torch.sparse_coo_tensor(i, v) # Shape inference + tensor(indices=tensor([[0, 1, 1], + [2, 0, 2]]), + values=tensor([3., 4., 5.]), + size=(2, 3), nnz=3, layout=torch.sparse_coo) + + >>> torch.sparse_coo_tensor(i, v, [2, 4], + ... dtype=torch.float64, + ... device=torch.device('cuda:0')) + tensor(indices=tensor([[0, 1, 1], + [2, 0, 2]]), + values=tensor([3., 4., 5.]), + device='cuda:0', size=(2, 4), nnz=3, dtype=torch.float64, + layout=torch.sparse_coo) + + # Create an empty sparse tensor with the following invariants: + # 1. sparse_dim + dense_dim = len(SparseTensor.shape) + # 2. SparseTensor._indices().shape = (sparse_dim, nnz) + # 3. SparseTensor._values().shape = (nnz, SparseTensor.shape[sparse_dim:]) + # + # For instance, to create an empty sparse tensor with nnz = 0, dense_dim = 0 and + # sparse_dim = 1 (hence indices is a 2D tensor of shape = (1, 0)) + >>> S = torch.sparse_coo_tensor(torch.empty([1, 0]), [], [1]) + tensor(indices=tensor([], size=(1, 0)), + values=tensor([], size=(0,)), + size=(1,), nnz=0, layout=torch.sparse_coo) + + # and to create an empty sparse tensor with nnz = 0, dense_dim = 1 and + # sparse_dim = 1 + >>> S = torch.sparse_coo_tensor(torch.empty([1, 0]), torch.empty([0, 2]), [1, 2]) + tensor(indices=tensor([], size=(1, 0)), + values=tensor([], size=(0, 2)), + size=(1, 2), nnz=0, layout=torch.sparse_coo) + +.. _torch.sparse: https://pytorch.org/docs/stable/sparse.html +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.sqrt, + r""" +sqrt(input, *, out=None) -> Tensor + +Returns a new tensor with the square-root of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \sqrt{\text{input}_{i}} +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-2.0755, 1.0226, 0.0831, 0.4806]) + >>> torch.sqrt(a) + tensor([ nan, 1.0112, 0.2883, 0.6933]) +""".format( + **common_args + ), +) + +add_docstr( + torch.square, + r""" +square(input, *, out=None) -> Tensor + +Returns a new tensor with the square of the elements of :attr:`input`. + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-2.0755, 1.0226, 0.0831, 0.4806]) + >>> torch.square(a) + tensor([ 4.3077, 1.0457, 0.0069, 0.2310]) +""".format( + **common_args + ), +) + +add_docstr( + torch.squeeze, + r""" +squeeze(input, dim=None) -> Tensor + +Returns a tensor with all specified dimensions of :attr:`input` of size `1` removed. + +For example, if `input` is of shape: +:math:`(A \times 1 \times B \times C \times 1 \times D)` then the `input.squeeze()` +will be of shape: :math:`(A \times B \times C \times D)`. + +When :attr:`dim` is given, a squeeze operation is done only in the given +dimension(s). If `input` is of shape: :math:`(A \times 1 \times B)`, +``squeeze(input, 0)`` leaves the tensor unchanged, but ``squeeze(input, 1)`` +will squeeze the tensor to the shape :math:`(A \times B)`. + +.. note:: The returned tensor shares the storage with the input tensor, + so changing the contents of one will change the contents of the other. + +.. warning:: If the tensor has a batch dimension of size 1, then `squeeze(input)` + will also remove the batch dimension, which can lead to unexpected + errors. Consider specifying only the dims you wish to be squeezed. + +Args: + {input} + dim (int or tuple of ints, optional): if given, the input will be squeezed + only in the specified dimensions. + + .. versionchanged:: 2.0 + :attr:`dim` now accepts tuples of dimensions. + +Example:: + + >>> x = torch.zeros(2, 1, 2, 1, 2) + >>> x.size() + torch.Size([2, 1, 2, 1, 2]) + >>> y = torch.squeeze(x) + >>> y.size() + torch.Size([2, 2, 2]) + >>> y = torch.squeeze(x, 0) + >>> y.size() + torch.Size([2, 1, 2, 1, 2]) + >>> y = torch.squeeze(x, 1) + >>> y.size() + torch.Size([2, 2, 1, 2]) + >>> y = torch.squeeze(x, (1, 2, 3)) + torch.Size([2, 2, 2]) +""".format( + **common_args + ), +) + +add_docstr( + torch.std, + r""" +std(input, dim=None, *, correction=1, keepdim=False, out=None) -> Tensor + +Calculates the standard deviation over the dimensions specified by :attr:`dim`. +:attr:`dim` can be a single dimension, list of dimensions, or ``None`` to +reduce over all dimensions. + +The standard deviation (:math:`\sigma`) is calculated as + +.. math:: \sigma = \sqrt{\frac{1}{\max(0,~N - \delta N)}\sum_{i=0}^{N-1}(x_i-\bar{x})^2} + +where :math:`x` is the sample set of elements, :math:`\bar{x}` is the +sample mean, :math:`N` is the number of samples and :math:`\delta N` is +the :attr:`correction`. +""" + + r""" + +{keepdim_details} + +Args: + {input} + {dim} + +Keyword args: + correction (int): difference between the sample size and sample degrees of freedom. + Defaults to `Bessel's correction`_, ``correction=1``. + + .. versionchanged:: 2.0 + Previously this argument was called ``unbiased`` and was a boolean + with ``True`` corresponding to ``correction=1`` and ``False`` being + ``correction=0``. + {keepdim} + {out} + +Example: + + >>> a = torch.tensor( + ... [[ 0.2035, 1.2959, 1.8101, -0.4644], + ... [ 1.5027, -0.3270, 0.5905, 0.6538], + ... [-1.5745, 1.3330, -0.5596, -0.6548], + ... [ 0.1264, -0.5080, 1.6420, 0.1992]]) + >>> torch.std(a, dim=1, keepdim=True) + tensor([[1.0311], + [0.7477], + [1.2204], + [0.9087]]) + +.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.std_mean, + r""" +std_mean(input, dim=None, *, correction=1, keepdim=False, out=None) -> (Tensor, Tensor) + +Calculates the standard deviation and mean over the dimensions specified by +:attr:`dim`. :attr:`dim` can be a single dimension, list of dimensions, or +``None`` to reduce over all dimensions. + +The standard deviation (:math:`\sigma`) is calculated as + +.. math:: \sigma = \sqrt{\frac{1}{\max(0,~N - \delta N)}\sum_{i=0}^{N-1}(x_i-\bar{x})^2} + +where :math:`x` is the sample set of elements, :math:`\bar{x}` is the +sample mean, :math:`N` is the number of samples and :math:`\delta N` is +the :attr:`correction`. + +""" + + r""" + +{keepdim_details} + +Args: + {input} + {opt_dim} + +Keyword args: + correction (int): difference between the sample size and sample degrees of freedom. + Defaults to `Bessel's correction`_, ``correction=1``. + + .. versionchanged:: 2.0 + Previously this argument was called ``unbiased`` and was a boolean + with ``True`` corresponding to ``correction=1`` and ``False`` being + ``correction=0``. + {keepdim} + {out} + +Returns: + A tuple (std, mean) containing the standard deviation and mean. + +Example: + + >>> a = torch.tensor( + ... [[ 0.2035, 1.2959, 1.8101, -0.4644], + ... [ 1.5027, -0.3270, 0.5905, 0.6538], + ... [-1.5745, 1.3330, -0.5596, -0.6548], + ... [ 0.1264, -0.5080, 1.6420, 0.1992]]) + >>> torch.std_mean(a, dim=0, keepdim=True) + (tensor([[1.2620, 1.0028, 1.0957, 0.6038]]), + tensor([[ 0.0645, 0.4485, 0.8707, -0.0665]])) + +.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.sub, + r""" +sub(input, other, *, alpha=1, out=None) -> Tensor + +Subtracts :attr:`other`, scaled by :attr:`alpha`, from :attr:`input`. + +.. math:: + \text{{out}}_i = \text{{input}}_i - \text{{alpha}} \times \text{{other}}_i +""" + + r""" + +Supports :ref:`broadcasting to a common shape `, +:ref:`type promotion `, and integer, float, and complex inputs. + +Args: + {input} + other (Tensor or Number): the tensor or number to subtract from :attr:`input`. + +Keyword args: + alpha (Number): the multiplier for :attr:`other`. + {out} + +Example:: + + >>> a = torch.tensor((1, 2)) + >>> b = torch.tensor((0, 1)) + >>> torch.sub(a, b, alpha=2) + tensor([1, 0]) +""".format( + **common_args + ), +) + +add_docstr( + torch.subtract, + r""" +subtract(input, other, *, alpha=1, out=None) -> Tensor + +Alias for :func:`torch.sub`. +""", +) + +add_docstr( + torch.sum, + r""" +sum(input, *, dtype=None) -> Tensor + +Returns the sum of all elements in the :attr:`input` tensor. + +Args: + {input} + +Keyword args: + {dtype} + +Example:: + + >>> a = torch.randn(1, 3) + >>> a + tensor([[ 0.1133, -0.9567, 0.2958]]) + >>> torch.sum(a) + tensor(-0.5475) + +.. function:: sum(input, dim, keepdim=False, *, dtype=None) -> Tensor + :noindex: + +Returns the sum of each row of the :attr:`input` tensor in the given +dimension :attr:`dim`. If :attr:`dim` is a list of dimensions, +reduce over all of them. + +{keepdim_details} + +Args: + {input} + {opt_dim} + {keepdim} + +Keyword args: + {dtype} + +Example:: + + >>> a = torch.randn(4, 4) + >>> a + tensor([[ 0.0569, -0.2475, 0.0737, -0.3429], + [-0.2993, 0.9138, 0.9337, -1.6864], + [ 0.1132, 0.7892, -0.1003, 0.5688], + [ 0.3637, -0.9906, -0.4752, -1.5197]]) + >>> torch.sum(a, 1) + tensor([-0.4598, -0.1381, 1.3708, -2.6217]) + >>> b = torch.arange(4 * 5 * 6).view(4, 5, 6) + >>> torch.sum(b, (2, 1)) + tensor([ 435., 1335., 2235., 3135.]) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.nansum, + r""" +nansum(input, *, dtype=None) -> Tensor + +Returns the sum of all elements, treating Not a Numbers (NaNs) as zero. + +Args: + {input} + +Keyword args: + {dtype} + +Example:: + + >>> a = torch.tensor([1., 2., float('nan'), 4.]) + >>> torch.nansum(a) + tensor(7.) + +.. function:: nansum(input, dim, keepdim=False, *, dtype=None) -> Tensor + :noindex: + +Returns the sum of each row of the :attr:`input` tensor in the given +dimension :attr:`dim`, treating Not a Numbers (NaNs) as zero. +If :attr:`dim` is a list of dimensions, reduce over all of them. + +{keepdim_details} + +Args: + {input} + {opt_dim} + {keepdim} + +Keyword args: + {dtype} + +Example:: + + >>> torch.nansum(torch.tensor([1., float("nan")])) + 1.0 + >>> a = torch.tensor([[1, 2], [3., float("nan")]]) + >>> torch.nansum(a) + tensor(6.) + >>> torch.nansum(a, dim=0) + tensor([4., 2.]) + >>> torch.nansum(a, dim=1) + tensor([3., 3.]) +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.svd, + r""" +svd(input, some=True, compute_uv=True, *, out=None) -> (Tensor, Tensor, Tensor) + +Computes the singular value decomposition of either a matrix or batch of +matrices :attr:`input`. The singular value decomposition is represented as a +namedtuple `(U, S, V)`, such that :attr:`input` :math:`= U \text{diag}(S) V^{\text{H}}`. +where :math:`V^{\text{H}}` is the transpose of `V` for real inputs, +and the conjugate transpose of `V` for complex inputs. +If :attr:`input` is a batch of matrices, then `U`, `S`, and `V` are also +batched with the same batch dimensions as :attr:`input`. + +If :attr:`some` is `True` (default), the method returns the reduced singular +value decomposition. In this case, if the last two dimensions of :attr:`input` are +`m` and `n`, then the returned `U` and `V` matrices will contain only +`min(n, m)` orthonormal columns. + +If :attr:`compute_uv` is `False`, the returned `U` and `V` will be +zero-filled matrices of shape `(m, m)` and `(n, n)` +respectively, and the same device as :attr:`input`. The argument :attr:`some` +has no effect when :attr:`compute_uv` is `False`. + +Supports :attr:`input` of float, double, cfloat and cdouble data types. +The dtypes of `U` and `V` are the same as :attr:`input`'s. `S` will +always be real-valued, even if :attr:`input` is complex. + +.. warning:: + + :func:`torch.svd` is deprecated in favor of :func:`torch.linalg.svd` + and will be removed in a future PyTorch release. + + ``U, S, V = torch.svd(A, some=some, compute_uv=True)`` (default) should be replaced with + + .. code:: python + + U, S, Vh = torch.linalg.svd(A, full_matrices=not some) + V = Vh.mH + + ``_, S, _ = torch.svd(A, some=some, compute_uv=False)`` should be replaced with + + .. code:: python + + S = torch.linalg.svdvals(A) + +.. note:: Differences with :func:`torch.linalg.svd`: + + * :attr:`some` is the opposite of + :func:`torch.linalg.svd`'s :attr:`full_matrices`. Note that + default value for both is `True`, so the default behavior is + effectively the opposite. + * :func:`torch.svd` returns `V`, whereas :func:`torch.linalg.svd` returns + `Vh`, that is, :math:`V^{\text{H}}`. + * If :attr:`compute_uv` is `False`, :func:`torch.svd` returns zero-filled + tensors for `U` and `Vh`, whereas :func:`torch.linalg.svd` returns + empty tensors. + +.. note:: The singular values are returned in descending order. If :attr:`input` is a batch of matrices, + then the singular values of each matrix in the batch are returned in descending order. + +.. note:: The `S` tensor can only be used to compute gradients if :attr:`compute_uv` is `True`. + +.. note:: When :attr:`some` is `False`, the gradients on `U[..., :, min(m, n):]` + and `V[..., :, min(m, n):]` will be ignored in the backward pass, as those vectors + can be arbitrary bases of the corresponding subspaces. + +.. note:: The implementation of :func:`torch.linalg.svd` on CPU uses LAPACK's routine `?gesdd` + (a divide-and-conquer algorithm) instead of `?gesvd` for speed. Analogously, + on GPU, it uses cuSOLVER's routines `gesvdj` and `gesvdjBatched` on CUDA 10.1.243 + and later, and MAGMA's routine `gesdd` on earlier versions of CUDA. + +.. note:: The returned `U` will not be contiguous. The matrix (or batch of matrices) will + be represented as a column-major matrix (i.e. Fortran-contiguous). + +.. warning:: The gradients with respect to `U` and `V` will only be finite when the input does not + have zero nor repeated singular values. + +.. warning:: If the distance between any two singular values is close to zero, the gradients with respect to + `U` and `V` will be numerically unstable, as they depends on + :math:`\frac{1}{\min_{i \neq j} \sigma_i^2 - \sigma_j^2}`. The same happens when the matrix + has small singular values, as these gradients also depend on `S^{-1}`. + +.. warning:: For complex-valued :attr:`input` the singular value decomposition is not unique, + as `U` and `V` may be multiplied by an arbitrary phase factor :math:`e^{i \phi}` on every column. + The same happens when :attr:`input` has repeated singular values, where one may multiply + the columns of the spanning subspace in `U` and `V` by a rotation matrix + and `the resulting vectors will span the same subspace`_. + Different platforms, like NumPy, or inputs on different device types, + may produce different `U` and `V` tensors. + +Args: + input (Tensor): the input tensor of size `(*, m, n)` where `*` is zero or more + batch dimensions consisting of `(m, n)` matrices. + some (bool, optional): controls whether to compute the reduced or full decomposition, and + consequently, the shape of returned `U` and `V`. Default: `True`. + compute_uv (bool, optional): controls whether to compute `U` and `V`. Default: `True`. + +Keyword args: + out (tuple, optional): the output tuple of tensors + +Example:: + + >>> a = torch.randn(5, 3) + >>> a + tensor([[ 0.2364, -0.7752, 0.6372], + [ 1.7201, 0.7394, -0.0504], + [-0.3371, -1.0584, 0.5296], + [ 0.3550, -0.4022, 1.5569], + [ 0.2445, -0.0158, 1.1414]]) + >>> u, s, v = torch.svd(a) + >>> u + tensor([[ 0.4027, 0.0287, 0.5434], + [-0.1946, 0.8833, 0.3679], + [ 0.4296, -0.2890, 0.5261], + [ 0.6604, 0.2717, -0.2618], + [ 0.4234, 0.2481, -0.4733]]) + >>> s + tensor([2.3289, 2.0315, 0.7806]) + >>> v + tensor([[-0.0199, 0.8766, 0.4809], + [-0.5080, 0.4054, -0.7600], + [ 0.8611, 0.2594, -0.4373]]) + >>> torch.dist(a, torch.mm(torch.mm(u, torch.diag(s)), v.t())) + tensor(8.6531e-07) + >>> a_big = torch.randn(7, 5, 3) + >>> u, s, v = torch.svd(a_big) + >>> torch.dist(a_big, torch.matmul(torch.matmul(u, torch.diag_embed(s)), v.mT)) + tensor(2.6503e-06) + +.. _the resulting vectors will span the same subspace: + (https://en.wikipedia.org/wiki/Singular_value_decomposition#Singular_values,_singular_vectors,_and_their_relation_to_the_SVD) +""", +) + + +add_docstr( + torch.t, + r""" +t(input) -> Tensor + +Expects :attr:`input` to be <= 2-D tensor and transposes dimensions 0 +and 1. + +0-D and 1-D tensors are returned as is. When input is a 2-D tensor this +is equivalent to ``transpose(input, 0, 1)``. + +Args: + {input} + +Example:: + + >>> x = torch.randn(()) + >>> x + tensor(0.1995) + >>> torch.t(x) + tensor(0.1995) + >>> x = torch.randn(3) + >>> x + tensor([ 2.4320, -0.4608, 0.7702]) + >>> torch.t(x) + tensor([ 2.4320, -0.4608, 0.7702]) + >>> x = torch.randn(2, 3) + >>> x + tensor([[ 0.4875, 0.9158, -0.5872], + [ 0.3938, -0.6929, 0.6932]]) + >>> torch.t(x) + tensor([[ 0.4875, 0.3938], + [ 0.9158, -0.6929], + [-0.5872, 0.6932]]) + +See also :func:`torch.transpose`. +""".format( + **common_args + ), +) + +add_docstr( + torch.flip, + r""" +flip(input, dims) -> Tensor + +Reverse the order of an n-D tensor along given axis in dims. + +.. note:: + `torch.flip` makes a copy of :attr:`input`'s data. This is different from NumPy's `np.flip`, + which returns a view in constant time. Since copying a tensor's data is more work than viewing that data, + `torch.flip` is expected to be slower than `np.flip`. + +Args: + {input} + dims (a list or tuple): axis to flip on + +Example:: + + >>> x = torch.arange(8).view(2, 2, 2) + >>> x + tensor([[[ 0, 1], + [ 2, 3]], + + [[ 4, 5], + [ 6, 7]]]) + >>> torch.flip(x, [0, 1]) + tensor([[[ 6, 7], + [ 4, 5]], + + [[ 2, 3], + [ 0, 1]]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.fliplr, + r""" +fliplr(input) -> Tensor + +Flip tensor in the left/right direction, returning a new tensor. + +Flip the entries in each row in the left/right direction. +Columns are preserved, but appear in a different order than before. + +Note: + Requires the tensor to be at least 2-D. + +.. note:: + `torch.fliplr` makes a copy of :attr:`input`'s data. This is different from NumPy's `np.fliplr`, + which returns a view in constant time. Since copying a tensor's data is more work than viewing that data, + `torch.fliplr` is expected to be slower than `np.fliplr`. + +Args: + input (Tensor): Must be at least 2-dimensional. + +Example:: + + >>> x = torch.arange(4).view(2, 2) + >>> x + tensor([[0, 1], + [2, 3]]) + >>> torch.fliplr(x) + tensor([[1, 0], + [3, 2]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.flipud, + r""" +flipud(input) -> Tensor + +Flip tensor in the up/down direction, returning a new tensor. + +Flip the entries in each column in the up/down direction. +Rows are preserved, but appear in a different order than before. + +Note: + Requires the tensor to be at least 1-D. + +.. note:: + `torch.flipud` makes a copy of :attr:`input`'s data. This is different from NumPy's `np.flipud`, + which returns a view in constant time. Since copying a tensor's data is more work than viewing that data, + `torch.flipud` is expected to be slower than `np.flipud`. + +Args: + input (Tensor): Must be at least 1-dimensional. + +Example:: + + >>> x = torch.arange(4).view(2, 2) + >>> x + tensor([[0, 1], + [2, 3]]) + >>> torch.flipud(x) + tensor([[2, 3], + [0, 1]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.roll, + r""" +roll(input, shifts, dims=None) -> Tensor + +Roll the tensor :attr:`input` along the given dimension(s). Elements that are +shifted beyond the last position are re-introduced at the first position. If +:attr:`dims` is `None`, the tensor will be flattened before rolling and then +restored to the original shape. + +Args: + {input} + shifts (int or tuple of ints): The number of places by which the elements + of the tensor are shifted. If shifts is a tuple, dims must be a tuple of + the same size, and each dimension will be rolled by the corresponding + value + dims (int or tuple of ints): Axis along which to roll + +Example:: + + >>> x = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]).view(4, 2) + >>> x + tensor([[1, 2], + [3, 4], + [5, 6], + [7, 8]]) + >>> torch.roll(x, 1) + tensor([[8, 1], + [2, 3], + [4, 5], + [6, 7]]) + >>> torch.roll(x, 1, 0) + tensor([[7, 8], + [1, 2], + [3, 4], + [5, 6]]) + >>> torch.roll(x, -1, 0) + tensor([[3, 4], + [5, 6], + [7, 8], + [1, 2]]) + >>> torch.roll(x, shifts=(2, 1), dims=(0, 1)) + tensor([[6, 5], + [8, 7], + [2, 1], + [4, 3]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.rot90, + r""" +rot90(input, k=1, dims=[0,1]) -> Tensor + +Rotate an n-D tensor by 90 degrees in the plane specified by dims axis. +Rotation direction is from the first towards the second axis if k > 0, and from the second towards the first for k < 0. + +Args: + {input} + k (int): number of times to rotate. Default value is 1 + dims (a list or tuple): axis to rotate. Default value is [0, 1] + +Example:: + + >>> x = torch.arange(4).view(2, 2) + >>> x + tensor([[0, 1], + [2, 3]]) + >>> torch.rot90(x, 1, [0, 1]) + tensor([[1, 3], + [0, 2]]) + + >>> x = torch.arange(8).view(2, 2, 2) + >>> x + tensor([[[0, 1], + [2, 3]], + + [[4, 5], + [6, 7]]]) + >>> torch.rot90(x, 1, [1, 2]) + tensor([[[1, 3], + [0, 2]], + + [[5, 7], + [4, 6]]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.take, + r""" +take(input, index) -> Tensor + +Returns a new tensor with the elements of :attr:`input` at the given indices. +The input tensor is treated as if it were viewed as a 1-D tensor. The result +takes the same shape as the indices. + +Args: + {input} + index (LongTensor): the indices into tensor + +Example:: + + >>> src = torch.tensor([[4, 3, 5], + ... [6, 7, 8]]) + >>> torch.take(src, torch.tensor([0, 2, 5])) + tensor([ 4, 5, 8]) +""".format( + **common_args + ), +) + +add_docstr( + torch.take_along_dim, + r""" +take_along_dim(input, indices, dim=None, *, out=None) -> Tensor + +Selects values from :attr:`input` at the 1-dimensional indices from :attr:`indices` along the given :attr:`dim`. + +If :attr:`dim` is None, the input array is treated as if it has been flattened to 1d. + +Functions that return indices along a dimension, like :func:`torch.argmax` and :func:`torch.argsort`, +are designed to work with this function. See the examples below. + +.. note:: + This function is similar to NumPy's `take_along_axis`. + See also :func:`torch.gather`. + +Args: + {input} + indices (tensor): the indices into :attr:`input`. Must have long dtype. + dim (int, optional): dimension to select along. + +Keyword args: + {out} + +Example:: + + >>> t = torch.tensor([[10, 30, 20], [60, 40, 50]]) + >>> max_idx = torch.argmax(t) + >>> torch.take_along_dim(t, max_idx) + tensor([60]) + >>> sorted_idx = torch.argsort(t, dim=1) + >>> torch.take_along_dim(t, sorted_idx, dim=1) + tensor([[10, 20, 30], + [40, 50, 60]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.tan, + r""" +tan(input, *, out=None) -> Tensor + +Returns a new tensor with the tangent of the elements of :attr:`input`. + +.. math:: + \text{out}_{i} = \tan(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([-1.2027, -1.7687, 0.4412, -1.3856]) + >>> torch.tan(a) + tensor([-2.5930, 4.9859, 0.4722, -5.3366]) +""".format( + **common_args + ), +) + +add_docstr( + torch.tanh, + r""" +tanh(input, *, out=None) -> Tensor + +Returns a new tensor with the hyperbolic tangent of the elements +of :attr:`input`. + +.. math:: + \text{out}_{i} = \tanh(\text{input}_{i}) +""" + + r""" +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 0.8986, -0.7279, 1.1745, 0.2611]) + >>> torch.tanh(a) + tensor([ 0.7156, -0.6218, 0.8257, 0.2553]) +""".format( + **common_args + ), +) + +add_docstr( + # torch.softmax doc str. Point this to torch.nn.functional.softmax + torch.softmax, + r""" +softmax(input, dim, *, dtype=None) -> Tensor + +Alias for :func:`torch.nn.functional.softmax`. +""", +) + +add_docstr( + torch.topk, + r""" +topk(input, k, dim=None, largest=True, sorted=True, *, out=None) -> (Tensor, LongTensor) + +Returns the :attr:`k` largest elements of the given :attr:`input` tensor along +a given dimension. + +If :attr:`dim` is not given, the last dimension of the `input` is chosen. + +If :attr:`largest` is ``False`` then the `k` smallest elements are returned. + +A namedtuple of `(values, indices)` is returned with the `values` and +`indices` of the largest `k` elements of each row of the `input` tensor in the +given dimension `dim`. + +The boolean option :attr:`sorted` if ``True``, will make sure that the returned +`k` elements are themselves sorted + +Args: + {input} + k (int): the k in "top-k" + dim (int, optional): the dimension to sort along + largest (bool, optional): controls whether to return largest or + smallest elements + sorted (bool, optional): controls whether to return the elements + in sorted order + +Keyword args: + out (tuple, optional): the output tuple of (Tensor, LongTensor) that can be + optionally given to be used as output buffers + +Example:: + + >>> x = torch.arange(1., 6.) + >>> x + tensor([ 1., 2., 3., 4., 5.]) + >>> torch.topk(x, 3) + torch.return_types.topk(values=tensor([5., 4., 3.]), indices=tensor([4, 3, 2])) +""".format( + **common_args + ), +) + +add_docstr( + torch.trace, + r""" +trace(input) -> Tensor + +Returns the sum of the elements of the diagonal of the input 2-D matrix. + +Example:: + + >>> x = torch.arange(1., 10.).view(3, 3) + >>> x + tensor([[ 1., 2., 3.], + [ 4., 5., 6.], + [ 7., 8., 9.]]) + >>> torch.trace(x) + tensor(15.) +""", +) + +add_docstr( + torch.transpose, + r""" +transpose(input, dim0, dim1) -> Tensor + +Returns a tensor that is a transposed version of :attr:`input`. +The given dimensions :attr:`dim0` and :attr:`dim1` are swapped. + +If :attr:`input` is a strided tensor then the resulting :attr:`out` +tensor shares its underlying storage with the :attr:`input` tensor, so +changing the content of one would change the content of the other. + +If :attr:`input` is a :ref:`sparse tensor ` then the +resulting :attr:`out` tensor *does not* share the underlying storage +with the :attr:`input` tensor. + +If :attr:`input` is a :ref:`sparse tensor ` with compressed +layout (SparseCSR, SparseBSR, SparseCSC or SparseBSC) the arguments +:attr:`dim0` and :attr:`dim1` must be both batch dimensions, or must +both be sparse dimensions. The batch dimensions of a sparse tensor are the +dimensions preceding the sparse dimensions. + +.. note:: + Transpositions which interchange the sparse dimensions of a `SparseCSR` + or `SparseCSC` layout tensor will result in the layout changing between + the two options. Transposition of the sparse dimensions of a ` SparseBSR` + or `SparseBSC` layout tensor will likewise generate a result with the + opposite layout. + + +Args: + {input} + dim0 (int): the first dimension to be transposed + dim1 (int): the second dimension to be transposed + +Example:: + + >>> x = torch.randn(2, 3) + >>> x + tensor([[ 1.0028, -0.9893, 0.5809], + [-0.1669, 0.7299, 0.4942]]) + >>> torch.transpose(x, 0, 1) + tensor([[ 1.0028, -0.1669], + [-0.9893, 0.7299], + [ 0.5809, 0.4942]]) + +See also :func:`torch.t`. +""".format( + **common_args + ), +) + +add_docstr( + torch.triangular_solve, + r""" +triangular_solve(b, A, upper=True, transpose=False, unitriangular=False, *, out=None) -> (Tensor, Tensor) + +Solves a system of equations with a square upper or lower triangular invertible matrix :math:`A` +and multiple right-hand sides :math:`b`. + +In symbols, it solves :math:`AX = b` and assumes :math:`A` is square upper-triangular +(or lower-triangular if :attr:`upper`\ `= False`) and does not have zeros on the diagonal. + +`torch.triangular_solve(b, A)` can take in 2D inputs `b, A` or inputs that are +batches of 2D matrices. If the inputs are batches, then returns +batched outputs `X` + +If the diagonal of :attr:`A` contains zeros or elements that are very close to zero and +:attr:`unitriangular`\ `= False` (default) or if the input matrix is badly conditioned, +the result may contain `NaN` s. + +Supports input of float, double, cfloat and cdouble data types. + +.. warning:: + + :func:`torch.triangular_solve` is deprecated in favor of :func:`torch.linalg.solve_triangular` + and will be removed in a future PyTorch release. + :func:`torch.linalg.solve_triangular` has its arguments reversed and does not return a + copy of one of the inputs. + + ``X = torch.triangular_solve(B, A).solution`` should be replaced with + + .. code:: python + + X = torch.linalg.solve_triangular(A, B) + +Args: + b (Tensor): multiple right-hand sides of size :math:`(*, m, k)` where + :math:`*` is zero of more batch dimensions + A (Tensor): the input triangular coefficient matrix of size :math:`(*, m, m)` + where :math:`*` is zero or more batch dimensions + upper (bool, optional): whether :math:`A` is upper or lower triangular. Default: ``True``. + transpose (bool, optional): solves `op(A)X = b` where `op(A) = A^T` if this flag is ``True``, + and `op(A) = A` if it is ``False``. Default: ``False``. + unitriangular (bool, optional): whether :math:`A` is unit triangular. + If True, the diagonal elements of :math:`A` are assumed to be + 1 and not referenced from :math:`A`. Default: ``False``. + +Keyword args: + out ((Tensor, Tensor), optional): tuple of two tensors to write + the output to. Ignored if `None`. Default: `None`. + +Returns: + A namedtuple `(solution, cloned_coefficient)` where `cloned_coefficient` + is a clone of :math:`A` and `solution` is the solution :math:`X` to :math:`AX = b` + (or whatever variant of the system of equations, depending on the keyword arguments.) + +Examples:: + + >>> A = torch.randn(2, 2).triu() + >>> A + tensor([[ 1.1527, -1.0753], + [ 0.0000, 0.7986]]) + >>> b = torch.randn(2, 3) + >>> b + tensor([[-0.0210, 2.3513, -1.5492], + [ 1.5429, 0.7403, -1.0243]]) + >>> torch.triangular_solve(b, A) + torch.return_types.triangular_solve( + solution=tensor([[ 1.7841, 2.9046, -2.5405], + [ 1.9320, 0.9270, -1.2826]]), + cloned_coefficient=tensor([[ 1.1527, -1.0753], + [ 0.0000, 0.7986]])) +""", +) + +add_docstr( + torch.tril, + r""" +tril(input, diagonal=0, *, out=None) -> Tensor + +Returns the lower triangular part of the matrix (2-D tensor) or batch of matrices +:attr:`input`, the other elements of the result tensor :attr:`out` are set to 0. + +The lower triangular part of the matrix is defined as the elements on and +below the diagonal. + +The argument :attr:`diagonal` controls which diagonal to consider. If +:attr:`diagonal` = 0, all elements on and below the main diagonal are +retained. A positive value includes just as many diagonals above the main +diagonal, and similarly a negative value excludes just as many diagonals below +the main diagonal. The main diagonal are the set of indices +:math:`\lbrace (i, i) \rbrace` for :math:`i \in [0, \min\{d_{1}, d_{2}\} - 1]` where +:math:`d_{1}, d_{2}` are the dimensions of the matrix. +""" + + r""" +Args: + {input} + diagonal (int, optional): the diagonal to consider + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(3, 3) + >>> a + tensor([[-1.0813, -0.8619, 0.7105], + [ 0.0935, 0.1380, 2.2112], + [-0.3409, -0.9828, 0.0289]]) + >>> torch.tril(a) + tensor([[-1.0813, 0.0000, 0.0000], + [ 0.0935, 0.1380, 0.0000], + [-0.3409, -0.9828, 0.0289]]) + + >>> b = torch.randn(4, 6) + >>> b + tensor([[ 1.2219, 0.5653, -0.2521, -0.2345, 1.2544, 0.3461], + [ 0.4785, -0.4477, 0.6049, 0.6368, 0.8775, 0.7145], + [ 1.1502, 3.2716, -1.1243, -0.5413, 0.3615, 0.6864], + [-0.0614, -0.7344, -1.3164, -0.7648, -1.4024, 0.0978]]) + >>> torch.tril(b, diagonal=1) + tensor([[ 1.2219, 0.5653, 0.0000, 0.0000, 0.0000, 0.0000], + [ 0.4785, -0.4477, 0.6049, 0.0000, 0.0000, 0.0000], + [ 1.1502, 3.2716, -1.1243, -0.5413, 0.0000, 0.0000], + [-0.0614, -0.7344, -1.3164, -0.7648, -1.4024, 0.0000]]) + >>> torch.tril(b, diagonal=-1) + tensor([[ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], + [ 0.4785, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], + [ 1.1502, 3.2716, 0.0000, 0.0000, 0.0000, 0.0000], + [-0.0614, -0.7344, -1.3164, 0.0000, 0.0000, 0.0000]]) +""".format( + **common_args + ), +) + +# docstr is split in two parts to avoid format mis-captureing :math: braces '{}' +# as common args. +add_docstr( + torch.tril_indices, + r""" +tril_indices(row, col, offset=0, *, dtype=torch.long, device='cpu', layout=torch.strided) -> Tensor + +Returns the indices of the lower triangular part of a :attr:`row`-by- +:attr:`col` matrix in a 2-by-N Tensor, where the first row contains row +coordinates of all indices and the second row contains column coordinates. +Indices are ordered based on rows and then columns. + +The lower triangular part of the matrix is defined as the elements on and +below the diagonal. + +The argument :attr:`offset` controls which diagonal to consider. If +:attr:`offset` = 0, all elements on and below the main diagonal are +retained. A positive value includes just as many diagonals above the main +diagonal, and similarly a negative value excludes just as many diagonals below +the main diagonal. The main diagonal are the set of indices +:math:`\lbrace (i, i) \rbrace` for :math:`i \in [0, \min\{d_{1}, d_{2}\} - 1]` +where :math:`d_{1}, d_{2}` are the dimensions of the matrix. + +.. note:: + When running on CUDA, ``row * col`` must be less than :math:`2^{59}` to + prevent overflow during calculation. +""" + + r""" +Args: + row (``int``): number of rows in the 2-D matrix. + col (``int``): number of columns in the 2-D matrix. + offset (``int``): diagonal offset from the main diagonal. + Default: if not provided, 0. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. + Default: if ``None``, ``torch.long``. + {device} + layout (:class:`torch.layout`, optional): currently only support ``torch.strided``. + +Example:: + + >>> a = torch.tril_indices(3, 3) + >>> a + tensor([[0, 1, 1, 2, 2, 2], + [0, 0, 1, 0, 1, 2]]) + + >>> a = torch.tril_indices(4, 3, -1) + >>> a + tensor([[1, 2, 2, 3, 3, 3], + [0, 0, 1, 0, 1, 2]]) + + >>> a = torch.tril_indices(4, 3, 1) + >>> a + tensor([[0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], + [0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2]]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.triu, + r""" +triu(input, diagonal=0, *, out=None) -> Tensor + +Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices +:attr:`input`, the other elements of the result tensor :attr:`out` are set to 0. + +The upper triangular part of the matrix is defined as the elements on and +above the diagonal. + +The argument :attr:`diagonal` controls which diagonal to consider. If +:attr:`diagonal` = 0, all elements on and above the main diagonal are +retained. A positive value excludes just as many diagonals above the main +diagonal, and similarly a negative value includes just as many diagonals below +the main diagonal. The main diagonal are the set of indices +:math:`\lbrace (i, i) \rbrace` for :math:`i \in [0, \min\{d_{1}, d_{2}\} - 1]` where +:math:`d_{1}, d_{2}` are the dimensions of the matrix. +""" + + r""" +Args: + {input} + diagonal (int, optional): the diagonal to consider + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(3, 3) + >>> a + tensor([[ 0.2309, 0.5207, 2.0049], + [ 0.2072, -1.0680, 0.6602], + [ 0.3480, -0.5211, -0.4573]]) + >>> torch.triu(a) + tensor([[ 0.2309, 0.5207, 2.0049], + [ 0.0000, -1.0680, 0.6602], + [ 0.0000, 0.0000, -0.4573]]) + >>> torch.triu(a, diagonal=1) + tensor([[ 0.0000, 0.5207, 2.0049], + [ 0.0000, 0.0000, 0.6602], + [ 0.0000, 0.0000, 0.0000]]) + >>> torch.triu(a, diagonal=-1) + tensor([[ 0.2309, 0.5207, 2.0049], + [ 0.2072, -1.0680, 0.6602], + [ 0.0000, -0.5211, -0.4573]]) + + >>> b = torch.randn(4, 6) + >>> b + tensor([[ 0.5876, -0.0794, -1.8373, 0.6654, 0.2604, 1.5235], + [-0.2447, 0.9556, -1.2919, 1.3378, -0.1768, -1.0857], + [ 0.4333, 0.3146, 0.6576, -1.0432, 0.9348, -0.4410], + [-0.9888, 1.0679, -1.3337, -1.6556, 0.4798, 0.2830]]) + >>> torch.triu(b, diagonal=1) + tensor([[ 0.0000, -0.0794, -1.8373, 0.6654, 0.2604, 1.5235], + [ 0.0000, 0.0000, -1.2919, 1.3378, -0.1768, -1.0857], + [ 0.0000, 0.0000, 0.0000, -1.0432, 0.9348, -0.4410], + [ 0.0000, 0.0000, 0.0000, 0.0000, 0.4798, 0.2830]]) + >>> torch.triu(b, diagonal=-1) + tensor([[ 0.5876, -0.0794, -1.8373, 0.6654, 0.2604, 1.5235], + [-0.2447, 0.9556, -1.2919, 1.3378, -0.1768, -1.0857], + [ 0.0000, 0.3146, 0.6576, -1.0432, 0.9348, -0.4410], + [ 0.0000, 0.0000, -1.3337, -1.6556, 0.4798, 0.2830]]) +""".format( + **common_args + ), +) + +# docstr is split in two parts to avoid format mis-capturing :math: braces '{}' +# as common args. +add_docstr( + torch.triu_indices, + r""" +triu_indices(row, col, offset=0, *, dtype=torch.long, device='cpu', layout=torch.strided) -> Tensor + +Returns the indices of the upper triangular part of a :attr:`row` by +:attr:`col` matrix in a 2-by-N Tensor, where the first row contains row +coordinates of all indices and the second row contains column coordinates. +Indices are ordered based on rows and then columns. + +The upper triangular part of the matrix is defined as the elements on and +above the diagonal. + +The argument :attr:`offset` controls which diagonal to consider. If +:attr:`offset` = 0, all elements on and above the main diagonal are +retained. A positive value excludes just as many diagonals above the main +diagonal, and similarly a negative value includes just as many diagonals below +the main diagonal. The main diagonal are the set of indices +:math:`\lbrace (i, i) \rbrace` for :math:`i \in [0, \min\{d_{1}, d_{2}\} - 1]` +where :math:`d_{1}, d_{2}` are the dimensions of the matrix. + +.. note:: + When running on CUDA, ``row * col`` must be less than :math:`2^{59}` to + prevent overflow during calculation. +""" + + r""" +Args: + row (``int``): number of rows in the 2-D matrix. + col (``int``): number of columns in the 2-D matrix. + offset (``int``): diagonal offset from the main diagonal. + Default: if not provided, 0. + +Keyword args: + dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. + Default: if ``None``, ``torch.long``. + {device} + layout (:class:`torch.layout`, optional): currently only support ``torch.strided``. + +Example:: + + >>> a = torch.triu_indices(3, 3) + >>> a + tensor([[0, 0, 0, 1, 1, 2], + [0, 1, 2, 1, 2, 2]]) + + >>> a = torch.triu_indices(4, 3, -1) + >>> a + tensor([[0, 0, 0, 1, 1, 1, 2, 2, 3], + [0, 1, 2, 0, 1, 2, 1, 2, 2]]) + + >>> a = torch.triu_indices(4, 3, 1) + >>> a + tensor([[0, 0, 1], + [1, 2, 2]]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.true_divide, + r""" +true_divide(dividend, divisor, *, out) -> Tensor + +Alias for :func:`torch.div` with ``rounding_mode=None``. +""", +) + +add_docstr( + torch.trunc, + r""" +trunc(input, *, out=None) -> Tensor + +Returns a new tensor with the truncated integer values of +the elements of :attr:`input`. + +For integer inputs, follows the array-api convention of returning a +copy of the input tensor. + +Args: + {input} + +Keyword args: + {out} + +Example:: + + >>> a = torch.randn(4) + >>> a + tensor([ 3.4742, 0.5466, -0.8008, -0.9079]) + >>> torch.trunc(a) + tensor([ 3., 0., -0., -0.]) +""".format( + **common_args + ), +) + +add_docstr( + torch.fake_quantize_per_tensor_affine, + r""" +fake_quantize_per_tensor_affine(input, scale, zero_point, quant_min, quant_max) -> Tensor + +Returns a new tensor with the data in :attr:`input` fake quantized using :attr:`scale`, +:attr:`zero_point`, :attr:`quant_min` and :attr:`quant_max`. + +.. math:: + \text{output} = ( + min( + \text{quant\_max}, + max( + \text{quant\_min}, + \text{std::nearby\_int}(\text{input} / \text{scale}) + \text{zero\_point} + ) + ) - \text{zero\_point} + ) \times \text{scale} + +Args: + input (Tensor): the input value(s), ``torch.float32`` tensor + scale (double scalar or ``float32`` Tensor): quantization scale + zero_point (int64 scalar or ``int32`` Tensor): quantization zero_point + quant_min (int64): lower bound of the quantized domain + quant_max (int64): upper bound of the quantized domain + +Returns: + Tensor: A newly fake_quantized ``torch.float32`` tensor + +Example:: + + >>> x = torch.randn(4) + >>> x + tensor([ 0.0552, 0.9730, 0.3973, -1.0780]) + >>> torch.fake_quantize_per_tensor_affine(x, 0.1, 0, 0, 255) + tensor([0.1000, 1.0000, 0.4000, 0.0000]) + >>> torch.fake_quantize_per_tensor_affine(x, torch.tensor(0.1), torch.tensor(0), 0, 255) + tensor([0.1000, 1.0000, 0.4000, 0.0000]) +""", +) + +add_docstr( + torch.fake_quantize_per_channel_affine, + r""" +fake_quantize_per_channel_affine(input, scale, zero_point, axis, quant_min, quant_max) -> Tensor + +Returns a new tensor with the data in :attr:`input` fake quantized per channel using :attr:`scale`, +:attr:`zero_point`, :attr:`quant_min` and :attr:`quant_max`, across the channel specified by :attr:`axis`. + +.. math:: + \text{output} = ( + min( + \text{quant\_max}, + max( + \text{quant\_min}, + \text{std::nearby\_int}(\text{input} / \text{scale}) + \text{zero\_point} + ) + ) - \text{zero\_point} + ) \times \text{scale} + +Args: + input (Tensor): the input value(s), in ``torch.float32`` + scale (Tensor): quantization scale, per channel in ``torch.float32`` + zero_point (Tensor): quantization zero_point, per channel in ``torch.int32`` or ``torch.half`` or ``torch.float32`` + axis (int32): channel axis + quant_min (int64): lower bound of the quantized domain + quant_max (int64): upper bound of the quantized domain + +Returns: + Tensor: A newly fake_quantized per channel ``torch.float32`` tensor + +Example:: + + >>> x = torch.randn(2, 2, 2) + >>> x + tensor([[[-0.2525, -0.0466], + [ 0.3491, -0.2168]], + + [[-0.5906, 1.6258], + [ 0.6444, -0.0542]]]) + >>> scales = (torch.randn(2) + 1) * 0.05 + >>> scales + tensor([0.0475, 0.0486]) + >>> zero_points = torch.zeros(2).to(torch.int32) + >>> zero_points + tensor([0, 0]) + >>> torch.fake_quantize_per_channel_affine(x, scales, zero_points, 1, 0, 255) + tensor([[[0.0000, 0.0000], + [0.3405, 0.0000]], + + [[0.0000, 1.6134], + [0.6323, 0.0000]]]) +""", +) + +add_docstr( + torch.fix, + r""" +fix(input, *, out=None) -> Tensor + +Alias for :func:`torch.trunc` +""", +) + +add_docstr( + torch.unsqueeze, + r""" +unsqueeze(input, dim) -> Tensor + +Returns a new tensor with a dimension of size one inserted at the +specified position. + +The returned tensor shares the same underlying data with this tensor. + +A :attr:`dim` value within the range ``[-input.dim() - 1, input.dim() + 1)`` +can be used. Negative :attr:`dim` will correspond to :meth:`unsqueeze` +applied at :attr:`dim` = ``dim + input.dim() + 1``. + +Args: + {input} + dim (int): the index at which to insert the singleton dimension + +Example:: + + >>> x = torch.tensor([1, 2, 3, 4]) + >>> torch.unsqueeze(x, 0) + tensor([[ 1, 2, 3, 4]]) + >>> torch.unsqueeze(x, 1) + tensor([[ 1], + [ 2], + [ 3], + [ 4]]) +""".format( + **common_args + ), +) + +add_docstr( + torch.var, + r""" +var(input, dim=None, *, correction=1, keepdim=False, out=None) -> Tensor + +Calculates the variance over the dimensions specified by :attr:`dim`. :attr:`dim` +can be a single dimension, list of dimensions, or ``None`` to reduce over all +dimensions. + +The variance (:math:`\sigma^2`) is calculated as + +.. math:: \sigma^2 = \frac{1}{\max(0,~N - \delta N)}\sum_{i=0}^{N-1}(x_i-\bar{x})^2 + +where :math:`x` is the sample set of elements, :math:`\bar{x}` is the +sample mean, :math:`N` is the number of samples and :math:`\delta N` is +the :attr:`correction`. +""" + + r""" + +{keepdim_details} + +Args: + {input} + {opt_dim} + +Keyword args: + correction (int): difference between the sample size and sample degrees of freedom. + Defaults to `Bessel's correction`_, ``correction=1``. + + .. versionchanged:: 2.0 + Previously this argument was called ``unbiased`` and was a boolean + with ``True`` corresponding to ``correction=1`` and ``False`` being + ``correction=0``. + {keepdim} + {out} + +Example: + + >>> a = torch.tensor( + ... [[ 0.2035, 1.2959, 1.8101, -0.4644], + ... [ 1.5027, -0.3270, 0.5905, 0.6538], + ... [-1.5745, 1.3330, -0.5596, -0.6548], + ... [ 0.1264, -0.5080, 1.6420, 0.1992]]) + >>> torch.var(a, dim=1, keepdim=True) + tensor([[1.0631], + [0.5590], + [1.4893], + [0.8258]]) + +.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.var_mean, + r""" +var_mean(input, dim=None, *, correction=1, keepdim=False, out=None) -> (Tensor, Tensor) + +Calculates the variance and mean over the dimensions specified by :attr:`dim`. +:attr:`dim` can be a single dimension, list of dimensions, or ``None`` to +reduce over all dimensions. + +The variance (:math:`\sigma^2`) is calculated as + +.. math:: \sigma^2 = \frac{1}{\max(0,~N - \delta N)}\sum_{i=0}^{N-1}(x_i-\bar{x})^2 + +where :math:`x` is the sample set of elements, :math:`\bar{x}` is the +sample mean, :math:`N` is the number of samples and :math:`\delta N` is +the :attr:`correction`. +""" + + r""" + +{keepdim_details} + +Args: + {input} + {opt_dim} + +Keyword args: + correction (int): difference between the sample size and sample degrees of freedom. + Defaults to `Bessel's correction`_, ``correction=1``. + + .. versionchanged:: 2.0 + Previously this argument was called ``unbiased`` and was a boolean + with ``True`` corresponding to ``correction=1`` and ``False`` being + ``correction=0``. + {keepdim} + {out} + +Returns: + A tuple (var, mean) containing the variance and mean. + +Example: + + >>> a = torch.tensor( + ... [[ 0.2035, 1.2959, 1.8101, -0.4644], + ... [ 1.5027, -0.3270, 0.5905, 0.6538], + ... [-1.5745, 1.3330, -0.5596, -0.6548], + ... [ 0.1264, -0.5080, 1.6420, 0.1992]]) + >>> torch.var_mean(a, dim=0, keepdim=True) + (tensor([[1.5926, 1.0056, 1.2005, 0.3646]]), + tensor([[ 0.0645, 0.4485, 0.8707, -0.0665]])) + +.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + +""".format( + **multi_dim_common + ), +) + +add_docstr( + torch.zeros, + r""" +zeros(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor + +Returns a tensor filled with the scalar value `0`, with the shape defined +by the variable argument :attr:`size`. + +Args: + size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + +Keyword args: + {out} + {dtype} + {layout} + {device} + {requires_grad} + +Example:: + + >>> torch.zeros(2, 3) + tensor([[ 0., 0., 0.], + [ 0., 0., 0.]]) + + >>> torch.zeros(5) + tensor([ 0., 0., 0., 0., 0.]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.zeros_like, + r""" +zeros_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor + +Returns a tensor filled with the scalar value `0`, with the same size as +:attr:`input`. ``torch.zeros_like(input)`` is equivalent to +``torch.zeros(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``. + +.. warning:: + As of 0.4, this function does not support an :attr:`out` keyword. As an alternative, + the old ``torch.zeros_like(input, out=output)`` is equivalent to + ``torch.zeros(input.size(), out=output)``. + +Args: + {input} + +Keyword args: + {dtype} + {layout} + {device} + {requires_grad} + {memory_format} + +Example:: + + >>> input = torch.empty(2, 3) + >>> torch.zeros_like(input) + tensor([[ 0., 0., 0.], + [ 0., 0., 0.]]) +""".format( + **factory_like_common_args + ), +) + +add_docstr( + torch.empty, + """ +empty(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False, pin_memory=False, \ +memory_format=torch.contiguous_format) -> Tensor + +Returns a tensor filled with uninitialized data. The shape of the tensor is +defined by the variable argument :attr:`size`. + +.. note:: + If :func:`torch.use_deterministic_algorithms()` and + :attr:`torch.utils.deterministic.fill_uninitialized_memory` are both set to + ``True``, the output tensor is initialized to prevent any possible + nondeterministic behavior from using the data as an input to an operation. + Floating point and complex tensors are filled with NaN, and integer tensors + are filled with the maximum value. + +Args: + size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + +Keyword args: + {out} + {dtype} + {layout} + {device} + {requires_grad} + {pin_memory} + {memory_format} + +Example:: + + >>> torch.empty((2,3), dtype=torch.int64) + tensor([[ 9.4064e+13, 2.8000e+01, 9.3493e+13], + [ 7.5751e+18, 7.1428e+18, 7.5955e+18]]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.empty_like, + r""" +empty_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) -> Tensor + +Returns an uninitialized tensor with the same size as :attr:`input`. +``torch.empty_like(input)`` is equivalent to +``torch.empty(input.size(), dtype=input.dtype, layout=input.layout, device=input.device)``. + +.. note:: + If :func:`torch.use_deterministic_algorithms()` and + :attr:`torch.utils.deterministic.fill_uninitialized_memory` are both set to + ``True``, the output tensor is initialized to prevent any possible + nondeterministic behavior from using the data as an input to an operation. + Floating point and complex tensors are filled with NaN, and integer tensors + are filled with the maximum value. + +Args: + {input} + +Keyword args: + {dtype} + {layout} + {device} + {requires_grad} + {memory_format} + +Example:: + + >>> a=torch.empty((2,3), dtype=torch.int32, device = 'cuda') + >>> torch.empty_like(a) + tensor([[0, 0, 0], + [0, 0, 0]], device='cuda:0', dtype=torch.int32) +""".format( + **factory_like_common_args + ), +) + +add_docstr( + torch.empty_strided, + r""" +empty_strided(size, stride, *, dtype=None, layout=None, device=None, requires_grad=False, pin_memory=False) -> Tensor + +Creates a tensor with the specified :attr:`size` and :attr:`stride` and filled with undefined data. + +.. warning:: + If the constructed tensor is "overlapped" (with multiple indices referring to the same element + in memory) its behavior is undefined. + +.. note:: + If :func:`torch.use_deterministic_algorithms()` and + :attr:`torch.utils.deterministic.fill_uninitialized_memory` are both set to + ``True``, the output tensor is initialized to prevent any possible + nondeterministic behavior from using the data as an input to an operation. + Floating point and complex tensors are filled with NaN, and integer tensors + are filled with the maximum value. + +Args: + size (tuple of int): the shape of the output tensor + stride (tuple of int): the strides of the output tensor + +Keyword args: + {dtype} + {layout} + {device} + {requires_grad} + {pin_memory} + +Example:: + + >>> a = torch.empty_strided((2, 3), (1, 2)) + >>> a + tensor([[8.9683e-44, 4.4842e-44, 5.1239e+07], + [0.0000e+00, 0.0000e+00, 3.0705e-41]]) + >>> a.stride() + (1, 2) + >>> a.size() + torch.Size([2, 3]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.empty_permuted, + r""" +empty_permuted(size, physical_layout, *, dtype=None, layout=None, device=None, requires_grad=False, pin_memory=False) -> Tensor + +Creates an uninitialized, non-overlapping and dense tensor with the +specified :attr:`size`, with :attr:`physical_layout` specifying how the +dimensions are physically laid out in memory (each logical dimension is listed +from outermost to innermost). :attr:`physical_layout` is a generalization +of NCHW/NHWC notation: if each dimension is assigned a number according to +what order they occur in size (N=0, C=1, H=2, W=3), then NCHW is ``(0, 1, 2, 3)`` +while NHWC is ``(0, 2, 3, 1)``. Equivalently, the strides of the output +tensor ``t`` are such that ``t.stride(physical_layout[i]) == contiguous_strides[i]`` +(notably, this function is *not* equivalent to ``torch.empty(size).permute(physical_layout)``). + +Unlike :func:`torch.empty_strided`, this is guaranteed to produce a dense +tensor with no overlaps. If possible, prefer using this function over +:func:`torch.empty_strided` or manual use of :func:`torch.as_strided`. + +.. note:: + If :func:`torch.use_deterministic_algorithms()` and + :attr:`torch.utils.deterministic.fill_uninitialized_memory` are both set to + ``True``, the output tensor is initialized to prevent any possible + nondeterministic behavior from using the data as an input to an operation. + Floating point and complex tensors are filled with NaN, and integer tensors + are filled with the maximum value. + +Args: + size (tuple of int): the shape of the output tensor + physical_layout (tuple of int): the ordering of dimensions physically in memory + +Keyword args: + {dtype} + {layout} + {device} + {requires_grad} + {pin_memory} + +Examples: + + >>> torch.empty((2, 3, 5, 7)).stride() + (105, 35, 7, 1) + >>> torch.empty_permuted((2, 3, 5, 7), (0, 1, 2, 3)).stride() + (105, 35, 7, 1) + >>> torch.empty((2, 3, 5, 7), memory_format=torch.channels_last).stride() + (105, 1, 21, 3) + >>> torch.empty_permuted((2, 3, 5, 7), (0, 2, 3, 1)).stride() + (105, 1, 21, 3) + >>> torch.empty_permuted((2, 3, 5, 7), (0, 2, 3, 1)).dim_order() + (0, 2, 3, 1) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.full, + r""" +full(size, fill_value, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor + +Creates a tensor of size :attr:`size` filled with :attr:`fill_value`. The +tensor's dtype is inferred from :attr:`fill_value`. + +Args: + size (int...): a list, tuple, or :class:`torch.Size` of integers defining the + shape of the output tensor. + fill_value (Scalar): the value to fill the output tensor with. + +Keyword args: + {out} + {dtype} + {layout} + {device} + {requires_grad} + +Example:: + + >>> torch.full((2, 3), 3.141592) + tensor([[ 3.1416, 3.1416, 3.1416], + [ 3.1416, 3.1416, 3.1416]]) +""".format( + **factory_common_args + ), +) + +add_docstr( + torch.full_like, + """ +full_like(input, fill_value, \\*, dtype=None, layout=torch.strided, device=None, requires_grad=False, \ +memory_format=torch.preserve_format) -> Tensor + +Returns a tensor with the same size as :attr:`input` filled with :attr:`fill_value`. +``torch.full_like(input, fill_value)`` is equivalent to +``torch.full(input.size(), fill_value, dtype=input.dtype, layout=input.layout, device=input.device)``. + +Args: + {input} + fill_value: the number to fill the output tensor with. + +Keyword args: + {dtype} + {layout} + {device} + {requires_grad} + {memory_format} +""".format( + **factory_like_common_args + ), +) + +add_docstr( + torch.det, + r""" +det(input) -> Tensor + +Alias for :func:`torch.linalg.det` +""", +) + +add_docstr( + torch.where, + r""" +where(condition, input, other, *, out=None) -> Tensor + +Return a tensor of elements selected from either :attr:`input` or :attr:`other`, depending on :attr:`condition`. + +The operation is defined as: + +.. math:: + \text{out}_i = \begin{cases} + \text{input}_i & \text{if } \text{condition}_i \\ + \text{other}_i & \text{otherwise} \\ + \end{cases} +""" + + r""" +.. note:: + The tensors :attr:`condition`, :attr:`input`, :attr:`other` must be :ref:`broadcastable `. + +Arguments: + condition (BoolTensor): When True (nonzero), yield input, otherwise yield other + input (Tensor or Scalar): value (if :attr:`input` is a scalar) or values selected at indices + where :attr:`condition` is ``True`` + other (Tensor or Scalar): value (if :attr:`other` is a scalar) or values selected at indices + where :attr:`condition` is ``False`` + +Keyword args: + {out} + +Returns: + Tensor: A tensor of shape equal to the broadcasted shape of :attr:`condition`, :attr:`input`, :attr:`other` + +Example:: + + >>> x = torch.randn(3, 2) + >>> y = torch.ones(3, 2) + >>> x + tensor([[-0.4620, 0.3139], + [ 0.3898, -0.7197], + [ 0.0478, -0.1657]]) + >>> torch.where(x > 0, 1.0, 0.0) + tensor([[0., 1.], + [1., 0.], + [1., 0.]]) + >>> torch.where(x > 0, x, y) + tensor([[ 1.0000, 0.3139], + [ 0.3898, 1.0000], + [ 0.0478, 1.0000]]) + >>> x = torch.randn(2, 2, dtype=torch.double) + >>> x + tensor([[ 1.0779, 0.0383], + [-0.8785, -1.1089]], dtype=torch.float64) + >>> torch.where(x > 0, x, 0.) + tensor([[1.0779, 0.0383], + [0.0000, 0.0000]], dtype=torch.float64) + +.. function:: where(condition) -> tuple of LongTensor + :noindex: + +``torch.where(condition)`` is identical to +``torch.nonzero(condition, as_tuple=True)``. + +.. note:: + See also :func:`torch.nonzero`. +""".format( + **common_args + ), +) + +add_docstr( + torch.logdet, + r""" +logdet(input) -> Tensor + +Calculates log determinant of a square matrix or batches of square matrices. + +It returns ``-inf`` if the input has a determinant of zero, and ``NaN`` if it has +a negative determinant. + +.. note:: + Backward through :meth:`logdet` internally uses SVD results when :attr:`input` + is not invertible. In this case, double backward through :meth:`logdet` will + be unstable in when :attr:`input` doesn't have distinct singular values. See + :func:`torch.linalg.svd` for details. + +.. seealso:: + + :func:`torch.linalg.slogdet` computes the sign (resp. angle) and natural logarithm of the + absolute value of the determinant of real-valued (resp. complex) square matrices. + +Arguments: + input (Tensor): the input tensor of size ``(*, n, n)`` where ``*`` is zero or more + batch dimensions. + +Example:: + + >>> A = torch.randn(3, 3) + >>> torch.det(A) + tensor(0.2611) + >>> torch.logdet(A) + tensor(-1.3430) + >>> A + tensor([[[ 0.9254, -0.6213], + [-0.5787, 1.6843]], + + [[ 0.3242, -0.9665], + [ 0.4539, -0.0887]], + + [[ 1.1336, -0.4025], + [-0.7089, 0.9032]]]) + >>> A.det() + tensor([1.1990, 0.4099, 0.7386]) + >>> A.det().log() + tensor([ 0.1815, -0.8917, -0.3031]) +""", +) + +add_docstr( + torch.slogdet, + r""" +slogdet(input) -> (Tensor, Tensor) + +Alias for :func:`torch.linalg.slogdet` +""", +) + +add_docstr( + torch.pinverse, + r""" +pinverse(input, rcond=1e-15) -> Tensor + +Alias for :func:`torch.linalg.pinv` +""", +) + +add_docstr( + torch.hann_window, + """ +hann_window(window_length, periodic=True, *, dtype=None, \ +layout=torch.strided, device=None, requires_grad=False) -> Tensor +""" + + r""" +Hann window function. + +.. math:: + w[n] = \frac{1}{2}\ \left[1 - \cos \left( \frac{2 \pi n}{N - 1} \right)\right] = + \sin^2 \left( \frac{\pi n}{N - 1} \right), + +where :math:`N` is the full window size. + +The input :attr:`window_length` is a positive integer controlling the +returned window size. :attr:`periodic` flag determines whether the returned +window trims off the last duplicate value from the symmetric window and is +ready to be used as a periodic window with functions like +:meth:`torch.stft`. Therefore, if :attr:`periodic` is true, the :math:`N` in +above formula is in fact :math:`\text{window\_length} + 1`. Also, we always have +``torch.hann_window(L, periodic=True)`` equal to +``torch.hann_window(L + 1, periodic=False)[:-1])``. + +.. note:: + If :attr:`window_length` :math:`=1`, the returned window contains a single value 1. +""" + + r""" +Arguments: + window_length (int): the size of returned window + periodic (bool, optional): If True, returns a window to be used as periodic + function. If False, return a symmetric window. + +Keyword args: + {dtype} Only floating point types are supported. + layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only + ``torch.strided`` (dense layout) is supported. + {device} + {requires_grad} + +Returns: + Tensor: A 1-D tensor of size :math:`(\text{{window\_length}},)` containing the window + +""".format( + **factory_common_args + ), +) + + +add_docstr( + torch.hamming_window, + """ +hamming_window(window_length, periodic=True, alpha=0.54, beta=0.46, *, dtype=None, \ +layout=torch.strided, device=None, requires_grad=False) -> Tensor +""" + + r""" +Hamming window function. + +.. math:: + w[n] = \alpha - \beta\ \cos \left( \frac{2 \pi n}{N - 1} \right), + +where :math:`N` is the full window size. + +The input :attr:`window_length` is a positive integer controlling the +returned window size. :attr:`periodic` flag determines whether the returned +window trims off the last duplicate value from the symmetric window and is +ready to be used as a periodic window with functions like +:meth:`torch.stft`. Therefore, if :attr:`periodic` is true, the :math:`N` in +above formula is in fact :math:`\text{window\_length} + 1`. Also, we always have +``torch.hamming_window(L, periodic=True)`` equal to +``torch.hamming_window(L + 1, periodic=False)[:-1])``. + +.. note:: + If :attr:`window_length` :math:`=1`, the returned window contains a single value 1. + +.. note:: + This is a generalized version of :meth:`torch.hann_window`. +""" + + r""" +Arguments: + window_length (int): the size of returned window + periodic (bool, optional): If True, returns a window to be used as periodic + function. If False, return a symmetric window. + alpha (float, optional): The coefficient :math:`\alpha` in the equation above + beta (float, optional): The coefficient :math:`\beta` in the equation above + +Keyword args: + {dtype} Only floating point types are supported. + layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only + ``torch.strided`` (dense layout) is supported. + {device} + {requires_grad} + +Returns: + Tensor: A 1-D tensor of size :math:`(\text{{window\_length}},)` containing the window. + +""".format( + **factory_common_args + ), +) + + +add_docstr( + torch.bartlett_window, + """ +bartlett_window(window_length, periodic=True, *, dtype=None, \ +layout=torch.strided, device=None, requires_grad=False) -> Tensor +""" + + r""" +Bartlett window function. + +.. math:: + w[n] = 1 - \left| \frac{2n}{N-1} - 1 \right| = \begin{cases} + \frac{2n}{N - 1} & \text{if } 0 \leq n \leq \frac{N - 1}{2} \\ + 2 - \frac{2n}{N - 1} & \text{if } \frac{N - 1}{2} < n < N \\ + \end{cases}, + +where :math:`N` is the full window size. + +The input :attr:`window_length` is a positive integer controlling the +returned window size. :attr:`periodic` flag determines whether the returned +window trims off the last duplicate value from the symmetric window and is +ready to be used as a periodic window with functions like +:meth:`torch.stft`. Therefore, if :attr:`periodic` is true, the :math:`N` in +above formula is in fact :math:`\text{window\_length} + 1`. Also, we always have +``torch.bartlett_window(L, periodic=True)`` equal to +``torch.bartlett_window(L + 1, periodic=False)[:-1])``. + +.. note:: + If :attr:`window_length` :math:`=1`, the returned window contains a single value 1. +""" + + r""" +Arguments: + window_length (int): the size of returned window + periodic (bool, optional): If True, returns a window to be used as periodic + function. If False, return a symmetric window. + +Keyword args: + {dtype} Only floating point types are supported. + layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only + ``torch.strided`` (dense layout) is supported. + {device} + {requires_grad} + +Returns: + Tensor: A 1-D tensor of size :math:`(\text{{window\_length}},)` containing the window + +""".format( + **factory_common_args + ), +) + + +add_docstr( + torch.blackman_window, + """ +blackman_window(window_length, periodic=True, *, dtype=None, \ +layout=torch.strided, device=None, requires_grad=False) -> Tensor +""" + + r""" +Blackman window function. + +.. math:: + w[n] = 0.42 - 0.5 \cos \left( \frac{2 \pi n}{N - 1} \right) + 0.08 \cos \left( \frac{4 \pi n}{N - 1} \right) + +where :math:`N` is the full window size. + +The input :attr:`window_length` is a positive integer controlling the +returned window size. :attr:`periodic` flag determines whether the returned +window trims off the last duplicate value from the symmetric window and is +ready to be used as a periodic window with functions like +:meth:`torch.stft`. Therefore, if :attr:`periodic` is true, the :math:`N` in +above formula is in fact :math:`\text{window\_length} + 1`. Also, we always have +``torch.blackman_window(L, periodic=True)`` equal to +``torch.blackman_window(L + 1, periodic=False)[:-1])``. + +.. note:: + If :attr:`window_length` :math:`=1`, the returned window contains a single value 1. +""" + + r""" +Arguments: + window_length (int): the size of returned window + periodic (bool, optional): If True, returns a window to be used as periodic + function. If False, return a symmetric window. + +Keyword args: + {dtype} Only floating point types are supported. + layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only + ``torch.strided`` (dense layout) is supported. + {device} + {requires_grad} + +Returns: + Tensor: A 1-D tensor of size :math:`(\text{{window\_length}},)` containing the window + +""".format( + **factory_common_args + ), +) + + +add_docstr( + torch.kaiser_window, + """ +kaiser_window(window_length, periodic=True, beta=12.0, *, dtype=None, \ +layout=torch.strided, device=None, requires_grad=False) -> Tensor +""" + + r""" +Computes the Kaiser window with window length :attr:`window_length` and shape parameter :attr:`beta`. + +Let I_0 be the zeroth order modified Bessel function of the first kind (see :func:`torch.i0`) and +``N = L - 1`` if :attr:`periodic` is False and ``L`` if :attr:`periodic` is True, +where ``L`` is the :attr:`window_length`. This function computes: + +.. math:: + out_i = I_0 \left( \beta \sqrt{1 - \left( {\frac{i - N/2}{N/2}} \right) ^2 } \right) / I_0( \beta ) + +Calling ``torch.kaiser_window(L, B, periodic=True)`` is equivalent to calling +``torch.kaiser_window(L + 1, B, periodic=False)[:-1])``. +The :attr:`periodic` argument is intended as a helpful shorthand +to produce a periodic window as input to functions like :func:`torch.stft`. + +.. note:: + If :attr:`window_length` is one, then the returned window is a single element tensor containing a one. + +""" + + r""" +Args: + window_length (int): length of the window. + periodic (bool, optional): If True, returns a periodic window suitable for use in spectral analysis. + If False, returns a symmetric window suitable for use in filter design. + beta (float, optional): shape parameter for the window. + +Keyword args: + {dtype} + layout (:class:`torch.layout`, optional): the desired layout of returned window tensor. Only + ``torch.strided`` (dense layout) is supported. + {device} + {requires_grad} + +""".format( + **factory_common_args + ), +) + + +add_docstr( + torch.vander, + """ +vander(x, N=None, increasing=False) -> Tensor +""" + + r""" +Generates a Vandermonde matrix. + +The columns of the output matrix are elementwise powers of the input vector :math:`x^{{(N-1)}}, x^{{(N-2)}}, ..., x^0`. +If increasing is True, the order of the columns is reversed :math:`x^0, x^1, ..., x^{{(N-1)}}`. Such a +matrix with a geometric progression in each row is named for Alexandre-Theophile Vandermonde. + +Arguments: + x (Tensor): 1-D input tensor. + N (int, optional): Number of columns in the output. If N is not specified, + a square array is returned :math:`(N = len(x))`. + increasing (bool, optional): Order of the powers of the columns. If True, + the powers increase from left to right, if False (the default) they are reversed. + +Returns: + Tensor: Vandermonde matrix. If increasing is False, the first column is :math:`x^{{(N-1)}}`, + the second :math:`x^{{(N-2)}}` and so forth. If increasing is True, the columns + are :math:`x^0, x^1, ..., x^{{(N-1)}}`. + +Example:: + + >>> x = torch.tensor([1, 2, 3, 5]) + >>> torch.vander(x) + tensor([[ 1, 1, 1, 1], + [ 8, 4, 2, 1], + [ 27, 9, 3, 1], + [125, 25, 5, 1]]) + >>> torch.vander(x, N=3) + tensor([[ 1, 1, 1], + [ 4, 2, 1], + [ 9, 3, 1], + [25, 5, 1]]) + >>> torch.vander(x, N=3, increasing=True) + tensor([[ 1, 1, 1], + [ 1, 2, 4], + [ 1, 3, 9], + [ 1, 5, 25]]) + +""".format( + **factory_common_args + ), +) + + +add_docstr( + torch.unbind, + r""" +unbind(input, dim=0) -> seq + +Removes a tensor dimension. + +Returns a tuple of all slices along a given dimension, already without it. + +Arguments: + input (Tensor): the tensor to unbind + dim (int): dimension to remove + +Example:: + + >>> torch.unbind(torch.tensor([[1, 2, 3], + >>> [4, 5, 6], + >>> [7, 8, 9]])) + (tensor([1, 2, 3]), tensor([4, 5, 6]), tensor([7, 8, 9])) +""", +) + + +add_docstr( + torch.combinations, + r""" +combinations(input, r=2, with_replacement=False) -> seq + +Compute combinations of length :math:`r` of the given tensor. The behavior is similar to +python's `itertools.combinations` when `with_replacement` is set to `False`, and +`itertools.combinations_with_replacement` when `with_replacement` is set to `True`. + +Arguments: + input (Tensor): 1D vector. + r (int, optional): number of elements to combine + with_replacement (bool, optional): whether to allow duplication in combination + +Returns: + Tensor: A tensor equivalent to converting all the input tensors into lists, do + `itertools.combinations` or `itertools.combinations_with_replacement` on these + lists, and finally convert the resulting list into tensor. + +Example:: + + >>> a = [1, 2, 3] + >>> list(itertools.combinations(a, r=2)) + [(1, 2), (1, 3), (2, 3)] + >>> list(itertools.combinations(a, r=3)) + [(1, 2, 3)] + >>> list(itertools.combinations_with_replacement(a, r=2)) + [(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)] + >>> tensor_a = torch.tensor(a) + >>> torch.combinations(tensor_a) + tensor([[1, 2], + [1, 3], + [2, 3]]) + >>> torch.combinations(tensor_a, r=3) + tensor([[1, 2, 3]]) + >>> torch.combinations(tensor_a, with_replacement=True) + tensor([[1, 1], + [1, 2], + [1, 3], + [2, 2], + [2, 3], + [3, 3]]) + +""", +) + +add_docstr( + torch.trapezoid, + r""" +trapezoid(y, x=None, *, dx=None, dim=-1) -> Tensor + +Computes the `trapezoidal rule `_ along +:attr:`dim`. By default the spacing between elements is assumed to be 1, but +:attr:`dx` can be used to specify a different constant spacing, and :attr:`x` can be +used to specify arbitrary spacing along :attr:`dim`. + + +Assuming :attr:`y` is a one-dimensional tensor with elements :math:`{y_0, y_1, ..., y_n}`, +the default computation is + +.. math:: + \begin{aligned} + \sum_{i = 1}^{n-1} \frac{1}{2} (y_i + y_{i-1}) + \end{aligned} + +When :attr:`dx` is specified the computation becomes + +.. math:: + \begin{aligned} + \sum_{i = 1}^{n-1} \frac{\Delta x}{2} (y_i + y_{i-1}) + \end{aligned} + +effectively multiplying the result by :attr:`dx`. When :attr:`x` is specified, +assuming :attr:`x` is also a one-dimensional tensor with +elements :math:`{x_0, x_1, ..., x_n}`, the computation becomes + +.. math:: + \begin{aligned} + \sum_{i = 1}^{n-1} \frac{(x_i - x_{i-1})}{2} (y_i + y_{i-1}) + \end{aligned} + +When :attr:`x` and :attr:`y` have the same size, the computation is as described above and no broadcasting is needed. +The broadcasting behavior of this function is as follows when their sizes are different. For both :attr:`x` +and :attr:`y`, the function computes the difference between consecutive elements along +dimension :attr:`dim`. This effectively creates two tensors, `x_diff` and `y_diff`, that have +the same shape as the original tensors except their lengths along the dimension :attr:`dim` is reduced by 1. +After that, those two tensors are broadcast together to compute final output as part of the trapezoidal rule. +See the examples below for details. + +.. note:: + The trapezoidal rule is a technique for approximating the definite integral of a function + by averaging its left and right Riemann sums. The approximation becomes more accurate as + the resolution of the partition increases. + +Arguments: + y (Tensor): Values to use when computing the trapezoidal rule. + x (Tensor): If specified, defines spacing between values as specified above. + +Keyword arguments: + dx (float): constant spacing between values. If neither :attr:`x` or :attr:`dx` + are specified then this defaults to 1. Effectively multiplies the result by its value. + dim (int): The dimension along which to compute the trapezoidal rule. + The last (inner-most) dimension by default. + +Examples:: + + >>> # Computes the trapezoidal rule in 1D, spacing is implicitly 1 + >>> y = torch.tensor([1, 5, 10]) + >>> torch.trapezoid(y) + tensor(10.5) + + >>> # Computes the same trapezoidal rule directly to verify + >>> (1 + 10 + 10) / 2 + 10.5 + + >>> # Computes the trapezoidal rule in 1D with constant spacing of 2 + >>> # NOTE: the result is the same as before, but multiplied by 2 + >>> torch.trapezoid(y, dx=2) + 21.0 + + >>> # Computes the trapezoidal rule in 1D with arbitrary spacing + >>> x = torch.tensor([1, 3, 6]) + >>> torch.trapezoid(y, x) + 28.5 + + >>> # Computes the same trapezoidal rule directly to verify + >>> ((3 - 1) * (1 + 5) + (6 - 3) * (5 + 10)) / 2 + 28.5 + + >>> # Computes the trapezoidal rule for each row of a 3x3 matrix + >>> y = torch.arange(9).reshape(3, 3) + tensor([[0, 1, 2], + [3, 4, 5], + [6, 7, 8]]) + >>> torch.trapezoid(y) + tensor([ 2., 8., 14.]) + + >>> # Computes the trapezoidal rule for each column of the matrix + >>> torch.trapezoid(y, dim=0) + tensor([ 6., 8., 10.]) + + >>> # Computes the trapezoidal rule for each row of a 3x3 ones matrix + >>> # with the same arbitrary spacing + >>> y = torch.ones(3, 3) + >>> x = torch.tensor([1, 3, 6]) + >>> torch.trapezoid(y, x) + array([5., 5., 5.]) + + >>> # Computes the trapezoidal rule for each row of a 3x3 ones matrix + >>> # with different arbitrary spacing per row + >>> y = torch.ones(3, 3) + >>> x = torch.tensor([[1, 2, 3], [1, 3, 5], [1, 4, 7]]) + >>> torch.trapezoid(y, x) + array([2., 4., 6.]) +""", +) + +add_docstr( + torch.trapz, + r""" +trapz(y, x, *, dim=-1) -> Tensor + +Alias for :func:`torch.trapezoid`. +""", +) + +add_docstr( + torch.cumulative_trapezoid, + r""" +cumulative_trapezoid(y, x=None, *, dx=None, dim=-1) -> Tensor + +Cumulatively computes the `trapezoidal rule `_ +along :attr:`dim`. By default the spacing between elements is assumed to be 1, but +:attr:`dx` can be used to specify a different constant spacing, and :attr:`x` can be +used to specify arbitrary spacing along :attr:`dim`. + +For more details, please read :func:`torch.trapezoid`. The difference between :func:`torch.trapezoid` +and this function is that, :func:`torch.trapezoid` returns a value for each integration, +where as this function returns a cumulative value for every spacing within the integration. This +is analogous to how `.sum` returns a value and `.cumsum` returns a cumulative sum. + +Arguments: + y (Tensor): Values to use when computing the trapezoidal rule. + x (Tensor): If specified, defines spacing between values as specified above. + +Keyword arguments: + dx (float): constant spacing between values. If neither :attr:`x` or :attr:`dx` + are specified then this defaults to 1. Effectively multiplies the result by its value. + dim (int): The dimension along which to compute the trapezoidal rule. + The last (inner-most) dimension by default. + +Examples:: + + >>> # Cumulatively computes the trapezoidal rule in 1D, spacing is implicitly 1. + >>> y = torch.tensor([1, 5, 10]) + >>> torch.cumulative_trapezoid(y) + tensor([3., 10.5]) + + >>> # Computes the same trapezoidal rule directly up to each element to verify + >>> (1 + 5) / 2 + 3.0 + >>> (1 + 10 + 10) / 2 + 10.5 + + >>> # Cumulatively computes the trapezoidal rule in 1D with constant spacing of 2 + >>> # NOTE: the result is the same as before, but multiplied by 2 + >>> torch.cumulative_trapezoid(y, dx=2) + tensor([6., 21.]) + + >>> # Cumulatively computes the trapezoidal rule in 1D with arbitrary spacing + >>> x = torch.tensor([1, 3, 6]) + >>> torch.cumulative_trapezoid(y, x) + tensor([6., 28.5]) + + >>> # Computes the same trapezoidal rule directly up to each element to verify + >>> ((3 - 1) * (1 + 5)) / 2 + 6.0 + >>> ((3 - 1) * (1 + 5) + (6 - 3) * (5 + 10)) / 2 + 28.5 + + >>> # Cumulatively computes the trapezoidal rule for each row of a 3x3 matrix + >>> y = torch.arange(9).reshape(3, 3) + tensor([[0, 1, 2], + [3, 4, 5], + [6, 7, 8]]) + >>> torch.cumulative_trapezoid(y) + tensor([[ 0.5, 2.], + [ 3.5, 8.], + [ 6.5, 14.]]) + + >>> # Cumulatively computes the trapezoidal rule for each column of the matrix + >>> torch.cumulative_trapezoid(y, dim=0) + tensor([[ 1.5, 2.5, 3.5], + [ 6.0, 8.0, 10.0]]) + + >>> # Cumulatively computes the trapezoidal rule for each row of a 3x3 ones matrix + >>> # with the same arbitrary spacing + >>> y = torch.ones(3, 3) + >>> x = torch.tensor([1, 3, 6]) + >>> torch.cumulative_trapezoid(y, x) + tensor([[2., 5.], + [2., 5.], + [2., 5.]]) + + >>> # Cumulatively computes the trapezoidal rule for each row of a 3x3 ones matrix + >>> # with different arbitrary spacing per row + >>> y = torch.ones(3, 3) + >>> x = torch.tensor([[1, 2, 3], [1, 3, 5], [1, 4, 7]]) + >>> torch.cumulative_trapezoid(y, x) + tensor([[1., 2.], + [2., 4.], + [3., 6.]]) +""", +) + +add_docstr( + torch.repeat_interleave, + r""" +repeat_interleave(input, repeats, dim=None, *, output_size=None) -> Tensor + +Repeat elements of a tensor. + +.. warning:: + + This is different from :meth:`torch.Tensor.repeat` but similar to ``numpy.repeat``. + +Args: + {input} + repeats (Tensor or int): The number of repetitions for each element. + repeats is broadcasted to fit the shape of the given axis. + dim (int, optional): The dimension along which to repeat values. + By default, use the flattened input array, and return a flat output + array. + +Keyword args: + output_size (int, optional): Total output size for the given axis + ( e.g. sum of repeats). If given, it will avoid stream synchronization + needed to calculate output shape of the tensor. + +Returns: + Tensor: Repeated tensor which has the same shape as input, except along the given axis. + +Example:: + + >>> x = torch.tensor([1, 2, 3]) + >>> x.repeat_interleave(2) + tensor([1, 1, 2, 2, 3, 3]) + >>> y = torch.tensor([[1, 2], [3, 4]]) + >>> torch.repeat_interleave(y, 2) + tensor([1, 1, 2, 2, 3, 3, 4, 4]) + >>> torch.repeat_interleave(y, 3, dim=1) + tensor([[1, 1, 1, 2, 2, 2], + [3, 3, 3, 4, 4, 4]]) + >>> torch.repeat_interleave(y, torch.tensor([1, 2]), dim=0) + tensor([[1, 2], + [3, 4], + [3, 4]]) + >>> torch.repeat_interleave(y, torch.tensor([1, 2]), dim=0, output_size=3) + tensor([[1, 2], + [3, 4], + [3, 4]]) + +If the `repeats` is `tensor([n1, n2, n3, ...])`, then the output will be +`tensor([0, 0, ..., 1, 1, ..., 2, 2, ..., ...])` where `0` appears `n1` times, +`1` appears `n2` times, `2` appears `n3` times, etc. + +.. function:: repeat_interleave(repeats, *) -> Tensor + :noindex: + +Repeats 0 repeats[0] times, 1 repeats[1] times, 2 repeats[2] times, etc. + +Args: + repeats (Tensor): The number of repetitions for each element. + +Returns: + Tensor: Repeated tensor of size `sum(repeats)`. + +Example:: + + >>> torch.repeat_interleave(torch.tensor([1, 2, 3])) + tensor([0, 1, 1, 2, 2, 2]) + +""".format( + **common_args + ), +) + +add_docstr( + torch.tile, + r""" +tile(input, dims) -> Tensor + +Constructs a tensor by repeating the elements of :attr:`input`. +The :attr:`dims` argument specifies the number of repetitions +in each dimension. + +If :attr:`dims` specifies fewer dimensions than :attr:`input` has, then +ones are prepended to :attr:`dims` until all dimensions are specified. +For example, if :attr:`input` has shape (8, 6, 4, 2) and :attr:`dims` +is (2, 2), then :attr:`dims` is treated as (1, 1, 2, 2). + +Analogously, if :attr:`input` has fewer dimensions than :attr:`dims` +specifies, then :attr:`input` is treated as if it were unsqueezed at +dimension zero until it has as many dimensions as :attr:`dims` specifies. +For example, if :attr:`input` has shape (4, 2) and :attr:`dims` +is (3, 3, 2, 2), then :attr:`input` is treated as if it had the +shape (1, 1, 4, 2). + +.. note:: + + This function is similar to NumPy's tile function. + +Args: + input (Tensor): the tensor whose elements to repeat. + dims (tuple): the number of repetitions per dimension. + +Example:: + + >>> x = torch.tensor([1, 2, 3]) + >>> x.tile((2,)) + tensor([1, 2, 3, 1, 2, 3]) + >>> y = torch.tensor([[1, 2], [3, 4]]) + >>> torch.tile(y, (2, 2)) + tensor([[1, 2, 1, 2], + [3, 4, 3, 4], + [1, 2, 1, 2], + [3, 4, 3, 4]]) +""", +) + +add_docstr( + torch.quantize_per_tensor, + r""" +quantize_per_tensor(input, scale, zero_point, dtype) -> Tensor + +Converts a float tensor to a quantized tensor with given scale and zero point. + +Arguments: + input (Tensor): float tensor or list of tensors to quantize + scale (float or Tensor): scale to apply in quantization formula + zero_point (int or Tensor): offset in integer value that maps to float zero + dtype (:class:`torch.dtype`): the desired data type of returned tensor. + Has to be one of the quantized dtypes: ``torch.quint8``, ``torch.qint8``, ``torch.qint32`` + +Returns: + Tensor: A newly quantized tensor or list of quantized tensors. + +Example:: + + >>> torch.quantize_per_tensor(torch.tensor([-1.0, 0.0, 1.0, 2.0]), 0.1, 10, torch.quint8) + tensor([-1., 0., 1., 2.], size=(4,), dtype=torch.quint8, + quantization_scheme=torch.per_tensor_affine, scale=0.1, zero_point=10) + >>> torch.quantize_per_tensor(torch.tensor([-1.0, 0.0, 1.0, 2.0]), 0.1, 10, torch.quint8).int_repr() + tensor([ 0, 10, 20, 30], dtype=torch.uint8) + >>> torch.quantize_per_tensor([torch.tensor([-1.0, 0.0]), torch.tensor([-2.0, 2.0])], + >>> torch.tensor([0.1, 0.2]), torch.tensor([10, 20]), torch.quint8) + (tensor([-1., 0.], size=(2,), dtype=torch.quint8, + quantization_scheme=torch.per_tensor_affine, scale=0.1, zero_point=10), + tensor([-2., 2.], size=(2,), dtype=torch.quint8, + quantization_scheme=torch.per_tensor_affine, scale=0.2, zero_point=20)) + >>> torch.quantize_per_tensor(torch.tensor([-1.0, 0.0, 1.0, 2.0]), torch.tensor(0.1), torch.tensor(10), torch.quint8) + tensor([-1., 0., 1., 2.], size=(4,), dtype=torch.quint8, + quantization_scheme=torch.per_tensor_affine, scale=0.10, zero_point=10) +""", +) + +add_docstr( + torch.quantize_per_tensor_dynamic, + r""" +quantize_per_tensor_dynamic(input, dtype, reduce_range) -> Tensor + +Converts a float tensor to a quantized tensor with scale and zero_point calculated +dynamically based on the input. + +Arguments: + input (Tensor): float tensor or list of tensors to quantize + dtype (:class:`torch.dtype`): the desired data type of returned tensor. + Has to be one of the quantized dtypes: ``torch.quint8``, ``torch.qint8`` + reduce_range (bool): a flag to indicate whether to reduce the range of quantized + data by 1 bit, it's required to avoid instruction overflow for some hardwares + +Returns: + Tensor: A newly (dynamically) quantized tensor + +Example:: + + >>> t = torch.quantize_per_tensor_dynamic(torch.tensor([-1.0, 0.0, 1.0, 2.0]), torch.quint8, False) + >>> print(t) + tensor([-1., 0., 1., 2.], size=(4,), dtype=torch.quint8, + quantization_scheme=torch.per_tensor_affine, scale=0.011764705882352941, + zero_point=85) + >>> t.int_repr() + tensor([ 0, 85, 170, 255], dtype=torch.uint8) +""", +) + +add_docstr( + torch.quantize_per_channel, + r""" +quantize_per_channel(input, scales, zero_points, axis, dtype) -> Tensor + +Converts a float tensor to a per-channel quantized tensor with given scales and zero points. + +Arguments: + input (Tensor): float tensor to quantize + scales (Tensor): float 1D tensor of scales to use, size should match ``input.size(axis)`` + zero_points (int): integer 1D tensor of offset to use, size should match ``input.size(axis)`` + axis (int): dimension on which apply per-channel quantization + dtype (:class:`torch.dtype`): the desired data type of returned tensor. + Has to be one of the quantized dtypes: ``torch.quint8``, ``torch.qint8``, ``torch.qint32`` + +Returns: + Tensor: A newly quantized tensor + +Example:: + + >>> x = torch.tensor([[-1.0, 0.0], [1.0, 2.0]]) + >>> torch.quantize_per_channel(x, torch.tensor([0.1, 0.01]), torch.tensor([10, 0]), 0, torch.quint8) + tensor([[-1., 0.], + [ 1., 2.]], size=(2, 2), dtype=torch.quint8, + quantization_scheme=torch.per_channel_affine, + scale=tensor([0.1000, 0.0100], dtype=torch.float64), + zero_point=tensor([10, 0]), axis=0) + >>> torch.quantize_per_channel(x, torch.tensor([0.1, 0.01]), torch.tensor([10, 0]), 0, torch.quint8).int_repr() + tensor([[ 0, 10], + [100, 200]], dtype=torch.uint8) +""", +) + + +add_docstr( + torch.quantized_batch_norm, + r""" +quantized_batch_norm(input, weight=None, bias=None, mean, var, eps, output_scale, output_zero_point) -> Tensor + +Applies batch normalization on a 4D (NCHW) quantized tensor. + +.. math:: + + y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta + +Arguments: + input (Tensor): quantized tensor + weight (Tensor): float tensor that corresponds to the gamma, size C + bias (Tensor): float tensor that corresponds to the beta, size C + mean (Tensor): float mean value in batch normalization, size C + var (Tensor): float tensor for variance, size C + eps (float): a value added to the denominator for numerical stability. + output_scale (float): output quantized tensor scale + output_zero_point (int): output quantized tensor zero_point + +Returns: + Tensor: A quantized tensor with batch normalization applied. + +Example:: + + >>> qx = torch.quantize_per_tensor(torch.rand(2, 2, 2, 2), 1.5, 3, torch.quint8) + >>> torch.quantized_batch_norm(qx, torch.ones(2), torch.zeros(2), torch.rand(2), torch.rand(2), 0.00001, 0.2, 2) + tensor([[[[-0.2000, -0.2000], + [ 1.6000, -0.2000]], + + [[-0.4000, -0.4000], + [-0.4000, 0.6000]]], + + + [[[-0.2000, -0.2000], + [-0.2000, -0.2000]], + + [[ 0.6000, -0.4000], + [ 0.6000, -0.4000]]]], size=(2, 2, 2, 2), dtype=torch.quint8, + quantization_scheme=torch.per_tensor_affine, scale=0.2, zero_point=2) +""", +) + + +add_docstr( + torch.quantized_max_pool1d, + r""" +quantized_max_pool1d(input, kernel_size, stride=[], padding=0, dilation=1, ceil_mode=False) -> Tensor + +Applies a 1D max pooling over an input quantized tensor composed of several input planes. + +Arguments: + input (Tensor): quantized tensor + kernel_size (list of int): the size of the sliding window + stride (``list of int``, optional): the stride of the sliding window + padding (``list of int``, optional): padding to be added on both sides, must be >= 0 and <= kernel_size / 2 + dilation (``list of int``, optional): The stride between elements within a sliding window, must be > 0. Default 1 + ceil_mode (bool, optional): If True, will use ceil instead of floor to compute the output shape. + Defaults to False. + + +Returns: + Tensor: A quantized tensor with max_pool1d applied. + +Example:: + + >>> qx = torch.quantize_per_tensor(torch.rand(2, 2), 1.5, 3, torch.quint8) + >>> torch.quantized_max_pool1d(qx, [2]) + tensor([[0.0000], + [1.5000]], size=(2, 1), dtype=torch.quint8, + quantization_scheme=torch.per_tensor_affine, scale=1.5, zero_point=3) +""", +) + + +add_docstr( + torch.quantized_max_pool2d, + r""" +quantized_max_pool2d(input, kernel_size, stride=[], padding=0, dilation=1, ceil_mode=False) -> Tensor + +Applies a 2D max pooling over an input quantized tensor composed of several input planes. + +Arguments: + input (Tensor): quantized tensor + kernel_size (``list of int``): the size of the sliding window + stride (``list of int``, optional): the stride of the sliding window + padding (``list of int``, optional): padding to be added on both sides, must be >= 0 and <= kernel_size / 2 + dilation (``list of int``, optional): The stride between elements within a sliding window, must be > 0. Default 1 + ceil_mode (bool, optional): If True, will use ceil instead of floor to compute the output shape. + Defaults to False. + + +Returns: + Tensor: A quantized tensor with max_pool2d applied. + +Example:: + + >>> qx = torch.quantize_per_tensor(torch.rand(2, 2, 2, 2), 1.5, 3, torch.quint8) + >>> torch.quantized_max_pool2d(qx, [2,2]) + tensor([[[[1.5000]], + + [[1.5000]]], + + + [[[0.0000]], + + [[0.0000]]]], size=(2, 2, 1, 1), dtype=torch.quint8, + quantization_scheme=torch.per_tensor_affine, scale=1.5, zero_point=3) +""", +) + + +add_docstr( + torch.Generator, + r""" +Generator(device='cpu') -> Generator + +Creates and returns a generator object that manages the state of the algorithm which +produces pseudo random numbers. Used as a keyword argument in many :ref:`inplace-random-sampling` +functions. + +Arguments: + device (:class:`torch.device`, optional): the desired device for the generator. + +Returns: + Generator: An torch.Generator object. + +Example:: + + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA) + >>> g_cpu = torch.Generator() + >>> g_cuda = torch.Generator(device='cuda') +""", +) + + +add_docstr( + torch.Generator.set_state, + r""" +Generator.set_state(new_state) -> void + +Sets the Generator state. + +Arguments: + new_state (torch.ByteTensor): The desired state. + +Example:: + + >>> g_cpu = torch.Generator() + >>> g_cpu_other = torch.Generator() + >>> g_cpu.set_state(g_cpu_other.get_state()) +""", +) + + +add_docstr( + torch.Generator.get_state, + r""" +Generator.get_state() -> Tensor + +Returns the Generator state as a ``torch.ByteTensor``. + +Returns: + Tensor: A ``torch.ByteTensor`` which contains all the necessary bits + to restore a Generator to a specific point in time. + +Example:: + + >>> g_cpu = torch.Generator() + >>> g_cpu.get_state() +""", +) + + +add_docstr( + torch.Generator.manual_seed, + r""" +Generator.manual_seed(seed) -> Generator + +Sets the seed for generating random numbers. Returns a `torch.Generator` object. Any 32-bit integer is a valid seed. + +Arguments: + seed (int): The desired seed. Value must be within the inclusive range + `[-0x8000_0000_0000_0000, 0xffff_ffff_ffff_ffff]`. Otherwise, a RuntimeError + is raised. Negative inputs are remapped to positive values with the formula + `0xffff_ffff_ffff_ffff + seed`. + +Returns: + Generator: An torch.Generator object. + +Example:: + + >>> g_cpu = torch.Generator() + >>> g_cpu.manual_seed(2147483647) +""", +) + + +add_docstr( + torch.Generator.initial_seed, + r""" +Generator.initial_seed() -> int + +Returns the initial seed for generating random numbers. + +Example:: + + >>> g_cpu = torch.Generator() + >>> g_cpu.initial_seed() + 2147483647 +""", +) + + +add_docstr( + torch.Generator.seed, + r""" +Generator.seed() -> int + +Gets a non-deterministic random number from std::random_device or the current +time and uses it to seed a Generator. + +Example:: + + >>> g_cpu = torch.Generator() + >>> g_cpu.seed() + 1516516984916 +""", +) + + +add_docstr( + torch.Generator.device, + r""" +Generator.device -> device + +Gets the current device of the generator. + +Example:: + + >>> g_cpu = torch.Generator() + >>> g_cpu.device + device(type='cpu') +""", +) + +add_docstr( + torch._assert_async, + r""" +_assert_async(tensor) -> void + +Asynchronously assert that the contents of tensor are nonzero. For CPU tensors, +this is equivalent to ``assert tensor`` or ``assert tensor.is_nonzero()``; for +CUDA tensors, we DO NOT synchronize and you may only find out the assertion +failed at a later CUDA kernel launch. Asynchronous assertion can be helpful for +testing invariants in CUDA tensors without giving up performance. This function +is NOT intended to be used for regular error checking, as it will trash your CUDA +context if the assert fails (forcing you to restart your PyTorch process.) + +Args: + tensor (Tensor): a one element tensor to test to see if it is nonzero. Zero + elements (including False for boolean tensors) cause an assertion failure + to be raised. +""", +) + +add_docstr( + torch.searchsorted, + r""" +searchsorted(sorted_sequence, values, *, out_int32=False, right=False, side=None, out=None, sorter=None) -> Tensor + +Find the indices from the *innermost* dimension of :attr:`sorted_sequence` such that, if the +corresponding values in :attr:`values` were inserted before the indices, when sorted, the order +of the corresponding *innermost* dimension within :attr:`sorted_sequence` would be preserved. +Return a new tensor with the same size as :attr:`values`. More formally, +the returned index satisfies the following rules: + +.. list-table:: + :widths: 12 10 78 + :header-rows: 1 + + * - :attr:`sorted_sequence` + - :attr:`right` + - *returned index satisfies* + * - 1-D + - False + - ``sorted_sequence[i-1] < values[m][n]...[l][x] <= sorted_sequence[i]`` + * - 1-D + - True + - ``sorted_sequence[i-1] <= values[m][n]...[l][x] < sorted_sequence[i]`` + * - N-D + - False + - ``sorted_sequence[m][n]...[l][i-1] < values[m][n]...[l][x] <= sorted_sequence[m][n]...[l][i]`` + * - N-D + - True + - ``sorted_sequence[m][n]...[l][i-1] <= values[m][n]...[l][x] < sorted_sequence[m][n]...[l][i]`` + +Args: + sorted_sequence (Tensor): N-D or 1-D tensor, containing monotonically increasing sequence on the *innermost* + dimension unless :attr:`sorter` is provided, in which case the sequence does not + need to be sorted + values (Tensor or Scalar): N-D tensor or a Scalar containing the search value(s). + +Keyword args: + out_int32 (bool, optional): indicate the output data type. torch.int32 if True, torch.int64 otherwise. + Default value is False, i.e. default output data type is torch.int64. + right (bool, optional): if False, return the first suitable location that is found. If True, return the + last such index. If no suitable index found, return 0 for non-numerical value + (eg. nan, inf) or the size of *innermost* dimension within :attr:`sorted_sequence` + (one pass the last index of the *innermost* dimension). In other words, if False, + gets the lower bound index for each value in :attr:`values` on the corresponding + *innermost* dimension of the :attr:`sorted_sequence`. If True, gets the upper + bound index instead. Default value is False. :attr:`side` does the same and is + preferred. It will error if :attr:`side` is set to "left" while this is True. + side (str, optional): the same as :attr:`right` but preferred. "left" corresponds to False for :attr:`right` + and "right" corresponds to True for :attr:`right`. It will error if this is set to + "left" while :attr:`right` is True. Default value is None. + out (Tensor, optional): the output tensor, must be the same size as :attr:`values` if provided. + sorter (LongTensor, optional): if provided, a tensor matching the shape of the unsorted + :attr:`sorted_sequence` containing a sequence of indices that sort it in the + ascending order on the innermost dimension + + +Example:: + + >>> sorted_sequence = torch.tensor([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]]) + >>> sorted_sequence + tensor([[ 1, 3, 5, 7, 9], + [ 2, 4, 6, 8, 10]]) + >>> values = torch.tensor([[3, 6, 9], [3, 6, 9]]) + >>> values + tensor([[3, 6, 9], + [3, 6, 9]]) + >>> torch.searchsorted(sorted_sequence, values) + tensor([[1, 3, 4], + [1, 2, 4]]) + >>> torch.searchsorted(sorted_sequence, values, side='right') + tensor([[2, 3, 5], + [1, 3, 4]]) + + >>> sorted_sequence_1d = torch.tensor([1, 3, 5, 7, 9]) + >>> sorted_sequence_1d + tensor([1, 3, 5, 7, 9]) + >>> torch.searchsorted(sorted_sequence_1d, values) + tensor([[1, 3, 4], + [1, 3, 4]]) +""", +) + +add_docstr( + torch.bucketize, + r""" +bucketize(input, boundaries, *, out_int32=False, right=False, out=None) -> Tensor + +Returns the indices of the buckets to which each value in the :attr:`input` belongs, where the +boundaries of the buckets are set by :attr:`boundaries`. Return a new tensor with the same size +as :attr:`input`. If :attr:`right` is False (default), then the left boundary is open. Note that +this behavior is opposite the behavior of +`numpy.digitize `_. +More formally, the returned index satisfies the following rules: + +.. list-table:: + :widths: 15 85 + :header-rows: 1 + + * - :attr:`right` + - *returned index satisfies* + * - False + - ``boundaries[i-1] < input[m][n]...[l][x] <= boundaries[i]`` + * - True + - ``boundaries[i-1] <= input[m][n]...[l][x] < boundaries[i]`` + +Args: + input (Tensor or Scalar): N-D tensor or a Scalar containing the search value(s). + boundaries (Tensor): 1-D tensor, must contain a strictly increasing sequence, or the return value is undefined. + +Keyword args: + out_int32 (bool, optional): indicate the output data type. torch.int32 if True, torch.int64 otherwise. + Default value is False, i.e. default output data type is torch.int64. + right (bool, optional): if False, return the first suitable location that is found. If True, return the + last such index. If no suitable index found, return 0 for non-numerical value + (eg. nan, inf) or the size of :attr:`boundaries` (one pass the last index). + In other words, if False, gets the lower bound index for each value in :attr:`input` + from :attr:`boundaries`. If True, gets the upper bound index instead. + Default value is False. + out (Tensor, optional): the output tensor, must be the same size as :attr:`input` if provided. + + +Example:: + + >>> boundaries = torch.tensor([1, 3, 5, 7, 9]) + >>> boundaries + tensor([1, 3, 5, 7, 9]) + >>> v = torch.tensor([[3, 6, 9], [3, 6, 9]]) + >>> v + tensor([[3, 6, 9], + [3, 6, 9]]) + >>> torch.bucketize(v, boundaries) + tensor([[1, 3, 4], + [1, 3, 4]]) + >>> torch.bucketize(v, boundaries, right=True) + tensor([[2, 3, 5], + [2, 3, 5]]) +""", +) + +add_docstr( + torch.view_as_real_copy, + r""" +Performs the same operation as :func:`torch.view_as_real`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.view_as_complex_copy, + r""" +Performs the same operation as :func:`torch.view_as_complex`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.as_strided_copy, + r""" +Performs the same operation as :func:`torch.as_strided`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.diagonal_copy, + r""" +Performs the same operation as :func:`torch.diagonal`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.expand_copy, + r""" +Performs the same operation as :func:`torch.expand`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.permute_copy, + r""" +Performs the same operation as :func:`torch.permute`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.select_copy, + r""" +Performs the same operation as :func:`torch.select`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.detach_copy, + r""" +Performs the same operation as :func:`torch.detach`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.slice_copy, + r""" +Performs the same operation as :func:`torch.slice`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.split_copy, + r""" +Performs the same operation as :func:`torch.split`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.split_with_sizes_copy, + r""" +Performs the same operation as :func:`torch.split_with_sizes`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.squeeze_copy, + r""" +Performs the same operation as :func:`torch.squeeze`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.t_copy, + r""" +Performs the same operation as :func:`torch.t`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.transpose_copy, + r""" +Performs the same operation as :func:`torch.transpose`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.unsqueeze_copy, + r""" +Performs the same operation as :func:`torch.unsqueeze`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.indices_copy, + r""" +Performs the same operation as :func:`torch.indices`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.values_copy, + r""" +Performs the same operation as :func:`torch.values`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.crow_indices_copy, + r""" +Performs the same operation as :func:`torch.crow_indices`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.col_indices_copy, + r""" +Performs the same operation as :func:`torch.col_indices`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.unbind_copy, + r""" +Performs the same operation as :func:`torch.unbind`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.view_copy, + r""" +Performs the same operation as :func:`torch.view`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.unfold_copy, + r""" +Performs the same operation as :func:`torch.unfold`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +add_docstr( + torch.alias_copy, + r""" +Performs the same operation as :func:`torch.alias`, but all output tensors +are freshly created instead of aliasing the input. +""", +) + +for unary_base_func_name in ( + "exp", + "sqrt", + "abs", + "acos", + "asin", + "atan", + "ceil", + "cos", + "cosh", + "erf", + "erfc", + "expm1", + "floor", + "log", + "log10", + "log1p", + "log2", + "neg", + "tan", + "tanh", + "sin", + "sinh", + "round", + "lgamma", + "frac", + "reciprocal", + "sigmoid", + "trunc", + "zero", +): + unary_foreach_func_name = f"_foreach_{unary_base_func_name}" + if hasattr(torch, unary_foreach_func_name): + add_docstr( + getattr(torch, unary_foreach_func_name), + rf""" +{unary_foreach_func_name}(self: List[Tensor]) -> List[Tensor] + +Apply :func:`torch.{unary_base_func_name}` to each Tensor of the input list. + """, + ) + unary_inplace_foreach_func_name = f"{unary_foreach_func_name}_" + if hasattr(torch, unary_inplace_foreach_func_name): + add_docstr( + getattr(torch, unary_inplace_foreach_func_name), + rf""" +{unary_inplace_foreach_func_name}(self: List[Tensor]) -> None + +Apply :func:`torch.{unary_base_func_name}` to each Tensor of the input list. + """, + ) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62fd410c23eac9297ebd28d0438e77ea0846e1e2 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/modules/__pycache__/conv_fused.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/modules/__pycache__/conv_fused.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eef02d728c877202fd7c1ab28055223abdaaaa5e Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/modules/__pycache__/conv_fused.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d273ec77be768c084576a22b9267fa7e3d6e043 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ce571862b4275063d49d60a259014cb4efcffd4d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__init__.py @@ -0,0 +1,6 @@ +import torch +from .linear_relu import LinearReLU + +__all__ = [ + 'LinearReLU', +] diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc55f075a928d33af5591aa94ebbc4336954184a Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/conv_relu.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/conv_relu.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54cc19e3c808f806749d59eecafdfc1189e2c7c3 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/conv_relu.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d79bdbfe83209f18b17cc8c7b245f322871d6c0 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__init__.py @@ -0,0 +1 @@ +from .modules import * # noqa: F403 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fdd2800e5f21c288be20caf2261428b4309701b4 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5555796e825db3580b37a068e91991100572e951 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__pycache__/linear.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__pycache__/linear.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..776e0d8e79a400fed6364b9a67bd711a6cf582bc Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__pycache__/linear.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d79bdbfe83209f18b17cc8c7b245f322871d6c0 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__init__.py @@ -0,0 +1 @@ +from .modules import * # noqa: F403 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..acbefee3866bbdc2e8fdf9f40edd0d469053470c Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2fa82055e39c506efba2ed15460cfe5049839abd Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d68659c3aba5ffbb8050e492a30d798e8302efa Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/rnn.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/rnn.py new file mode 100644 index 0000000000000000000000000000000000000000..2c57d1ae9bc51b6270279d81ee8a46cf80148bea --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/rnn.py @@ -0,0 +1,411 @@ +import numbers +from typing import Optional, Tuple +import warnings + +import torch +from torch import Tensor + +""" +We will recreate all the RNN modules as we require the modules to be decomposed +into its building blocks to be able to observe. +""" + +__all__ = [ + "LSTMCell", + "LSTM" +] + +class LSTMCell(torch.nn.Module): + r"""A quantizable long short-term memory (LSTM) cell. + + For the description and the argument types, please, refer to :class:`~torch.nn.LSTMCell` + + Examples:: + + >>> import torch.ao.nn.quantizable as nnqa + >>> rnn = nnqa.LSTMCell(10, 20) + >>> input = torch.randn(6, 10) + >>> hx = torch.randn(3, 20) + >>> cx = torch.randn(3, 20) + >>> output = [] + >>> for i in range(6): + ... hx, cx = rnn(input[i], (hx, cx)) + ... output.append(hx) + """ + _FLOAT_MODULE = torch.nn.LSTMCell + + def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True, + device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__() + self.input_size = input_dim + self.hidden_size = hidden_dim + self.bias = bias + + self.igates = torch.nn.Linear(input_dim, 4 * hidden_dim, bias=bias, **factory_kwargs) + self.hgates = torch.nn.Linear(hidden_dim, 4 * hidden_dim, bias=bias, **factory_kwargs) + self.gates = torch.ao.nn.quantized.FloatFunctional() + + self.input_gate = torch.nn.Sigmoid() + self.forget_gate = torch.nn.Sigmoid() + self.cell_gate = torch.nn.Tanh() + self.output_gate = torch.nn.Sigmoid() + + self.fgate_cx = torch.ao.nn.quantized.FloatFunctional() + self.igate_cgate = torch.ao.nn.quantized.FloatFunctional() + self.fgate_cx_igate_cgate = torch.ao.nn.quantized.FloatFunctional() + + self.ogate_cy = torch.ao.nn.quantized.FloatFunctional() + + self.initial_hidden_state_qparams: Tuple[float, int] = (1.0, 0) + self.initial_cell_state_qparams: Tuple[float, int] = (1.0, 0) + self.hidden_state_dtype: torch.dtype = torch.quint8 + self.cell_state_dtype: torch.dtype = torch.quint8 + + def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]: + if hidden is None or hidden[0] is None or hidden[1] is None: + hidden = self.initialize_hidden(x.shape[0], x.is_quantized) + hx, cx = hidden + + igates = self.igates(x) + hgates = self.hgates(hx) + gates = self.gates.add(igates, hgates) + + input_gate, forget_gate, cell_gate, out_gate = gates.chunk(4, 1) + + input_gate = self.input_gate(input_gate) + forget_gate = self.forget_gate(forget_gate) + cell_gate = self.cell_gate(cell_gate) + out_gate = self.output_gate(out_gate) + + fgate_cx = self.fgate_cx.mul(forget_gate, cx) + igate_cgate = self.igate_cgate.mul(input_gate, cell_gate) + fgate_cx_igate_cgate = self.fgate_cx_igate_cgate.add(fgate_cx, igate_cgate) + cy = fgate_cx_igate_cgate + + # TODO: make this tanh a member of the module so its qparams can be configured + tanh_cy = torch.tanh(cy) + hy = self.ogate_cy.mul(out_gate, tanh_cy) + return hy, cy + + def initialize_hidden(self, batch_size: int, is_quantized: bool = False) -> Tuple[Tensor, Tensor]: + h, c = torch.zeros((batch_size, self.hidden_size)), torch.zeros((batch_size, self.hidden_size)) + if is_quantized: + (h_scale, h_zp) = self.initial_hidden_state_qparams + (c_scale, c_zp) = self.initial_cell_state_qparams + h = torch.quantize_per_tensor(h, scale=h_scale, zero_point=h_zp, dtype=self.hidden_state_dtype) + c = torch.quantize_per_tensor(c, scale=c_scale, zero_point=c_zp, dtype=self.cell_state_dtype) + return h, c + + def _get_name(self): + return 'QuantizableLSTMCell' + + @classmethod + def from_params(cls, wi, wh, bi=None, bh=None): + """Uses the weights and biases to create a new LSTM cell. + + Args: + wi, wh: Weights for the input and hidden layers + bi, bh: Biases for the input and hidden layers + """ + assert (bi is None) == (bh is None) # Either both None or both have values + input_size = wi.shape[1] + hidden_size = wh.shape[1] + cell = cls(input_dim=input_size, hidden_dim=hidden_size, + bias=(bi is not None)) + cell.igates.weight = torch.nn.Parameter(wi) + if bi is not None: + cell.igates.bias = torch.nn.Parameter(bi) + cell.hgates.weight = torch.nn.Parameter(wh) + if bh is not None: + cell.hgates.bias = torch.nn.Parameter(bh) + return cell + + @classmethod + def from_float(cls, other): + assert type(other) == cls._FLOAT_MODULE + assert hasattr(other, 'qconfig'), "The float module must have 'qconfig'" + observed = cls.from_params(other.weight_ih, other.weight_hh, + other.bias_ih, other.bias_hh) + observed.qconfig = other.qconfig + observed.igates.qconfig = other.qconfig + observed.hgates.qconfig = other.qconfig + return observed + + +class _LSTMSingleLayer(torch.nn.Module): + r"""A single one-directional LSTM layer. + + The difference between a layer and a cell is that the layer can process a + sequence, while the cell only expects an instantaneous value. + """ + def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True, + device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__() + self.cell = LSTMCell(input_dim, hidden_dim, bias=bias, **factory_kwargs) + + def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None): + result = [] + seq_len = x.shape[0] + for i in range(seq_len): + hidden = self.cell(x[i], hidden) + result.append(hidden[0]) # type: ignore[index] + result_tensor = torch.stack(result, 0) + return result_tensor, hidden + + @classmethod + def from_params(cls, *args, **kwargs): + cell = LSTMCell.from_params(*args, **kwargs) + layer = cls(cell.input_size, cell.hidden_size, cell.bias) + layer.cell = cell + return layer + + +class _LSTMLayer(torch.nn.Module): + r"""A single bi-directional LSTM layer.""" + def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True, + batch_first: bool = False, bidirectional: bool = False, + device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__() + self.batch_first = batch_first + self.bidirectional = bidirectional + self.layer_fw = _LSTMSingleLayer(input_dim, hidden_dim, bias=bias, **factory_kwargs) + if self.bidirectional: + self.layer_bw = _LSTMSingleLayer(input_dim, hidden_dim, bias=bias, **factory_kwargs) + + def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None): + if self.batch_first: + x = x.transpose(0, 1) + if hidden is None: + hx_fw, cx_fw = (None, None) + else: + hx_fw, cx_fw = hidden + hidden_bw: Optional[Tuple[Tensor, Tensor]] = None + if self.bidirectional: + if hx_fw is None: + hx_bw = None + else: + hx_bw = hx_fw[1] + hx_fw = hx_fw[0] + if cx_fw is None: + cx_bw = None + else: + cx_bw = cx_fw[1] + cx_fw = cx_fw[0] + if hx_bw is not None and cx_bw is not None: + hidden_bw = hx_bw, cx_bw + if hx_fw is None and cx_fw is None: + hidden_fw = None + else: + hidden_fw = torch.jit._unwrap_optional(hx_fw), torch.jit._unwrap_optional(cx_fw) + result_fw, hidden_fw = self.layer_fw(x, hidden_fw) + + if hasattr(self, 'layer_bw') and self.bidirectional: + x_reversed = x.flip(0) + result_bw, hidden_bw = self.layer_bw(x_reversed, hidden_bw) + result_bw = result_bw.flip(0) + + result = torch.cat([result_fw, result_bw], result_fw.dim() - 1) + if hidden_fw is None and hidden_bw is None: + h = None + c = None + elif hidden_fw is None: + (h, c) = torch.jit._unwrap_optional(hidden_bw) + elif hidden_bw is None: + (h, c) = torch.jit._unwrap_optional(hidden_fw) + else: + h = torch.stack([hidden_fw[0], hidden_bw[0]], 0) # type: ignore[list-item] + c = torch.stack([hidden_fw[1], hidden_bw[1]], 0) # type: ignore[list-item] + else: + result = result_fw + h, c = torch.jit._unwrap_optional(hidden_fw) # type: ignore[assignment] + + if self.batch_first: + result.transpose_(0, 1) + + return result, (h, c) + + @classmethod + def from_float(cls, other, layer_idx=0, qconfig=None, **kwargs): + r""" + There is no FP equivalent of this class. This function is here just to + mimic the behavior of the `prepare` within the `torch.ao.quantization` + flow. + """ + assert hasattr(other, 'qconfig') or (qconfig is not None) + + input_size = kwargs.get('input_size', other.input_size) + hidden_size = kwargs.get('hidden_size', other.hidden_size) + bias = kwargs.get('bias', other.bias) + batch_first = kwargs.get('batch_first', other.batch_first) + bidirectional = kwargs.get('bidirectional', other.bidirectional) + + layer = cls(input_size, hidden_size, bias, batch_first, bidirectional) + layer.qconfig = getattr(other, 'qconfig', qconfig) + wi = getattr(other, f'weight_ih_l{layer_idx}') + wh = getattr(other, f'weight_hh_l{layer_idx}') + bi = getattr(other, f'bias_ih_l{layer_idx}', None) + bh = getattr(other, f'bias_hh_l{layer_idx}', None) + + layer.layer_fw = _LSTMSingleLayer.from_params(wi, wh, bi, bh) + + if other.bidirectional: + wi = getattr(other, f'weight_ih_l{layer_idx}_reverse') + wh = getattr(other, f'weight_hh_l{layer_idx}_reverse') + bi = getattr(other, f'bias_ih_l{layer_idx}_reverse', None) + bh = getattr(other, f'bias_hh_l{layer_idx}_reverse', None) + layer.layer_bw = _LSTMSingleLayer.from_params(wi, wh, bi, bh) + return layer + + +class LSTM(torch.nn.Module): + r"""A quantizable long short-term memory (LSTM). + + For the description and the argument types, please, refer to :class:`~torch.nn.LSTM` + + Attributes: + layers : instances of the `_LSTMLayer` + + .. note:: + To access the weights and biases, you need to access them per layer. + See examples below. + + Examples:: + + >>> import torch.ao.nn.quantizable as nnqa + >>> rnn = nnqa.LSTM(10, 20, 2) + >>> input = torch.randn(5, 3, 10) + >>> h0 = torch.randn(2, 3, 20) + >>> c0 = torch.randn(2, 3, 20) + >>> output, (hn, cn) = rnn(input, (h0, c0)) + >>> # To get the weights: + >>> # xdoctest: +SKIP + >>> print(rnn.layers[0].weight_ih) + tensor([[...]]) + >>> print(rnn.layers[0].weight_hh) + AssertionError: There is no reverse path in the non-bidirectional layer + """ + _FLOAT_MODULE = torch.nn.LSTM + + def __init__(self, input_size: int, hidden_size: int, + num_layers: int = 1, bias: bool = True, + batch_first: bool = False, dropout: float = 0., + bidirectional: bool = False, + device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__() + self.input_size = input_size + self.hidden_size = hidden_size + self.num_layers = num_layers + self.bias = bias + self.batch_first = batch_first + self.dropout = float(dropout) + self.bidirectional = bidirectional + self.training = False # Default to eval mode. If we want to train, we will explicitly set to training. + num_directions = 2 if bidirectional else 1 + + if not isinstance(dropout, numbers.Number) or not 0 <= dropout <= 1 or \ + isinstance(dropout, bool): + raise ValueError("dropout should be a number in range [0, 1] " + "representing the probability of an element being " + "zeroed") + if dropout > 0: + warnings.warn("dropout option for quantizable LSTM is ignored. " + "If you are training, please, use nn.LSTM version " + "followed by `prepare` step.") + if num_layers == 1: + warnings.warn("dropout option adds dropout after all but last " + "recurrent layer, so non-zero dropout expects " + f"num_layers greater than 1, but got dropout={dropout} " + f"and num_layers={num_layers}") + + layers = [_LSTMLayer(self.input_size, self.hidden_size, + self.bias, batch_first=False, + bidirectional=self.bidirectional, **factory_kwargs)] + for layer in range(1, num_layers): + layers.append(_LSTMLayer(self.hidden_size, self.hidden_size, + self.bias, batch_first=False, + bidirectional=self.bidirectional, + **factory_kwargs)) + self.layers = torch.nn.ModuleList(layers) + + def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None): + if self.batch_first: + x = x.transpose(0, 1) + + max_batch_size = x.size(1) + num_directions = 2 if self.bidirectional else 1 + if hidden is None: + zeros = torch.zeros(num_directions, max_batch_size, + self.hidden_size, dtype=torch.float, + device=x.device) + zeros.squeeze_(0) + if x.is_quantized: + zeros = torch.quantize_per_tensor(zeros, scale=1.0, + zero_point=0, dtype=x.dtype) + hxcx = [(zeros, zeros) for _ in range(self.num_layers)] + else: + hidden_non_opt = torch.jit._unwrap_optional(hidden) + if isinstance(hidden_non_opt[0], Tensor): + hx = hidden_non_opt[0].reshape(self.num_layers, num_directions, + max_batch_size, + self.hidden_size) + cx = hidden_non_opt[1].reshape(self.num_layers, num_directions, + max_batch_size, + self.hidden_size) + hxcx = [(hx[idx].squeeze(0), cx[idx].squeeze(0)) for idx in range(self.num_layers)] + else: + hxcx = hidden_non_opt + + hx_list = [] + cx_list = [] + for idx, layer in enumerate(self.layers): + x, (h, c) = layer(x, hxcx[idx]) + hx_list.append(torch.jit._unwrap_optional(h)) + cx_list.append(torch.jit._unwrap_optional(c)) + hx_tensor = torch.stack(hx_list) + cx_tensor = torch.stack(cx_list) + + # We are creating another dimension for bidirectional case + # need to collapse it + hx_tensor = hx_tensor.reshape(-1, hx_tensor.shape[-2], hx_tensor.shape[-1]) + cx_tensor = cx_tensor.reshape(-1, cx_tensor.shape[-2], cx_tensor.shape[-1]) + + if self.batch_first: + x = x.transpose(0, 1) + + return x, (hx_tensor, cx_tensor) + + def _get_name(self): + return 'QuantizableLSTM' + + @classmethod + def from_float(cls, other, qconfig=None): + assert isinstance(other, cls._FLOAT_MODULE) + assert (hasattr(other, 'qconfig') or qconfig) + observed = cls(other.input_size, other.hidden_size, other.num_layers, + other.bias, other.batch_first, other.dropout, + other.bidirectional) + observed.qconfig = getattr(other, 'qconfig', qconfig) + for idx in range(other.num_layers): + observed.layers[idx] = _LSTMLayer.from_float(other, idx, qconfig, + batch_first=False) + + # Prepare the model + if other.training: + observed.train() + observed = torch.ao.quantization.prepare_qat(observed, inplace=True) + else: + observed.eval() + observed = torch.ao.quantization.prepare(observed, inplace=True) + return observed + + @classmethod + def from_observed(cls, other): + # The whole flow is float -> observed -> quantized + # This class does float -> observed only + raise NotImplementedError("It looks like you are trying to convert a " + "non-quantizable LSTM module. Please, see " + "the examples on quantizable LSTMs.") diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..53cf965c2e48af87b4542b9414e4e3767ed85e92 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__init__.py @@ -0,0 +1,38 @@ +from . import functional +from .modules import * # noqa: F403 +from .modules import MaxPool2d + +__all__ = [ + 'BatchNorm2d', + 'BatchNorm3d', + 'Conv1d', + 'Conv2d', + 'Conv3d', + 'ConvTranspose1d', + 'ConvTranspose2d', + 'ConvTranspose3d', + 'DeQuantize', + 'ELU', + 'Embedding', + 'EmbeddingBag', + 'GroupNorm', + 'Hardswish', + 'InstanceNorm1d', + 'InstanceNorm2d', + 'InstanceNorm3d', + 'LayerNorm', + 'LeakyReLU', + 'Linear', + 'LSTM', + 'MultiheadAttention', + 'Quantize', + 'ReLU6', + 'Sigmoid', + 'Softmax', + 'Dropout', + 'PReLU', + # Wrapper modules + 'FloatFunctional', + 'FXFloatFunctional', + 'QFunctional', +] diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45ca7141c08f1c1c58447dc22ee61e1f0e0976a5 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a7a97e0a8da831d63d25d6f02edf77cb85b280a7 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__init__.py @@ -0,0 +1,19 @@ + +from .linear import Linear +from .rnn import LSTM, GRU, LSTMCell, RNNCell, GRUCell +from .conv import Conv1d, Conv2d, Conv3d, ConvTranspose1d, ConvTranspose2d, ConvTranspose3d + +__all__ = [ + 'Linear', + 'LSTM', + 'GRU', + 'LSTMCell', + 'RNNCell', + 'GRUCell', + 'Conv1d', + 'Conv2d', + 'Conv3d', + 'ConvTranspose1d', + 'ConvTranspose2d', + 'ConvTranspose3d', +] diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d626cdf296756b301ffa7bf4ae4715d0cae65a9 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/conv.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..f1af77964136556d209605665f77ed51c3a0d44e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/conv.py @@ -0,0 +1,399 @@ +r"""Dynamically quantized convolution modules.""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from torch import Tensor +from torch._ops import ops +from torch.nn.common_types import _size_1_t +from torch.nn.modules.utils import _single, _pair, _triple +from torch.ao.nn.quantized.modules.conv import _reverse_repeat_padding +import torch.ao.nn.quantized as nnq +import warnings + +__all__ = ['Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d'] + + +class Conv1d(nnq.Conv1d): + r"""A dynamically quantized conv module with floating point tensors as inputs and outputs. + + For details on input arguments, parameters, and implementation see + :class:`~torch.nn.Conv1d` and :class:`~torch.ao.nn.quantized.dynamic.Conv1d` and + + Attributes: + weight (Tensor): packed tensor derived from the learnable weight + parameter. + scale (Tensor): scalar for the output scale + zero_point (Tensor): scalar for the output zero point + + See :class:`~torch.nn.Conv1d` for other attributes. + + Examples:: + + >>> # xdoctest: +SKIP + >>> m = nn.quantized.dynamic.Conv1d(16, 33, 3, stride=2) + >>> input = torch.randn(20, 16, 100) + >>> output = m(input) + + """ + + _FLOAT_MODULE = nn.Conv1d + _NNIQAT_CONV_BN_MODULE = None # type: ignore[assignment] + _NNI_CONV_RELU_MODULE = None # type: ignore[assignment] + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + groups: int = 1, + bias: bool = True, + padding_mode: str = 'zeros', + device=None, + dtype=None, + reduce_range=True): + warnings.warn( + "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format( + self._get_name() + ) + ) + factory_kwargs = {'device': device, 'dtype': dtype} + kernel_size = _single(kernel_size) + stride = _single(stride) + padding = padding if isinstance(padding, str) else _single(padding) + dilation = _single(dilation) + + super().__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, bias, padding_mode, **factory_kwargs) + + def _get_name(self): + return 'DynamicQuantizedConv1d' + + def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor: + # Temporarily using len(shape) instead of ndim due to JIT issue + # https://github.com/pytorch/pytorch/issues/23890 + if len(input.shape) != 3: + raise ValueError("Input shape must be `(N, C, L)`!") + if self.padding_mode != 'zeros': + # Padding in Conv1d is stored as (p, p), need to get (p,) + _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding[:1]) + input = F.pad(input, _reversed_padding_repeated_twice, + mode=self.padding_mode) + return ops.quantized.conv1d_dynamic(input, self._packed_params, reduce_range) + + +class Conv2d(nnq.Conv2d): + r"""A dynamically quantized conv module with floating point tensors as inputs and outputs. + + For details on input arguments, parameters, and implementation see + :class:`~torch.nn.Conv2d` and :class:`~torch.ao.nn.quantized.dynamic.Conv2d` and + + Attributes: + weight (Tensor): packed tensor derived from the learnable weight + parameter. + scale (Tensor): scalar for the output scale + zero_point (Tensor): scalar for the output zero point + + See :class:`~torch.nn.Conv2d` for other attributes. + + Examples:: + + >>> # xdoctest: +SKIP + >>> # With square kernels and equal stride + >>> m = nn.quantized.dynamic.Conv2d(16, 33, 3, stride=2) + >>> # non-square kernels and unequal stride and with padding + >>> m = nn.quantized.dynamic.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) + >>> # non-square kernels and unequal stride and with padding and dilation + >>> m = nn.quantized.dynamic.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1)) + >>> input = torch.randn(20, 16, 50, 100) + >>> output = m(input) + + """ + _FLOAT_MODULE = nn.Conv2d + _NNIQAT_CONV_BN_MODULE = None # type: ignore[assignment] + _NNI_CONV_RELU_MODULE = None # type: ignore[assignment] + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True, + padding_mode='zeros', device=None, dtype=None): + warnings.warn( + "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format( + self._get_name() + ) + ) + factory_kwargs = {'device': device, 'dtype': dtype} + kernel_size = _pair(kernel_size) + stride = _pair(stride) + padding = _pair(padding) + dilation = _pair(dilation) + + super().__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, bias, padding_mode, **factory_kwargs) + + def _get_name(self): + return 'DynamicQuantizedConv2d' + + def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor: + # Temporarily using len(shape) instead of ndim due to JIT issue + # https://github.com/pytorch/pytorch/issues/23890 + if len(input.shape) != 4: + raise ValueError("Input shape must be `(N, C, H, W)`!") + if self.padding_mode != 'zeros': + _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding) + input = F.pad(input, _reversed_padding_repeated_twice, + mode=self.padding_mode) + return ops.quantized.conv2d_dynamic( + input, self._packed_params, reduce_range) + + +class Conv3d(nnq.Conv3d): + r"""A dynamically quantized conv module with floating point tensors as inputs and outputs. + + For details on input arguments, parameters, and implementation see + :class:`~torch.nn.Conv3d` and :class:`~torch.ao.nn.quantized.dynamic.Conv3d` and + + Attributes: + weight (Tensor): packed tensor derived from the learnable weight + parameter. + scale (Tensor): scalar for the output scale + zero_point (Tensor): scalar for the output zero point + + See :class:`~torch.nn.Conv3d` for other attributes. + + Examples:: + + >>> # xdoctest: +SKIP + >>> # With square kernels and equal stride + >>> m = nn.quantized.dynamic.Conv3d(16, 33, 3, stride=2) + >>> # non-square kernels and unequal stride and with padding + >>> m = nn.quantized.dynamic.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2)) + >>> # non-square kernels and unequal stride and with padding and dilation + >>> m = nn.quantized.dynamic.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2), dilation=(1, 2, 2)) + >>> input = torch.randn(20, 16, 56, 56, 56) + >>> output = m(input) + + """ + _FLOAT_MODULE = nn.Conv3d + _NNIQAT_CONV_BN_MODULE = None # type: ignore[assignment] + _NNI_CONV_RELU_MODULE = None # type: ignore[assignment] + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True, + padding_mode='zeros', device=None, dtype=None): + warnings.warn( + "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format( + self._get_name() + ) + ) + assert padding_mode != 'reflect', "Conv3d does not support reflection padding" + factory_kwargs = {'device': device, 'dtype': dtype} + kernel_size = _triple(kernel_size) + stride = _triple(stride) + padding = _triple(padding) + dilation = _triple(dilation) + super()._init( + in_channels, out_channels, kernel_size, stride, padding, dilation, + False, _triple(0), groups, bias, padding_mode, **factory_kwargs) + + def _get_name(self): + return 'DynamicQuantizedConv3d' + + def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor: + # Temporarily using len(shape) instead of ndim due to JIT issue + # https://github.com/pytorch/pytorch/issues/23890 + if len(input.shape) != 5: + raise ValueError("Input shape must be `(N, C, D, H, W)`!") + if self.padding_mode != 'zeros': + _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding) + input = F.pad(input, _reversed_padding_repeated_twice, + mode=self.padding_mode) + return ops.quantized.conv3d_dynamic( + input, self._packed_params, reduce_range) + + +class ConvTranspose1d(nnq.ConvTranspose1d): + r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs. + + For details on input arguments, parameters, and implementation see + :class:`~torch.nn.ConvTranspose1d`. + + For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv1d` + + Attributes: + weight (Tensor): packed tensor derived from the learnable weight + parameter. + scale (Tensor): scalar for the output scale + zero_point (Tensor): scalar for the output zero point + See :class:`~torch.nn.ConvTranspose1d` for other attributes. + + Examples:: + + >>> # xdoctest: +SKIP + >>> # With square kernels and equal stride + >>> m = nndq.ConvTranspose1d(16, 33, 3, stride=2) + >>> # non-square kernels and unequal stride and with padding + >>> m = nndq.ConvTranspose1d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) + >>> output = m(input) + >>> # exact output size can be also specified as an argument + >>> downsample = nndq.Conv1d(16, 16, 3, stride=2, padding=1) + >>> upsample = nndq.ConvTranspose1d(16, 16, 3, stride=2, padding=1) + >>> h = downsample(input) + >>> h.size() + torch.Size([1, 16, 6]) + >>> output = upsample(h, output_size=input.size()) + >>> output.size() + torch.Size([1, 16, 12]) + """ + + _FLOAT_MODULE = nn.ConvTranspose1d + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, output_padding=0, groups=1, bias=True, + dilation=1, padding_mode='zeros', device=None, dtype=None): + warnings.warn( + "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format( + self._get_name() + ) + ) + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__( + in_channels, out_channels, kernel_size, stride, padding, output_padding, + groups, bias, dilation, padding_mode, **factory_kwargs) + + def _get_name(self): + return 'DynamicQuantizedConvTranspose1d' + + def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor: + # Temporarily using len(shape) instead of ndim due to JIT issue + # https://github.com/pytorch/pytorch/issues/23890 + if len(input.shape) != 3: + raise ValueError("Input shape must be `(N, C, L)`!") + return torch.ops.quantized.conv_transpose1d_dynamic( + input, self._packed_params, reduce_range) + + +class ConvTranspose2d(nnq.ConvTranspose2d): + r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs. + + For details on input arguments, parameters, and implementation see + :class:`~torch.nn.ConvTranspose2d`. + + For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv2d` + + Attributes: + weight (Tensor): packed tensor derived from the learnable weight + parameter. + scale (Tensor): scalar for the output scale + zero_point (Tensor): scalar for the output zero point + See :class:`~torch.nn.ConvTranspose2d` for other attributes. + + Examples:: + + >>> # xdoctest: +SKIP + >>> # With square kernels and equal stride + >>> m = nnq.ConvTranspose2d(16, 33, 3, stride=2) + >>> # non-square kernels and unequal stride and with padding + >>> m = nnq.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) + >>> output = m(input) + >>> # exact output size can be also specified as an argument + >>> downsample = nnq.Conv2d(16, 16, 3, stride=2, padding=1) + >>> upsample = nnq.ConvTranspose2d(16, 16, 3, stride=2, padding=1) + >>> h = downsample(input) + >>> h.size() + torch.Size([1, 16, 6, 6]) + >>> output = upsample(h, output_size=input.size()) + >>> output.size() + torch.Size([1, 16, 12, 12]) + """ + + _FLOAT_MODULE = nn.ConvTranspose2d + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, output_padding=0, groups=1, bias=True, + dilation=1, padding_mode='zeros', device=None, dtype=None): + warnings.warn( + "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format( + self._get_name() + ) + ) + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__( + in_channels, out_channels, kernel_size, stride, padding, output_padding, + groups, bias, dilation, padding_mode, **factory_kwargs) + + def _get_name(self): + return 'DynamicQuantizedConvTranspose2d' + + def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor: + # Temporarily using len(shape) instead of ndim due to JIT issue + # https://github.com/pytorch/pytorch/issues/23890 + if len(input.shape) != 4: + raise ValueError("Input shape must be `(N, C, H, W)`!") + return ops.quantized.conv_transpose2d_dynamic( + input, self._packed_params, reduce_range) + + +class ConvTranspose3d(nnq.ConvTranspose3d): + r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs. + + For details on input arguments, parameters, and implementation see + :class:`~torch.nn.ConvTranspose3d`. + + For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv3d` + + Attributes: + weight (Tensor): packed tensor derived from the learnable weight + parameter. + scale (Tensor): scalar for the output scale + zero_point (Tensor): scalar for the output zero point + See :class:`~torch.nn.ConvTranspose3d` for other attributes. + + Examples:: + + >>> # xdoctest: +SKIP + >>> # With cubic kernels and equal stride + >>> m = nnq.ConvTranspose3d(16, 33, 3, stride=2) + >>> # non-cubic kernels and unequal stride and with padding + >>> m = nnq.ConvTranspose3d(16, 33, (3, 3, 5), stride=(2, 1, 1), padding=(4, 2, 2)) + >>> output = m(input) + >>> # exact output size can be also specified as an argument + >>> downsample = nnq.Conv3d(16, 16, 3, stride=2, padding=1) + >>> upsample = nnq.ConvTranspose3d(16, 16, 3, stride=2, padding=1) + >>> h = downsample(input) + >>> h.size() + torch.Size([1, 16, 6, 6, 6]) + >>> output = upsample(h, output_size=input.size()) + >>> output.size() + torch.Size([1, 16, 12, 12, 12]) + """ + + _FLOAT_MODULE = nn.ConvTranspose3d + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, output_padding=0, groups=1, bias=True, + dilation=1, padding_mode='zeros', device=None, dtype=None): + warnings.warn( + "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format( + self._get_name() + ) + ) + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__( + in_channels, out_channels, kernel_size, stride, padding, output_padding, + groups, bias, dilation, padding_mode, **factory_kwargs) + + def _get_name(self): + return 'DynamicQuantizedConvTranspose3d' + + def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor: + # Temporarily using len(shape) instead of ndim due to JIT issue + # https://github.com/pytorch/pytorch/issues/23890 + if len(input.shape) != 5: + raise ValueError("Input shape must be `(N, C, T, H, W)`!") + return ops.quantized.conv_transpose3d_dynamic( + input, self._packed_params, reduce_range) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py new file mode 100644 index 0000000000000000000000000000000000000000..a8a366e57f53cd022fe6e12e20b76f76afdd4726 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py @@ -0,0 +1,132 @@ +import torch +import torch.ao.nn.quantized as nnq +from torch.ao.nn.quantized.modules.utils import _quantize_weight +import torch.ao.nn.intrinsic as nni + +__all__ = [ + "Linear", +] + + +class Linear(nnq.Linear): + r""" + A dynamic quantized linear module with floating point tensor as inputs and outputs. + We adopt the same interface as `torch.nn.Linear`, please see + https://pytorch.org/docs/stable/nn.html#torch.nn.Linear for documentation. + + Similar to :class:`torch.nn.Linear`, attributes will be randomly + initialized at module creation time and will be overwritten later + + Attributes: + weight (Tensor): the non-learnable quantized weights of the module which are of + shape :math:`(\text{out\_features}, \text{in\_features})`. + bias (Tensor): the non-learnable floating point bias of the module of shape + :math:`(\text{out\_features})`. If :attr:`bias` is ``True``, + the values are initialized to zero. + + Examples:: + + >>> # xdoctest: +SKIP + >>> m = nn.quantized.dynamic.Linear(20, 30) + >>> input = torch.randn(128, 20) + >>> output = m(input) + >>> print(output.size()) + torch.Size([128, 30]) + """ + # version used in this class is different from the parent class nnq.Linear + _version = 4 + + def __init__(self, in_features, out_features, bias_=True, dtype=torch.qint8): + super().__init__(in_features, out_features, bias_, dtype=dtype) + # We don't muck around with buffers or attributes or anything here + # to keep the module simple. *everything* is simply a Python attribute. + # Serialization logic is explicitly handled in the below serialization and + # deserialization modules + self.version = 4 + + def forward(self, x): + # Note that we can handle self.bias == None case. + if self._packed_params.dtype == torch.qint8: + if self.version is None or self.version < 4: + Y = torch.ops.quantized.linear_dynamic( + x, self._packed_params._packed_params) + else: + Y = torch.ops.quantized.linear_dynamic( + x, self._packed_params._packed_params, reduce_range=True) + elif self._packed_params.dtype == torch.float16: + Y = torch.ops.quantized.linear_dynamic_fp16( + x, self._packed_params._packed_params) + else: + raise RuntimeError('Unsupported dtype on dynamic quantized linear!') + return Y.to(x.dtype) + + def _get_name(self): + return 'DynamicQuantizedLinear' + + def extra_repr(self): + extra_repr_str = 'in_features={}, out_features={}, dtype={}'.format( + self.in_features, self.out_features, self._packed_params.dtype + ) + if self._packed_params.dtype == torch.qint8: + extra_repr_str += f', qscheme={self.weight().qscheme()}' + return extra_repr_str + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + self.version = version + super()._load_from_state_dict(state_dict, prefix, local_metadata, False, + missing_keys, unexpected_keys, error_msgs) + + @classmethod + def from_float(cls, mod): + r"""Create a dynamic quantized module from a float module or qparams_dict + + Args: + mod (Module): a float module, either produced by torch.ao.quantization + utilities or provided by the user + """ + float_modules = [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, + torch.ao.nn.intrinsic.modules.fused.LinearReLU, torch.ao.nn.qat.dynamic.Linear] + + assert type(mod) in float_modules, \ + 'nn.quantized.dynamic.Linear.from_float only works for one of' + \ + str([float_mod.__name__ for float_mod in float_modules]) + assert hasattr(mod, 'qconfig'), 'Input float module must have qconfig defined' + if type(mod) == nni.LinearReLU: + mod = mod[0] + if mod.qconfig is not None and mod.qconfig.weight is not None: + weight_observer = mod.qconfig.weight() + else: + # We have the circular import issues if we import the qconfig in the beginning of this file: + # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the + # import until we need it. + from torch.ao.quantization.qconfig import default_dynamic_qconfig + weight_observer = default_dynamic_qconfig.weight() + dtype = weight_observer.dtype + assert dtype in [torch.qint8, torch.float16], "The only supported dtypes for " \ + f"dynamic quantized linear are qint8 and float16 got: {dtype}" + weight_observer(mod.weight) + if dtype == torch.qint8: + qweight = _quantize_weight(mod.weight.float(), weight_observer) + elif dtype == torch.float16: + qweight = mod.weight.float() + else: + raise RuntimeError('Unsupported dtype specified for dynamic quantized Linear!') + qlinear = cls(mod.in_features, mod.out_features, dtype=dtype) + qlinear.set_weight_bias(qweight, mod.bias) + return qlinear + + @classmethod + def from_reference(cls, ref_qlinear): + """ Create a (fbgemm/qnnpack) dynamic quantized module from a reference quantized + module + Args: + ref_qlinear (Module): a reference quantized module, either produced by + torch.ao.quantization functions or provided by the user + """ + qlinear = cls(ref_qlinear.in_features, ref_qlinear.out_features, dtype=ref_qlinear.weight_dtype) + qweight = ref_qlinear.get_quantized_weight() + bias = ref_qlinear.bias + qlinear.set_weight_bias(qweight, bias) + return qlinear diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/rnn.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/rnn.py new file mode 100644 index 0000000000000000000000000000000000000000..02badab6796401e317bb0e75741e501a5b3ca1eb --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/rnn.py @@ -0,0 +1,1096 @@ +import numbers +import warnings + +import torch +import torch.nn as nn +from torch import Tensor # noqa: F401 +from torch._jit_internal import Tuple, Optional, List, Union, Dict # noqa: F401 +from torch.nn.utils.rnn import PackedSequence +from torch.ao.nn.quantized.modules.utils import _quantize_weight + +__all__ = ['pack_weight_bias', 'PackedParameter', 'RNNBase', 'LSTM', 'GRU', 'RNNCellBase', 'RNNCell', 'LSTMCell', + 'GRUCell', "apply_permutation"] + + +def _apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor: + return tensor.index_select(dim, permutation) + + +def apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor: + warnings.warn("apply_permutation is deprecated, please use tensor.index_select(dim, permutation) instead") + return _apply_permutation(tensor, permutation, dim) + + +def pack_weight_bias(qweight, bias, dtype): + + if dtype == torch.qint8: + # for each layer, for each direction we need to quantize and pack + # weights and pack parameters in this order: + # + # w_ih, w_hh + packed_weight = \ + torch.ops.quantized.linear_prepack(qweight, bias) + + return packed_weight + else: + # for each layer, for each direction we need to quantize and pack + # weights and pack parameters in this order: + # + # packed_ih, packed_hh, b_ih, b_hh + packed_weight = torch.ops.quantized.linear_prepack_fp16( + qweight, bias) + + return packed_weight + + +class PackedParameter(torch.nn.Module): + def __init__(self, param): + super().__init__() + self.param = param + + def _save_to_state_dict(self, destination, prefix, keep_vars): + super()._save_to_state_dict(destination, prefix, keep_vars) + destination[prefix + 'param'] = self.param + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + self.param = state_dict[prefix + 'param'] + super()._load_from_state_dict(state_dict, prefix, local_metadata, False, + missing_keys, unexpected_keys, error_msgs) + + +class RNNBase(torch.nn.Module): + + _FLOAT_MODULE = nn.RNNBase + + _version = 2 + + def __init__(self, mode, input_size, hidden_size, + num_layers=1, bias=True, batch_first=False, + dropout=0., bidirectional=False, dtype=torch.qint8): + super().__init__() + + self.mode = mode + self.input_size = input_size + self.hidden_size = hidden_size + self.num_layers = num_layers + self.bias = bias + self.batch_first = batch_first + self.dropout = float(dropout) + self.bidirectional = bidirectional + self.dtype = dtype + self.version = 2 + self.training = False + num_directions = 2 if bidirectional else 1 + + # "type: ignore" is required since ints and Numbers are not fully comparable + # https://github.com/python/mypy/issues/8566 + if not isinstance(dropout, numbers.Number) \ + or not 0 <= dropout <= 1 or isinstance(dropout, bool): # type: ignore[operator] + raise ValueError("dropout should be a number in range [0, 1] " + "representing the probability of an element being " + "zeroed") + if dropout > 0 and num_layers == 1: # type: ignore[operator] + warnings.warn("dropout option adds dropout after all but last " + "recurrent layer, so non-zero dropout expects " + f"num_layers greater than 1, but got dropout={dropout} and " + f"num_layers={num_layers}") + + if mode == 'LSTM': + gate_size = 4 * hidden_size + elif mode == 'GRU': + gate_size = 3 * hidden_size + else: + raise ValueError("Unrecognized RNN mode: " + mode) + + _all_weight_values = [] + for layer in range(num_layers): + for direction in range(num_directions): + layer_input_size = input_size if layer == 0 else hidden_size * num_directions + + w_ih = torch.randn(gate_size, layer_input_size).to(torch.float) + w_hh = torch.randn(gate_size, hidden_size).to(torch.float) + b_ih = torch.randn(gate_size).to(torch.float) + b_hh = torch.randn(gate_size).to(torch.float) + if dtype == torch.qint8: + w_ih = torch.quantize_per_tensor(w_ih, scale=0.1, zero_point=0, dtype=torch.qint8) + w_hh = torch.quantize_per_tensor(w_hh, scale=0.1, zero_point=0, dtype=torch.qint8) + packed_ih = \ + torch.ops.quantized.linear_prepack(w_ih, b_ih) + packed_hh = \ + torch.ops.quantized.linear_prepack(w_hh, b_hh) + if self.version is None or self.version < 2: + cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic( + packed_ih, packed_hh, b_ih, b_hh) + else: + cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic( + packed_ih, packed_hh, b_ih, b_hh, True) + else: + packed_ih = torch.ops.quantized.linear_prepack_fp16(w_ih, b_ih) + packed_hh = torch.ops.quantized.linear_prepack_fp16(w_hh, b_hh) + cell_params = torch.ops.quantized.make_quantized_cell_params_fp16( + packed_ih, packed_hh) + + _all_weight_values.append(PackedParameter(cell_params)) + self._all_weight_values = torch.nn.ModuleList(_all_weight_values) + + def _get_name(self): + return 'DynamicQuantizedRNN' + + def extra_repr(self): + s = '{input_size}, {hidden_size}' + if self.num_layers != 1: + s += ', num_layers={num_layers}' + if self.bias is not True: + s += ', bias={bias}' + if self.batch_first is not False: + s += ', batch_first={batch_first}' + if self.dropout != 0: + s += ', dropout={dropout}' + if self.bidirectional is not False: + s += ', bidirectional={bidirectional}' + return s.format(**self.__dict__) + + def __repr__(self): + # We don't want to show `ModuleList` children, hence custom + # `__repr__`. This is the same as nn.Module.__repr__, except the check + # for the `PackedParameter` and `nn.ModuleList`. + # You should still override `extra_repr` to add more info. + extra_lines = [] + extra_repr = self.extra_repr() + # empty string will be split into list [''] + if extra_repr: + extra_lines = extra_repr.split('\n') + child_lines = [] + for key, module in self._modules.items(): + if isinstance(module, (PackedParameter, nn.ModuleList)): + continue + mod_str = repr(module) + mod_str = nn.modules.module._addindent(mod_str, 2) + child_lines.append('(' + key + '): ' + mod_str) + lines = extra_lines + child_lines + + main_str = self._get_name() + '(' + if lines: + # simple one-liner info, which most builtin Modules will use + if len(extra_lines) == 1 and not child_lines: + main_str += extra_lines[0] + else: + main_str += '\n ' + '\n '.join(lines) + '\n' + + main_str += ')' + return main_str + + def check_input(self, input: Tensor, batch_sizes: Optional[Tensor]) -> None: + expected_input_dim = 2 if batch_sizes is not None else 3 + if input.dim() != expected_input_dim: + raise RuntimeError( + f'input must have {expected_input_dim} dimensions, got {input.dim()}') + if self.input_size != input.size(-1): + raise RuntimeError( + f'input.size(-1) must be equal to input_size. Expected {self.input_size}, got {input.size(-1)}') + + def get_expected_hidden_size(self, input: Tensor, batch_sizes: Optional[Tensor]) -> Tuple[int, int, int]: + if batch_sizes is not None: + mini_batch = int(batch_sizes[0]) + else: + mini_batch = input.size(0) if self.batch_first else input.size(1) + num_directions = 2 if self.bidirectional else 1 + expected_hidden_size = (self.num_layers * num_directions, + mini_batch, self.hidden_size) + return expected_hidden_size + + def check_hidden_size( + self, hx: Tensor, expected_hidden_size: Tuple[int, int, int], + msg: str = 'Expected hidden size {}, got {}' + ) -> None: + if hx.size() != expected_hidden_size: + raise RuntimeError(msg.format( + expected_hidden_size, list(hx.size()))) + + def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]) -> None: + self.check_input(input, batch_sizes) + expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes) + self.check_hidden_size(hidden, expected_hidden_size, + msg='Expected hidden size {}, got {}') + + def permute_hidden(self, hx: Tensor, permutation: Optional[Tensor]) -> Tensor: + if permutation is None: + return hx + return _apply_permutation(hx, permutation) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + self.version = version + super()._load_from_state_dict(state_dict, prefix, local_metadata, False, + missing_keys, unexpected_keys, error_msgs) + + def set_weight_bias(self, weight_bias_dict): + + def weight_bias_name(ihhh, layer, suffix): + weight_name = f"weight_{ihhh}_l{layer}{suffix}" + bias_name = f"bias_{ihhh}_l{layer}{suffix}" + return weight_name, bias_name + + num_directions = 2 if self.bidirectional else 1 + # TODO: dedup with __init__ of RNNBase + _all_weight_values = [] + for layer in range(self.num_layers): + for direction in range(num_directions): + suffix = "_reverse" if direction == 1 else "" + w_ih_name, b_ih_name = weight_bias_name("ih", layer, suffix) + w_hh_name, b_hh_name = weight_bias_name("hh", layer, suffix) + w_ih = weight_bias_dict[w_ih_name] + b_ih = weight_bias_dict[b_ih_name] + w_hh = weight_bias_dict[w_hh_name] + b_hh = weight_bias_dict[b_hh_name] + if w_ih.dtype == torch.qint8: + packed_ih = torch.ops.quantized.linear_prepack(w_ih, b_ih) + packed_hh = torch.ops.quantized.linear_prepack(w_hh, b_hh) + if self.version is None or self.version < 2: + cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic( + packed_ih, packed_hh, b_ih, b_hh) + else: + cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic( + packed_ih, packed_hh, b_ih, b_hh, True) + else: + packed_ih = torch.ops.quantized.linear_prepack_fp16(w_ih, b_ih) + packed_hh = torch.ops.quantized.linear_prepack_fp16(w_hh, b_hh) + cell_params = torch.ops.quantized.make_quantized_cell_params_fp16( + packed_ih, packed_hh) + + _all_weight_values.append(PackedParameter(cell_params)) + self._all_weight_values = torch.nn.ModuleList(_all_weight_values) + + @classmethod + def from_float(cls, mod): + assert type(mod) in {torch.nn.LSTM, + torch.nn.GRU}, 'nn.quantized.dynamic.RNNBase.from_float only works for nn.LSTM and nn.GRU' + assert hasattr( + mod, + 'qconfig' + ), 'Input float module must have qconfig defined' + + if mod.qconfig is not None and mod.qconfig.weight is not None: + weight_observer_method = mod.qconfig.weight + else: + # We have the circular import issues if we import the qconfig in the beginning of this file: + # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the + # import until we need it. + from torch.ao.quantization.qconfig import default_dynamic_qconfig + weight_observer_method = default_dynamic_qconfig.weight + + dtype = weight_observer_method().dtype + supported_scalar_types = [torch.qint8, torch.float16] + if dtype not in supported_scalar_types: + raise RuntimeError(f'Unsupported dtype for dynamic RNN quantization: {dtype}') + # RNNBase can be either LSTM or GRU + qRNNBase: Union[LSTM, GRU] + if mod.mode == 'LSTM': + qRNNBase = LSTM(mod.input_size, mod.hidden_size, mod.num_layers, + mod.bias, mod.batch_first, mod.dropout, mod.bidirectional, dtype) + elif mod.mode == 'GRU': + qRNNBase = GRU(mod.input_size, mod.hidden_size, mod.num_layers, + mod.bias, mod.batch_first, mod.dropout, mod.bidirectional, dtype) + else: + raise NotImplementedError('Only LSTM/GRU is supported for QuantizedRNN for now') + + num_directions = 2 if mod.bidirectional else 1 + + assert mod.bias + + _all_weight_values = [] + for layer in range(qRNNBase.num_layers): + for direction in range(num_directions): + suffix = '_reverse' if direction == 1 else '' + + def retrieve_weight_bias(ihhh): + weight_name = f'weight_{ihhh}_l{layer}{suffix}' + bias_name = f'bias_{ihhh}_l{layer}{suffix}' + weight = getattr(mod, weight_name) + bias = getattr(mod, bias_name) + return weight, bias + + weight_ih, bias_ih = retrieve_weight_bias('ih') + weight_hh, bias_hh = retrieve_weight_bias('hh') + + if dtype == torch.qint8: + def quantize_and_pack(w, b): + weight_observer = weight_observer_method() + weight_observer(w) + qweight = _quantize_weight(w.float(), weight_observer) + packed_weight = \ + torch.ops.quantized.linear_prepack(qweight, b) + return packed_weight + packed_ih = quantize_and_pack(weight_ih, bias_ih) + packed_hh = quantize_and_pack(weight_hh, bias_hh) + if qRNNBase.version is None or qRNNBase.version < 2: + cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic( + packed_ih, packed_hh, bias_ih, bias_hh) + else: + cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic( + packed_ih, packed_hh, bias_ih, bias_hh, True) + + elif dtype == torch.float16: + packed_ih = torch.ops.quantized.linear_prepack_fp16( + weight_ih.float(), bias_ih) + packed_hh = torch.ops.quantized.linear_prepack_fp16( + weight_hh.float(), bias_hh) + + cell_params = torch.ops.quantized.make_quantized_cell_params_fp16( + packed_ih, packed_hh) + else: + raise RuntimeError('Unsupported dtype specified for dynamic quantized LSTM!') + + _all_weight_values.append(PackedParameter(cell_params)) + qRNNBase._all_weight_values = torch.nn.ModuleList(_all_weight_values) + + return qRNNBase + + def _weight_bias(self): + # Returns a dict of weights and biases + weight_bias_dict: Dict[str, Dict] = {'weight' : {}, 'bias' : {}} + count = 0 + num_directions = 2 if self.bidirectional else 1 + for layer in range(self.num_layers): + for direction in range(num_directions): + suffix = '_reverse' if direction == 1 else '' + key_name1 = f'weight_ih_l{layer}{suffix}' + key_name2 = f'weight_hh_l{layer}{suffix}' + # packed weights are part of torchbind class, CellParamsSerializationType + # Within the packed weight class, the weight and bias are accessible as Tensors + packed_weight_bias = self._all_weight_values[count].param.__getstate__()[0][4] + weight_bias_dict['weight'][key_name1] = packed_weight_bias[0].__getstate__()[0][0] + weight_bias_dict['weight'][key_name2] = packed_weight_bias[1].__getstate__()[0][0] + key_name1 = f'bias_ih_l{layer}{suffix}' + key_name2 = f'bias_hh_l{layer}{suffix}' + weight_bias_dict['bias'][key_name1] = packed_weight_bias[0].__getstate__()[0][1] + weight_bias_dict['bias'][key_name2] = packed_weight_bias[1].__getstate__()[0][1] + count = count + 1 + return weight_bias_dict + + def get_weight(self): + return self._weight_bias()['weight'] + + def get_bias(self): + return self._weight_bias()['bias'] + + +class LSTM(RNNBase): + r""" + A dynamic quantized LSTM module with floating point tensor as inputs and outputs. + We adopt the same interface as `torch.nn.LSTM`, please see + https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM for documentation. + + Examples:: + + >>> # xdoctest: +SKIP + >>> rnn = nn.LSTM(10, 20, 2) + >>> input = torch.randn(5, 3, 10) + >>> h0 = torch.randn(2, 3, 20) + >>> c0 = torch.randn(2, 3, 20) + >>> output, (hn, cn) = rnn(input, (h0, c0)) + """ + _FLOAT_MODULE = nn.LSTM + + __overloads__ = {'forward': ['forward_packed', 'forward_tensor']} + + def __init__(self, *args, **kwargs): + super().__init__('LSTM', *args, **kwargs) + + def _get_name(self): + return 'DynamicQuantizedLSTM' + + def forward_impl( + self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]], + batch_sizes: Optional[Tensor], max_batch_size: int, + sorted_indices: Optional[Tensor] + ) -> Tuple[Tensor, Tuple[Tensor, Tensor]]: + if hx is None: + num_directions = 2 if self.bidirectional else 1 + zeros = torch.zeros(self.num_layers * num_directions, + max_batch_size, self.hidden_size, + dtype=input.dtype, device=input.device) + hx = (zeros, zeros) + else: + # Each batch of the hidden state should match the input sequence that + # the user believes he/she is passing in. + hx = self.permute_hidden(hx, sorted_indices) + + self.check_forward_args(input, hx, batch_sizes) + + _all_params = ([m.param for m in self._all_weight_values]) + if batch_sizes is None: + result = torch.quantized_lstm(input, hx, _all_params, self.bias, self.num_layers, + float(self.dropout), self.training, self.bidirectional, + self.batch_first, dtype=self.dtype, use_dynamic=True) + else: + result = torch.quantized_lstm(input, batch_sizes, hx, _all_params, self.bias, + self.num_layers, float(self.dropout), self.training, + self.bidirectional, dtype=self.dtype, use_dynamic=True) + output = result[0] + hidden = result[1:] + + return output, hidden + + @torch.jit.export + def forward_tensor( + self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None + ) -> Tuple[Tensor, Tuple[Tensor, Tensor]]: + batch_sizes = None + max_batch_size = input.size(0) if self.batch_first else input.size(1) + sorted_indices = None + unsorted_indices = None + + output, hidden = self.forward_impl( + input, hx, batch_sizes, max_batch_size, sorted_indices) + + return output, self.permute_hidden(hidden, unsorted_indices) + + @torch.jit.export + def forward_packed( + self, input: PackedSequence, hx: Optional[Tuple[Tensor, Tensor]] = None + ) -> Tuple[PackedSequence, Tuple[Tensor, Tensor]]: + input_, batch_sizes, sorted_indices, unsorted_indices = input + max_batch_size = int(batch_sizes[0]) + + output_, hidden = self.forward_impl( + input_, hx, batch_sizes, max_batch_size, sorted_indices + ) + + output = PackedSequence(output_, batch_sizes, + sorted_indices, unsorted_indices) + return output, self.permute_hidden(hidden, unsorted_indices) + + # "type: ignore" is required due to issue #43072 + def permute_hidden( # type: ignore[override] + self, hx: Tuple[Tensor, Tensor], permutation: Optional[Tensor] + ) -> Tuple[Tensor, Tensor]: + if permutation is None: + return hx + return _apply_permutation(hx[0], permutation), _apply_permutation(hx[1], permutation) + + # "type: ignore" is required due to issue #43072 + def check_forward_args( # type: ignore[override] + self, input: Tensor, hidden: Tuple[Tensor, Tensor], batch_sizes: Optional[Tensor] + ) -> None: + self.check_input(input, batch_sizes) + expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes) + + self.check_hidden_size(hidden[0], expected_hidden_size, + 'Expected hidden[0] size {}, got {}') + self.check_hidden_size(hidden[1], expected_hidden_size, + 'Expected hidden[1] size {}, got {}') + + @torch.jit.ignore + def forward(self, input, hx=None): + if isinstance(input, PackedSequence): + return self.forward_packed(input, hx) + else: + return self.forward_tensor(input, hx) + + @classmethod + def from_float(cls, mod): + return super().from_float(mod) + + @classmethod + def from_reference(cls, ref_mod): + assert hasattr(ref_mod, "weight_ih_l0_dtype"), "We are assuming weight_ih_l0 " + "exists in LSTM, may need to relax the assumption to support the use case" + qmod = cls( + ref_mod.input_size, + ref_mod.hidden_size, + ref_mod.num_layers, + ref_mod.bias, + ref_mod.batch_first, + ref_mod.dropout, + ref_mod.bidirectional, + # assuming there is layer 0, which should be OK + ref_mod.weight_ih_l0_dtype, + ) + qmod.set_weight_bias(ref_mod.get_quantized_weight_bias_dict()) + return qmod + + +class GRU(RNNBase): + r"""Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence. + + + For each element in the input sequence, each layer computes the following + function: + + .. math:: + \begin{array}{ll} + r_t = \sigma(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ + z_t = \sigma(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\ + n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) \\ + h_t = (1 - z_t) \odot n_t + z_t \odot h_{(t-1)} + \end{array} + + where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is the input + at time `t`, :math:`h_{(t-1)}` is the hidden state of the layer + at time `t-1` or the initial hidden state at time `0`, and :math:`r_t`, + :math:`z_t`, :math:`n_t` are the reset, update, and new gates, respectively. + :math:`\sigma` is the sigmoid function, and :math:`\odot` is the Hadamard product. + + In a multilayer GRU, the input :math:`x^{(l)}_t` of the :math:`l` -th layer + (:math:`l >= 2`) is the hidden state :math:`h^{(l-1)}_t` of the previous layer multiplied by + dropout :math:`\delta^{(l-1)}_t` where each :math:`\delta^{(l-1)}_t` is a Bernoulli random + variable which is :math:`0` with probability :attr:`dropout`. + + Args: + input_size: The number of expected features in the input `x` + hidden_size: The number of features in the hidden state `h` + num_layers: Number of recurrent layers. E.g., setting ``num_layers=2`` + would mean stacking two GRUs together to form a `stacked GRU`, + with the second GRU taking in outputs of the first GRU and + computing the final results. Default: 1 + bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. + Default: ``True`` + batch_first: If ``True``, then the input and output tensors are provided + as (batch, seq, feature). Default: ``False`` + dropout: If non-zero, introduces a `Dropout` layer on the outputs of each + GRU layer except the last layer, with dropout probability equal to + :attr:`dropout`. Default: 0 + bidirectional: If ``True``, becomes a bidirectional GRU. Default: ``False`` + + Inputs: input, h_0 + - **input** of shape `(seq_len, batch, input_size)`: tensor containing the features + of the input sequence. The input can also be a packed variable length + sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence` + for details. + - **h_0** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor + containing the initial hidden state for each element in the batch. + Defaults to zero if not provided. If the RNN is bidirectional, + num_directions should be 2, else it should be 1. + + Outputs: output, h_n + - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor + containing the output features h_t from the last layer of the GRU, + for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been + given as the input, the output will also be a packed sequence. + For the unpacked case, the directions can be separated + using ``output.view(seq_len, batch, num_directions, hidden_size)``, + with forward and backward being direction `0` and `1` respectively. + + Similarly, the directions can be separated in the packed case. + - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor + containing the hidden state for `t = seq_len` + + Like *output*, the layers can be separated using + ``h_n.view(num_layers, num_directions, batch, hidden_size)``. + + Shape: + - Input1: :math:`(L, N, H_{in})` tensor containing input features where + :math:`H_{in}=\text{input\_size}` and `L` represents a sequence length. + - Input2: :math:`(S, N, H_{out})` tensor + containing the initial hidden state for each element in the batch. + :math:`H_{out}=\text{hidden\_size}` + Defaults to zero if not provided. where :math:`S=\text{num\_layers} * \text{num\_directions}` + If the RNN is bidirectional, num_directions should be 2, else it should be 1. + - Output1: :math:`(L, N, H_{all})` where :math:`H_{all}=\text{num\_directions} * \text{hidden\_size}` + - Output2: :math:`(S, N, H_{out})` tensor containing the next hidden state + for each element in the batch + + Attributes: + weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer + (W_ir|W_iz|W_in), of shape `(3*hidden_size, input_size)` for `k = 0`. + Otherwise, the shape is `(3*hidden_size, num_directions * hidden_size)` + weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer + (W_hr|W_hz|W_hn), of shape `(3*hidden_size, hidden_size)` + bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer + (b_ir|b_iz|b_in), of shape `(3*hidden_size)` + bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer + (b_hr|b_hz|b_hn), of shape `(3*hidden_size)` + + .. note:: + All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` + where :math:`k = \frac{1}{\text{hidden\_size}}` + + .. note:: + The calculation of new gate :math:`n_t` subtly differs from the original paper and other frameworks. + In the original implementation, the Hadamard product :math:`(\odot)` between :math:`r_t` and the + previous hidden state :math:`h_{(t-1)}` is done before the multiplication with the weight matrix + `W` and addition of bias: + + .. math:: + \begin{aligned} + n_t = \tanh(W_{in} x_t + b_{in} + W_{hn} ( r_t \odot h_{(t-1)} ) + b_{hn}) + \end{aligned} + + This is in contrast to PyTorch implementation, which is done after :math:`W_{hn} h_{(t-1)}` + + .. math:: + \begin{aligned} + n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) + \end{aligned} + + This implementation differs on purpose for efficiency. + + .. include:: ../cudnn_persistent_rnn.rst + + Examples:: + + >>> # xdoctest: +SKIP + >>> rnn = nn.GRU(10, 20, 2) + >>> input = torch.randn(5, 3, 10) + >>> h0 = torch.randn(2, 3, 20) + >>> output, hn = rnn(input, h0) + """ + _FLOAT_MODULE = nn.GRU + + __overloads__ = {'forward': ['forward_packed', 'forward_tensor']} + + def __init__(self, *args, **kwargs): + super().__init__('GRU', *args, **kwargs) + + def _get_name(self): + return 'DynamicQuantizedGRU' + + def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]) -> None: + self.check_input(input, batch_sizes) + expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes) + + self.check_hidden_size(hidden, expected_hidden_size, + 'Expected hidden size {}, got {}') + + def forward_impl( + self, input: Tensor, hx: Optional[Tensor], + batch_sizes: Optional[Tensor], max_batch_size: int, + sorted_indices: Optional[Tensor] + ) -> Tuple[Tensor, Tensor]: + if hx is None: + num_directions = 2 if self.bidirectional else 1 + zeros = torch.zeros(self.num_layers * num_directions, + max_batch_size, self.hidden_size, + dtype=input.dtype, device=input.device) + hx = zeros + else: + # Each batch of the hidden state should match the input sequence that + # the user believes he/she is passing in. + hx = self.permute_hidden(hx, sorted_indices) + + self.check_forward_args(input, hx, batch_sizes) + + _all_params = ([m.param for m in self._all_weight_values]) + if batch_sizes is None: + result = torch.quantized_gru(input, + hx, + _all_params, + self.bias, + self.num_layers, + self.dropout, + self.training, + self.bidirectional, + self.batch_first) + else: + result = torch.quantized_gru(input, + batch_sizes, + hx, + _all_params, + self.bias, + self.num_layers, + self.dropout, + self.training, + self.bidirectional) + output = result[0] + hidden = result[1] + + return output, hidden + + + @torch.jit.export + def forward_tensor( + self, input: Tensor, hx: Optional[Tensor] = None + ) -> Tuple[Tensor, Tensor]: + batch_sizes = None + max_batch_size = input.size(0) if self.batch_first else input.size(1) + sorted_indices = None + unsorted_indices = None + + output, hidden = self.forward_impl( + input, hx, batch_sizes, max_batch_size, sorted_indices) + + return output, self.permute_hidden(hidden, unsorted_indices) + + @torch.jit.export + def forward_packed( + self, input: PackedSequence, hx: Optional[Tensor] = None + ) -> Tuple[PackedSequence, Tensor]: + input_, batch_sizes, sorted_indices, unsorted_indices = input + max_batch_size = int(batch_sizes[0]) + output_, hidden = self.forward_impl( + input_, hx, batch_sizes, max_batch_size, sorted_indices + ) + + output = PackedSequence(output_, batch_sizes, + sorted_indices, unsorted_indices) + return output, self.permute_hidden(hidden, unsorted_indices) + + def permute_hidden( + self, hx: Tensor, permutation: Optional[Tensor] + ) -> Tensor: + if permutation is None: + return hx + return _apply_permutation(hx, permutation) + + @torch.jit.ignore + def forward(self, input, hx=None): + if isinstance(input, PackedSequence): + return self.forward_packed(input, hx) + else: + return self.forward_tensor(input, hx) + + @classmethod + def from_float(cls, mod): + return super().from_float(mod) + + @classmethod + def from_reference(cls, ref_mod): + assert hasattr(ref_mod, "weight_ih_l0_dtype"), "We are assuming weight_ih_l0 " + "exists in LSTM, may need to relax the assumption to support the use case" + qmod = cls( + ref_mod.input_size, + ref_mod.hidden_size, + ref_mod.num_layers, + ref_mod.bias, + ref_mod.batch_first, + ref_mod.dropout, + ref_mod.bidirectional, + # assuming there is layer 0, which should be OK + ref_mod.weight_ih_l0_dtype, + ) + qmod.set_weight_bias(ref_mod.get_quantized_weight_bias_dict()) + return qmod + +class RNNCellBase(torch.nn.Module): + # _FLOAT_MODULE = nn.CellRNNBase + __constants__ = ['input_size', 'hidden_size', 'bias'] + + def __init__(self, input_size, hidden_size, bias=True, num_chunks=4, dtype=torch.qint8): + super().__init__() + self.input_size = input_size + self.hidden_size = hidden_size + self.bias = bias + self.weight_dtype = dtype + if bias: + self.bias_ih = torch.randn(num_chunks * hidden_size).to(dtype=torch.float) + self.bias_hh = torch.randn(num_chunks * hidden_size).to(dtype=torch.float) + else: + self.register_parameter('bias_ih', None) + self.register_parameter('bias_hh', None) + + weight_ih = torch.randn(num_chunks * hidden_size, input_size).to(torch.float) + weight_hh = torch.randn(num_chunks * hidden_size, hidden_size).to(torch.float) + if dtype == torch.qint8: + weight_ih = torch.quantize_per_tensor(weight_ih, scale=1, zero_point=0, dtype=torch.qint8) + weight_hh = torch.quantize_per_tensor(weight_hh, scale=1, zero_point=0, dtype=torch.qint8) + + if dtype == torch.qint8: + # for each layer, for each direction we need to quantize and pack + # weights and pack parameters in this order: + # + # w_ih, w_hh + packed_weight_ih = \ + torch.ops.quantized.linear_prepack(weight_ih, self.bias_ih) + packed_weight_hh = \ + torch.ops.quantized.linear_prepack(weight_hh, self.bias_hh) + else: + # for each layer, for each direction we need to quantize and pack + # weights and pack parameters in this order: + # + # packed_ih, packed_hh, b_ih, b_hh + packed_weight_ih = torch.ops.quantized.linear_prepack_fp16( + weight_ih, self.bias_ih) + packed_weight_hh = torch.ops.quantized.linear_prepack_fp16( + weight_hh, self.bias_hh) + + self._packed_weight_ih = packed_weight_ih + self._packed_weight_hh = packed_weight_hh + + def _get_name(self): + return 'DynamicQuantizedRNNBase' + + def extra_repr(self): + s = '{input_size}, {hidden_size}' + if 'bias' in self.__dict__ and self.bias is not True: + s += ', bias={bias}' + if 'nonlinearity' in self.__dict__ and self.nonlinearity != "tanh": + s += ', nonlinearity={nonlinearity}' + return s.format(**self.__dict__) + + def check_forward_input(self, input): + if input.size(1) != self.input_size: + raise RuntimeError( + f"input has inconsistent input_size: got {input.size(1)}, expected {self.input_size}") + + def check_forward_hidden(self, input: Tensor, hx: Tensor, hidden_label: str = '') -> None: + if input.size(0) != hx.size(0): + raise RuntimeError( + f"Input batch size {input.size(0)} doesn't match hidden{hidden_label} batch size {hx.size(0)}") + + if hx.size(1) != self.hidden_size: + raise RuntimeError( + f"hidden{hidden_label} has inconsistent hidden_size: got {hx.size(1)}, expected {self.hidden_size}") + + @classmethod + def from_float(cls, mod): + assert type(mod) in {torch.nn.LSTMCell, + torch.nn.GRUCell, + torch.nn.RNNCell}, 'nn.quantized.dynamic.RNNCellBase.from_float \ + only works for nn.LSTMCell, nn.GRUCell and nn.RNNCell' + assert hasattr( + mod, 'qconfig'), 'Input float module must have qconfig defined' + + if mod.qconfig is not None and mod.qconfig.weight is not None: + weight_observer_method = mod.qconfig.weight + else: + # We have the circular import issues if we import the qconfig in the beginning of this file: + # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the + # import until we need it. + from torch.ao.quantization.qconfig import default_dynamic_qconfig + weight_observer_method = default_dynamic_qconfig.weight + + dtype = weight_observer_method().dtype + supported_scalar_types = [torch.qint8, torch.float16] + if dtype not in supported_scalar_types: + raise RuntimeError(f'Unsupported dtype for dynamic RNN quantization: {dtype}') + + qRNNCellBase: Union[LSTMCell, GRUCell, RNNCell] + + if type(mod) == torch.nn.LSTMCell: + qRNNCellBase = LSTMCell(mod.input_size, mod.hidden_size, bias=mod.bias, dtype=dtype) + elif type(mod) == torch.nn.GRUCell: + qRNNCellBase = GRUCell(mod.input_size, mod.hidden_size, bias=mod.bias, dtype=dtype) + elif type(mod) == torch.nn.RNNCell: + qRNNCellBase = RNNCell(mod.input_size, mod.hidden_size, bias=mod.bias, nonlinearity=mod.nonlinearity, dtype=dtype) + else: + raise NotImplementedError('Only LSTMCell, GRUCell and RNNCell \ + are supported for QuantizedRNN for now') + + assert mod.bias + + def _observe_and_quantize_weight(weight): + if dtype == torch.qint8: + weight_observer = weight_observer_method() + weight_observer(weight) + qweight = _quantize_weight(weight.float(), weight_observer) + return qweight + else: + return weight.float() + + qRNNCellBase._packed_weight_ih = pack_weight_bias(_observe_and_quantize_weight(mod.weight_ih), mod.bias_ih, dtype) + qRNNCellBase._packed_weight_hh = pack_weight_bias(_observe_and_quantize_weight(mod.weight_hh), mod.bias_hh, dtype) + return qRNNCellBase + + @classmethod + def from_reference(cls, ref_mod): + assert hasattr(ref_mod, "weight_ih_dtype"), "We are assuming weight_ih " + "exists in reference module, may need to relax the assumption to support the use case" + if hasattr(ref_mod, "nonlinearity"): + qmod = cls( + ref_mod.input_size, + ref_mod.hidden_size, + ref_mod.bias, + ref_mod.nonlinearity, + dtype=ref_mod.weight_ih_dtype + ) + else: + qmod = cls( + ref_mod.input_size, + ref_mod.hidden_size, + ref_mod.bias, + dtype=ref_mod.weight_ih_dtype + ) + weight_bias_dict = { + "weight": { + "weight_ih": ref_mod.get_quantized_weight_ih(), + "weight_hh": ref_mod.get_quantized_weight_hh(), + }, + "bias": { + "bias_ih": ref_mod.bias_ih, + "bias_hh": ref_mod.bias_hh, + } + } + qmod.set_weight_bias(weight_bias_dict) + return qmod + + def _weight_bias(self): + # Returns a dict of weights and biases + weight_bias_dict: Dict[str, Dict] = {'weight' : {}, 'bias' : {}} + w1, b1 = self._packed_weight_ih.__getstate__()[0] + w2, b2 = self._packed_weight_hh.__getstate__()[0] + # TODO: these can be simplified to one level? e.g. using weight_ih as key + # directly + weight_bias_dict['weight']['weight_ih'] = w1 + weight_bias_dict['weight']['weight_hh'] = w2 + weight_bias_dict['bias']['bias_ih'] = b1 + weight_bias_dict['bias']['bias_hh'] = b2 + return weight_bias_dict + + def get_weight(self): + return self._weight_bias()['weight'] + + def get_bias(self): + return self._weight_bias()['bias'] + + def set_weight_bias(self, weight_bias_dict): + # TODO: these can be simplified to one level? e.g. using weight_ih as key + # directly + self._packed_weight_ih = pack_weight_bias( + weight_bias_dict["weight"]["weight_ih"], + weight_bias_dict["bias"]["bias_ih"], + self.weight_dtype) + self._packed_weight_hh = pack_weight_bias( + weight_bias_dict["weight"]["weight_hh"], + weight_bias_dict["bias"]["bias_hh"], + self.weight_dtype) + + def _save_to_state_dict(self, destination, prefix, keep_vars): + super()._save_to_state_dict(destination, prefix, keep_vars) + destination[prefix + '_packed_weight_ih'] = self._packed_weight_ih + destination[prefix + '_packed_weight_hh'] = self._packed_weight_hh + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + self._packed_weight_ih = state_dict.pop(prefix + '_packed_weight_ih') + self._packed_weight_hh = state_dict.pop(prefix + '_packed_weight_hh') + super()._load_from_state_dict(state_dict, prefix, local_metadata, False, + missing_keys, unexpected_keys, error_msgs) + + +class RNNCell(RNNCellBase): + r"""An Elman RNN cell with tanh or ReLU non-linearity. + A dynamic quantized RNNCell module with floating point tensor as inputs and outputs. + Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.RNNCell`, + please see https://pytorch.org/docs/stable/nn.html#torch.nn.RNNCell for documentation. + + Examples:: + + >>> # xdoctest: +SKIP + >>> rnn = nn.RNNCell(10, 20) + >>> input = torch.randn(6, 3, 10) + >>> hx = torch.randn(3, 20) + >>> output = [] + >>> for i in range(6): + ... hx = rnn(input[i], hx) + ... output.append(hx) + """ + __constants__ = ['input_size', 'hidden_size', 'bias', 'nonlinearity'] + + def __init__(self, input_size, hidden_size, bias=True, nonlinearity="tanh", dtype=torch.qint8): + super().__init__(input_size, hidden_size, bias, num_chunks=1, dtype=dtype) + self.nonlinearity = nonlinearity + + def _get_name(self): + return 'DynamicQuantizedRNNCell' + + def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor: + self.check_forward_input(input) + if hx is None: + hx = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device) + self.check_forward_hidden(input, hx, '') + if self.nonlinearity == "tanh": + ret = torch.ops.quantized.quantized_rnn_tanh_cell_dynamic( + input, hx, + self._packed_weight_ih, self._packed_weight_hh, + self.bias_ih, self.bias_hh) + elif self.nonlinearity == "relu": + ret = torch.ops.quantized.quantized_rnn_relu_cell_dynamic( + input, hx, + self._packed_weight_ih, self._packed_weight_hh, + self.bias_ih, self.bias_hh) + else: + ret = input # TODO: remove when jit supports exception flow + raise RuntimeError( + f"Unknown nonlinearity: {self.nonlinearity}") + return ret + + @classmethod + def from_float(cls, mod): + return super().from_float(mod) + + +class LSTMCell(RNNCellBase): + r"""A long short-term memory (LSTM) cell. + + A dynamic quantized LSTMCell module with floating point tensor as inputs and outputs. + Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.LSTMCell`, + please see https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell for documentation. + + Examples:: + + >>> # xdoctest: +SKIP + >>> rnn = nn.LSTMCell(10, 20) + >>> input = torch.randn(6, 3, 10) + >>> hx = torch.randn(3, 20) + >>> cx = torch.randn(3, 20) + >>> output = [] + >>> for i in range(6): + ... hx, cx = rnn(input[i], (hx, cx)) + ... output.append(hx) + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, num_chunks=4, **kwargs) # type: ignore[misc] + + def _get_name(self): + return 'DynamicQuantizedLSTMCell' + + def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]: + self.check_forward_input(input) + if hx is None: + zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device) + hx = (zeros, zeros) + self.check_forward_hidden(input, hx[0], '[0]') + self.check_forward_hidden(input, hx[1], '[1]') + return torch.ops.quantized.quantized_lstm_cell_dynamic( + input, hx, + self._packed_weight_ih, self._packed_weight_hh, + self.bias_ih, self.bias_hh) + + @classmethod + def from_float(cls, mod): + return super().from_float(mod) + + +class GRUCell(RNNCellBase): + r"""A gated recurrent unit (GRU) cell + + A dynamic quantized GRUCell module with floating point tensor as inputs and outputs. + Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.GRUCell`, + please see https://pytorch.org/docs/stable/nn.html#torch.nn.GRUCell for documentation. + + Examples:: + + >>> # xdoctest: +SKIP + >>> rnn = nn.GRUCell(10, 20) + >>> input = torch.randn(6, 3, 10) + >>> hx = torch.randn(3, 20) + >>> output = [] + >>> for i in range(6): + ... hx = rnn(input[i], hx) + ... output.append(hx) + """ + + def __init__(self, input_size, hidden_size, bias=True, dtype=torch.qint8): + super().__init__(input_size, hidden_size, bias, num_chunks=3, dtype=dtype) + + def _get_name(self): + return 'DynamicQuantizedGRUCell' + + def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor: + self.check_forward_input(input) + if hx is None: + hx = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device) + self.check_forward_hidden(input, hx, '') + return torch.ops.quantized.quantized_gru_cell_dynamic( + input, hx, + self._packed_weight_ih, self._packed_weight_hh, + self.bias_ih, self.bias_hh, + ) + + @classmethod + def from_float(cls, mod): + return super().from_float(mod) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/functional.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/functional.py new file mode 100644 index 0000000000000000000000000000000000000000..72218184fcfacc20aba8c5359d38999d13493664 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/functional.py @@ -0,0 +1,644 @@ +r""" Functional interface (quantized).""" +from typing import List, Optional +import warnings + +import torch +from torch import Tensor +from torch.nn.modules.utils import _pair, _triple +from torch.jit.annotations import BroadcastingList2 + +from .modules.utils import _pair_from_first + +# Although some of the functions and docstrings are mirrored from the torch.nn, +# we want to have them here for future changes. + +__all__ = [ + "avg_pool2d", + "avg_pool3d", + "adaptive_avg_pool2d", + "adaptive_avg_pool3d", + "conv1d", + "conv2d", + "conv3d", + "interpolate", + "linear", + "max_pool1d", + "max_pool2d", + "celu", + "leaky_relu", + "hardtanh", + "hardswish", + "threshold", + "elu", + "hardsigmoid", + "clamp", + "upsample", + "upsample_bilinear", + "upsample_nearest", +] + +def avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, + count_include_pad=True, divisor_override=None): + r""" + Applies 2D average-pooling operation in :math:`kH \times kW` regions by step size + :math:`sH \times sW` steps. The number of output features is equal to the number of + input planes. + + .. note:: The input quantization parameters propagate to the output. + + See :class:`~torch.ao.nn.quantized.AvgPool2d` for details and output shape. + + Args: + input: quantized input tensor :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)` + kernel_size: size of the pooling region. Can be a single number or a + tuple `(kH, kW)` + stride: stride of the pooling operation. Can be a single number or a + tuple `(sH, sW)`. Default: :attr:`kernel_size` + padding: implicit zero paddings on both sides of the input. Can be a + single number or a tuple `(padH, padW)`. Default: 0 + ceil_mode: when True, will use `ceil` instead of `floor` in the formula + to compute the output shape. Default: ``False`` + count_include_pad: when True, will include the zero-padding in the + averaging calculation. Default: ``True`` + divisor_override: if specified, it will be used as divisor, otherwise + size of the pooling region will be used. Default: None + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.avg_pool2d' must be quantized!") + return torch.nn.functional.avg_pool2d(input, kernel_size, stride, padding, + ceil_mode, count_include_pad, + divisor_override) + +def avg_pool3d(input, kernel_size, stride=None, padding=0, ceil_mode=False, + count_include_pad=True, divisor_override=None): + r""" + Applies 3D average-pooling operation in :math:`kD \ times kH \times kW` regions by step size + :math:`sD \times sH \times sW` steps. The number of output features is equal to the number of + input planes. + + .. note:: The input quantization parameters propagate to the output. + + Args: + input: quantized input tensor :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)` + kernel_size: size of the pooling region. Can be a single number or a + tuple `(kD, kH, kW)` + stride: stride of the pooling operation. Can be a single number or a + tuple `(sD, sH, sW)`. Default: :attr:`kernel_size` + padding: implicit zero paddings on both sides of the input. Can be a + single number or a tuple `(padD, padH, padW)`. Default: 0 + ceil_mode: when True, will use `ceil` instead of `floor` in the formula + to compute the output shape. Default: ``False`` + count_include_pad: when True, will include the zero-padding in the + averaging calculation. Default: ``True`` + divisor_override: if specified, it will be used as divisor, otherwise + size of the pooling region will be used. Default: None + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.avg_pool3d' must be quantized!") + return torch.nn.functional.avg_pool3d(input, kernel_size, stride, padding, + ceil_mode, count_include_pad, + divisor_override) + +def adaptive_avg_pool2d(input: Tensor, output_size: BroadcastingList2[int]) -> Tensor: + r""" + Applies a 2D adaptive average pooling over a quantized input signal composed + of several quantized input planes. + + .. note:: The input quantization parameters propagate to the output. + + See :class:`~torch.ao.nn.quantized.AdaptiveAvgPool2d` for details and output shape. + + Args: + output_size: the target output size (single integer or + double-integer tuple) + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.functional.adaptive_avg_pool2d' must be quantized!") + return torch.nn.functional.adaptive_avg_pool2d(input, output_size) + +def adaptive_avg_pool3d(input: Tensor, output_size: BroadcastingList2[int]) -> Tensor: + r""" + Applies a 3D adaptive average pooling over a quantized input signal composed + of several quantized input planes. + + .. note:: The input quantization parameters propagate to the output. + + See :class:`~torch.ao.nn.quantized.AdaptiveAvgPool3d` for details and output shape. + + Args: + output_size: the target output size (single integer or + double-integer tuple) + """ + if not input.is_quantized: + raise ValueError( + "Input to 'quantized.functional.adaptive_avg_pool3d' must be quantized!") + return torch.nn.functional.adaptive_avg_pool3d(input, output_size) + +def conv1d(input, weight, bias, + stride=1, padding=0, dilation=1, groups=1, + padding_mode='zeros', + scale=1.0, zero_point=0, + dtype=torch.quint8): + r""" + Applies a 1D convolution over a quantized 1D input composed of several input + planes. + + See :class:`~torch.ao.nn.quantized.Conv1d` for details and output shape. + + Args: + input: quantized input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)` + weight: quantized filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , iW)` + bias: **non-quantized** bias tensor of shape :math:`(\text{out\_channels})`. The tensor type must be `torch.float`. + stride: the stride of the convolving kernel. Can be a single number or a + tuple `(sW,)`. Default: 1 + padding: implicit paddings on both sides of the input. Can be a + single number or a tuple `(padW,)`. Default: 0 + dilation: the spacing between kernel elements. Can be a single number or + a tuple `(dW,)`. Default: 1 + groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the + number of groups. Default: 1 + padding_mode: the padding mode to use. Only "zeros" is supported for quantized convolution at the moment. Default: "zeros" + scale: quantization scale for the output. Default: 1.0 + zero_point: quantization zero_point for the output. Default: 0 + dtype: quantization data type to use. Default: ``torch.quint8`` + + Examples:: + + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE) + >>> from torch.ao.nn.quantized import functional as qF + >>> filters = torch.randn(33, 16, 3, dtype=torch.float) + >>> inputs = torch.randn(20, 16, 50, dtype=torch.float) + >>> bias = torch.randn(33, dtype=torch.float) + >>> + >>> scale, zero_point = 1.0, 0 + >>> dtype_inputs = torch.quint8 + >>> dtype_filters = torch.qint8 + >>> + >>> q_filters = torch.quantize_per_tensor(filters, scale, zero_point, dtype_filters) + >>> q_inputs = torch.quantize_per_tensor(inputs, scale, zero_point, dtype_inputs) + >>> qF.conv1d(q_inputs, q_filters, bias, padding=1, scale=scale, zero_point=zero_point) + """ # noqa: E501 + if padding_mode != 'zeros': + raise NotImplementedError("Only zero-padding is supported!") + if input.dtype != torch.quint8: + raise NotImplementedError("Only torch.quint8 is supported for activation tensor!") + if weight.dtype != torch.qint8: + raise NotImplementedError("Only torch.qint8 is supported for weight tensor!") + if input.ndim != 3: + raise ValueError("Input shape must be `(N, C, L)`!") + stride = _pair_from_first(stride) + padding = _pair_from_first(padding) + dilation = _pair_from_first(dilation) + + packed_params = torch.ops.quantized.conv1d_prepack( + weight, bias, stride, padding, dilation, groups) + return torch.ops.quantized.conv1d(input, packed_params, scale, zero_point) + +def conv2d(input, weight, bias, + stride=1, padding=0, dilation=1, groups=1, + padding_mode='zeros', + scale=1.0, zero_point=0, + dtype=torch.quint8): + r""" + Applies a 2D convolution over a quantized 2D input composed of several input + planes. + + See :class:`~torch.ao.nn.quantized.Conv2d` for details and output shape. + + Args: + input: quantized input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)` + weight: quantized filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kH , kW)` + bias: **non-quantized** bias tensor of shape :math:`(\text{out\_channels})`. The tensor type must be `torch.float`. + stride: the stride of the convolving kernel. Can be a single number or a + tuple `(sH, sW)`. Default: 1 + padding: implicit paddings on both sides of the input. Can be a + single number or a tuple `(padH, padW)`. Default: 0 + dilation: the spacing between kernel elements. Can be a single number or + a tuple `(dH, dW)`. Default: 1 + groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the + number of groups. Default: 1 + padding_mode: the padding mode to use. Only "zeros" is supported for quantized convolution at the moment. Default: "zeros" + scale: quantization scale for the output. Default: 1.0 + zero_point: quantization zero_point for the output. Default: 0 + dtype: quantization data type to use. Default: ``torch.quint8`` + + Examples:: + + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE) + >>> from torch.ao.nn.quantized import functional as qF + >>> filters = torch.randn(8, 4, 3, 3, dtype=torch.float) + >>> inputs = torch.randn(1, 4, 5, 5, dtype=torch.float) + >>> bias = torch.randn(8, dtype=torch.float) + >>> + >>> scale, zero_point = 1.0, 0 + >>> dtype_inputs = torch.quint8 + >>> dtype_filters = torch.qint8 + >>> + >>> q_filters = torch.quantize_per_tensor(filters, scale, zero_point, dtype_filters) + >>> q_inputs = torch.quantize_per_tensor(inputs, scale, zero_point, dtype_inputs) + >>> qF.conv2d(q_inputs, q_filters, bias, padding=1, scale=scale, zero_point=zero_point) + """ # noqa: E501 + if padding_mode != 'zeros': + raise NotImplementedError("Only zero-padding is supported!") + if input.dtype != torch.quint8: + raise NotImplementedError("Only torch.quint8 is supported for activation tensor!") + if weight.dtype != torch.qint8: + raise NotImplementedError("Only torch.qint8 is supported for weight tensor!") + if input.ndim != 4: + raise ValueError("Input shape must be `(N, C, H, W)`!") + stride = _pair(stride) + padding = _pair(padding) + dilation = _pair(dilation) + + packed_params = torch.ops.quantized.conv2d_prepack( + weight, bias, stride, padding, dilation, groups) + return torch.ops.quantized.conv2d(input, packed_params, scale, zero_point) + +def conv3d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1, + padding_mode='zeros', scale=1.0, zero_point=0, dtype=torch.quint8): + r""" + Applies a 3D convolution over a quantized 3D input composed of several input + planes. + + See :class:`~torch.ao.nn.quantized.Conv3d` for details and output shape. + + Args: + input: quantized input tensor of shape + :math:`(\text{minibatch} , \text{in\_channels} , iD , iH , iW)` + weight: quantized filters of shape + :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kD , kH , kW)` + bias: **non-quantized** bias tensor of shape + :math:`(\text{out\_channels})`. The tensor type must be `torch.float`. + stride: the stride of the convolving kernel. Can be a single number or a + tuple `(sD, sH, sW)`. Default: 1 + padding: implicit paddings on both sides of the input. Can be a + single number or a tuple `(padD, padH, padW)`. Default: 0 + dilation: the spacing between kernel elements. Can be a single number or + a tuple `(dD, dH, dW)`. Default: 1 + groups: split input into groups, :math:`\text{in\_channels}` should be + divisible by the number of groups. Default: 1 + padding_mode: the padding mode to use. Only "zeros" is supported for + quantized convolution at the moment. Default: "zeros" + scale: quantization scale for the output. Default: 1.0 + zero_point: quantization zero_point for the output. Default: 0 + dtype: quantization data type to use. Default: ``torch.quint8`` + + Examples:: + + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE) + >>> from torch.ao.nn.quantized import functional as qF + >>> filters = torch.randn(8, 4, 3, 3, 3, dtype=torch.float) + >>> inputs = torch.randn(1, 4, 5, 5, 5, dtype=torch.float) + >>> bias = torch.randn(8, dtype=torch.float) + >>> + >>> scale, zero_point = 1.0, 0 + >>> dtype_inputs = torch.quint8 + >>> dtype_filters = torch.qint8 + >>> + >>> q_filters = torch.quantize_per_tensor(filters, scale, zero_point, dtype_filters) + >>> q_inputs = torch.quantize_per_tensor(inputs, scale, zero_point, dtype_inputs) + >>> qF.conv3d(q_inputs, q_filters, bias, padding=1, scale=scale, zero_point=zero_point) + """ # noqa: E501 + if padding_mode != 'zeros': + raise NotImplementedError("Only zero-padding is supported!") + if input.dtype != torch.quint8: + raise NotImplementedError("Only torch.quint8 is supported for activation tensor!") + if weight.dtype != torch.qint8: + raise NotImplementedError("Only torch.qint8 is supported for weight tensor!") + if input.ndim != 5: + raise ValueError("Input shape must be `(N, C, D, H, W)`!") + stride = _triple(stride) + padding = _triple(padding) + dilation = _triple(dilation) + + packed_params = torch.ops.quantized.conv3d_prepack( + weight, bias, stride, padding, dilation, groups) + return torch.ops.quantized.conv3d(input, packed_params, scale, zero_point) + +def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None): + r"""Down/up samples the input to either the given :attr:`size` or the given + :attr:`scale_factor` + + See :func:`torch.nn.functional.interpolate` for implementation details. + + The input dimensions are interpreted in the form: + `mini-batch x channels x [optional depth] x [optional height] x width`. + + .. note:: The input quantization parameters propagate to the output. + + .. note:: Only 2D/3D input is supported for quantized inputs + + .. note:: Only the following modes are supported for the quantized inputs: + + - `bilinear` + - `nearest` + + Args: + input (Tensor): the input tensor + size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]): + output spatial size. + scale_factor (float or Tuple[float]): multiplier for spatial size. Has to match input size if it is a tuple. + mode (str): algorithm used for upsampling: + ``'nearest'`` | ``'bilinear'`` + align_corners (bool, optional): Geometrically, we consider the pixels of the + input and output as squares rather than points. + If set to ``True``, the input and output tensors are aligned by the + center points of their corner pixels, preserving the values at the corner pixels. + If set to ``False``, the input and output tensors are aligned by the corner + points of their corner pixels, and the interpolation uses edge value padding + for out-of-boundary values, making this operation *independent* of input size + when :attr:`scale_factor` is kept the same. This only has an effect when :attr:`mode` + is ``'bilinear'``. + Default: ``False`` + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.interpolate' must be quantized!") + return torch.nn.functional.interpolate(input, size, scale_factor, mode, + align_corners) + +def linear( + input: Tensor, weight: Tensor, bias: Optional[Tensor] = None, + scale: Optional[float] = None, zero_point: Optional[int] = None +) -> Tensor: + r""" + Applies a linear transformation to the incoming quantized data: + :math:`y = xA^T + b`. + See :class:`~torch.ao.nn.quantized.Linear` + + .. note:: + + Current implementation packs weights on every call, which has penalty on performance. + If you want to avoid the overhead, use :class:`~torch.ao.nn.quantized.Linear`. + + Args: + input (Tensor): Quantized input of type `torch.quint8` + weight (Tensor): Quantized weight of type `torch.qint8` + bias (Tensor): None or fp32 bias of type `torch.float` + scale (double): output scale. If None, derived from the input scale + zero_point (long): output zero point. If None, derived from the input zero_point + + Shape: + - Input: :math:`(N, *, in\_features)` where `*` means any number of + additional dimensions + - Weight: :math:`(out\_features, in\_features)` + - Bias: :math:`(out\_features)` + - Output: :math:`(N, *, out\_features)` + """ + if scale is None: + scale = input.q_scale() + if zero_point is None: + zero_point = input.q_zero_point() + _packed_params = torch.ops.quantized.linear_prepack(weight, bias) + return torch.ops.quantized.linear(input, _packed_params, scale, zero_point) + +def max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1, + ceil_mode=False, return_indices=False): + r"""Applies a 1D max pooling over a quantized input signal composed of + several quantized input planes. + + .. note:: The input quantization parameters are propagated to the output. + + See :class:`~torch.ao.nn.quantized.MaxPool1d` for details. + """ + if return_indices: + raise NotImplementedError("return_indices is not yet implemented!") + if stride is None: + stride = torch.jit.annotate(List[int], []) + return torch.nn.functional.max_pool1d(input, kernel_size, stride, padding, + dilation, ceil_mode=ceil_mode, return_indices=return_indices) + +def max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1, + ceil_mode=False, return_indices=False): + r"""Applies a 2D max pooling over a quantized input signal composed of + several quantized input planes. + + .. note:: The input quantization parameters are propagated to the output. + + See :class:`~torch.ao.nn.quantized.MaxPool2d` for details. + """ + if return_indices: + raise NotImplementedError("return_indices is not yet implemented!") + if stride is None: + stride = torch.jit.annotate(List[int], []) + return torch.nn.functional.max_pool2d(input, kernel_size, stride, padding, + dilation, ceil_mode=ceil_mode, return_indices=return_indices) + +def celu(input: Tensor, scale: float, zero_point: int, alpha: float = 1.) -> Tensor: + r"""celu(input, scale, zero_point, alpha=1.) -> Tensor + + Applies the quantized CELU function element-wise. + + .. math:: + \text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x / \alpha) - 1)) + + Args: + input: quantized input + alpha: the :math:`\alpha` value for the CELU formulation. Default: 1.0 + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.celu' must be quantized!") + return torch.ops.quantized.celu(input, scale, zero_point, alpha) + + +def leaky_relu(input: Tensor, negative_slope: float = 0.01, inplace: bool = False, + scale: Optional[float] = None, zero_point: Optional[int] = None): + r""" + Quantized version of the. + leaky_relu(input, negative_slope=0.01, inplace=False, scale, zero_point) -> Tensor + + Applies element-wise, + :math:`\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)` + + Args: + input: Quantized input + negative_slope: The slope of the negative input + inplace: Inplace modification of the input tensor + scale, zero_point: Scale and zero point of the output tensor. + + See :class:`~torch.nn.LeakyReLU` for more details. + """ + if scale is not None and zero_point is not None: + assert not inplace, "Cannot rescale with `inplace`" + output = torch._empty_affine_quantized( + input.shape, scale=scale, zero_point=int(zero_point), dtype=input.dtype) + torch._C._nn.leaky_relu(input, negative_slope, out=output) + return output + if inplace: + result = torch._C._nn.leaky_relu_(input, negative_slope) + else: + result = torch._C._nn.leaky_relu(input, negative_slope) + return result + +def hardtanh(input: Tensor, min_val: float = -1., max_val: float = 1., inplace: bool = False) -> Tensor: + r"""This is the quantized version of :func:`~torch.nn.functional.hardtanh`. + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.hardtanh' must be quantized!") + if inplace: + return torch._C._nn.hardtanh_(input, min_val, max_val) + return torch._C._nn.hardtanh(input, min_val, max_val) + +def hardswish(input: Tensor, scale: float, zero_point: int) -> Tensor: + r"""This is the quantized version of :func:`~torch.nn.functional.hardswish`. + + Args: + input: quantized input + scale: quantization scale of the output tensor + zero_point: quantization zero point of the output tensor + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.hardswish' must be quantized!") + return torch._ops.ops.quantized.hardswish(input, scale, zero_point) + +def threshold(input: Tensor, threshold: float, value: float) -> Tensor: + r"""Applies the quantized version of the threshold function element-wise: + + .. math:: + x = \begin{cases} + x & \text{if~} x > \text{threshold} \\ + \text{value} & \text{otherwise} + \end{cases} + + See :class:`~torch.nn.Threshold` for more details. + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.threshold' must be quantized!") + if threshold is None: + raise ValueError("Input to 'threshold' must be specified!") + if value is None: + raise ValueError("Input to 'value' must be specified!") + return torch._ops.ops.quantized.threshold(input, threshold, value) + +def elu(input: Tensor, scale: float, zero_point: int, alpha: float = 1.) -> Tensor: + r"""This is the quantized version of :func:`~torch.nn.functional.elu`. + + Args: + input: quantized input + scale: quantization scale of the output tensor + zero_point: quantization zero point of the output tensor + alpha: the alpha constant + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.elu' must be quantized!") + return torch.ops.quantized.elu(input, scale, zero_point, alpha) + +def hardsigmoid(input: Tensor, inplace: bool = False) -> Tensor: + r"""This is the quantized version of :func:`~torch.nn.functional.hardsigmoid`. + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.hardsigmoid' must be quantized!") + if inplace: + return torch._C._nn.hardsigmoid_(input) # type: ignore[attr-defined] + return torch._C._nn.hardsigmoid(input) + +def clamp(input: Tensor, min_: float, max_: float) -> Tensor: + r"""float(input, min\_, max\_) -> Tensor + + Applies the clamp function element-wise. + See :class:`~torch.ao.nn.quantized.clamp` for more details. + + Args: + input: quantized input + min_: minimum value for clamping + max_: maximum value for clamping + """ + if not input.is_quantized: + raise ValueError("Input to 'quantized.clamp' must be quantized!") + return torch.clamp(input, min_, max_) + +def upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None): + r"""Upsamples the input to either the given :attr:`size` or the given + :attr:`scale_factor` + + .. warning:: + This function is deprecated in favor of + :func:`torch.ao.nn.quantized.functional.interpolate`. + This is equivalent with ``nn.quantized.functional.interpolate(...)``. + + See :func:`torch.nn.functional.interpolate` for implementation details. + + The input dimensions are interpreted in the form: + `mini-batch x channels x [optional depth] x [optional height] x width`. + + .. note:: The input quantization parameters propagate to the output. + + .. note:: Only 2D input is supported for quantized inputs + + .. note:: Only the following modes are supported for the quantized inputs: + + - `bilinear` + - `nearest` + + Args: + input (Tensor): quantized input tensor + size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]): + output spatial size. + scale_factor (float or Tuple[float]): multiplier for spatial size. Has to be an integer. + mode (str): algorithm used for upsampling: + ``'nearest'`` | ``'bilinear'`` + align_corners (bool, optional): Geometrically, we consider the pixels of the + input and output as squares rather than points. + If set to ``True``, the input and output tensors are aligned by the + center points of their corner pixels, preserving the values at the corner pixels. + If set to ``False``, the input and output tensors are aligned by the corner + points of their corner pixels, and the interpolation uses edge value padding + for out-of-boundary values, making this operation *independent* of input size + when :attr:`scale_factor` is kept the same. This only has an effect when :attr:`mode` + is ``'bilinear'``. + Default: ``False`` + + .. warning:: + With ``align_corners = True``, the linearly interpolating modes + (`bilinear`) don't proportionally align the + output and input pixels, and thus the output values can depend on the + input size. This was the default behavior for these modes up to version + 0.3.1. Since then, the default behavior is ``align_corners = False``. + See :class:`~torch.nn.Upsample` for concrete examples on how this + affects the outputs. + """ + warnings.warn("nn.quantized.functional.upsample is deprecated. Use nn.quantized.functional.interpolate instead.") + return interpolate(input, size, scale_factor, mode, align_corners) + +def upsample_bilinear(input, size=None, scale_factor=None): + r"""Upsamples the input, using bilinear upsampling. + + .. warning:: + This function is deprecated in favor of + :func:`torch.ao.nn.quantized.functional.interpolate`. + This is equivalent with + ``nn.quantized.functional.interpolate(..., mode='bilinear', align_corners=True)``. + + .. note:: The input quantization parameters propagate to the output. + + .. note:: Only 2D inputs are supported + + Args: + input (Tensor): quantized input + size (int or Tuple[int, int]): output spatial size. + scale_factor (int or Tuple[int, int]): multiplier for spatial size + """ + # DeprecationWarning is ignored by default + warnings.warn("nn.quantized.functional.upsample_bilinear is deprecated. Use nn.quantized.functional.interpolate instead.") + return interpolate(input, size, scale_factor, mode='bilinear', align_corners=True) + +def upsample_nearest(input, size=None, scale_factor=None): + r"""Upsamples the input, using nearest neighbours' pixel values. + + .. warning:: + This function is deprecated in favor of + :func:`torch.ao.nn.quantized.functional.interpolate`. + This is equivalent with ``nn.quantized.functional.interpolate(..., mode='nearest')``. + + .. note:: The input quantization parameters propagate to the output. + + .. note:: Only 2D inputs are supported + + Args: + input (Tensor): quantized input + size (int or Tuple[int, int] or Tuple[int, int, int]): output spatial + size. + scale_factor (int): multiplier for spatial size. Has to be an integer. + """ + # DeprecationWarning is ignored by default + warnings.warn("nn.quantized.functional.upsample_nearest is deprecated. Use nn.quantized.functional.interpolate instead.") + return interpolate(input, size, scale_factor, mode='nearest') diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..668f765fe3ef0ae082e94a30639fe343034ca306 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__init__.py @@ -0,0 +1,131 @@ +import torch + +# The quantized modules use `torch.nn` and `torch.ao.nn.quantizable` +# packages. However, the `quantizable` package uses "lazy imports" +# to avoid circular dependency. +# Hence we need to include it here to make sure it is resolved before +# they are used in the modules. +import torch.ao.nn.quantizable + +from torch.nn.modules.pooling import MaxPool2d + +from .activation import ReLU6, Hardswish, ELU, LeakyReLU, Sigmoid, Softmax, MultiheadAttention, PReLU +from .dropout import Dropout +from .batchnorm import BatchNorm2d, BatchNorm3d +from .normalization import LayerNorm, GroupNorm, InstanceNorm1d, \ + InstanceNorm2d, InstanceNorm3d +from .conv import Conv1d, Conv2d, Conv3d +from .conv import ConvTranspose1d, ConvTranspose2d, ConvTranspose3d +from .linear import Linear +from .embedding_ops import Embedding, EmbeddingBag +from .rnn import LSTM + +from .functional_modules import FloatFunctional, FXFloatFunctional, QFunctional + +__all__ = [ + 'BatchNorm2d', + 'BatchNorm3d', + 'Conv1d', + 'Conv2d', + 'Conv3d', + 'ConvTranspose1d', + 'ConvTranspose2d', + 'ConvTranspose3d', + 'DeQuantize', + 'ELU', + 'Embedding', + 'EmbeddingBag', + 'GroupNorm', + 'Hardswish', + 'InstanceNorm1d', + 'InstanceNorm2d', + 'InstanceNorm3d', + 'LayerNorm', + 'LeakyReLU', + 'Linear', + 'LSTM', + 'MultiheadAttention', + 'Quantize', + 'ReLU6', + 'Sigmoid', + 'Softmax', + 'Dropout', + 'PReLU', + # Wrapper modules + 'FloatFunctional', + 'FXFloatFunctional', + 'QFunctional', +] + +class Quantize(torch.nn.Module): + r"""Quantizes an incoming tensor + + Args: + `scale`: scale of the output Quantized Tensor + `zero_point`: zero_point of output Quantized Tensor + `dtype`: data type of output Quantized Tensor + `factory_kwargs`: Dictionary of kwargs used for configuring initialization + of internal buffers. Currently, `device` and `dtype` are supported. + Example: `factory_kwargs={'device': 'cuda', 'dtype': torch.float64}` + will initialize internal buffers as type `torch.float64` on the current CUDA device. + Note that `dtype` only applies to floating-point buffers. + + Examples:: + >>> t = torch.tensor([[1., -1.], [1., -1.]]) + >>> scale, zero_point, dtype = 1.0, 2, torch.qint8 + >>> qm = Quantize(scale, zero_point, dtype) + >>> # xdoctest: +SKIP + >>> qt = qm(t) + >>> print(qt) + tensor([[ 1., -1.], + [ 1., -1.]], size=(2, 2), dtype=torch.qint8, scale=1.0, zero_point=2) + """ + + scale: torch.Tensor + zero_point: torch.Tensor + + def __init__(self, scale, zero_point, dtype, factory_kwargs=None): + factory_kwargs = torch.nn.factory_kwargs(factory_kwargs) + super().__init__() + self.register_buffer('scale', torch.tensor([scale], **factory_kwargs)) + self.register_buffer('zero_point', + torch.tensor([zero_point], dtype=torch.long, + **{k: v for k, v in factory_kwargs.items() if k != 'dtype'})) + self.dtype = dtype + + def forward(self, X): + return torch.quantize_per_tensor(X, float(self.scale), + int(self.zero_point), self.dtype) + + @staticmethod + def from_float(mod): + assert hasattr(mod, 'activation_post_process') + scale, zero_point = mod.activation_post_process.calculate_qparams() + return Quantize(scale.float().item(), zero_point.long().item(), mod.activation_post_process.dtype) + + def extra_repr(self): + return f'scale={self.scale}, zero_point={self.zero_point}, dtype={self.dtype}' + + +class DeQuantize(torch.nn.Module): + r"""Dequantizes an incoming tensor + + Examples:: + >>> input = torch.tensor([[1., -1.], [1., -1.]]) + >>> scale, zero_point, dtype = 1.0, 2, torch.qint8 + >>> qm = Quantize(scale, zero_point, dtype) + >>> # xdoctest: +SKIP + >>> quantized_input = qm(input) + >>> dqm = DeQuantize() + >>> dequantized = dqm(quantized_input) + >>> print(dequantized) + tensor([[ 1., -1.], + [ 1., -1.]], dtype=torch.float32) + """ + + def forward(self, Xq): + return Xq.dequantize() + + @staticmethod + def from_float(mod): + return DeQuantize() diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/activation.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/activation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d24d45c2978de8132db9e805ad19ad0b721fe23 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/activation.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/embedding_ops.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/embedding_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c4389a60d9b08fa264765ead9ad3f2932e141be9 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/embedding_ops.py @@ -0,0 +1,295 @@ +import torch +import torch.nn as nn +from torch import Tensor # noqa: F401 +from torch._jit_internal import Optional, List # noqa: F401 + +from .utils import _hide_packed_params_repr +from .utils import _quantize_weight + +__all__ = ['EmbeddingPackedParams', 'Embedding', 'EmbeddingBag'] + +class EmbeddingPackedParams(torch.nn.Module): + _version = 1 + + def __init__(self, num_embeddings, embedding_dim, dtype=torch.quint8): + super().__init__() + self.dtype = dtype + if self.dtype in [torch.quint8, torch.quint4x2]: + scales = torch.ones(num_embeddings, dtype=torch.float) + zero_points = torch.zeros(num_embeddings, dtype=torch.float) + wq = torch._empty_per_channel_affine_quantized([num_embeddings, embedding_dim], scales=scales, + zero_points=zero_points, + axis=0, dtype=self.dtype) + self.set_weight(wq) + else: + raise NotImplementedError(f'Unsupported dtype on quantized embedding! Supports quint8 and quint4x2. Got dtype: {dtype}') + + @torch.jit.export + def set_weight(self, weight: torch.Tensor) -> None: + if self.dtype in [torch.quint8, torch.quint4x2]: + self._packed_weight = torch.ops.quantized.embedding_bag_prepack(weight) + else: + raise NotImplementedError('Unsupported dtype for quantized embedding prepack! Supports quint8 and quint4x2.') + + + @torch.jit.export + def _weight(self): + if self.dtype in [torch.quint8, torch.quint4x2]: + return torch.ops.quantized.embedding_bag_unpack(self._packed_weight) + else: + raise NotImplementedError('Unsupported dtype for quantized embedding unpack! Supports quint8 and quint4x2.') + + def forward(self, x): + return x + + # Version 1 + # self + # |--- _packed_weight : Tensor representing weight of EmbeddingPackedParamsBase + # |--- dtype : torch.dtype + + def _save_to_state_dict(self, destination, prefix, keep_vars): + super()._save_to_state_dict(destination, prefix, keep_vars) + destination[prefix + 'dtype'] = self.dtype + destination[prefix + '_packed_weight'] = self._weight() + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + self.dtype = state_dict[prefix + 'dtype'] + state_dict.pop(prefix + 'dtype') + + weight = state_dict[prefix + '_packed_weight'] + state_dict.pop(prefix + '_packed_weight') + self.set_weight(weight) + + super()._load_from_state_dict(state_dict, prefix, local_metadata, False, + missing_keys, unexpected_keys, error_msgs) + + def __repr__(self): + return self._weight().__repr__() + +class Embedding(torch.nn.Module): + r""" + A quantized Embedding module with quantized packed weights as inputs. + We adopt the same interface as `torch.nn.Embedding`, please see + https://pytorch.org/docs/stable/nn.html#torch.nn.Embedding for documentation. + + Similar to :class:`~torch.nn.Embedding`, attributes will be randomly + initialized at module creation time and will be overwritten later + + Attributes: + weight (Tensor): the non-learnable quantized weights of the module of + shape :math:`(\text{num\_embeddings}, \text{embedding\_dim})`. + + Examples:: + >>> m = nn.quantized.Embedding(num_embeddings=10, embedding_dim=12) + >>> indices = torch.tensor([9, 6, 5, 7, 8, 8, 9, 2, 8]) + >>> output = m(indices) + >>> print(output.size()) + torch.Size([9, 12]) + + """ + _version = 1 + + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None, + max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False, + sparse: bool = False, _weight: Optional[Tensor] = None, dtype=torch.quint8) -> None: + super().__init__() + self.num_embeddings = num_embeddings + self.embedding_dim = embedding_dim + self.dtype = dtype + + if _weight is None: + scales = torch.ones(num_embeddings, dtype=torch.float) + zero_points = torch.zeros(num_embeddings, dtype=torch.float) + qweight = torch._empty_per_channel_affine_quantized([num_embeddings, embedding_dim], + scales=scales, zero_points=zero_points, + axis=0, dtype=torch.quint8) + else: + assert list(_weight.shape) == [num_embeddings, embedding_dim], \ + 'Shape of weight does not match num_embeddings and embedding_dim' + qweight = _weight + + self._packed_params = EmbeddingPackedParams(num_embeddings, embedding_dim, dtype) + self._packed_params.set_weight(qweight) + + def forward(self, indices: Tensor) -> Tensor: + if self.dtype == torch.quint4x2: + return torch.ops.quantized.embedding_4bit(self._packed_params._packed_weight, indices) + else: + return torch.ops.quantized.embedding_byte(self._packed_params._packed_weight, indices) + + def _get_name(self): + return 'QuantizedEmbedding' + + def __repr__(self): + return _hide_packed_params_repr(self, EmbeddingPackedParams) + + def extra_repr(self): + extra_repr_str = 'num_embeddings={}, embedding_dim={}, dtype={}, qscheme={}'.format( + self.num_embeddings, self.embedding_dim, self._packed_params.dtype, self.weight().qscheme() + ) + + return extra_repr_str + + def set_weight(self, w: torch.Tensor) -> None: + self._packed_params.set_weight(w) + + def weight(self): + return self._packed_params._weight() + + @classmethod + def from_float(cls, mod): + r"""Create a quantized embedding module from a float module + + Args: + mod (Module): a float module, either produced by torch.ao.quantization + utilities or provided by user + """ + if hasattr(mod, 'weight_fake_quant'): + assert type(mod) == torch.ao.nn.qat.Embedding, 'nnq.' + cls.__name__ + '.from_float ' + \ + 'with fake quant only works for ' + torch.ao.nn.qat.Embedding.__name__ + weight_observer = mod.weight_fake_quant + activation_post_process = mod.activation_post_process + else: + assert type(mod) == nn.Embedding, 'nnq.' + cls.__name__ + '.from_float only works for ' + \ + nn.Embedding.__name__ + assert hasattr(mod, 'qconfig'), 'Embedding input float module must have qconfig defined' + from torch.ao.quantization import float_qparams_weight_only_qconfig + if mod.qconfig is not None and mod.qconfig.weight is not None: # type: ignore[union-attr] + weight_observer = mod.qconfig.weight() # type: ignore[union-attr, operator] + else: + weight_observer = float_qparams_weight_only_qconfig.weight() + + dtype = weight_observer.dtype + is_float_qparams_qconfig = weight_observer.qscheme == torch.per_channel_affine_float_qparams + assert is_float_qparams_qconfig, \ + 'Embedding quantization is only supported with float_qparams_weight_only_qconfig.' + + assert dtype == torch.quint8 or dtype == torch.quint4x2, \ + f'The only supported dtype for nnq.Embedding is torch.quint8 and torch.quint4x2, got {dtype}' + + # Run the observer to calculate qparams. + weight_observer(mod.weight) + qweight = _quantize_weight(mod.weight.float(), weight_observer) + + # Create quantized Embedding module and pass in the quantized weight + qembedding = Embedding(mod.num_embeddings, mod.embedding_dim) + qembedding.set_weight(qweight) + return qembedding + + @classmethod + def from_reference(cls, ref_embedding): + qembedding = cls( + ref_embedding.num_embeddings, + ref_embedding.embedding_dim, + ref_embedding.padding_idx, + ref_embedding.max_norm, + ref_embedding.norm_type, + ref_embedding.scale_grad_by_freq, + ref_embedding.sparse, + ref_embedding.get_quantized_weight(), + ref_embedding.weight_dtype, + ) + return qembedding + +class EmbeddingBag(Embedding): + r""" + A quantized EmbeddingBag module with quantized packed weights as inputs. + We adopt the same interface as `torch.nn.EmbeddingBag`, please see + https://pytorch.org/docs/stable/nn.html#torch.nn.EmbeddingBag for documentation. + + Similar to :class:`~torch.nn.EmbeddingBag`, attributes will be randomly + initialized at module creation time and will be overwritten later + + Attributes: + weight (Tensor): the non-learnable quantized weights of the module of + shape :math:`(\text{num\_embeddings}, \text{embedding\_dim})`. + + Examples:: + >>> m = nn.quantized.EmbeddingBag(num_embeddings=10, embedding_dim=12, include_last_offset=True, mode='sum') + >>> indices = torch.tensor([9, 6, 5, 7, 8, 8, 9, 2, 8, 6, 6, 9, 1, 6, 8, 8, 3, 2, 3, 6, 3, 6, 5, 7, 0, 8, 4, 6, 5, 8, 2, 3]) + >>> offsets = torch.tensor([0, 19, 20, 28, 28, 32]) + >>> output = m(indices, offsets) + >>> print(output.size()) + torch.Size([5, 12]) + + """ + _version = 1 + + def __init__(self, num_embeddings: int, embedding_dim: int, + max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False, + mode: str = 'sum', sparse: bool = False, _weight: Optional[Tensor] = None, + include_last_offset: bool = False, dtype=torch.quint8) -> None: + super().__init__(num_embeddings, embedding_dim, _weight=_weight, dtype=dtype) + + self.mode = mode + self.pruned_weights = False + self.include_last_offset = include_last_offset + self.dtype = dtype + + def forward(self, indices: Tensor, offsets: Optional[Tensor] = None, per_sample_weights: Optional[Tensor] = None, + compressed_indices_mapping: Optional[Tensor] = None) -> Tensor: + if self.dtype == torch.quint4x2: + return torch.ops.quantized.embedding_bag_4bit(self._packed_params._packed_weight, indices, offsets, False, 0, + self.pruned_weights, per_sample_weights, compressed_indices_mapping, + self.include_last_offset) + else: + return torch.ops.quantized.embedding_bag_byte(self._packed_params._packed_weight, indices, offsets, False, 0, + self.pruned_weights, per_sample_weights, compressed_indices_mapping, + self.include_last_offset) + + def _get_name(self): + return 'QuantizedEmbeddingBag' + + @classmethod + def from_float(cls, mod): + r"""Create a quantized embedding_bag module from a float module + + Args: + mod (Module): a float module, either produced by torch.ao.quantization + utilities or provided by user + """ + if hasattr(mod, 'weight_fake_quant'): + weight_observer = mod.weight_fake_quant + else: + assert type(mod) == nn.EmbeddingBag, 'nnq.' + cls.__name__ + '.from_float only works for ' + \ + nn.EmbeddingBag.__name__ + assert hasattr(mod, 'qconfig'), 'EmbeddingBag input float module must have qconfig defined' + from torch.ao.quantization.qconfig import float_qparams_weight_only_qconfig + if mod.qconfig is not None and mod.qconfig.weight is not None: # type: ignore[union-attr] + weight_observer = mod.qconfig.weight() # type: ignore[union-attr, operator] + else: + weight_observer = float_qparams_weight_only_qconfig.weight() + + dtype = weight_observer.dtype + is_float_qparams_qconfig = weight_observer.qscheme == torch.per_channel_affine_float_qparams + assert is_float_qparams_qconfig, \ + 'EmbeddingBag quantization is only supported with float_qparams_weight_only_qconfig.' + + assert dtype == torch.quint8 or dtype == torch.quint4x2, \ + f'The only supported dtype for nnq.EmbeddingBag is torch.quint8 and torch.quint4x2, got {dtype}' + + # Run the observer to calculate qparams. + weight_observer(mod.weight) + qweight = _quantize_weight(mod.weight.float(), weight_observer) + + # Create quantized EmbeddingBag module and pass in the quantized weight + qembedding_bag = EmbeddingBag(mod.num_embeddings, mod.embedding_dim, dtype=dtype) + qembedding_bag.set_weight(qweight) + return qembedding_bag + + @classmethod + def from_reference(cls, ref_embedding_bag): + qembedding_bag = cls( + ref_embedding_bag.num_embeddings, + ref_embedding_bag.embedding_dim, + ref_embedding_bag.max_norm, + ref_embedding_bag.norm_type, + ref_embedding_bag.scale_grad_by_freq, + ref_embedding_bag.mode, + ref_embedding_bag.sparse, + ref_embedding_bag.get_quantized_weight(), + ref_embedding_bag.include_last_offset, + ref_embedding_bag.weight_dtype, + ) + return qembedding_bag diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/normalization.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..f798a241e3242a74d9d022e6f06b140f65bb5b99 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/normalization.py @@ -0,0 +1,199 @@ +import torch + +__all__ = ['LayerNorm', 'GroupNorm', 'InstanceNorm1d', 'InstanceNorm2d', 'InstanceNorm3d'] + +class LayerNorm(torch.nn.LayerNorm): + r"""This is the quantized version of :class:`~torch.nn.LayerNorm`. + + Additional args: + * **scale** - quantization scale of the output, type: double. + * **zero_point** - quantization zero point of the output, type: long. + + """ + + def __init__(self, normalized_shape, weight, bias, scale, zero_point, eps=1e-5, + elementwise_affine=True, device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__(normalized_shape, eps=eps, elementwise_affine=elementwise_affine, + **factory_kwargs) + self.weight = weight + self.bias = bias + self.register_buffer('scale', torch.tensor(scale, **factory_kwargs)) + self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs)) + + def forward(self, input): + return torch.ops.quantized.layer_norm( + input, self.normalized_shape, weight=self.weight, bias=self.bias, + eps=self.eps, output_scale=self.scale, output_zero_point=self.zero_point) + + def _get_name(self): + return 'QuantizedLayerNorm' + + @classmethod + def from_float(cls, mod): + scale, zero_point = mod.activation_post_process.calculate_qparams() + new_mod = cls( + mod.normalized_shape, mod.weight, mod.bias, float(scale), + int(zero_point), mod.eps, mod.elementwise_affine) + return new_mod + + @classmethod + def from_reference(cls, mod, scale, zero_point): + return cls( + mod.normalized_shape, mod.weight, mod.bias, float(scale), + int(zero_point), mod.eps, mod.elementwise_affine) + +class GroupNorm(torch.nn.GroupNorm): + r"""This is the quantized version of :class:`~torch.nn.GroupNorm`. + + Additional args: + * **scale** - quantization scale of the output, type: double. + * **zero_point** - quantization zero point of the output, type: long. + + """ + __constants__ = ['num_groups', 'num_channels', 'eps', 'affine'] + + def __init__(self, num_groups, num_channels, weight, bias, scale, zero_point, eps=1e-5, + affine=True, device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__(num_groups, num_channels, eps, affine, **factory_kwargs) + self.weight = weight + self.bias = bias + self.register_buffer('scale', torch.tensor(scale, **factory_kwargs)) + self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs)) + + def forward(self, input): + return torch.ops.quantized.group_norm( + input, self.num_groups, self.weight, self.bias, self.eps, self.scale, + self.zero_point) + + def _get_name(self): + return 'QuantizedGroupNorm' + + @classmethod + def from_float(cls, mod): + scale, zero_point = mod.activation_post_process.calculate_qparams() + new_mod = cls( + mod.num_groups, mod.num_channels, mod.weight, mod.bias, float(scale), int(zero_point), + mod.eps, mod.affine) + return new_mod + +class InstanceNorm1d(torch.nn.InstanceNorm1d): + r"""This is the quantized version of :class:`~torch.nn.InstanceNorm1d`. + + Additional args: + * **scale** - quantization scale of the output, type: double. + * **zero_point** - quantization zero point of the output, type: long. + + """ + def __init__(self, num_features, weight, bias, scale, zero_point, + eps=1e-5, momentum=0.1, affine=False, + track_running_stats=False, device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__(num_features, eps, momentum, affine, track_running_stats, **factory_kwargs) + self.weight = weight + self.bias = bias + self.register_buffer('scale', torch.tensor(scale, **factory_kwargs)) + self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs)) + + def forward(self, input): + return torch.ops.quantized.instance_norm( + input, self.weight, self.bias, self.eps, self.scale, + self.zero_point) + + def _get_name(self): + return 'QuantizedInstanceNorm1d' + + @classmethod + def from_float(cls, mod): + scale, zero_point = mod.activation_post_process.calculate_qparams() + new_mod = cls( + mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point), + mod.eps, mod.affine) + return new_mod + + @classmethod + def from_reference(cls, mod, scale, zero_point): + return cls( + mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point), + mod.eps, mod.affine) + +class InstanceNorm2d(torch.nn.InstanceNorm2d): + r"""This is the quantized version of :class:`~torch.nn.InstanceNorm2d`. + + Additional args: + * **scale** - quantization scale of the output, type: double. + * **zero_point** - quantization zero point of the output, type: long. + + """ + def __init__(self, num_features, weight, bias, scale, zero_point, + eps=1e-5, momentum=0.1, affine=False, + track_running_stats=False, device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__(num_features, eps, momentum, affine, track_running_stats, **factory_kwargs) + self.weight = weight + self.bias = bias + self.register_buffer('scale', torch.tensor(scale, **factory_kwargs)) + self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs)) + + def forward(self, input): + return torch.ops.quantized.instance_norm( + input, self.weight, self.bias, self.eps, self.scale, + self.zero_point) + + def _get_name(self): + return 'QuantizedInstanceNorm2d' + + @classmethod + def from_float(cls, mod): + scale, zero_point = mod.activation_post_process.calculate_qparams() + new_mod = cls( + mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point), + mod.eps, mod.affine) + return new_mod + + @classmethod + def from_reference(cls, mod, scale, zero_point): + return cls( + mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point), + mod.eps, mod.affine) + +class InstanceNorm3d(torch.nn.InstanceNorm3d): + r"""This is the quantized version of :class:`~torch.nn.InstanceNorm3d`. + + Additional args: + * **scale** - quantization scale of the output, type: double. + * **zero_point** - quantization zero point of the output, type: long. + + """ + def __init__(self, num_features, weight, bias, scale, zero_point, + eps=1e-5, momentum=0.1, affine=False, + track_running_stats=False, device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__(num_features, eps, momentum, affine, track_running_stats, **factory_kwargs) + self.weight = weight + self.bias = bias + self.register_buffer('scale', torch.tensor(scale, **factory_kwargs)) + self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs)) + + def forward(self, input): + return torch.ops.quantized.instance_norm( + input, self.weight, self.bias, self.eps, self.scale, + self.zero_point) + + def _get_name(self): + return 'QuantizedInstanceNorm3d' + + @classmethod + def from_float(cls, mod): + scale, zero_point = mod.activation_post_process.calculate_qparams() + new_mod = cls( + mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point), + mod.eps, mod.affine) + return new_mod + + @classmethod + def from_reference(cls, mod, scale, zero_point): + return cls( + mod.num_features, mod.weight, mod.bias, float(scale), int(zero_point), + mod.eps, mod.affine) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/utils.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7c24c0ca31dc7c1e0ed348667ad4f1cb69cbda31 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/utils.py @@ -0,0 +1,117 @@ +import abc +import torch +import itertools +import collections +from torch.nn.modules.module import _addindent + +__all__ = [ + "WeightedQuantizedModule", +] + +class WeightedQuantizedModule(torch.nn.Module, metaclass=abc.ABCMeta): + """Wrapper for quantized modules than can be lowered from reference modules.""" + @classmethod + @abc.abstractmethod + def from_reference(cls, ref_module, output_scale, output_zero_point): + raise NotImplementedError + +def _get_weight_observer(observer): + # FakeQuantize observer + if hasattr(observer, "activation_post_process"): + observer = observer.activation_post_process + # UniformQuantizationObserverBase observer + return observer + +def _needs_weight_clamping(observer, dtype): + observer = _get_weight_observer(observer) + if dtype in [torch.qint8, torch.quint8, torch.qint32]: + info = torch.iinfo(dtype) + return observer.quant_min > info.min or observer.quant_max < info.max + return False + +def _clamp_weights(qweight, observer, scale, zp): + if not _needs_weight_clamping(observer, qweight.dtype): + return qweight + + observer = _get_weight_observer(observer) + min_, max_ = observer.quant_min, observer.quant_max + + # Doing this because can't use torch.ops.quantized.clamp() with per_channel qscheme yet. + qw_int_max = torch.clone(qweight.int_repr()).fill_(max_) + qw_int_min = torch.clone(qweight.int_repr()).fill_(min_) + qw_int = torch.minimum(torch.maximum(qweight.int_repr(), qw_int_min), qw_int_max) + + if observer.qscheme in [torch.per_tensor_symmetric, + torch.per_tensor_affine]: + qweight = torch._make_per_tensor_quantized_tensor(qw_int, scale.item(), zp.item()) + elif observer.qscheme in [torch.per_channel_symmetric, + torch.per_channel_affine, + torch.per_channel_affine_float_qparams]: + qweight = torch._make_per_channel_quantized_tensor(qw_int, scale, zp, axis=observer.ch_axis) + else: + raise ValueError("Unexpected qscheme " + observer.qscheme) + return qweight + +def _quantize_weight(float_wt, observer): + wt_scale, wt_zp = observer.calculate_qparams() + if observer.qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]: + qweight = torch.quantize_per_tensor( + float_wt, + float(wt_scale), int(wt_zp), torch.qint8) + qweight = _clamp_weights(qweight, observer, wt_scale, wt_zp) + elif observer.qscheme in [torch.per_channel_symmetric, torch.per_channel_affine]: + wt_axis = observer.ch_axis + qweight = torch.quantize_per_channel( + float_wt, + wt_scale.to(torch.double), wt_zp.to(torch.int64), wt_axis, torch.qint8) + qweight = _clamp_weights(qweight, observer, wt_scale, wt_zp) + elif observer.qscheme in [torch.per_channel_affine_float_qparams]: + qweight = torch.quantize_per_channel( + float_wt, + wt_scale.to(torch.float), wt_zp.to(torch.float), observer.ch_axis, observer.dtype) + qweight = _clamp_weights(qweight, observer, wt_scale, wt_zp) + else: + raise ValueError("Unexpected qscheme " + observer.qscheme) + return qweight + +def _ntuple_from_first(n): + """Converts the argument to a tuple of size n + with the first element repeated.""" + def parse(x): + while isinstance(x, collections.abc.Sequence): + if len(x) == n: + break + x = x[0] + return tuple(itertools.repeat(x, n)) + return parse + +def _hide_packed_params_repr(self, params): + # We don't want to show `PackedParams` children, hence custom + # `__repr__`. This is the same as nn.Module.__repr__, except the check + # for the `params module`. + extra_lines = [] + extra_repr = self.extra_repr() + # empty string will be split into list [''] + if extra_repr: + extra_lines = extra_repr.split('\n') + child_lines = [] + for key, module in self._modules.items(): + if isinstance(module, params): + continue + mod_str = repr(module) + mod_str = _addindent(mod_str, 2) + child_lines.append('(' + key + '): ' + mod_str) + lines = extra_lines + child_lines + + main_str = self._get_name() + '(' + if lines: + # simple one-liner info, which most builtin Modules will use + if len(extra_lines) == 1 and not child_lines: + main_str += extra_lines[0] + else: + main_str += '\n ' + '\n '.join(lines) + '\n' + + main_str += ')' + return main_str + +_pair_from_first = _ntuple_from_first(2) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c9fea7a8b70a7de658bb2930fbb7beca21f00132 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/conv.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/conv.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa38486ac7df2291e47cc1f02b31a014553941f7 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/conv.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/rnn.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/rnn.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08f4ade3a2de72bc34a03d68e3eb4446d03950fb Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/rnn.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/conv.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..910223056fba9ca12333136c538765cf8643bc54 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/conv.py @@ -0,0 +1,318 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import Optional, Dict, Any, List +from torch.nn.common_types import _size_1_t +from .utils import ReferenceQuantizedModule + +__all__ = ['Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d'] + +class _ConvNd(torch.nn.modules.conv._ConvNd, ReferenceQuantizedModule): + """ A reference version of nn.quantized.Conv2d + we will not pack the parameters in this module, since weight packing is an + optimization for quantized backends supported in PyTorch (fbgemm/qnnpack), + this is useful when user want to use this module in other backends like Glow. + """ + __annotations__ = {"bias": Optional[torch.Tensor]} + _IS_REFERENCE = True + + @staticmethod + def from_float(cls, float_conv, weight_qparams): + qref_conv = cls( + float_conv.in_channels, + float_conv.out_channels, + float_conv.kernel_size, # type: ignore[arg-type] + float_conv.stride, # type: ignore[arg-type] + float_conv.padding, # type: ignore[arg-type] + float_conv.dilation, # type: ignore[arg-type] + float_conv.groups, + float_conv.bias is not None, # type: ignore[arg-type] + float_conv.padding_mode, + device=float_conv.weight.device, + dtype=float_conv.weight.dtype, + weight_qparams=weight_qparams) + qref_conv.weight = torch.nn.Parameter(float_conv.weight.detach()) + if float_conv.bias is not None: + qref_conv.bias = torch.nn.Parameter(float_conv.bias.detach()) + return qref_conv + +class Conv1d(_ConvNd, nn.Conv1d): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + groups: int = 1, + bias: bool = True, + padding_mode: str = "zeros", + device=None, + dtype=None, + weight_qparams: Optional[Dict[str, Any]] = None): + nn.Conv1d.__init__( + self, in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, bias, padding_mode, device, dtype) + self._init_weight_qparams(weight_qparams, device) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + we have: + w(float) -- quant - dequant \ + x(float) ------------- F.conv1d --- + + In the full model, we will see + w(float) -- quant - *dequant \ + x -- quant --- *dequant -- *F.conv1d --- *quant - dequant + and the backend should be able to fuse the ops with `*` into a quantized conv1d + """ + weight_quant_dequant = self.get_weight() + result = F.conv1d( + x, weight_quant_dequant, self.bias, self.stride, + self.padding, self.dilation, self.groups) + return result + + def _get_name(self): + return "QuantizedConv1d(Reference)" + + @classmethod + def from_float(cls, float_conv, weight_qparams): + return _ConvNd.from_float(cls, float_conv, weight_qparams) + +class Conv2d(_ConvNd, nn.Conv2d): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True, + padding_mode='zeros', + device=None, + dtype=None, + weight_qparams: Optional[Dict[str, Any]] = None): + nn.Conv2d.__init__( + self, in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, bias, padding_mode, device, dtype) + self._init_weight_qparams(weight_qparams, device) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + we have: + w(float) -- quant - dequant \ + x(float) ------------- F.conv2d --- + + In the full model, we will see + w(float) -- quant - *dequant \ + x -- quant --- *dequant -- *F.conv2d --- *quant - dequant + and the backend should be able to fuse the ops with `*` into a quantized conv2d + """ + weight_quant_dequant = self.get_weight() + result = F.conv2d( + x, weight_quant_dequant, self.bias, self.stride, + self.padding, self.dilation, self.groups) + return result + + def _get_name(self): + return "QuantizedConv2d(Reference)" + + @classmethod + def from_float(cls, float_conv, weight_qparams): + return _ConvNd.from_float(cls, float_conv, weight_qparams) + +class Conv3d(_ConvNd, nn.Conv3d): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True, + padding_mode="zeros", + device=None, + dtype=None, + weight_qparams: Optional[Dict[str, Any]] = None): + nn.Conv3d.__init__( + self, in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, bias, padding_mode, device, dtype) + self._init_weight_qparams(weight_qparams, device) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + we have: + w(float) -- quant - dequant \ + x(float) ------------- F.conv3d --- + + In the full model, we will see + w(float) -- quant - *dequant \ + x -- quant --- *dequant -- *F.conv3d --- *quant - dequant + and the backend should be able to fuse the ops with `*` into a quantized conv3d + """ + weight_quant_dequant = self.get_weight() + result = F.conv3d( + x, weight_quant_dequant, self.bias, self.stride, + self.padding, self.dilation, self.groups) + return result + + def _get_name(self): + return "QuantizedConv3d(Reference)" + + @classmethod + def from_float(cls, float_conv, weight_qparams): + return _ConvNd.from_float(cls, float_conv, weight_qparams) + +class _ConvTransposeNd(_ConvNd, torch.nn.modules.conv._ConvTransposeNd): + """ A reference version of nn.quantized.ConvTranspose2d + we will not pack the parameters in this module, since weight packing is an + optimization for quantized backends supported in PyTorch (fbgemm/qnnpack), + this is useful when user want to use this module in other backends like Glow. + """ + @staticmethod + def from_float(cls, float_conv, weight_qparams): + qref_conv = cls( + float_conv.in_channels, + float_conv.out_channels, + float_conv.kernel_size, # type: ignore[arg-type] + float_conv.stride, # type: ignore[arg-type] + float_conv.padding, # type: ignore[arg-type] + float_conv.output_padding, # type: ignore[arg-type] + float_conv.groups, + float_conv.bias is not None, # type: ignore[arg-type] + float_conv.dilation, # type: ignore[arg-type] + float_conv.padding_mode, + device=float_conv.weight.device, + dtype=float_conv.weight.dtype, + weight_qparams=weight_qparams) + qref_conv.weight = torch.nn.Parameter(float_conv.weight.detach()) + if float_conv.bias is not None: + qref_conv.bias = torch.nn.Parameter(float_conv.bias.detach()) + return qref_conv + + +class ConvTranspose1d(_ConvTransposeNd, nn.ConvTranspose1d): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + padding: _size_1_t = 0, + output_padding: _size_1_t = 0, + groups: int = 1, + bias: bool = True, + dilation: _size_1_t = 1, + padding_mode: str = "zeros", + device=None, + dtype=None, + weight_qparams: Optional[Dict[str, Any]] = None): + nn.ConvTranspose1d.__init__( + self, in_channels, out_channels, kernel_size, stride, padding, output_padding, + groups, bias, dilation, padding_mode, device, dtype) + self._init_weight_qparams(weight_qparams, device) + + def forward(self, x: torch.Tensor, output_size: Optional[List[int]] = None) -> torch.Tensor: + """ + we have: + w(float) -- quant - dequant \ + x(float) ------------- F.convTranspose1d --- + In the full model, we will see + w(float) -- quant - *dequant \ + x -- quant --- *dequant -- *F.convTranspose1d --- *quant - dequant + and the backend should be able to fuse the ops with `*` into a quantized conv1d + """ + + assert isinstance(self.padding, tuple) + # One cannot replace List by Tuple or Sequence in "_output_padding" because + # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`. + output_padding = self._output_padding( + input, output_size, self.stride, self.padding, self.kernel_size, self.dilation) # type: ignore[arg-type] + + weight_quant_dequant = self.get_weight() + result = F.conv_transpose1d( + x, weight_quant_dequant, self.bias, self.stride, + self.padding, output_padding, self.groups, self.dilation) + return result + + def _get_name(self): + return "QuantizedConvTranspose1d(Reference)" + + @classmethod + def from_float(cls, float_conv, weight_qparams): + return _ConvTransposeNd.from_float(cls, float_conv, weight_qparams) + +class ConvTranspose2d(_ConvTransposeNd, nn.ConvTranspose2d): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, output_padding=0, + groups=1, bias=True, dilation=1, + padding_mode='zeros', + device=None, + dtype=None, + weight_qparams: Optional[Dict[str, Any]] = None): + + nn.ConvTranspose2d.__init__( + self, in_channels, out_channels, kernel_size, stride, padding, output_padding, + groups, bias, dilation, padding_mode, device, dtype) + self._init_weight_qparams(weight_qparams, device) + + def forward(self, x: torch.Tensor, output_size: Optional[List[int]] = None) -> torch.Tensor: + """ + we have: + w(float) -- quant - dequant \ + x(float) ------------- F.convTranspose2d --- + In the full model, we will see + w(float) -- quant - *dequant \ + x -- quant --- *dequant -- *F.convTranspose2d --- *quant - dequant + and the backend should be able to fuse the ops with `*` into a quantized conv2d + """ + assert isinstance(self.padding, tuple) + # One cannot replace List by Tuple or Sequence in "_output_padding" because + # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`. + + output_padding = self._output_padding( + input, output_size, self.stride, self.padding, self.kernel_size, self.dilation) # type: ignore[arg-type] + + weight_quant_dequant = self.get_weight() + result = F.conv_transpose2d( + x, weight_quant_dequant, self.bias, self.stride, + self.padding, output_padding, self.groups, self.dilation) + + return result + + def _get_name(self): + return "QuantizedConvTranspose2d(Reference)" + + @classmethod + def from_float(cls, float_conv, weight_qparams): + return _ConvTransposeNd.from_float(cls, float_conv, weight_qparams) + +class ConvTranspose3d(_ConvTransposeNd, nn.ConvTranspose3d): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, output_padding=0, + groups=1, bias=True, dilation=1, + padding_mode="zeros", + device=None, + dtype=None, + weight_qparams: Optional[Dict[str, Any]] = None): + nn.ConvTranspose3d.__init__( + self, in_channels, out_channels, kernel_size, stride, padding, output_padding, + groups, bias, dilation, padding_mode, device, dtype) + self._init_weight_qparams(weight_qparams, device) + + def forward(self, x: torch.Tensor, output_size: Optional[List[int]] = None) -> torch.Tensor: + """ + we have: + w(float) -- quant - dequant \ + x(float) ------------- F.convTranspose3d --- + In the full model, we will see + w(float) -- quant - *dequant \ + x -- quant --- *dequant -- *F.convTranspose3d --- *quant - dequant + and the backend should be able to fuse the ops with `*` into a quantized conv3d + """ + + assert isinstance(self.padding, tuple) + # One cannot replace List by Tuple or Sequence in "_output_padding" because + # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`. + output_padding = self._output_padding( + input, output_size, self.stride, self.padding, self.kernel_size, self.dilation) # type: ignore[arg-type] + + weight_quant_dequant = self.get_weight() + result = F.conv_transpose3d( + x, weight_quant_dequant, self.bias, self.stride, + self.padding, output_padding, self.groups, self.dilation) + return result + + def _get_name(self): + return "QuantizedConvTranspose3d(Reference)" + + @classmethod + def from_float(cls, float_conv, weight_qparams): + return _ConvTransposeNd.from_float(cls, float_conv, weight_qparams) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/sparse.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/sparse.py new file mode 100644 index 0000000000000000000000000000000000000000..4890402b875a5ffea8d636aae0b4e3d00992706b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/sparse.py @@ -0,0 +1,94 @@ +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor +from .utils import ReferenceQuantizedModule +from typing import Optional, Dict, Any + +__all__ = ['Embedding', 'EmbeddingBag'] + +class Embedding(nn.Embedding, ReferenceQuantizedModule): + """ A reference quantized Embedding module that fits into the + FX Graph Mode Quantization workflow, activation will be floating point Tensor, + we will store floating point weight as well in the module, but in forward we'll + quantize and dequantize the weight before running the floating point functional + embedding operator. + """ + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None, + max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False, + sparse: bool = False, _weight: Optional[Tensor] = None, + device=None, dtype=None, + weight_qparams: Optional[Dict[str, Any]] = None) -> None: + super().__init__(num_embeddings, embedding_dim, padding_idx, max_norm, + norm_type, scale_grad_by_freq, sparse, _weight, device, dtype) + self._init_weight_qparams(weight_qparams, device) + + def _get_name(self): + return "QuantizedEmbedding(Reference)" + + def forward(self, input: Tensor) -> Tensor: + weight_quant_dequant = self.get_weight() + return F.embedding( + input, weight_quant_dequant, self.padding_idx, self.max_norm, + self.norm_type, self.scale_grad_by_freq, self.sparse) + + @classmethod + def from_float(cls, mod, weight_qparams): + return cls( + mod.num_embeddings, + mod.embedding_dim, + mod.padding_idx, + mod.max_norm, + mod.norm_type, + mod.scale_grad_by_freq, + mod.sparse, + mod.weight, + mod.weight.device, + mod.weight.dtype, + weight_qparams) + +class EmbeddingBag(nn.EmbeddingBag, ReferenceQuantizedModule): + """ A reference quantized EmbeddingBag module that fits into the + FX Graph Mode Quantization workflow, activation will be floating point Tensor, + we will store floating point weight as well in the module, but in forward we'll + quantize and dequantize the weight before running the floating point functional + embedding operator. + """ + def __init__(self, num_embeddings: int, embedding_dim: int, + max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False, + mode: str = 'mean', sparse: bool = False, _weight: Optional[Tensor] = None, + include_last_offset: bool = False, padding_idx: Optional[int] = None, + device=None, dtype=None, + weight_qparams: Optional[Dict[str, Any]] = None) -> None: + super().__init__(num_embeddings, embedding_dim, max_norm, norm_type, + scale_grad_by_freq, mode, sparse, _weight, include_last_offset, + padding_idx, device, dtype) + self._init_weight_qparams(weight_qparams, device) + + def _get_name(self): + return "QuantizedEmbedding(Reference)" + + def forward(self, input: Tensor, offsets: Optional[Tensor] = None, per_sample_weights: Optional[Tensor] = None) -> Tensor: + weight_quant_dequant = self.get_weight() + return F.embedding_bag(input, weight_quant_dequant, offsets, + self.max_norm, self.norm_type, + self.scale_grad_by_freq, self.mode, self.sparse, + per_sample_weights, self.include_last_offset, + self.padding_idx) + + @classmethod + def from_float(cls, mod, weight_qparams): + return cls( + mod.num_embeddings, + mod.embedding_dim, + mod.max_norm, + mod.norm_type, + mod.scale_grad_by_freq, + mod.mode, + mod.sparse, + mod.weight, + mod.include_last_offset, + mod.padding_idx, + mod.weight.device, + mod.weight.dtype, + weight_qparams + ) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52ee46afd4be311668e5043caeee941aed205fc3 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..86596ba18cf1f08e979a9e4c0ae0485627c44845 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__init__.py @@ -0,0 +1,10 @@ +from torch.ao.nn.sparse.quantized import dynamic + +from .linear import Linear +from .linear import LinearPackedParams + +__all__ = [ + "dynamic", + "Linear", + "LinearPackedParams", +] diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..edebd898a21d7197ae11b5f965ed4171340175c2 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__pycache__/__init__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/__pycache__/data_norm_sparsifier.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/__pycache__/data_norm_sparsifier.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa4b0d32f4633aab11f198b1769eb5f174262428 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/__pycache__/data_norm_sparsifier.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6f017aa9e2e2c673909197d9424d95b196ff30c3 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__init__.py @@ -0,0 +1,8 @@ +from .base_structured_sparsifier import BaseStructuredSparsifier +from .parametrization import ( + FakeStructuredSparsity, + BiasHook, +) +from .saliency_pruner import SaliencyPruner +from .lstm_saliency_pruner import LSTMSaliencyPruner +from .FPGM_pruner import FPGMPruner diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/utils.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..98f489904cc45340167cfae5f2362ee70ccb8fca --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/utils.py @@ -0,0 +1,136 @@ +from typing import Any, Dict, Optional, Type +from torch.nn.utils.parametrize import type_before_parametrizations, is_parametrized +from itertools import chain + +from torch import nn + +__all__ = [ + "module_contains_param", + "swap_module", + "module_to_fqn", + "fqn_to_module", + "get_arg_info_from_tensor_fqn", + "FakeSparsity", +] + + +def module_contains_param(module: nn.Module, parametrization: Type[nn.Module]) -> bool: + if is_parametrized(module): + # see if any of the module tensors have a parametriztion attached that matches the one passed in + return any( + any(isinstance(param, parametrization) for param in param_list) + for key, param_list in module.parametrizations.items() # type: ignore[union-attr,operator] + ) + return False + + +def swap_module( + mod: nn.Module, mapping: Dict[Type[nn.Module], Type[nn.Module]] +) -> nn.Module: + r"""Swaps the module using from_dense according to the mapping passed in. + Args: + mod: input module + mapping: a dictionary that maps from nn module to sparse nn module + Return: + The corresponding sparse module of `mod` according to mapping, created using from_dense + """ + if type_before_parametrizations(mod) in mapping: + sparse_mod = mapping[type_before_parametrizations(mod)] + + # TODO Fix this typing, as Type[Module] has no attribute "from_dense" + new_mod = sparse_mod.from_dense(mod) # type: ignore[attr-defined] + + # Preserve module's pre forward hooks. They'll be called on quantized input + for pre_hook_fn in mod._forward_pre_hooks.values(): + new_mod.register_forward_pre_hook(pre_hook_fn) + # Preserve module's post forward hooks except _observer_forward_hook + # After convert they'll work with quantized output + for hook_fn in mod._forward_hooks.values(): + new_mod.register_forward_hook(hook_fn) + + # respect device affinity when swapping modules + devices = {p.device for p in chain(mod.parameters(), mod.buffers())} + assert len(devices) <= 1, ( + f"swap_module only works with cpu or single-device CUDA modules, but got devices {devices}" + ) + device = next(iter(devices)) if len(devices) > 0 else None + if device: + new_mod.to(device) + + return new_mod + + else: + return mod + + +def module_to_fqn( + model: nn.Module, module: nn.Module, prefix: str = "" +) -> Optional[str]: + """ + Returns the fqn for a module or None if module not a descendent of model. + """ + if module is model: + return "" + for name, child in model.named_children(): + fqn = module_to_fqn(child, module, ".") + if isinstance(fqn, str): + return prefix + name + fqn + return None + + +def fqn_to_module(model: Optional[nn.Module], path: str) -> Optional[nn.Module]: + """ + Given an fqn, returns the corresponding module or tensor or None if the fqn given by `path` + doesn't correspond to anything. Similar to model.get_submodule(path) but works for tensors. + """ + if path != "": + for name in path.split("."): + model = getattr(model, name, None) + return model + + +def get_arg_info_from_tensor_fqn(model: nn.Module, tensor_fqn: str) -> Dict[str, Any]: + """ + Uses tensor_fqn to obtain a dict containing module_fqn, module and tensor_name + """ + # string manip to split tensor_fqn into module_fqn and tensor_name + # if tensor_fqn is 'weight' then module_fqn and tensor_name are '' and 'weight' + # if tensor_fqn is 'linear.weight' then module_fqn and tensor_name are 'linear' and 'weight' + tensor_name = tensor_fqn.split(".")[-1] + module_fqn = tensor_fqn[: -len(tensor_name) - ("." in tensor_fqn)] + + module = fqn_to_module(model, module_fqn) + + return { + "module_fqn": module_fqn, + "module": module, + "tensor_name": tensor_name, + "tensor_fqn": tensor_fqn, + } + + +# Parametrizations +class FakeSparsity(nn.Module): + r"""Parametrization for the weights. Should be attached to the 'weight' or + any other parameter that requires a mask applied to it. + + Note:: + + Once the mask is passed, the variable should not change the id. The + contents of the mask can change, but the mask reference itself should + not. + """ + + def __init__(self, mask): + super().__init__() + self.register_buffer("mask", mask) + + def forward(self, x): + assert self.mask.shape == x.shape + return self.mask * x + + def state_dict(self, *args, **kwargs): + # We don't want to let the parametrizations to save the mask. + # That way we make sure that the linear module doesn't store the masks + # alongside their parametrizations. + return {} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quant_type.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quant_type.py new file mode 100644 index 0000000000000000000000000000000000000000..1448a6270e2ba9ba49b13a9d4878180f56a5bba7 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quant_type.py @@ -0,0 +1,30 @@ +import enum + +__all__ = [ + "QuantType", +] + +# Quantization type (dynamic quantization, static quantization). +# Should match the c++ enum in quantization_type.h +class QuantType(enum.IntEnum): + DYNAMIC = 0 + STATIC = 1 + QAT = 2 + WEIGHT_ONLY = 3 + +_quant_type_to_str = { + QuantType.STATIC: "static", + QuantType.DYNAMIC: "dynamic", + QuantType.QAT: "qat", + QuantType.WEIGHT_ONLY: "weight_only", +} + +# TODO: make this private +def _get_quant_type_to_str(quant_type: QuantType) -> str: + return _quant_type_to_str[quant_type] + +def _quant_type_from_str(name: str) -> QuantType: + for quant_type, s in _quant_type_to_str.items(): + if name == s: + return quant_type + raise ValueError(f"Unknown QuantType name '{name}'") diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/__main__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/__main__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a424f63a260abf3dc76503072cdd1c598da2e26 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/__main__.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_bdist_wheel.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_bdist_wheel.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d400b5ea84b434df76d7998d165ede06994b106f Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/_bdist_wheel.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/util.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/util.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30e0ef60bf5e2da8a5d4743d543a8a926e474a32 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/util.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/wheelfile.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/wheelfile.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2edc4facd7ad8210bb6590be73a02d12c1ceaa68 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/__pycache__/wheelfile.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_bdist_wheel.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_bdist_wheel.py new file mode 100644 index 0000000000000000000000000000000000000000..88973ebfb88f9521e5f0613b5cedf1204d10a8f1 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/_bdist_wheel.py @@ -0,0 +1,613 @@ +""" +Create a wheel (.whl) distribution. + +A wheel is a built archive format. +""" + +from __future__ import annotations + +import os +import re +import shutil +import stat +import struct +import sys +import sysconfig +import warnings +from email.generator import BytesGenerator, Generator +from email.policy import EmailPolicy +from glob import iglob +from shutil import rmtree +from typing import TYPE_CHECKING, Callable, Iterable, Literal, Sequence, cast +from zipfile import ZIP_DEFLATED, ZIP_STORED + +import setuptools +from setuptools import Command + +from . import __version__ as wheel_version +from .metadata import pkginfo_to_metadata +from .util import log +from .vendored.packaging import tags +from .vendored.packaging import version as _packaging_version +from .wheelfile import WheelFile + +if TYPE_CHECKING: + import types + +# ensure Python logging is configured +try: + __import__("setuptools.logging") +except ImportError: + # setuptools < ?? + from . import _setuptools_logging + + _setuptools_logging.configure() + + +def safe_name(name: str) -> str: + """Convert an arbitrary string to a standard distribution name + Any runs of non-alphanumeric/. characters are replaced with a single '-'. + """ + return re.sub("[^A-Za-z0-9.]+", "-", name) + + +def safe_version(version: str) -> str: + """ + Convert an arbitrary string to a standard version string + """ + try: + # normalize the version + return str(_packaging_version.Version(version)) + except _packaging_version.InvalidVersion: + version = version.replace(" ", ".") + return re.sub("[^A-Za-z0-9.]+", "-", version) + + +setuptools_major_version = int(setuptools.__version__.split(".")[0]) + +PY_LIMITED_API_PATTERN = r"cp3\d" + + +def _is_32bit_interpreter() -> bool: + return struct.calcsize("P") == 4 + + +def python_tag() -> str: + return f"py{sys.version_info[0]}" + + +def get_platform(archive_root: str | None) -> str: + """Return our platform name 'win32', 'linux_x86_64'""" + result = sysconfig.get_platform() + if result.startswith("macosx") and archive_root is not None: + from .macosx_libfile import calculate_macosx_platform_tag + + result = calculate_macosx_platform_tag(archive_root, result) + elif _is_32bit_interpreter(): + if result == "linux-x86_64": + # pip pull request #3497 + result = "linux-i686" + elif result == "linux-aarch64": + # packaging pull request #234 + # TODO armv8l, packaging pull request #690 => this did not land + # in pip/packaging yet + result = "linux-armv7l" + + return result.replace("-", "_") + + +def get_flag( + var: str, fallback: bool, expected: bool = True, warn: bool = True +) -> bool: + """Use a fallback value for determining SOABI flags if the needed config + var is unset or unavailable.""" + val = sysconfig.get_config_var(var) + if val is None: + if warn: + warnings.warn( + f"Config variable '{var}' is unset, Python ABI tag may be incorrect", + RuntimeWarning, + stacklevel=2, + ) + return fallback + return val == expected + + +def get_abi_tag() -> str | None: + """Return the ABI tag based on SOABI (if available) or emulate SOABI (PyPy2).""" + soabi: str = sysconfig.get_config_var("SOABI") + impl = tags.interpreter_name() + if not soabi and impl in ("cp", "pp") and hasattr(sys, "maxunicode"): + d = "" + m = "" + u = "" + if get_flag("Py_DEBUG", hasattr(sys, "gettotalrefcount"), warn=(impl == "cp")): + d = "d" + + if get_flag( + "WITH_PYMALLOC", + impl == "cp", + warn=(impl == "cp" and sys.version_info < (3, 8)), + ) and sys.version_info < (3, 8): + m = "m" + + abi = f"{impl}{tags.interpreter_version()}{d}{m}{u}" + elif soabi and impl == "cp" and soabi.startswith("cpython"): + # non-Windows + abi = "cp" + soabi.split("-")[1] + elif soabi and impl == "cp" and soabi.startswith("cp"): + # Windows + abi = soabi.split("-")[0] + elif soabi and impl == "pp": + # we want something like pypy36-pp73 + abi = "-".join(soabi.split("-")[:2]) + abi = abi.replace(".", "_").replace("-", "_") + elif soabi and impl == "graalpy": + abi = "-".join(soabi.split("-")[:3]) + abi = abi.replace(".", "_").replace("-", "_") + elif soabi: + abi = soabi.replace(".", "_").replace("-", "_") + else: + abi = None + + return abi + + +def safer_name(name: str) -> str: + return safe_name(name).replace("-", "_") + + +def safer_version(version: str) -> str: + return safe_version(version).replace("-", "_") + + +def remove_readonly( + func: Callable[..., object], + path: str, + excinfo: tuple[type[Exception], Exception, types.TracebackType], +) -> None: + remove_readonly_exc(func, path, excinfo[1]) + + +def remove_readonly_exc(func: Callable[..., object], path: str, exc: Exception) -> None: + os.chmod(path, stat.S_IWRITE) + func(path) + + +class bdist_wheel(Command): + description = "create a wheel distribution" + + supported_compressions = { + "stored": ZIP_STORED, + "deflated": ZIP_DEFLATED, + } + + user_options = [ + ("bdist-dir=", "b", "temporary directory for creating the distribution"), + ( + "plat-name=", + "p", + "platform name to embed in generated filenames " + f"(default: {get_platform(None)})", + ), + ( + "keep-temp", + "k", + "keep the pseudo-installation tree around after " + "creating the distribution archive", + ), + ("dist-dir=", "d", "directory to put final built distributions in"), + ("skip-build", None, "skip rebuilding everything (for testing/debugging)"), + ( + "relative", + None, + "build the archive using relative paths (default: false)", + ), + ( + "owner=", + "u", + "Owner name used when creating a tar file [default: current user]", + ), + ( + "group=", + "g", + "Group name used when creating a tar file [default: current group]", + ), + ("universal", None, "make a universal wheel (default: false)"), + ( + "compression=", + None, + "zipfile compression (one of: {}) (default: 'deflated')".format( + ", ".join(supported_compressions) + ), + ), + ( + "python-tag=", + None, + f"Python implementation compatibility tag (default: '{python_tag()}')", + ), + ( + "build-number=", + None, + "Build number for this particular version. " + "As specified in PEP-0427, this must start with a digit. " + "[default: None]", + ), + ( + "py-limited-api=", + None, + "Python tag (cp32|cp33|cpNN) for abi3 wheel tag (default: false)", + ), + ] + + boolean_options = ["keep-temp", "skip-build", "relative", "universal"] + + def initialize_options(self): + self.bdist_dir: str = None + self.data_dir = None + self.plat_name: str | None = None + self.plat_tag = None + self.format = "zip" + self.keep_temp = False + self.dist_dir: str | None = None + self.egginfo_dir = None + self.root_is_pure: bool | None = None + self.skip_build = None + self.relative = False + self.owner = None + self.group = None + self.universal: bool = False + self.compression: str | int = "deflated" + self.python_tag: str = python_tag() + self.build_number: str | None = None + self.py_limited_api: str | Literal[False] = False + self.plat_name_supplied = False + + def finalize_options(self): + if self.bdist_dir is None: + bdist_base = self.get_finalized_command("bdist").bdist_base + self.bdist_dir = os.path.join(bdist_base, "wheel") + + egg_info = self.distribution.get_command_obj("egg_info") + egg_info.ensure_finalized() # needed for correct `wheel_dist_name` + + self.data_dir = self.wheel_dist_name + ".data" + self.plat_name_supplied = self.plat_name is not None + + try: + self.compression = self.supported_compressions[self.compression] + except KeyError: + raise ValueError(f"Unsupported compression: {self.compression}") from None + + need_options = ("dist_dir", "plat_name", "skip_build") + + self.set_undefined_options("bdist", *zip(need_options, need_options)) + + self.root_is_pure = not ( + self.distribution.has_ext_modules() or self.distribution.has_c_libraries() + ) + + if self.py_limited_api and not re.match( + PY_LIMITED_API_PATTERN, self.py_limited_api + ): + raise ValueError(f"py-limited-api must match '{PY_LIMITED_API_PATTERN}'") + + # Support legacy [wheel] section for setting universal + wheel = self.distribution.get_option_dict("wheel") + if "universal" in wheel: + # please don't define this in your global configs + log.warning( + "The [wheel] section is deprecated. Use [bdist_wheel] instead.", + ) + val = wheel["universal"][1].strip() + if val.lower() in ("1", "true", "yes"): + self.universal = True + + if self.build_number is not None and not self.build_number[:1].isdigit(): + raise ValueError("Build tag (build-number) must start with a digit.") + + @property + def wheel_dist_name(self): + """Return distribution full name with - replaced with _""" + components = ( + safer_name(self.distribution.get_name()), + safer_version(self.distribution.get_version()), + ) + if self.build_number: + components += (self.build_number,) + return "-".join(components) + + def get_tag(self) -> tuple[str, str, str]: + # bdist sets self.plat_name if unset, we should only use it for purepy + # wheels if the user supplied it. + if self.plat_name_supplied: + plat_name = cast(str, self.plat_name) + elif self.root_is_pure: + plat_name = "any" + else: + # macosx contains system version in platform name so need special handle + if self.plat_name and not self.plat_name.startswith("macosx"): + plat_name = self.plat_name + else: + # on macosx always limit the platform name to comply with any + # c-extension modules in bdist_dir, since the user can specify + # a higher MACOSX_DEPLOYMENT_TARGET via tools like CMake + + # on other platforms, and on macosx if there are no c-extension + # modules, use the default platform name. + plat_name = get_platform(self.bdist_dir) + + if _is_32bit_interpreter(): + if plat_name in ("linux-x86_64", "linux_x86_64"): + plat_name = "linux_i686" + if plat_name in ("linux-aarch64", "linux_aarch64"): + # TODO armv8l, packaging pull request #690 => this did not land + # in pip/packaging yet + plat_name = "linux_armv7l" + + plat_name = ( + plat_name.lower().replace("-", "_").replace(".", "_").replace(" ", "_") + ) + + if self.root_is_pure: + if self.universal: + impl = "py2.py3" + else: + impl = self.python_tag + tag = (impl, "none", plat_name) + else: + impl_name = tags.interpreter_name() + impl_ver = tags.interpreter_version() + impl = impl_name + impl_ver + # We don't work on CPython 3.1, 3.0. + if self.py_limited_api and (impl_name + impl_ver).startswith("cp3"): + impl = self.py_limited_api + abi_tag = "abi3" + else: + abi_tag = str(get_abi_tag()).lower() + tag = (impl, abi_tag, plat_name) + # issue gh-374: allow overriding plat_name + supported_tags = [ + (t.interpreter, t.abi, plat_name) for t in tags.sys_tags() + ] + assert ( + tag in supported_tags + ), f"would build wheel with unsupported tag {tag}" + return tag + + def run(self): + build_scripts = self.reinitialize_command("build_scripts") + build_scripts.executable = "python" + build_scripts.force = True + + build_ext = self.reinitialize_command("build_ext") + build_ext.inplace = False + + if not self.skip_build: + self.run_command("build") + + install = self.reinitialize_command("install", reinit_subcommands=True) + install.root = self.bdist_dir + install.compile = False + install.skip_build = self.skip_build + install.warn_dir = False + + # A wheel without setuptools scripts is more cross-platform. + # Use the (undocumented) `no_ep` option to setuptools' + # install_scripts command to avoid creating entry point scripts. + install_scripts = self.reinitialize_command("install_scripts") + install_scripts.no_ep = True + + # Use a custom scheme for the archive, because we have to decide + # at installation time which scheme to use. + for key in ("headers", "scripts", "data", "purelib", "platlib"): + setattr(install, "install_" + key, os.path.join(self.data_dir, key)) + + basedir_observed = "" + + if os.name == "nt": + # win32 barfs if any of these are ''; could be '.'? + # (distutils.command.install:change_roots bug) + basedir_observed = os.path.normpath(os.path.join(self.data_dir, "..")) + self.install_libbase = self.install_lib = basedir_observed + + setattr( + install, + "install_purelib" if self.root_is_pure else "install_platlib", + basedir_observed, + ) + + log.info(f"installing to {self.bdist_dir}") + + self.run_command("install") + + impl_tag, abi_tag, plat_tag = self.get_tag() + archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}" + if not self.relative: + archive_root = self.bdist_dir + else: + archive_root = os.path.join( + self.bdist_dir, self._ensure_relative(install.install_base) + ) + + self.set_undefined_options("install_egg_info", ("target", "egginfo_dir")) + distinfo_dirname = ( + f"{safer_name(self.distribution.get_name())}-" + f"{safer_version(self.distribution.get_version())}.dist-info" + ) + distinfo_dir = os.path.join(self.bdist_dir, distinfo_dirname) + self.egg2dist(self.egginfo_dir, distinfo_dir) + + self.write_wheelfile(distinfo_dir) + + # Make the archive + if not os.path.exists(self.dist_dir): + os.makedirs(self.dist_dir) + + wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl") + with WheelFile(wheel_path, "w", self.compression) as wf: + wf.write_files(archive_root) + + # Add to 'Distribution.dist_files' so that the "upload" command works + getattr(self.distribution, "dist_files", []).append( + ( + "bdist_wheel", + "{}.{}".format(*sys.version_info[:2]), # like 3.7 + wheel_path, + ) + ) + + if not self.keep_temp: + log.info(f"removing {self.bdist_dir}") + if not self.dry_run: + if sys.version_info < (3, 12): + rmtree(self.bdist_dir, onerror=remove_readonly) + else: + rmtree(self.bdist_dir, onexc=remove_readonly_exc) + + def write_wheelfile( + self, wheelfile_base: str, generator: str = f"bdist_wheel ({wheel_version})" + ): + from email.message import Message + + msg = Message() + msg["Wheel-Version"] = "1.0" # of the spec + msg["Generator"] = generator + msg["Root-Is-Purelib"] = str(self.root_is_pure).lower() + if self.build_number is not None: + msg["Build"] = self.build_number + + # Doesn't work for bdist_wininst + impl_tag, abi_tag, plat_tag = self.get_tag() + for impl in impl_tag.split("."): + for abi in abi_tag.split("."): + for plat in plat_tag.split("."): + msg["Tag"] = "-".join((impl, abi, plat)) + + wheelfile_path = os.path.join(wheelfile_base, "WHEEL") + log.info(f"creating {wheelfile_path}") + with open(wheelfile_path, "wb") as f: + BytesGenerator(f, maxheaderlen=0).flatten(msg) + + def _ensure_relative(self, path: str) -> str: + # copied from dir_util, deleted + drive, path = os.path.splitdrive(path) + if path[0:1] == os.sep: + path = drive + path[1:] + return path + + @property + def license_paths(self) -> Iterable[str]: + if setuptools_major_version >= 57: + # Setuptools has resolved any patterns to actual file names + return self.distribution.metadata.license_files or () + + files: set[str] = set() + metadata = self.distribution.get_option_dict("metadata") + if setuptools_major_version >= 42: + # Setuptools recognizes the license_files option but does not do globbing + patterns = cast(Sequence[str], self.distribution.metadata.license_files) + else: + # Prior to those, wheel is entirely responsible for handling license files + if "license_files" in metadata: + patterns = metadata["license_files"][1].split() + else: + patterns = () + + if "license_file" in metadata: + warnings.warn( + 'The "license_file" option is deprecated. Use "license_files" instead.', + DeprecationWarning, + stacklevel=2, + ) + files.add(metadata["license_file"][1]) + + if not files and not patterns and not isinstance(patterns, list): + patterns = ("LICEN[CS]E*", "COPYING*", "NOTICE*", "AUTHORS*") + + for pattern in patterns: + for path in iglob(pattern): + if path.endswith("~"): + log.debug( + f'ignoring license file "{path}" as it looks like a backup' + ) + continue + + if path not in files and os.path.isfile(path): + log.info( + f'adding license file "{path}" (matched pattern "{pattern}")' + ) + files.add(path) + + return files + + def egg2dist(self, egginfo_path: str, distinfo_path: str): + """Convert an .egg-info directory into a .dist-info directory""" + + def adios(p: str) -> None: + """Appropriately delete directory, file or link.""" + if os.path.exists(p) and not os.path.islink(p) and os.path.isdir(p): + shutil.rmtree(p) + elif os.path.exists(p): + os.unlink(p) + + adios(distinfo_path) + + if not os.path.exists(egginfo_path): + # There is no egg-info. This is probably because the egg-info + # file/directory is not named matching the distribution name used + # to name the archive file. Check for this case and report + # accordingly. + import glob + + pat = os.path.join(os.path.dirname(egginfo_path), "*.egg-info") + possible = glob.glob(pat) + err = f"Egg metadata expected at {egginfo_path} but not found" + if possible: + alt = os.path.basename(possible[0]) + err += f" ({alt} found - possible misnamed archive file?)" + + raise ValueError(err) + + if os.path.isfile(egginfo_path): + # .egg-info is a single file + pkg_info = pkginfo_to_metadata(egginfo_path, egginfo_path) + os.mkdir(distinfo_path) + else: + # .egg-info is a directory + pkginfo_path = os.path.join(egginfo_path, "PKG-INFO") + pkg_info = pkginfo_to_metadata(egginfo_path, pkginfo_path) + + # ignore common egg metadata that is useless to wheel + shutil.copytree( + egginfo_path, + distinfo_path, + ignore=lambda x, y: { + "PKG-INFO", + "requires.txt", + "SOURCES.txt", + "not-zip-safe", + }, + ) + + # delete dependency_links if it is only whitespace + dependency_links_path = os.path.join(distinfo_path, "dependency_links.txt") + with open(dependency_links_path, encoding="utf-8") as dependency_links_file: + dependency_links = dependency_links_file.read().strip() + if not dependency_links: + adios(dependency_links_path) + + pkg_info_path = os.path.join(distinfo_path, "METADATA") + serialization_policy = EmailPolicy( + utf8=True, + mangle_from_=False, + max_line_length=0, + ) + with open(pkg_info_path, "w", encoding="utf-8") as out: + Generator(out, policy=serialization_policy).flatten(pkg_info) + + for license_path in self.license_paths: + filename = os.path.basename(license_path) + shutil.copy(license_path, os.path.join(distinfo_path, filename)) + + adios(egginfo_path) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/bdist_wheel.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/bdist_wheel.py new file mode 100644 index 0000000000000000000000000000000000000000..dd7b8629e5087fe35a2ea7ab6d3cbbb4cacf0031 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/bdist_wheel.py @@ -0,0 +1,26 @@ +from typing import TYPE_CHECKING +from warnings import warn + +warn( + "The 'wheel' package is no longer the canonical location of the 'bdist_wheel' " + "command, and will be removed in a future release. Please update to setuptools " + "v70.1 or later which contains an integrated version of this command.", + DeprecationWarning, + stacklevel=1, +) + +if TYPE_CHECKING: + from ._bdist_wheel import bdist_wheel as bdist_wheel +else: + try: + # Better integration/compatibility with setuptools: + # in the case new fixes or PEPs are implemented in setuptools + # there is no need to backport them to the deprecated code base. + # This is useful in the case of old packages in the ecosystem + # that are still used but have low maintenance. + from setuptools.command.bdist_wheel import bdist_wheel + except ImportError: + # Only used in the case of old setuptools versions. + # If the user wants to get the latest fixes/PEPs, + # they are encouraged to address the deprecation warning. + from ._bdist_wheel import bdist_wheel as bdist_wheel diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/metadata.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..b8098fa8599a1f142ee33ee551b7a874f5f98840 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/metadata.py @@ -0,0 +1,183 @@ +""" +Tools for converting old- to new-style metadata. +""" + +from __future__ import annotations + +import functools +import itertools +import os.path +import re +import textwrap +from email.message import Message +from email.parser import Parser +from typing import Generator, Iterable, Iterator, Literal + +from .vendored.packaging.requirements import Requirement + + +def _nonblank(str: str) -> bool | Literal[""]: + return str and not str.startswith("#") + + +@functools.singledispatch +def yield_lines(iterable: Iterable[str]) -> Iterator[str]: + r""" + Yield valid lines of a string or iterable. + >>> list(yield_lines('')) + [] + >>> list(yield_lines(['foo', 'bar'])) + ['foo', 'bar'] + >>> list(yield_lines('foo\nbar')) + ['foo', 'bar'] + >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) + ['foo', 'baz #comment'] + >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) + ['foo', 'bar', 'baz', 'bing'] + """ + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text: str) -> Iterator[str]: + return filter(_nonblank, map(str.strip, text.splitlines())) + + +def split_sections( + s: str | Iterator[str], +) -> Generator[tuple[str | None, list[str]], None, None]: + """Split a string or iterable thereof into (section, content) pairs + Each ``section`` is a stripped version of the section header ("[section]") + and each ``content`` is a list of stripped lines excluding blank lines and + comment-only lines. If there are any such lines before the first section + header, they're returned in a first ``section`` of ``None``. + """ + section = None + content: list[str] = [] + for line in yield_lines(s): + if line.startswith("["): + if line.endswith("]"): + if section or content: + yield section, content + section = line[1:-1].strip() + content = [] + else: + raise ValueError("Invalid section heading", line) + else: + content.append(line) + + # wrap up last segment + yield section, content + + +def safe_extra(extra: str) -> str: + """Convert an arbitrary string to a standard 'extra' name + Any runs of non-alphanumeric characters are replaced with a single '_', + and the result is always lowercased. + """ + return re.sub("[^A-Za-z0-9.-]+", "_", extra).lower() + + +def safe_name(name: str) -> str: + """Convert an arbitrary string to a standard distribution name + Any runs of non-alphanumeric/. characters are replaced with a single '-'. + """ + return re.sub("[^A-Za-z0-9.]+", "-", name) + + +def requires_to_requires_dist(requirement: Requirement) -> str: + """Return the version specifier for a requirement in PEP 345/566 fashion.""" + if requirement.url: + return " @ " + requirement.url + + requires_dist: list[str] = [] + for spec in requirement.specifier: + requires_dist.append(spec.operator + spec.version) + + if requires_dist: + return " " + ",".join(sorted(requires_dist)) + else: + return "" + + +def convert_requirements(requirements: list[str]) -> Iterator[str]: + """Yield Requires-Dist: strings for parsed requirements strings.""" + for req in requirements: + parsed_requirement = Requirement(req) + spec = requires_to_requires_dist(parsed_requirement) + extras = ",".join(sorted(safe_extra(e) for e in parsed_requirement.extras)) + if extras: + extras = f"[{extras}]" + + yield safe_name(parsed_requirement.name) + extras + spec + + +def generate_requirements( + extras_require: dict[str | None, list[str]], +) -> Iterator[tuple[str, str]]: + """ + Convert requirements from a setup()-style dictionary to + ('Requires-Dist', 'requirement') and ('Provides-Extra', 'extra') tuples. + + extras_require is a dictionary of {extra: [requirements]} as passed to setup(), + using the empty extra {'': [requirements]} to hold install_requires. + """ + for extra, depends in extras_require.items(): + condition = "" + extra = extra or "" + if ":" in extra: # setuptools extra:condition syntax + extra, condition = extra.split(":", 1) + + extra = safe_extra(extra) + if extra: + yield "Provides-Extra", extra + if condition: + condition = "(" + condition + ") and " + condition += f"extra == '{extra}'" + + if condition: + condition = " ; " + condition + + for new_req in convert_requirements(depends): + canonical_req = str(Requirement(new_req + condition)) + yield "Requires-Dist", canonical_req + + +def pkginfo_to_metadata(egg_info_path: str, pkginfo_path: str) -> Message: + """ + Convert .egg-info directory with PKG-INFO to the Metadata 2.1 format + """ + with open(pkginfo_path, encoding="utf-8") as headers: + pkg_info = Parser().parse(headers) + + pkg_info.replace_header("Metadata-Version", "2.1") + # Those will be regenerated from `requires.txt`. + del pkg_info["Provides-Extra"] + del pkg_info["Requires-Dist"] + requires_path = os.path.join(egg_info_path, "requires.txt") + if os.path.exists(requires_path): + with open(requires_path, encoding="utf-8") as requires_file: + requires = requires_file.read() + + parsed_requirements = sorted(split_sections(requires), key=lambda x: x[0] or "") + for extra, reqs in parsed_requirements: + for key, value in generate_requirements({extra: reqs}): + if (key, value) not in pkg_info.items(): + pkg_info[key] = value + + description = pkg_info["Description"] + if description: + description_lines = pkg_info["Description"].splitlines() + dedented_description = "\n".join( + # if the first line of long_description is blank, + # the first line here will be indented. + ( + description_lines[0].lstrip(), + textwrap.dedent("\n".join(description_lines[1:])), + "\n", + ) + ) + pkg_info.set_payload(dedented_description) + del pkg_info["Description"] + + return pkg_info diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/util.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/util.py new file mode 100644 index 0000000000000000000000000000000000000000..c928aa403b75e1ae392b4af5fe13147a095bbd30 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/util.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import base64 +import logging + +log = logging.getLogger("wheel") + + +def urlsafe_b64encode(data: bytes) -> bytes: + """urlsafe_b64encode without padding""" + return base64.urlsafe_b64encode(data).rstrip(b"=") + + +def urlsafe_b64decode(data: bytes) -> bytes: + """urlsafe_b64decode without padding""" + pad = b"=" * (4 - (len(data) & 3)) + return base64.urlsafe_b64decode(data + pad) diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/wheelfile.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/wheelfile.py new file mode 100644 index 0000000000000000000000000000000000000000..0a0f4596c566420135991713ee75ca29c4cff3ed --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/wheel/wheelfile.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +import csv +import hashlib +import os.path +import re +import stat +import time +from io import StringIO, TextIOWrapper +from typing import IO, TYPE_CHECKING, Literal +from zipfile import ZIP_DEFLATED, ZipFile, ZipInfo + +from wheel.cli import WheelError +from wheel.util import log, urlsafe_b64decode, urlsafe_b64encode + +if TYPE_CHECKING: + from typing import Protocol, Sized, Union + + from typing_extensions import Buffer + + StrPath = Union[str, os.PathLike[str]] + + class SizedBuffer(Sized, Buffer, Protocol): ... + + +# Non-greedy matching of an optional build number may be too clever (more +# invalid wheel filenames will match). Separate regex for .dist-info? +WHEEL_INFO_RE = re.compile( + r"""^(?P(?P[^\s-]+?)-(?P[^\s-]+?))(-(?P\d[^\s-]*))? + -(?P[^\s-]+?)-(?P[^\s-]+?)-(?P\S+)\.whl$""", + re.VERBOSE, +) +MINIMUM_TIMESTAMP = 315532800 # 1980-01-01 00:00:00 UTC + + +def get_zipinfo_datetime(timestamp: float | None = None): + # Some applications need reproducible .whl files, but they can't do this without + # forcing the timestamp of the individual ZipInfo objects. See issue #143. + timestamp = int(os.environ.get("SOURCE_DATE_EPOCH", timestamp or time.time())) + timestamp = max(timestamp, MINIMUM_TIMESTAMP) + return time.gmtime(timestamp)[0:6] + + +class WheelFile(ZipFile): + """A ZipFile derivative class that also reads SHA-256 hashes from + .dist-info/RECORD and checks any read files against those. + """ + + _default_algorithm = hashlib.sha256 + + def __init__( + self, + file: StrPath, + mode: Literal["r", "w", "x", "a"] = "r", + compression: int = ZIP_DEFLATED, + ): + basename = os.path.basename(file) + self.parsed_filename = WHEEL_INFO_RE.match(basename) + if not basename.endswith(".whl") or self.parsed_filename is None: + raise WheelError(f"Bad wheel filename {basename!r}") + + ZipFile.__init__(self, file, mode, compression=compression, allowZip64=True) + + self.dist_info_path = "{}.dist-info".format( + self.parsed_filename.group("namever") + ) + self.record_path = self.dist_info_path + "/RECORD" + self._file_hashes: dict[str, tuple[None, None] | tuple[int, bytes]] = {} + self._file_sizes = {} + if mode == "r": + # Ignore RECORD and any embedded wheel signatures + self._file_hashes[self.record_path] = None, None + self._file_hashes[self.record_path + ".jws"] = None, None + self._file_hashes[self.record_path + ".p7s"] = None, None + + # Fill in the expected hashes by reading them from RECORD + try: + record = self.open(self.record_path) + except KeyError: + raise WheelError(f"Missing {self.record_path} file") from None + + with record: + for line in csv.reader( + TextIOWrapper(record, newline="", encoding="utf-8") + ): + path, hash_sum, size = line + if not hash_sum: + continue + + algorithm, hash_sum = hash_sum.split("=") + try: + hashlib.new(algorithm) + except ValueError: + raise WheelError( + f"Unsupported hash algorithm: {algorithm}" + ) from None + + if algorithm.lower() in {"md5", "sha1"}: + raise WheelError( + f"Weak hash algorithm ({algorithm}) is not permitted by " + f"PEP 427" + ) + + self._file_hashes[path] = ( + algorithm, + urlsafe_b64decode(hash_sum.encode("ascii")), + ) + + def open( + self, + name_or_info: str | ZipInfo, + mode: Literal["r", "w"] = "r", + pwd: bytes | None = None, + ) -> IO[bytes]: + def _update_crc(newdata: bytes) -> None: + eof = ef._eof + update_crc_orig(newdata) + running_hash.update(newdata) + if eof and running_hash.digest() != expected_hash: + raise WheelError(f"Hash mismatch for file '{ef_name}'") + + ef_name = ( + name_or_info.filename if isinstance(name_or_info, ZipInfo) else name_or_info + ) + if ( + mode == "r" + and not ef_name.endswith("/") + and ef_name not in self._file_hashes + ): + raise WheelError(f"No hash found for file '{ef_name}'") + + ef = ZipFile.open(self, name_or_info, mode, pwd) + if mode == "r" and not ef_name.endswith("/"): + algorithm, expected_hash = self._file_hashes[ef_name] + if expected_hash is not None: + # Monkey patch the _update_crc method to also check for the hash from + # RECORD + running_hash = hashlib.new(algorithm) + update_crc_orig, ef._update_crc = ef._update_crc, _update_crc + + return ef + + def write_files(self, base_dir: str): + log.info(f"creating '{self.filename}' and adding '{base_dir}' to it") + deferred: list[tuple[str, str]] = [] + for root, dirnames, filenames in os.walk(base_dir): + # Sort the directory names so that `os.walk` will walk them in a + # defined order on the next iteration. + dirnames.sort() + for name in sorted(filenames): + path = os.path.normpath(os.path.join(root, name)) + if os.path.isfile(path): + arcname = os.path.relpath(path, base_dir).replace(os.path.sep, "/") + if arcname == self.record_path: + pass + elif root.endswith(".dist-info"): + deferred.append((path, arcname)) + else: + self.write(path, arcname) + + deferred.sort() + for path, arcname in deferred: + self.write(path, arcname) + + def write( + self, + filename: str, + arcname: str | None = None, + compress_type: int | None = None, + ) -> None: + with open(filename, "rb") as f: + st = os.fstat(f.fileno()) + data = f.read() + + zinfo = ZipInfo( + arcname or filename, date_time=get_zipinfo_datetime(st.st_mtime) + ) + zinfo.external_attr = (stat.S_IMODE(st.st_mode) | stat.S_IFMT(st.st_mode)) << 16 + zinfo.compress_type = compress_type or self.compression + self.writestr(zinfo, data, compress_type) + + def writestr( + self, + zinfo_or_arcname: str | ZipInfo, + data: SizedBuffer | str, + compress_type: int | None = None, + ): + if isinstance(zinfo_or_arcname, str): + zinfo_or_arcname = ZipInfo( + zinfo_or_arcname, date_time=get_zipinfo_datetime() + ) + zinfo_or_arcname.compress_type = self.compression + zinfo_or_arcname.external_attr = (0o664 | stat.S_IFREG) << 16 + + if isinstance(data, str): + data = data.encode("utf-8") + + ZipFile.writestr(self, zinfo_or_arcname, data, compress_type) + fname = ( + zinfo_or_arcname.filename + if isinstance(zinfo_or_arcname, ZipInfo) + else zinfo_or_arcname + ) + log.info(f"adding '{fname}'") + if fname != self.record_path: + hash_ = self._default_algorithm(data) + self._file_hashes[fname] = ( + hash_.name, + urlsafe_b64encode(hash_.digest()).decode("ascii"), + ) + self._file_sizes[fname] = len(data) + + def close(self): + # Write RECORD + if self.fp is not None and self.mode == "w" and self._file_hashes: + data = StringIO() + writer = csv.writer(data, delimiter=",", quotechar='"', lineterminator="\n") + writer.writerows( + ( + (fname, algorithm + "=" + hash_, self._file_sizes[fname]) + for fname, (algorithm, hash_) in self._file_hashes.items() + ) + ) + writer.writerow((format(self.record_path), "", "")) + self.writestr(self.record_path, data.getvalue()) + + ZipFile.close(self)